From a63f6b245aadb6e20fed4ed353a4f3a62cfdffa6 Mon Sep 17 00:00:00 2001
From: KrishnaRani <thakurkrishnarani@gmail.com>
Date: Wed, 22 Oct 2025 23:16:08 +0200
Subject: [PATCH 1/7] add rwth_dbis learner models

---
 ...llm_learner_rwthdbis_taxonomy_discovery.py |  57 ++
 examples/llm_learner_rwthdbis_term_typing.py  |  50 ++
 ontolearner/__init__.py                       |   6 +-
 ontolearner/learner/__init__.py               |   2 +
 .../learner/taxonomy_discovery/__init__.py    |  15 +
 .../learner/taxonomy_discovery/rwthdbis.py    | 792 ++++++++++++++++++
 ontolearner/learner/term_typing/__init__.py   |  15 +
 ontolearner/learner/term_typing/rwthdbis.py   | 255 ++++++
 requirements.txt                              |   3 +
 9 files changed, 1194 insertions(+), 1 deletion(-)
 create mode 100644 examples/llm_learner_rwthdbis_taxonomy_discovery.py
 create mode 100644 examples/llm_learner_rwthdbis_term_typing.py
 create mode 100644 ontolearner/learner/taxonomy_discovery/__init__.py
 create mode 100644 ontolearner/learner/taxonomy_discovery/rwthdbis.py
 create mode 100644 ontolearner/learner/term_typing/__init__.py
 create mode 100644 ontolearner/learner/term_typing/rwthdbis.py

diff --git a/examples/llm_learner_rwthdbis_taxonomy_discovery.py b/examples/llm_learner_rwthdbis_taxonomy_discovery.py
new file mode 100644
index 0000000..fea5539
--- /dev/null
+++ b/examples/llm_learner_rwthdbis_taxonomy_discovery.py
@@ -0,0 +1,57 @@
+# Import core modules from the OntoLearner library
+from ontolearner import LearnerPipeline, train_test_split
+from ontolearner import ChordOntology, RWTHDBISTaxonomyLearner
+
+# Load the Chord ontology, which exposes hierarchical (parent, child) relations for taxonomy discovery
+ontology = ChordOntology()
+ontology.load()  # Read entities, type system, and taxonomic edges into memory
+
+# Extract typed taxonomic edges and split into train/test while preserving the structured shape
+train_data, test_data = train_test_split(
+    ontology.extract(),
+    test_size=0.2,
+    random_state=42
+)
+
+# Initialize a supervised taxonomy classifier (encoder-based fine-tuning)
+# Negative sampling controls the number of non-edge examples; bidirectional templates create both (p→c) and (c→p) views
+# Context features are optional and can be enabled with with_context=True and a JSON path of type descriptions
+learner = RWTHDBISTaxonomyLearner(
+    model_name="microsoft/deberta-v3-small",
+    output_dir="./results/",
+    num_train_epochs=1,
+    per_device_train_batch_size=8,
+    gradient_accumulation_steps=4,
+    learning_rate=2e-5,
+    max_length=256,
+    seed=42,
+    negative_ratio=5,
+    bidirectional_templates=True,
+    context_json_path=None,
+    ontology_name=ontology.ontology_full_name,
+)
+
+# Build the pipeline
+pipeline = LearnerPipeline(
+    llm=learner,
+    llm_id=learner.model_name,
+    ontologizer_data=False,
+)
+
+# # Run the full learning pipeline on the taxonomy-discovery task
+outputs = pipeline(
+    train_data=train_data,
+    test_data=test_data,
+    task="taxonomy-discovery",
+    evaluate=True,
+    ontologizer_data=False,
+)
+
+# Display the evaluation results
+print("Metrics:", outputs['metrics'])          # Shows {'precision': ..., 'recall': ..., 'f1_score': ...}
+
+# Display total elapsed time for training + prediction + evaluation
+print("Elapsed time:", outputs['elapsed_time'])
+
+# Print all returned outputs (include predictions)
+print(outputs)
diff --git a/examples/llm_learner_rwthdbis_term_typing.py b/examples/llm_learner_rwthdbis_term_typing.py
new file mode 100644
index 0000000..67d207f
--- /dev/null
+++ b/examples/llm_learner_rwthdbis_term_typing.py
@@ -0,0 +1,50 @@
+# Import core modules from the OntoLearner library
+from ontolearner import LearnerPipeline, train_test_split, AgrO
+from ontolearner import RWTHDBISTermTypingLearner
+
+#load the AgrO ontology.
+# AgrO provides term-typing supervision where each term can be annotated with one or more types.
+ontology = AgrO()
+ontology.load()
+data = ontology.extract()
+
+# Split the labeled term-typing data into train and test sets
+train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
+
+# Configure a supervised encoder-based classifier for term typing.
+# This fine-tunes DeBERTa v3 on (term → type) signals; increase epochs for stronger results.
+learner = RWTHDBISTermTypingLearner(
+    model_name="microsoft/deberta-v3-small",
+    output_dir="./results/deberta-v3",
+    num_train_epochs=30,
+    per_device_train_batch_size=16,
+    gradient_accumulation_steps=2,
+    learning_rate=2e-5,
+    max_length=64,
+    seed=42,
+)
+
+# Build the pipeline and pass raw structured objects end-to-end.
+pipeline = LearnerPipeline(
+    llm=learner,
+    llm_id=learner.model_name,
+    ontologizer_data=False,
+)
+
+# Run the full learning pipeline on the term-typing task
+outputs = pipeline(
+    train_data=train_data,
+    test_data=test_data,
+    task="term-typing",
+    evaluate=True,
+    ontologizer_data=False,
+)
+
+# Display the evaluation results
+print("Metrics:", outputs['metrics'])          # Shows {'precision': ..., 'recall': ..., 'f1_score': ...}
+
+# Display total elapsed time for training + prediction + evaluation
+print("Elapsed time:", outputs['elapsed_time'])
+
+# Print all returned outputs (include predictions)
+print(outputs)
diff --git a/ontolearner/__init__.py b/ontolearner/__init__.py
index 237bee8..0b6fd26 100644
--- a/ontolearner/__init__.py
+++ b/ontolearner/__init__.py
@@ -29,7 +29,9 @@
                       AutoRetrieverLearner,
                       AutoRAGLearner,
                       StandardizedPrompting,
-                      LabelMapper)
+                      LabelMapper,
+                      RWTHDBISTaxonomyLearner,
+                      RWTHDBISTermTypingLearner)
 from ._learner import LearnerPipeline
 
 from .processor import Processor
@@ -47,6 +49,8 @@
     "LabelMapper",
     "LearnerPipeline",
     "Processor",
+    "RWTHDBISTaxonomyLearner",
+    "RWTHDBISTermTypingLearner",
     "data_structure",
     "text2onto",
     "ontology",
diff --git a/ontolearner/learner/__init__.py b/ontolearner/learner/__init__.py
index 0baf580..ad38f0b 100644
--- a/ontolearner/learner/__init__.py
+++ b/ontolearner/learner/__init__.py
@@ -17,3 +17,5 @@
 from .rag import AutoRAGLearner
 from .prompt import StandardizedPrompting
 from .label_mapper import LabelMapper
+from .taxonomy_discovery.rwthdbis import RWTHDBISSFTLearner as RWTHDBISTaxonomyLearner
+from .term_typing.rwthdbis        import RWTHDBISSFTLearner as RWTHDBISTermTypingLearner
diff --git a/ontolearner/learner/taxonomy_discovery/__init__.py b/ontolearner/learner/taxonomy_discovery/__init__.py
new file mode 100644
index 0000000..ab5b4f8
--- /dev/null
+++ b/ontolearner/learner/taxonomy_discovery/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025 SciKnowOrg
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://opensource.org/licenses/MIT
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .rwthdbis import RWTHDBISSFTLearner
diff --git a/ontolearner/learner/taxonomy_discovery/rwthdbis.py b/ontolearner/learner/taxonomy_discovery/rwthdbis.py
new file mode 100644
index 0000000..47989c5
--- /dev/null
+++ b/ontolearner/learner/taxonomy_discovery/rwthdbis.py
@@ -0,0 +1,792 @@
+# Copyright (c) 2025 SciKnowOrg
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://opensource.org/licenses/MIT
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+import random
+import re
+import time
+import platform
+import multiprocessing
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Callable
+from functools import partial
+from tqdm.auto import tqdm
+import g4f
+from g4f.client import Client as _G4FClient
+import torch
+from datasets import Dataset, DatasetDict
+from transformers import (
+    AutoTokenizer,
+    AutoModelForSequenceClassification,
+    DataCollatorWithPadding,
+    Trainer,
+    TrainingArguments,
+    set_seed,
+)
+
+from ...base import AutoLearner
+
+class RWTHDBISSFTLearner(AutoLearner):
+    """
+    Supervised classifier for (parent, child) taxonomy edges.
+
+    Model input format:
+      "<relation template> ## <optional context>"
+
+    If no `context_json_path` is provided, the class precomputes a
+    context file ({ontology_name}_processed.json) directly from the ontology
+    object.
+    """
+
+    # Sentences containing any of these phrases are pruned from term_info.
+    _CONTEXT_REMOVALS = [
+        "couldn't find any",
+        "does not require",
+        "assist you further",
+        "feel free to",
+        "I'm currently unable",
+        "the search results",
+        "I'm unable to",
+        "recommend referring directly",
+        "bear with me",
+        "searching for the most relevant information",
+        "I'm currently checking the most relevant",
+        "already in English",
+        "require further",
+        "any additional information",
+        "already an English",
+        "don't have information",
+        "I'm sorry,",
+        "For further exploration",
+        "For more detailed information",
+    ]
+
+    def __init__(
+        self,
+        min_predictions: int = 1,
+        model_name: str = "distilroberta-base",
+        output_dir: str = "./results/{model_name}",
+        max_length: int = 256,
+        per_device_train_batch_size: int = 8,
+        gradient_accumulation_steps: int = 4,
+        num_train_epochs: int = 1,
+        learning_rate: float = 2e-5,
+        weight_decay: float = 0.01,
+        logging_steps: int = 25,
+        save_strategy: str = "epoch",
+        save_total_limit: int = 1,
+        fp16: bool = True,
+        bf16: bool = False,
+        seed: int = 42,
+        negative_ratio: int = 5,
+        bidirectional_templates: bool = True,
+        context_json_path: Optional[str] = None,
+        ontology_name: str = "Geonames"
+    ) -> None:
+        super().__init__()
+
+        self.model_name = model_name
+        self.safe_model_name = model_name.replace("/", "__")
+
+        resolved_output = output_dir.format(model_name=self.safe_model_name)
+        self.output_dir = str(Path(resolved_output))
+        Path(self.output_dir).mkdir(parents=True, exist_ok=True)
+
+        self.min_predictions = int(min_predictions)
+        self.max_length = int(max_length)
+        self.per_device_train_batch_size = int(per_device_train_batch_size)
+        self.gradient_accumulation_steps = int(gradient_accumulation_steps)
+        self.num_train_epochs = float(num_train_epochs)
+        self.learning_rate = float(learning_rate)
+        self.weight_decay = float(weight_decay)
+        self.logging_steps = int(logging_steps)
+        self.save_strategy = str(save_strategy)
+        self.save_total_limit = int(save_total_limit)
+        self.fp16 = bool(fp16)
+        self.bf16 = bool(bf16)
+        self.seed = int(seed)
+
+        self.negative_ratio = int(negative_ratio)
+        self.bidirectional_templates = bool(bidirectional_templates)
+        self.context_json_path = context_json_path
+
+        self.ontology_name = ontology_name
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model: Optional[AutoModelForSequenceClassification] = None
+        self.tokenizer: Optional[AutoTokenizer] = None
+
+        os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+        os.environ.setdefault("WANDB_DISABLED", "true")
+        os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
+
+        self._context_exact: Dict[str, str] = {}       # lower(term) -> info
+        self._context_rows: List[Dict[str, str]] = []  # [{'term': str, 'term_info': str}, ...]
+
+    def _taxonomy_discovery(self, data: Any, test: bool = False) -> Optional[Any]:
+        return self._predict_pairs(data) if test else self._train_from_pairs(data)
+
+    def _train_from_pairs(self, train_data: Any) -> None:
+        # Always (re)build context from ontology unless an explicit file is provided
+        if not self.context_json_path:
+            context_dir = Path(self.output_dir) / "context"
+            context_dir.mkdir(parents=True, exist_ok=True)
+            processed_context_file = context_dir / f"{self.ontology_name}_processed.json"
+
+            # Remove stale file then regenerate
+            if processed_context_file.exists():
+                try:
+                    processed_context_file.unlink()
+                except Exception:
+                    pass
+
+            self.preprocess_context_from_ontology(
+                ontology=train_data,
+                processed_dir=context_dir,
+                dataset_name=self.ontology_name,
+                num_workers=max(1, min(os.cpu_count() or 2, 4)),
+                provider=partial(self._default_gpt_inference_with_dataset, dataset_name=self.ontology_name),
+                max_retries=5,
+            )
+
+            self.context_json_path = str(processed_context_file)
+
+        # Reproducibility
+        set_seed(self.seed)
+        random.seed(self.seed)
+        torch.manual_seed(self.seed)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed_all(self.seed)
+
+        # Build labeled pairs from ontology; context comes from preprocessed map
+        positive_pairs = self._extract_positive_pairs(train_data)
+        if not positive_pairs:
+            raise ValueError("No positive (parent, child) pairs found in train_data.")
+
+        entity_names = sorted({parent for parent, _ in positive_pairs} | {child for _, child in positive_pairs})
+        negative_pairs = self._generate_negatives(
+            positives=positive_pairs,
+            entities=entity_names,
+            ratio=self.negative_ratio,
+        )
+
+        labels, texts = self._build_text_dataset(positive_pairs, negative_pairs)
+
+
+        datasets = DatasetDict({"train": Dataset.from_dict({"label": labels, "text": texts})})
+
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = (
+                getattr(self.tokenizer, "eos_token", None)
+                or getattr(self.tokenizer, "sep_token", None)
+                or getattr(self.tokenizer, "cls_token", None)
+            )
+
+        def tokenize_batch(batch: Dict[str, List[str]]):
+            return self.tokenizer(batch["text"], truncation=True, max_length=self.max_length)
+
+        tokenized = datasets.map(tokenize_batch, batched=True, remove_columns=["text"])
+        collator = DataCollatorWithPadding(self.tokenizer)
+
+        self.model = AutoModelForSequenceClassification.from_pretrained(
+            self.model_name,
+            num_labels=2,
+            id2label={0: "incorrect", 1: "correct"},
+            label2id={"incorrect": 0, "correct": 1},
+        )
+        if getattr(self.model.config, "pad_token_id", None) is None and self.tokenizer.pad_token_id is not None:
+            self.model.config.pad_token_id = self.tokenizer.pad_token_id
+
+        train_args = TrainingArguments(
+            output_dir=self.output_dir,
+            learning_rate=self.learning_rate,
+            per_device_train_batch_size=self.per_device_train_batch_size,
+            gradient_accumulation_steps=self.gradient_accumulation_steps,
+            num_train_epochs=self.num_train_epochs,
+            weight_decay=self.weight_decay,
+            save_strategy=self.save_strategy,
+            save_total_limit=self.save_total_limit,
+            logging_steps=self.logging_steps,
+            dataloader_pin_memory = bool(torch.cuda.is_available()),
+            fp16=self.fp16,
+            bf16=self.bf16,
+            report_to="none",
+            save_safetensors=True,
+        )
+
+        trainer = Trainer(
+            model=self.model,
+            args=train_args,
+            train_dataset=tokenized["train"],
+            tokenizer=self.tokenizer,
+            data_collator=collator,
+        )
+        trainer.train()
+        trainer.save_model(self.output_dir)
+        self.tokenizer.save_pretrained(self.output_dir)
+
+    def _predict_pairs(self, eval_data: Any) -> List[Dict[str, str]]:
+        import torch.nn.functional as F
+
+        self._ensure_loaded_for_inference()
+
+        candidate_pairs = self._extract_pairs_for_eval(eval_data)
+        if not candidate_pairs:
+            return []
+
+        accepted: List[Dict[str, str]] = []
+        scored_candidates: List[Tuple[float, str, str, int]] = []
+
+        self.model.eval()
+        with torch.no_grad():
+            for parent_term, child_term in candidate_pairs:
+                input_text = self._format_input(parent_term, child_term)
+                inputs = self.tokenizer(input_text, return_tensors="pt", truncation=True, max_length=self.max_length)
+                inputs = {k: v.to(self.device) for k, v in inputs.items()}
+                logits = self.model(**inputs).logits
+                probs = F.softmax(logits, dim=-1).squeeze(0)
+                p_positive = float(probs[1].item())
+                predicted_label = int(torch.argmax(logits, dim=-1).item())
+                scored_candidates.append((p_positive, parent_term, child_term, predicted_label))
+                if predicted_label == 1:
+                    accepted.append({"parent": parent_term, "child": child_term})
+
+        if accepted:
+            return accepted
+
+        top_k = max(0, int(self.min_predictions))
+        if top_k == 0:
+            return []
+        scored_candidates.sort(key=lambda item: item[0], reverse=True)
+        return [{"parent": parent_term, "child": child_term}
+                for (_prob, parent_term, child_term, _pred) in scored_candidates[:top_k]]
+
+    def _ensure_loaded_for_inference(self) -> None:
+        if self.model is not None and self.tokenizer is not None:
+            return
+        self.model = AutoModelForSequenceClassification.from_pretrained(self.output_dir).to(self.device)
+        self.tokenizer = AutoTokenizer.from_pretrained(self.output_dir)
+        if self.tokenizer.pad_token_id is None and getattr(self.model.config, "pad_token_id", None) is not None:
+            self.tokenizer.pad_token_id = self.model.config.pad_token_id
+
+    def _load_context_map(self) -> None:
+        """Build exact and fuzzy maps from {ontology_name}_processed.json."""
+        if not (self.context_json_path):
+            self._context_exact = {}
+            self._context_rows = []
+            return
+        try:
+            rows = json.load(open(self.context_json_path, "r", encoding="utf-8"))
+            self._context_exact = {
+                str(row.get("term", "")).strip().lower(): str(row.get("term_info", "")).strip()
+                for row in rows
+            }
+            self._context_rows = [
+                {"term": str(row.get("term", "")), "term_info": str(row.get("term_info", ""))}
+                for row in rows
+            ]
+        except Exception:
+            self._context_exact = {}
+            self._context_rows = []
+
+    def _lookup_context_info(self, raw_term: str) -> str:
+        """
+        Loose context lookup: split by commas, strip whitespace, case-insensitive
+        substring match against any row['term']. Join hits with '.'.
+        """
+        if not raw_term:
+            return ""
+        term_key = raw_term.strip().lower()
+        if term_key in self._context_exact:
+            return self._context_exact[term_key]
+
+        subterms = [re.sub(r"\s+", "", piece) for piece in raw_term.split(",")]
+        matched_infos: List[str] = []
+        for subterm in subterms:
+            if not subterm:
+                continue
+            lower_subterm = subterm.lower()
+            for row in self._context_rows:
+                if lower_subterm in row["term"].lower():
+                    info = row.get("term_info", "")
+                    if info:
+                        matched_infos.append(info)
+                        break  # one hit per subterm
+        return ".".join(matched_infos)
+
+    def _extract_positive_pairs(self, ontology_obj: Any) -> List[Tuple[str, str]]:
+        """
+        Read pairs from ontology_obj.type_taxonomies.taxonomies (or fallback to .taxonomies).
+        Each item must provide 'parent' and 'child' attributes/keys.
+        """
+        type_taxonomies = getattr(ontology_obj, "type_taxonomies", None)
+        items = getattr(type_taxonomies, "taxonomies", None) if type_taxonomies is not None else getattr(ontology_obj, "taxonomies", None)
+        pairs: List[Tuple[str, str]] = []
+        if items:
+            for item in items:
+                parent_term = getattr(item, "parent", None) if not isinstance(item, dict) else item.get("parent")
+                child_term = getattr(item, "child", None) if not isinstance(item, dict) else item.get("child")
+                if parent_term and child_term:
+                    pairs.append((str(parent_term), str(child_term)))
+        return pairs
+
+    def _extract_pairs_for_eval(self, ontology_obj: Any) -> List[Tuple[str, str]]:
+        candidate_pairs = getattr(ontology_obj, "pairs", None)
+        if candidate_pairs:
+            pairs: List[Tuple[str, str]] = []
+            for item in candidate_pairs:
+                parent_term = getattr(item, "parent", None) if not isinstance(item, dict) else item.get("parent")
+                child_term = getattr(item, "child", None) if not isinstance(item, dict) else item.get("child")
+                if parent_term and child_term:
+                    pairs.append((str(parent_term), str(child_term)))
+            return pairs
+        return self._extract_positive_pairs(ontology_obj)
+
+    def _generate_negatives(
+        self,
+        positives: List[Tuple[str, str]],
+        entities: List[str],
+        ratio: int,
+    ) -> List[Tuple[str, str]]:
+        positive_set = set(positives)
+        all_possible = {(parent_term, child_term) for parent_term in entities for child_term in entities if parent_term != child_term}
+        negative_candidates = list(all_possible - positive_set)
+
+        target_count = max(len(positive_set) * max(1, ratio), len(positive_set))
+        sample_count = min(target_count, len(negative_candidates))
+        return random.sample(negative_candidates, k=sample_count) if sample_count > 0 else []
+
+    def _build_text_dataset(
+        self,
+        positives: List[Tuple[str, str]],
+        negatives: List[Tuple[str, str]],
+    ) -> Tuple[List[int], List[str]]:
+        self._load_context_map()
+
+        labels: List[int] = []
+        input_texts: List[str] = []
+
+        def add_example(parent_term: str, child_term: str, label_value: int) -> None:
+            input_texts.append(self._format_input(parent_term, child_term))
+            labels.append(label_value)
+            if self.bidirectional_templates:
+                input_texts.append(self._format_input(child_term, parent_term, reverse=True))
+                labels.append(label_value)
+
+        for parent_term, child_term in positives:
+            add_example(parent_term, child_term, 1)
+        for parent_term, child_term in negatives:
+            add_example(parent_term, child_term, 0)
+
+        return labels, input_texts
+
+    def _format_input(self, parent_term: str, child_term: str, reverse: bool = False) -> str:
+        relation_text = (
+            f"{child_term} is a subclass / child / subtype / descendant class of {parent_term}"
+            if reverse
+            else f"{parent_term} is the superclass / parent / supertype / ancestor class of {child_term}"
+        )
+
+        parent_info = self._lookup_context_info(parent_term)
+        child_info = self._lookup_context_info(child_term)
+        if not parent_info and not child_info:
+            return relation_text
+
+        context_text = f"## Context. '{parent_term}': {parent_info} '{child_term}': {child_info}"
+        return f"{relation_text} {context_text}"
+
+    @staticmethod
+    def _is_windows() -> bool:
+        return (os.name == "nt") or (platform.system().lower() == "windows")
+
+    @staticmethod
+    def _default_gpt_inference_with_dataset(term: str, dataset_name: str) -> str:
+        """
+        Generate a plain-text description for `term`, tailored by `dataset_name`.
+        Uses g4f if available; otherwise returns an empty string.
+        """
+        prompt = (
+            f"Here is a: {term}, which is of domain name :{dataset_name}, translate it into english, "
+            "Provide as detailed a definition of this term as possible in plain text.without any markdown format."
+            "No reference link in result. "
+            "- Focus on intrinsic properties; do not name other entities or explicit relationships.\n"
+            "- Include classification/type, defining features, scope/scale, roles/functions, and measurable attributes when applicable.\n"
+            "Output: Plain text paragraphs only, neutral and factual."
+            f"Make sure all provided information can be used for discovering implicit relation of other {dataset_name} term, but don't mention the relation in result."
+        )
+
+        try:
+            client = _G4FClient()
+            response = client.chat.completions.create(
+                model=g4f.models.default,
+                messages=[{"role": "user", "content": prompt}],
+            )
+            raw_text = response.choices[0].message.content if response and response.choices else ""
+        except Exception:
+            raw_text = ""  # or some deterministic fallback
+
+        # Clean up
+        cleaned = re.sub(r"[\*\-\#]", "", raw_text)
+        cleaned = re.sub(r"\n\s*\n", " ", cleaned)
+        cleaned = cleaned.replace("\n", " ")
+        cleaned = re.sub(r"\s{2,}", " ", cleaned)
+        cleaned = re.sub(r"\[\[\d+\]\]\(https?://[^\)]+\)", "", cleaned)
+        sentences = [sentence for sentence in cleaned.split(".") if "?" not in sentence]
+        return ".".join(sentences).strip()
+
+    @staticmethod
+    def _clean_term_info(raw_text: str) -> str:
+        """Normalize whitespace and remove link artifacts."""
+        cleaned = re.sub(r"\[\[\d+\]\]\(https?://[^\)]+\)", "", str(raw_text))
+        cleaned = re.sub(r"\s+", " ", cleaned).strip()
+        return cleaned
+
+    @classmethod
+    def _merge_part_files(cls, dataset_name: str, merged_path: Path, part_paths: List[Path]) -> None:
+        merged_rows: List[dict] = []
+        for part_path in part_paths:
+            try:
+                if not part_path.is_file():
+                    continue
+                part_content = json.load(open(part_path, "r", encoding="utf-8"))
+                if isinstance(part_content, list):
+                    merged_rows.extend(part_content)
+                elif isinstance(part_content, dict):
+                    merged_rows.append(part_content)
+            except Exception:
+                continue
+
+        removal_markers = list(cls._CONTEXT_REMOVALS) + [dataset_name]
+        for row in merged_rows:
+            term_info_raw = str(row.get("term_info", ""))
+            kept_sentences: List[str] = []
+            for sentence in term_info_raw.split("."):
+                sentence_no_links = re.sub(r"\[\[\d+\]\]\(https?://[^\)]+\)", "", sentence)
+                if any(marker in sentence_no_links for marker in removal_markers):
+                    continue
+                kept_sentences.append(sentence_no_links)
+            row["term_info"] = cls._clean_term_info(".".join(kept_sentences))
+
+        merged_path.parent.mkdir(parents=True, exist_ok=True)
+        json.dump(merged_rows, open(merged_path, "w", encoding="utf-8"), ensure_ascii=False, indent=4)
+
+        # best-effort cleanup
+        for part_path in part_paths:
+            try:
+                os.remove(part_path)
+            except Exception:
+                pass
+
+    @staticmethod
+    def _fill_bucket_threaded(bucket_rows: List[dict], output_path: Path, provider: Callable[[str], str]) -> None:
+        start_index = 0
+        try:
+            if output_path.is_file():
+                existing_rows = json.load(open(output_path, "r", encoding="utf-8"))
+                if isinstance(existing_rows, list) and existing_rows:
+                    bucket_rows[: len(existing_rows)] = existing_rows
+                    start_index = len(existing_rows)
+        except Exception:
+            pass
+
+        for row_index in range(start_index, len(bucket_rows)):
+            try:
+                bucket_rows[row_index]["term_info"] = provider(bucket_rows[row_index]["term"])
+            except Exception:
+                bucket_rows[row_index]["term_info"] = ""
+            if row_index % 10 == 1:
+                json.dump(bucket_rows[: row_index + 1], open(output_path, "w", encoding="utf-8"), ensure_ascii=False, indent=2)
+
+        json.dump(bucket_rows, open(output_path, "w", encoding="utf-8"), ensure_ascii=False, indent=2)
+
+    @staticmethod
+    def _fill_bucket_process(
+        worker_id: int,
+        bucket_rows: List[dict],
+        output_path: Path,
+        provider: Callable[[str], str],
+        progress_map: "multiprocessing.managers.DictProxy",
+    ) -> None:
+        current_index = 0
+        try:
+            if output_path.is_file():
+                existing_rows = json.load(open(output_path, "r", encoding="utf-8"))
+                if isinstance(existing_rows, list) and existing_rows:
+                    bucket_rows[: len(existing_rows)] = existing_rows
+                    current_index = len(existing_rows)
+        except Exception:
+            pass
+
+        progress_map[worker_id] = current_index
+
+        for row_index in range(current_index, len(bucket_rows)):
+            try:
+                bucket_rows[row_index]["term_info"] = provider(bucket_rows[row_index]["term"])
+            except Exception:
+                bucket_rows[row_index]["term_info"] = ""
+            progress_map[worker_id] = row_index + 1
+            if row_index % 10 == 1:
+                json.dump(bucket_rows[: row_index + 1], open(output_path, "w", encoding="utf-8"), ensure_ascii=False, indent=2)
+
+        json.dump(bucket_rows, open(output_path, "w", encoding="utf-8"), ensure_ascii=False, indent=2)
+        progress_map[worker_id] = len(bucket_rows)
+
+    @classmethod
+    def _execute_for_terms(
+        cls,
+        terms: List[str],
+        merged_path: Path,
+        part_paths: List[Path],
+        provider: Callable[[str], str],
+        dataset_name: str,
+        num_workers: int = 2,
+    ) -> None:
+        """
+        Generate context for `terms`, writing shards to `part_paths`, then merge.
+        Threads on Windows; processes on POSIX.
+        """
+        worker_count = max(1, min(num_workers, os.cpu_count() or 2, 4))
+        all_rows = [{"id": row_index, "term": term, "term_info": ""} for row_index, term in enumerate(terms)]
+
+        buckets: List[List[dict]] = [[] for _ in range(worker_count)]
+        for reversed_index, row in enumerate(reversed(all_rows)):
+            buckets[reversed_index % worker_count].append(row)
+
+        if cls._is_windows():
+            total_rows = len(terms)
+            progress_bar = tqdm(total=total_rows, desc=f"{dataset_name} generation (threads)")
+
+            def run_bucket(bucket_rows: List[dict], out_path: Path) -> int:
+                cls._fill_bucket_threaded(bucket_rows, out_path, provider)
+                return len(bucket_rows)
+
+            with ThreadPoolExecutor(max_workers=worker_count) as pool:
+                futures = [pool.submit(run_bucket, buckets[bucket_index], part_paths[bucket_index])
+                           for bucket_index in range(worker_count)]
+                for future in as_completed(futures):
+                    completed_count = future.result()
+                    if progress_bar:
+                        progress_bar.update(completed_count)
+            if progress_bar:
+                progress_bar.close()
+        else:
+            manager = multiprocessing.Manager()
+            progress_map = manager.dict({worker_index: 0 for worker_index in range(worker_count)})
+
+            processes: List[multiprocessing.Process] = []
+            for worker_index, bucket_rows in enumerate(buckets):
+                process = multiprocessing.Process(
+                    target=cls._fill_bucket_process,
+                    args=(worker_index, bucket_rows, part_paths[worker_index], provider, progress_map),
+                )
+                processes.append(process)
+                process.start()
+
+            total_rows = len(terms)
+            with tqdm(total=total_rows, desc=f"{dataset_name} generation") as progress_bar:
+                previous_total = 0
+                while any(process.is_alive() for process in processes):
+                    current_total = int(sum(progress_map.values()))
+                    progress_bar.update(current_total - previous_total)
+                    previous_total = current_total
+                    time.sleep(0.5)
+                current_total = int(sum(progress_map.values()))
+                if current_total > previous_total:
+                    progress_bar.update(current_total - previous_total)
+
+            for process in processes:
+                process.join()
+
+        cls._merge_part_files(dataset_name, merged_path, part_paths)
+
+    @classmethod
+    def _re_infer_short_entries(
+        cls,
+        merged_path: Path,
+        re_part_paths: List[Path],
+        re_merged_path: Path,
+        provider: Callable[[str], str],
+        dataset_name: str,
+        num_workers: int,
+    ) -> int:
+        """
+        Re-query terms with too-short term_info (< 50 chars). Returns remaining count.
+        """
+        merged_rows = json.load(open(merged_path, "r", encoding="utf-8"))
+
+        removal_markers = list(cls._CONTEXT_REMOVALS) + [dataset_name]
+        short_rows: List[dict] = []
+        long_rows: List[dict] = []
+
+        for row in merged_rows:
+            term_info_raw = str(row.get("term_info", ""))
+            sentences = term_info_raw.split(".")
+            for marker in removal_markers:
+                sentences = [sentence if marker not in sentence else "" for sentence in sentences]
+            filtered_info = re.sub(r"\[\[\d+\]\]\(https?://[^\)]+\)", "", ".".join(sentences))
+            row["term_info"] = filtered_info
+            (short_rows if len(filtered_info) < 50 else long_rows).append(row)
+
+        worker_count = max(1, min(num_workers, os.cpu_count() or 2, 4))
+        buckets: List[List[dict]] = [[] for _ in range(worker_count)]
+        for row_index, row in enumerate(short_rows):
+            buckets[row_index % worker_count].append(row)
+
+        # clean old re-inference shards
+        for path in re_part_paths:
+            try:
+                os.remove(path)
+            except Exception:
+                pass
+
+        total_candidates = len(short_rows)
+        if cls._is_windows():
+            progress_bar = tqdm(total=total_candidates, desc=f"{dataset_name} re-inference (threads)")
+
+            def run_bucket(bucket_rows: List[dict], out_path: Path) -> int:
+                cls._fill_bucket_threaded(bucket_rows, out_path, provider)
+                return len(bucket_rows)
+
+            with ThreadPoolExecutor(max_workers=worker_count) as pool:
+                futures = [pool.submit(run_bucket, buckets[bucket_index], re_part_paths[bucket_index])
+                           for bucket_index in range(worker_count)]
+                for future in as_completed(futures):
+                    completed_count = future.result()
+                    if progress_bar:
+                        progress_bar.update(completed_count)
+            if progress_bar:
+                progress_bar.close()
+        else:
+            manager = multiprocessing.Manager()
+            progress_map = manager.dict({worker_index: 0 for worker_index in range(worker_count)})
+
+            processes: List[multiprocessing.Process] = []
+            for worker_index, bucket_rows in enumerate(buckets):
+                process = multiprocessing.Process(
+                    target=cls._fill_bucket_process,
+                    args=(worker_index, bucket_rows, re_part_paths[worker_index], provider, progress_map),
+                )
+                processes.append(process)
+                process.start()
+
+            with tqdm(total=total_candidates, desc=f"{dataset_name} re-inference") as progress_bar:
+                previous_total = 0
+                while any(process.is_alive() for process in processes):
+                    current_total = int(sum(progress_map.values()))
+                    progress_bar.update(current_total - previous_total)
+                    previous_total = current_total
+                    time.sleep(1)
+                if progress_bar.n < total_candidates:
+                    progress_bar.update(total_candidates - progress_bar.n)
+
+            for process in processes:
+                process.join()
+
+        # merge and write back
+        cls._merge_part_files(dataset_name, re_merged_path, re_part_paths)
+        new_rows = json.load(open(re_merged_path, "r", encoding="utf-8")) if re_merged_path.is_file() else []
+        final_rows = long_rows + new_rows
+        json.dump(final_rows, open(merged_path, "w", encoding="utf-8"), ensure_ascii=False, indent=4)
+
+        remaining_short = sum(1 for row in final_rows if len(str(row.get("term_info", ""))) < 50)
+        return remaining_short
+
+    @staticmethod
+    def _extract_terms_from_ontology(ontology: Any) -> List[str]:
+        """
+        Collect unique term names from ontology.type_taxonomies.taxonomies.
+        """
+        type_taxonomies = getattr(ontology, "type_taxonomies", None)
+        taxonomies = getattr(type_taxonomies, "taxonomies", None) if type_taxonomies is not None else getattr(ontology, "taxonomies", None)
+        unique_terms: set[str] = set()
+        if taxonomies:
+            for row in taxonomies:
+                parent_term = getattr(row, "parent", None) if not isinstance(row, dict) else row.get("parent")
+                child_term = getattr(row, "child", None) if not isinstance(row, dict) else row.get("child")
+                if parent_term:
+                    unique_terms.add(str(parent_term))
+                if child_term:
+                    unique_terms.add(str(child_term))
+        return sorted(unique_terms)
+
+    def preprocess_context_from_ontology(
+        self,
+        ontology: Any,
+        processed_dir: str | Path,
+        dataset_name: str = "GeoNames",
+        num_workers: int = 2,
+        provider: Optional[Callable[[str], str]] = None,
+        max_retries: int = 5,
+    ) -> Path:
+        """
+        Build {id, term, term_info} from an ontology object.
+        Always regenerates {dataset_name}_processed.json.
+        """
+        provider = provider or provider or partial(self._default_gpt_inference_with_dataset, dataset_name=dataset_name)
+
+        processed_dir = Path(processed_dir)
+        processed_dir.mkdir(parents=True, exist_ok=True)
+
+        merged_path = processed_dir / f"{dataset_name}_processed.json"
+        if merged_path.exists():
+            try:
+                merged_path.unlink()
+            except Exception:
+                pass
+
+        worker_count = max(1, min(num_workers, os.cpu_count() or 2, 4))
+        shard_paths = [processed_dir / f"{dataset_name}_type_part{shard_index}.json" for shard_index in range(worker_count)]
+        reinf_paths = [processed_dir / f"{dataset_name}_re_inference{shard_index}.json" for shard_index in range(worker_count)]
+        reinf_merged_path = processed_dir / f"{dataset_name}_Types_re_inference.json"
+
+        # remove any leftover shards
+        for path in shard_paths + reinf_paths + [reinf_merged_path]:
+            try:
+                if path.exists():
+                    path.unlink()
+            except Exception:
+                pass
+
+        unique_terms = self._extract_terms_from_ontology(ontology)
+        print(f"[Preprocess] Unique terms from ontology: {len(unique_terms)}")
+
+        self._execute_for_terms(
+            terms=unique_terms,
+            merged_path=merged_path,
+            part_paths=shard_paths,
+            provider=provider,
+            dataset_name=dataset_name,
+            num_workers=worker_count,
+        )
+
+        retry_round = 0
+        while retry_round < max_retries:
+            remaining_count = self._re_infer_short_entries(
+                merged_path=merged_path,
+                re_part_paths=reinf_paths,
+                re_merged_path=reinf_merged_path,
+                provider=provider,
+                dataset_name=dataset_name,
+                num_workers=worker_count,
+            )
+            print(f"[Preprocess] Re-infer round {retry_round + 1} done. Remaining short entries: {remaining_count}")
+            retry_round += 1
+            if remaining_count == 0:
+                break
+
+        print(f"[Preprocess] Done. Merged context at: {merged_path}")
+        self.context_json_path = str(merged_path)
+        return merged_path
diff --git a/ontolearner/learner/term_typing/__init__.py b/ontolearner/learner/term_typing/__init__.py
new file mode 100644
index 0000000..ab5b4f8
--- /dev/null
+++ b/ontolearner/learner/term_typing/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025 SciKnowOrg
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://opensource.org/licenses/MIT
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .rwthdbis import RWTHDBISSFTLearner
diff --git a/ontolearner/learner/term_typing/rwthdbis.py b/ontolearner/learner/term_typing/rwthdbis.py
new file mode 100644
index 0000000..f27fd56
--- /dev/null
+++ b/ontolearner/learner/term_typing/rwthdbis.py
@@ -0,0 +1,255 @@
+# Copyright (c) 2025 SciKnowOrg
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://opensource.org/licenses/MIT
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import random
+from typing import Any, Dict, List, Optional, Tuple
+
+import torch
+from datasets import Dataset, DatasetDict
+from tqdm.auto import tqdm
+from transformers import (
+    AutoTokenizer,
+    AutoModelForSequenceClassification,
+    DataCollatorWithPadding,
+    Trainer,
+    TrainingArguments,
+    set_seed,
+)
+from transformers import DebertaV2Tokenizer
+
+from ...base import AutoLearner
+
+class RWTHDBISSFTLearner(AutoLearner):
+    """
+    Supervised term-typing
+
+    Training expands multi-label examples into multiple single-label rows.
+    Inference returns: [{"term": "<text>", "types": ["<label_str>"]}, ...]
+    """
+
+    def __init__(
+        self,
+        model_name: str = "microsoft/deberta-v3-small",
+        trained_model_path: Optional[str] = None,
+        output_dir: Optional[str] = None,
+        max_length: int = 64,
+        per_device_train_batch_size: int = 16,
+        gradient_accumulation_steps: int = 2,
+        num_train_epochs: int = 3,
+        learning_rate: float = 2e-5,
+        weight_decay: float = 0.01,
+        logging_steps: int = 50,
+        save_strategy: str = "epoch",
+        save_total_limit: int = 1,
+        fp16: bool = False,
+        bf16: bool = False,
+        seed: int = 42
+    ) -> None:
+        super().__init__()
+        self.model_name = model_name
+        self.trained_model_path = trained_model_path
+        self.output_dir = output_dir or "./term_typing"
+        os.makedirs(self.output_dir, exist_ok=True)
+
+        self.max_length = max_length
+        self.per_device_train_batch_size = per_device_train_batch_size
+        self.gradient_accumulation_steps = gradient_accumulation_steps
+        self.num_train_epochs = num_train_epochs
+        self.learning_rate = learning_rate
+        self.weight_decay = weight_decay
+        self.logging_steps = logging_steps
+        self.save_strategy = save_strategy
+        self.save_total_limit = save_total_limit
+        self.fp16 = fp16
+        self.bf16 = bf16
+        self.seed = seed
+
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model: Optional[AutoModelForSequenceClassification] = None
+        self.tokenizer: Optional[AutoTokenizer] = None
+        self.id2label: Dict[int, str] = {}
+        self.label2id: Dict[str, int] = {}
+
+    def _term_typing(self, data: Any, test: bool = False) -> Optional[Any]:
+        """
+        train: expects ontology-like object with .term_typings
+        test:  returns List[{"term": str, "types": [str]}] (for evaluator)
+        """
+        if not test:
+            return self._train_from_term_typings(train_data=data)
+
+        terms = self._collect_eval_terms(data)
+        return self._predict_structured_output(terms)
+
+    def _load_robust_tokenizer(self, backbone: str) -> AutoTokenizer:
+        try:
+            return AutoTokenizer.from_pretrained(backbone, use_fast=True)
+        except Exception as fast_err:
+            print(f"[tokenizer] Fast tokenizer failed: {fast_err}. Trying DebertaV2Tokenizer (slow)...")
+
+        try:
+            return DebertaV2Tokenizer.from_pretrained(backbone)
+        except Exception as slow_err:
+            print(f"[tokenizer] DebertaV2Tokenizer failed: {slow_err}. Trying AutoTokenizer(use_fast=False)...")
+
+        try:
+            return AutoTokenizer.from_pretrained(backbone, use_fast=False)
+        except Exception as final_err:
+            raise RuntimeError(
+                "Failed to load a tokenizer for this DeBERTa model.\n"
+                "Try:\n"
+                "  - pip install --upgrade sentencepiece\n"
+                "  - ensure network access for model files\n"
+                "  - clear your HF cache and retry\n"
+                "  - pin versions: transformers==4.43.*, tokenizers<0.20\n"
+                f"Original error: {final_err}"
+            )
+
+    def _expand_multilabel_training_rows(
+        self, term_typings: List[Any]
+    ) -> Tuple[List[str], List[int], Dict[int, str], Dict[str, int]]:
+        """
+        From multi-label instances -> (texts, label_ids), and label maps.
+        """
+        label_strings: List[str] = []
+        for instance in term_typings:
+            label_strings.extend([str(label) for label in instance.types])
+
+        unique_labels = sorted(set(label_strings))
+        id2label = {i: label for i, label in enumerate(unique_labels)}
+        label2id = {label: i for i, label in enumerate(unique_labels)}
+
+        texts: List[str] = []
+        label_ids: List[int] = []
+        for instance in term_typings:
+            term_text = str(instance.term)
+            for label in instance.types:
+                texts.append(term_text)
+                label_ids.append(label2id[str(label)])
+
+        return texts, label_ids, id2label, label2id
+
+    def _collect_eval_terms(self, eval_data: Any) -> List[str]:
+        """
+        Accepts List[str] OR object with .term_typings; returns list of term texts.
+        """
+        if isinstance(eval_data, list) and all(isinstance(x, str) for x in eval_data):
+            terms = eval_data
+        else:
+            term_typings = getattr(eval_data, "term_typings", None)
+            if term_typings is None:
+                raise ValueError("Provide a List[str] OR an object with .term_typings for test=True.")
+            terms = [str(instance.term) for instance in term_typings]
+        return terms
+
+    def _train_from_term_typings(self, train_data: Any) -> None:
+        set_seed(self.seed)
+        random.seed(self.seed)
+        torch.manual_seed(self.seed)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed_all(self.seed)
+
+        term_typings: List[Any] = getattr(train_data, "term_typings", None)
+        if term_typings is None:
+            raise ValueError("train_data must provide .term_typings for term-typing.")
+
+        texts, label_ids, self.id2label, self.label2id = self._expand_multilabel_training_rows(term_typings)
+
+        dataset = DatasetDict({"train": Dataset.from_dict({"labels": label_ids, "text": texts})})
+
+        backbone = self.trained_model_path or self.model_name
+        self.tokenizer = self._load_robust_tokenizer(backbone)
+
+        def tokenize_batch(batch: Dict[str, List[str]]):
+            return self.tokenizer(batch["text"], truncation=True, max_length=self.max_length)
+
+        tokenized = dataset.map(tokenize_batch, batched=True, remove_columns=["text"])
+        data_collator = DataCollatorWithPadding(self.tokenizer)
+
+        self.model = AutoModelForSequenceClassification.from_pretrained(
+            backbone,
+            num_labels=len(self.id2label),
+            id2label=self.id2label,
+            label2id=self.label2id,
+        )
+
+        if getattr(self.model.config, "pad_token_id", None) is None and self.tokenizer.pad_token_id is not None:
+            self.model.config.pad_token_id = self.tokenizer.pad_token_id
+
+        training_args = TrainingArguments(
+            output_dir=self.output_dir,
+            learning_rate=self.learning_rate,
+            per_device_train_batch_size=self.per_device_train_batch_size,
+            gradient_accumulation_steps=self.gradient_accumulation_steps,
+            num_train_epochs=self.num_train_epochs,
+            weight_decay=self.weight_decay,
+            save_strategy=self.save_strategy,
+            save_total_limit=self.save_total_limit,
+            logging_steps=self.logging_steps,
+            fp16=self.fp16,
+            bf16=self.bf16,
+            report_to=[],
+        )
+
+        trainer = Trainer(
+            model=self.model,
+            args=training_args,
+            train_dataset=tokenized["train"],
+            tokenizer=self.tokenizer,
+            data_collator=data_collator,
+        )
+
+        trainer.train()
+        trainer.save_model(self.output_dir)
+        self.tokenizer.save_pretrained(self.output_dir)
+
+    def _ensure_loaded_for_inference(self) -> None:
+        if self.model is not None and self.tokenizer is not None:
+            return
+        model_path = self.trained_model_path or self.output_dir
+        self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
+        self.tokenizer = self._load_robust_tokenizer(model_path)
+
+        cfg = self.model.config
+        if hasattr(cfg, "id2label") and hasattr(cfg, "label2id"):
+            self.id2label = dict(cfg.id2label)
+            self.label2id = dict(cfg.label2id)
+
+        self.model.to(self.device).eval()
+
+    def _predict_label_ids(self, terms: List[str]) -> List[int]:
+        self._ensure_loaded_for_inference()
+        predictions: List[int] = []
+        for term_text in tqdm(terms, desc="Inference", bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}"):
+            inputs = self.tokenizer(term_text, return_tensors="pt", truncation=True, max_length=self.max_length)
+            inputs = {name: tensor.to(self.device) for name, tensor in inputs.items()}
+            with torch.no_grad():
+                logits = self.model(**inputs).logits
+                predictions.append(int(torch.argmax(logits, dim=-1).item()))
+        return predictions
+
+    def _predict_structured_output(self, terms: List[str]) -> List[Dict[str, List[str]]]:
+        """
+        Convert predicted IDs into evaluator format:
+        [{"term": "<text>", "types": ["<label_str>"]}, ...]
+        """
+        label_ids = self._predict_label_ids(terms)
+        id2label_map = self.id2label or {}  # fallback handled below
+
+        results: List[Dict[str, List[str]]] = []
+        for term_text, label_id in zip(terms, label_ids):
+            label_str = id2label_map.get(int(label_id), str(int(label_id)))
+            results.append({"term": term_text, "types": [label_str]})
+        return results
diff --git a/requirements.txt b/requirements.txt
index 3ce19f7..6d71bd5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,3 +20,6 @@ sentence-transformers~=5.1.0
 scikit-learn~=1.6.1
 bitsandbytes~=0.45.1
 mistral-common[sentencepiece]~=1.8.5
+g4f
+protobuf<5
+accelerate>=0.26.0

From 16457094ce35731b67f55b7f1bc27b5621242b20 Mon Sep 17 00:00:00 2001
From: KrishnaRani <thakurkrishnarani@gmail.com>
Date: Wed, 29 Oct 2025 15:34:23 +0100
Subject: [PATCH 2/7] added skhnlp learner models

---
 ..._learner_skhnlp_sft_taxonomoy_discovery.py |  64 ++
 ...m_learner_skhnlp_zs_taxonomoy_discovery.py |  50 ++
 ontolearner/__init__.py                       |   6 +-
 ontolearner/learner/__init__.py               |   1 +
 .../learner/taxonomy_discovery/__init__.py    |   1 +
 .../learner/taxonomy_discovery/skhnlp.py      | 761 ++++++++++++++++++
 requirements.txt                              |   1 +
 7 files changed, 883 insertions(+), 1 deletion(-)
 create mode 100644 examples/llm_learner_skhnlp_sft_taxonomoy_discovery.py
 create mode 100644 examples/llm_learner_skhnlp_zs_taxonomoy_discovery.py
 create mode 100644 ontolearner/learner/taxonomy_discovery/skhnlp.py

diff --git a/examples/llm_learner_skhnlp_sft_taxonomoy_discovery.py b/examples/llm_learner_skhnlp_sft_taxonomoy_discovery.py
new file mode 100644
index 0000000..3661a5b
--- /dev/null
+++ b/examples/llm_learner_skhnlp_sft_taxonomoy_discovery.py
@@ -0,0 +1,64 @@
+# Import core modules from the OntoLearner library
+from ontolearner import GeoNames, train_test_split, LearnerPipeline
+from ontolearner import SKHNLPSequentialFTLearner
+
+# Load ontology and split
+# Load the GeoNames ontology for taxonomy discovery.
+# GeoNames provides geographic parent-child relationships (is-a hierarchy).
+ontology = GeoNames()
+ontology.load()
+data = ontology.extract()
+
+# Split the taxonomic relationships into train and test sets
+train_data, test_data = train_test_split(
+    data,
+    test_size=0.2,
+    random_state=42
+)
+
+# Configure the learner with user-defined training args + device
+# Configure the supervised BERT SFT Learner for taxonomy discovery.
+# This fine-tunes BERT-Large using Sequential Prompts on (Parent, Child) pairs.
+bert_learner = SKHNLPSequentialFTLearner(
+    model_name="bert-large-uncased",
+    n_prompts=2,
+    random_state=1403,
+    device="cpu", # Note: CPU training for BERT-Large is very slow.
+    output_dir="./results/",
+    num_train_epochs=1,
+    per_device_train_batch_size=8,
+    per_device_eval_batch_size=8,
+    warmup_steps=500,
+    weight_decay=0.01,
+    logging_dir="./logs/",
+    logging_steps=50,
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    load_best_model_at_end=True,
+)
+
+# Build pipeline and run
+# Build the pipeline, passing the BERT Learner.
+pipeline = LearnerPipeline(
+    llm=bert_learner,
+    llm_id="bert-large-uncased",
+    ontologizer_data=False,
+)
+
+# Run the full learning pipeline on the taxonomy-discovery task
+outputs = pipeline(
+    train_data=train_data,
+    test_data=test_data,
+    task="taxonomy-discovery",
+    evaluate=True,
+    ontologizer_data=False,
+)
+
+# Display the evaluation results
+print("Metrics:", outputs.get("metrics"))
+
+# Display total elapsed time for training + prediction + evaluation
+print("Elapsed time:", outputs["elapsed_time"])
+
+# Print all returned outputs (include predictions)
+print(outputs)
diff --git a/examples/llm_learner_skhnlp_zs_taxonomoy_discovery.py b/examples/llm_learner_skhnlp_zs_taxonomoy_discovery.py
new file mode 100644
index 0000000..90391f5
--- /dev/null
+++ b/examples/llm_learner_skhnlp_zs_taxonomoy_discovery.py
@@ -0,0 +1,50 @@
+# Import core modules from the OntoLearner library
+from ontolearner import GeoNames, train_test_split, LearnerPipeline, SKHNLPZSLearner
+
+#Load ontology and split data
+# The GeoNames ontology provides geographic term types and relationships.
+ontology = GeoNames()
+ontology.load()
+train_data, test_data = train_test_split(
+    ontology.extract(),
+    test_size=0.2,
+    random_state=42,
+)
+
+# Configure the learner with user-defined generation and normalization settings
+# Configure the Zero-Shot Qwen Learner for taxonomy discovery.
+# This model uses a fixed prompt and string normalization (Levenshtein) to classify terms.
+llm_learner = SKHNLPZSLearner(
+    model_name="Qwen/Qwen2.5-0.5B-Instruct",
+    device="cpu",               # use "cuda" if you have a GPU
+    max_new_tokens=16,
+    save_path="./outputs/",     # directory or full file path for CSV
+    verbose=True,
+    normalize_mode="levenshtein",      # "none" | "substring" | "levenshtein" | "auto"
+)
+
+# Build pipeline and run
+pipe = LearnerPipeline(
+    llm=llm_learner,
+    llm_id="Qwen/Qwen2.5-0.5B-Instruct",
+    ontologizer_data=False,
+    device="cpu",
+)
+
+# Run the full learning pipeline on the taxonomy-discovery task
+outputs = pipe(
+    train_data=train_data,        # zero-shot; ignored by the LLM learner
+    test_data=test_data,
+    task="taxonomy-discovery",
+    evaluate=True,
+    ontologizer_data=False,
+)
+
+# Display the evaluation results
+print("Metrics:", outputs.get("metrics"))
+
+# Display total elapsed time for training + prediction + evaluation
+print("Elapsed time:", outputs["elapsed_time"])
+
+# Print all returned outputs (include predictions)
+print(outputs)
diff --git a/ontolearner/__init__.py b/ontolearner/__init__.py
index 0b6fd26..d9ba608 100644
--- a/ontolearner/__init__.py
+++ b/ontolearner/__init__.py
@@ -31,7 +31,9 @@
                       StandardizedPrompting,
                       LabelMapper,
                       RWTHDBISTaxonomyLearner,
-                      RWTHDBISTermTypingLearner)
+                      RWTHDBISTermTypingLearner
+                      SKHNLPZSLearner,
+                      SKHNLPSequentialFTLearner)
 from ._learner import LearnerPipeline
 
 from .processor import Processor
@@ -51,6 +53,8 @@
     "Processor",
     "RWTHDBISTaxonomyLearner",
     "RWTHDBISTermTypingLearner",
+    "SKHNLPZSLearner",
+    "SKHNLPSequentialFTLearner",
     "data_structure",
     "text2onto",
     "ontology",
diff --git a/ontolearner/learner/__init__.py b/ontolearner/learner/__init__.py
index ad38f0b..3c56154 100644
--- a/ontolearner/learner/__init__.py
+++ b/ontolearner/learner/__init__.py
@@ -19,3 +19,4 @@
 from .label_mapper import LabelMapper
 from .taxonomy_discovery.rwthdbis import RWTHDBISSFTLearner as RWTHDBISTaxonomyLearner
 from .term_typing.rwthdbis        import RWTHDBISSFTLearner as RWTHDBISTermTypingLearner
+from .taxonomy_discovery.skhnlp import SKHNLPSequentialFTLearner, SKHNLPZSLearner
diff --git a/ontolearner/learner/taxonomy_discovery/__init__.py b/ontolearner/learner/taxonomy_discovery/__init__.py
index ab5b4f8..2c6b452 100644
--- a/ontolearner/learner/taxonomy_discovery/__init__.py
+++ b/ontolearner/learner/taxonomy_discovery/__init__.py
@@ -13,3 +13,4 @@
 # limitations under the License.
 
 from .rwthdbis import RWTHDBISSFTLearner
+from .skhnlp import SKHNLPSequentialFTLearner, SKHNLPZSLearner
diff --git a/ontolearner/learner/taxonomy_discovery/skhnlp.py b/ontolearner/learner/taxonomy_discovery/skhnlp.py
new file mode 100644
index 0000000..fbe53b4
--- /dev/null
+++ b/ontolearner/learner/taxonomy_discovery/skhnlp.py
@@ -0,0 +1,761 @@
+# Copyright (c) 2025 SciKnowOrg
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://opensource.org/licenses/MIT
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+import random
+
+import pandas as pd
+import torch
+import Levenshtein
+from datasets import Dataset
+from typing import Any, Optional, List, Tuple, Dict
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    BertTokenizer,
+    BertForSequenceClassification,
+    pipeline,
+    Trainer,
+    TrainingArguments,
+)
+
+from ...base import AutoLearner, AutoPrompt
+from ...utils import taxonomy_split, train_test_split as ontology_split
+from ...data_structure import OntologyData, TaxonomicRelation
+
+class SKHNLPTaxonomyPrompts(AutoPrompt):
+    """Builds the 7 taxonomy prompts used during fine-tuning / inference."""
+    def __init__(self) -> None:
+        super().__init__(prompt_template="{parent} is the superclass of {child}. This statement is [MASK].")
+        self.templates: List[str] = [
+            "{parent} is the superclass of {child}. This statement is [MASK].",
+            "{child} is a subclass of {parent}. This statement is [MASK].",
+            "{parent} is the parent class of {child}. This statement is [MASK].",
+            "{child} is a child class of {parent}. This statement is [MASK].",
+            "{parent} is a supertype of {child}. This statement is [MASK].",
+            "{child} is a subtype of {parent}. This statement is [MASK].",
+            "{parent} is an ancestor class of {child}. This statement is [MASK].",
+        ]
+
+    def make(self, parent: str, child: str, template_idx: int) -> str:
+        return self.templates[template_idx].format(parent=parent, child=child)
+
+
+class SKHNLPSequentialFTLearner(AutoLearner):
+    """
+    BERT-based classifier for taxonomy discovery.
+
+    With OntologyData:
+      * TRAIN: ontology-aware split; create balanced train/eval with negatives.
+      * PREDICT/TEST: notebook-style parent selection -> list[{'parent', 'child'}].
+
+    With DataFrame/list:
+      * TRAIN: taxonomy_split + negatives; build prompts and fine-tune.
+      * PREDICT/TEST: pairwise binary classification (returns label + score).
+    """
+
+    # Fixed constants defining data split size and the proportional mix of
+    # negative sample types (reversed vs. manipulated) for balancing.
+    _EVAL_FRACTION: float = 0.16
+    _NEG_RATIO_REVERSED: float = 1/3
+    _NEG_RATIO_MANIPULATED: float = 2/3
+
+    def __init__(
+        self,
+        # core
+        model_name: str = "bert-large-uncased",
+        n_prompts: int = 7,
+        random_state: int = 1403,
+        device: Optional[str] = None,  # "cuda" | "cpu" | None (auto)
+
+        # ---- expose TrainingArguments as individual user-defined args ----
+        output_dir: str = "./results/",
+        num_train_epochs: int = 1,
+        per_device_train_batch_size: int = 4,
+        per_device_eval_batch_size: int = 4,
+        warmup_steps: int = 500,
+        weight_decay: float = 0.01,
+        logging_dir: str = "./logs/",
+        logging_steps: int = 50,
+        eval_strategy: str = "epoch",
+        save_strategy: str = "epoch",
+        load_best_model_at_end: bool = True,
+    ) -> None:
+        super().__init__()
+        self.model_name = model_name
+        self.n_prompts = n_prompts
+        self.random_state = random_state
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+
+        self.tokenizer: Optional[BertTokenizer] = None
+        self.model: Optional[BertForSequenceClassification] = None
+        self.prompter = SKHNLPTaxonomyPrompts()
+
+        # Candidate parents (unique parent list) for multi-class parent selection.
+        self._candidate_parents: Optional[List[str]] = None
+
+        # Keep last train/eval tables for inspection
+        self._last_train: Optional[pd.DataFrame] = None
+        self._last_eval: Optional[pd.DataFrame] = None
+
+        random.seed(self.random_state)
+
+        # Build TrainingArguments from the individual user-defined values
+        self.training_args = TrainingArguments(
+            output_dir=output_dir,
+            num_train_epochs=num_train_epochs,
+            per_device_train_batch_size=per_device_train_batch_size,
+            per_device_eval_batch_size=per_device_eval_batch_size,
+            warmup_steps=warmup_steps,
+            weight_decay=weight_decay,
+            logging_dir=logging_dir,
+            logging_steps=logging_steps,
+            eval_strategy=eval_strategy,
+            save_strategy=save_strategy,
+            load_best_model_at_end=load_best_model_at_end,
+        )
+
+    def load(self, model_id: Optional[str] = None, **_: Any) -> None:
+        """Load tokenizer and model; move model to the requested device."""
+        model_id = model_id or self.model_name
+        self.tokenizer = BertTokenizer.from_pretrained(model_id)
+        self.model = BertForSequenceClassification.from_pretrained(model_id, num_labels=2)
+        self.model.config.problem_type = "single_label_classification"
+
+        # place on device chosen by user (or auto)
+        target_device = self.device
+        if target_device not in {"cuda", "cpu"}:
+            target_device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model.to(target_device)
+
+    def tasks_ground_truth_former(self, data: Any, task: str) -> Any:
+        if task != "taxonomy-discovery":
+            return super().tasks_ground_truth_former(data, task)
+
+        if isinstance(data, pd.DataFrame):
+            if "label" in data.columns:
+                return [
+                    {"parent": p, "child": c, "label": bool(lbl)}
+                    for p, c, lbl in zip(data["parent"], data["child"], data["label"])
+                ]
+            return [{"parent": p, "child": c} for p, c in zip(data["parent"], data["child"])]
+
+        if isinstance(data, list):
+            return data
+
+        return super().tasks_ground_truth_former(data, task)
+
+    def _make_negatives(self, positives_df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
+        """Return (reversed_df, manipulated_df)."""
+        unique_parents = positives_df["parent"].unique().tolist()
+
+        def as_reversed(df: pd.DataFrame) -> pd.DataFrame:
+            out = df.copy()
+            out[["parent", "child"]] = out[["child", "parent"]].values
+            out["label"] = False
+            return out
+
+        def with_random_parent(df: pd.DataFrame) -> pd.DataFrame:
+            def pick_other_parent(p: str) -> str:
+                pool = [x for x in unique_parents if x != p]
+                return random.choice(pool) if pool else p
+            out = df.copy()
+            out["parent"] = out["parent"].apply(pick_other_parent)
+            out["label"] = False
+            return out
+
+        return as_reversed(positives_df), with_random_parent(positives_df)
+
+    def _balance_with_negatives(
+        self,
+        positives_df: pd.DataFrame,
+        reversed_df: pd.DataFrame,
+        manipulated_df: pd.DataFrame,
+    ) -> pd.DataFrame:
+        """Combine positives and negatives with the same ratios as before."""
+        n_pos = len(positives_df)
+        n_rev = int(n_pos * self._NEG_RATIO_REVERSED)
+        n_man = int(n_pos * self._NEG_RATIO_MANIPULATED)
+
+        combined = pd.concat(
+            [
+                positives_df.sample(n_pos, random_state=self.random_state),
+                reversed_df.sample(n_rev, random_state=self.random_state),
+                manipulated_df.sample(n_man, random_state=self.random_state),
+            ],
+            ignore_index=True,
+        )
+        combined = combined.drop_duplicates(subset=["parent", "child", "label"]).reset_index(drop=True)
+        return combined
+
+    def _add_prompt_columns(self, df: pd.DataFrame) -> pd.DataFrame:
+        out = df.copy()
+        for i in range(self.n_prompts):
+            out[f"prompt_{i+1}"] = out.apply(
+                lambda r, k=i: self.prompter.make(r["parent"], r["child"], k), axis=1
+            )
+        return out
+
+    def _df_from_relations(relations: List[TaxonomicRelation], label: bool = True) -> pd.DataFrame:
+        if not relations:
+            return pd.DataFrame(columns=["parent", "child", "label"])
+        return pd.DataFrame([{"parent": r.parent, "child": r.child, "label": label} for r in relations])
+
+    def _relations_from_df(df: pd.DataFrame) -> List[TaxonomicRelation]:
+        return [TaxonomicRelation(parent=p, child=c) for p, c in zip(df["parent"], df["child"])]
+
+    def _build_masked_prompt(self, parent: str, child: str, index_1_based: int, mask_token: str = "[MASK]") -> str:
+        prompts_1based = [
+            f"{parent} is the superclass of {child}. This statement is {mask_token}.",
+            f"{child} is a subclass of {parent}. This statement is {mask_token}.",
+            f"{parent} is the parent class of {child}. This statement is {mask_token}.",
+            f"{child} is a child class of {parent}. This statement is {mask_token}.",
+            f"{parent} is a supertype of {child}. This statement is {mask_token}.",
+            f"{child} is a subtype of {parent}. This statement is {mask_token}.",
+            f"{parent} is an ancestor class of {child}. This statement is {mask_token}.",
+            f"{child} is a descendant classs of {child}. This statement is {mask_token}.",
+            f"\"{parent}\" is the superclass of \"{child}\". This statement is {mask_token}.",
+        ]
+        return prompts_1based[index_1_based - 1]
+
+    @torch.no_grad()
+    def _predict_prompt_true_false(self, sentence: str) -> bool:
+        enc = self.tokenizer(sentence, return_tensors="pt").to(self.model.device)
+        logits = self.model(**enc).logits
+        predicted_label = torch.argmax(logits, dim=1).item()
+        return predicted_label == 1
+
+    def _select_parent_via_prompts(self, child: str) -> str:
+        assert self._candidate_parents, "Candidate parents not initialized."
+        scores: dict[str, int] = {p: 0 for p in self._candidate_parents}
+
+        def prompt_indices_for_level(level: int) -> List[int]:
+            if level == 0:
+                return [1]
+            return [2 * level, 2 * level + 1]
+
+        def recurse(active_parents: List[str], level: int) -> str:
+            idxs = [i for i in prompt_indices_for_level(level) if 1 <= i <= self.n_prompts]
+            if idxs:
+                for parent in active_parents:
+                    votes = sum(
+                        1
+                        for idx in idxs
+                        if self._predict_prompt_true_false(
+                            self._build_masked_prompt(parent=parent, child=child, index_1_based=idx)
+                        )
+                    )
+                    scores[parent] += votes
+
+            max_score = max(scores[p] for p in active_parents)
+            tied = [p for p in active_parents if scores[p] == max_score]
+            if len(tied) == 1:
+                return tied[0]
+            if level < 4:
+                return recurse(tied, level + 1)
+            return random.choice(tied)
+
+        return recurse(list(scores.keys()), level=0)
+
+    def _taxonomy_discovery(self, data: Any, test: bool = False):
+        """
+        TRAIN:
+          - OntologyData -> ontology-aware split; negatives per split; balanced sets.
+          - DataFrame/list -> taxonomy_split for positives; negatives proportional.
+        TEST:
+          - OntologyData -> parent selection: [{'parent': predicted, 'child': child}]
+          - DataFrame/list -> binary pair classification with 'label' + 'score'
+        """
+        is_ontology_object = isinstance(data, OntologyData)
+
+        # Normalize input
+        if isinstance(data, pd.DataFrame):
+            pairs_df = data.copy()
+        elif isinstance(data, list):
+            pairs_df = pd.DataFrame(data)
+        else:
+            gt_pairs = super().tasks_ground_truth_former(data, "taxonomy-discovery")
+            pairs_df = pd.DataFrame(gt_pairs)
+            if "label" not in pairs_df.columns:
+                pairs_df["label"] = True
+
+        # Maintain candidate parents across calls
+        if "parent" in pairs_df.columns:
+            parents_in_call = sorted(pd.unique(pairs_df["parent"]).tolist())
+            if test:
+                if self._candidate_parents is None:
+                    self._candidate_parents = parents_in_call
+                else:
+                    self._candidate_parents = sorted(set(self._candidate_parents).union(parents_in_call))
+            else:
+                if self._candidate_parents is None:
+                    self._candidate_parents = parents_in_call
+
+        if test:
+            if is_ontology_object and self._candidate_parents:
+                predictions: List[dict[str, str]] = []
+                for _, row in pairs_df.iterrows():
+                    child_term = row["child"]
+                    chosen_parent = self._select_parent_via_prompts(child_term)
+                    predictions.append({"parent": chosen_parent, "child": child_term})
+                return predictions
+
+            # pairwise binary classification
+            prompts_df = self._add_prompt_columns(pairs_df.copy())
+            true_probs_by_prompt: List[torch.Tensor] = []
+
+            for i in range(self.n_prompts):
+                col = f"prompt_{i+1}"
+                enc = self.tokenizer(
+                    prompts_df[col].tolist(),
+                    return_tensors="pt",
+                    padding=True,
+                    truncation=True,
+                ).to(self.model.device)
+                with torch.no_grad():
+                    logits = self.model(**enc).logits
+                    true_probs_by_prompt.append(torch.softmax(logits, dim=1)[:, 1])
+
+            avg_true_prob = torch.stack(true_probs_by_prompt, dim=0).mean(0)
+            predicted_bool = (avg_true_prob >= 0.5).cpu().tolist()
+
+            results: List[dict[str, Any]] = []
+            for p, c, s, yhat in zip(
+                pairs_df["parent"], pairs_df["child"], avg_true_prob.tolist(), predicted_bool
+            ):
+                results.append({"parent": p, "child": c, "label": int(bool(yhat)), "score": float(s)})
+            return results
+
+        if isinstance(data, OntologyData):
+            train_onto, eval_onto = ontology_split(
+                data, test_size=self._EVAL_FRACTION, random_state=self.random_state, verbose=False
+            )
+
+            train_pos_rel: List[TaxonomicRelation] = getattr(train_onto.type_taxonomies, "taxonomies", []) or []
+            eval_pos_rel: List[TaxonomicRelation] = getattr(eval_onto.type_taxonomies, "taxonomies", []) or []
+
+            train_pos_df = self._df_from_relations(train_pos_rel, label=True)
+            eval_pos_df = self._df_from_relations(eval_pos_rel, label=True)
+
+            tr_rev_df, tr_man_df = self._make_negatives(train_pos_df)
+            ev_rev_df, ev_man_df = self._make_negatives(eval_pos_df)
+
+            train_df = self._balance_with_negatives(train_pos_df, tr_rev_df, tr_man_df)
+            eval_df = self._balance_with_negatives(eval_pos_df, ev_rev_df, ev_man_df)
+
+            train_df = self._add_prompt_columns(train_df)
+            eval_df = self._add_prompt_columns(eval_df)
+
+        else:
+            if "label" not in pairs_df.columns or pairs_df["label"].nunique() == 1:
+                positives_df = pairs_df[pairs_df.get("label", True)][["parent", "child"]].copy()
+                pos_rel = self._relations_from_df(positives_df)
+
+                tr_rel, ev_rel = taxonomy_split(
+                    pos_rel, train_terms=None, test_size=self._EVAL_FRACTION, random_state=self.random_state, verbose=False
+                )
+                train_pos_df = self._df_from_relations(tr_rel, label=True)
+                eval_pos_df = self._df_from_relations(ev_rel, label=True)
+
+                tr_rev_df, tr_man_df = self._make_negatives(train_pos_df)
+                ev_rev_df, ev_man_df = self._make_negatives(eval_pos_df)
+
+                train_df = self._balance_with_negatives(train_pos_df, tr_rev_df, tr_man_df)
+                eval_df = self._balance_with_negatives(eval_pos_df, ev_rev_df, ev_man_df)
+
+                train_df = self._add_prompt_columns(train_df)
+                eval_df = self._add_prompt_columns(eval_df)
+
+            else:
+                positives_df = pairs_df[pairs_df["label"]][["parent", "child"]].copy()
+                pos_rel = self._relations_from_df(positives_df)
+
+                tr_rel, ev_rel = taxonomy_split(
+                    pos_rel, train_terms=None, test_size=self._EVAL_FRACTION, random_state=self.random_state, verbose=False
+                )
+                train_pos_df = self._df_from_relations(tr_rel, label=True)
+                eval_pos_df = self._df_from_relations(ev_rel, label=True)
+
+                negatives_df = pairs_df[pairs_df["label"]][["parent", "child"]].copy()
+                negatives_df = negatives_df.sample(frac=1.0, random_state=self.random_state).reset_index(drop=True)
+
+                n_eval_neg = max(1, int(len(negatives_df) * self._EVAL_FRACTION)) if len(negatives_df) > 0 else 0
+                eval_neg_df = negatives_df.iloc[:n_eval_neg].copy() if n_eval_neg > 0 else negatives_df.iloc[:0].copy()
+                train_neg_df = negatives_df.iloc[n_eval_neg:].copy()
+
+                train_neg_df["label"] = False
+                eval_neg_df["label"] = False
+
+                train_df = pd.concat([train_pos_df, train_neg_df], ignore_index=True)
+                eval_df = pd.concat([eval_pos_df, eval_neg_df], ignore_index=True)
+
+                train_df = self._add_prompt_columns(train_df)
+                eval_df = self._add_prompt_columns(eval_df)
+
+        # Ensure labels are int64
+        train_df["label"] = train_df["label"].astype("int64")
+        eval_df["label"] = eval_df["label"].astype("int64")
+
+        # Sequential fine-tuning across prompts
+        for i in range(self.n_prompts):
+            prompt_col = f"prompt_{i+1}"
+            train_ds = Dataset.from_pandas(train_df[[prompt_col, "label"]].reset_index(drop=True))
+            eval_ds = Dataset.from_pandas(eval_df[[prompt_col, "label"]].reset_index(drop=True))
+
+            train_ds = train_ds.rename_column("label", "labels")
+            eval_ds = eval_ds.rename_column("label", "labels")
+
+            def tokenize_batch(batch):
+                return self.tokenizer(batch[prompt_col], padding="max_length", truncation=True)
+
+            train_ds = train_ds.map(tokenize_batch, batched=True, remove_columns=[prompt_col])
+            eval_ds = eval_ds.map(tokenize_batch, batched=True, remove_columns=[prompt_col])
+
+            train_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
+            eval_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
+
+            trainer = Trainer(
+                model=self.model,
+                args=self.training_args,
+                train_dataset=train_ds,
+                eval_dataset=eval_ds,
+            )
+            trainer.train()
+
+        self._last_train = train_df
+        self._last_eval = eval_df
+        return None
+
+
+class SKHNLPZSLearner(AutoLearner):
+    """
+    Zero-shot taxonomy learner using an instruction-tuned causal LLM.
+
+    Behavior
+    --------
+    - Builds a fixed classification prompt listing 9 GeoNames parent classes.
+    - For each input row (child term), generates a short completion and parses
+      the predicted class from a strict '#[ ... ]#' format.
+    - Optionally normalizes the raw prediction to one of the valid 9 labels via:
+        * "none"        : keep the parsed text as-is
+        * "substring"   : snap to a label if either is a substring of the other
+        * "levenshtein" : snap to the closest label by edit distance
+        * "auto"        : substring, then Levenshtein if needed
+    - Saves raw and normalized predictions to CSV if `save_path` is provided.
+
+    Inputs the learner accepts (via `_to_dataframe`):
+    - pandas.DataFrame with columns: ['child', 'parent'] or ['child', 'parent', 'label']
+    - list[dict] with keys: 'child', 'parent' (and optionally 'label')
+    - list of tuples/lists: (child, parent) or (child, parent, label)
+    - OntoLearner-style object exposing .type_taxonomies.taxonomies iterable with (child, parent)
+    """
+
+    # Fixed class inventory (GeoNames parents)
+    CLASS_LIST = [
+        "city, village",
+        "country, state, region",
+        "forest, heath",
+        "mountain, hill, rock",
+        "parks, area",
+        "road, railroad",
+        "spot, building, farm",
+        "stream, lake",
+        "undersea",
+    ]
+
+    # Strict format: #[ ... ]#
+    _PREDICTION_PATTERN = re.compile(r"#\[\s*([^\]]+?)\s*\]#")
+
+    def __init__(
+        self,
+        model_name: str = "Qwen/Qwen2.5-0.5B-Instruct",
+        device: Optional[str] = None,          # "cuda" | "cpu" | None (auto)
+        max_new_tokens: int = 16,
+        save_path: Optional[str] = None,       # directory or full path
+        verbose: bool = True,
+        normalize_mode: str = "none",          # "none" | "substring" | "levenshtein" | "auto"
+        random_state: int = 1403,
+    ) -> None:
+        super().__init__()
+        self.model_name = model_name
+        self.verbose = verbose
+        self.max_new_tokens = max_new_tokens
+        self.save_path = save_path
+        self.normalize_mode = (normalize_mode or "none").lower().strip()
+        self.random_state = random_state
+
+        random.seed(self.random_state)
+
+        # Device: auto-detect CUDA if not specified
+        if device is None:
+            self._has_cuda = torch.cuda.is_available()
+        else:
+            self._has_cuda = (device == "cuda")
+        self._pipe_device = 0 if self._has_cuda else -1
+        self._model_device_map = {"": "cuda"} if self._has_cuda else None
+
+        self._tokenizer = None
+        self._model = None
+        self._pipeline = None
+
+        # Prompt template used for every example
+        self._classification_prompt = (
+            "My task is classification. My classes are as follows: "
+            "(city, village), (country, state, region), (forest, heath), "
+            "(mountain, hill, rock), (parks, area), (road, railroad), "
+            "(spot, building, farm), (stream, lake), (undersea). "
+            'I will provide you with a phrase like "wadi mouth". '
+            "The name of each class is placed within a pair of parentheses. "
+            "I want you to choose the most appropriate class from those mentioned above "
+            "based on the given phrase and present it in a format like #[parks, area]#. "
+            "So, the general format for each response will be #[class name]#. "
+            "Pay attention to the format of the response. Start with a '#' character, "
+            "include the class name inside it, and end with another '#' character. "
+            "Additionally, make sure to include a '#' character at the end to indicate "
+            "that the answer is complete. I don't need any additional explanations."
+        )
+
+    def load(self, model_id: str = "") -> None:
+        """
+        Load tokenizer, model, and text-generation pipeline.
+        """
+        model_id = model_id or self.model_name
+        if self.verbose:
+            print(f"[ZeroShotTaxonomyLearner] Loading {model_id}")
+
+        self._tokenizer = AutoTokenizer.from_pretrained(model_id)
+
+        # Ensure a pad token is set for generation
+        if self._tokenizer.pad_token_id is None and self._tokenizer.eos_token_id is not None:
+            self._tokenizer.pad_token = self._tokenizer.eos_token
+
+        self._model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            device_map=self._model_device_map,
+            torch_dtype="auto",
+        )
+
+        self._pipeline = pipeline(
+            task="text-generation",
+            model=self._model,
+            tokenizer=self._tokenizer,
+            device=self._pipe_device,  # 0 for GPU, -1 for CPU
+        )
+
+        if self.verbose:
+            print("Device set to use", "cuda" if self._has_cuda else "cpu")
+            print("[ZeroShotTaxonomyLearner] Model loaded.")
+
+    def _taxonomy_discovery(self, data: Any, test: bool = False) -> Optional[List[Dict[str, str]]]:
+        """
+        Zero-shot prediction over all incoming rows (no filtering/augmentation).
+        Returns a list of dictionaries: [{'parent': predicted_label, 'child': child}, ...]
+        """
+        if not test:
+            if self.verbose:
+                print("[ZeroShot] Training skipped (zero-shot).")
+            return None
+
+        df = self._to_dataframe(data)
+
+        if self.verbose:
+            print(f"[ZeroShot] Incoming rows: {len(df)}; columns: {list(df.columns)}")
+
+        eval_df = pd.DataFrame(df).reset_index(drop=True)
+        if eval_df.empty:
+            return []
+
+        # Prepare columns for inspection and saving
+        eval_df["prediction_raw"] = ""
+        eval_df["prediction_sub"] = ""
+        eval_df["prediction_lvn"] = ""
+        eval_df["prediction_auto"] = ""
+        eval_df["prediction"] = ""  # final (per normalize_mode)
+
+        # Generate predictions row by row
+        for idx, row in eval_df.iterrows():
+            child_term = str(row["child"])
+            raw_text, parsed_raw = self._generate_and_parse(child_term)
+
+            # Choose a string to normalize (parsed token if matched, otherwise whole output)
+            basis = parsed_raw if parsed_raw != "unknown" else raw_text
+
+            # Compute all normalization variants
+            sub_norm = self._normalize_substring_only(basis)
+            lvn_norm = self._normalize_levenshtein_only(basis)
+            auto_norm = self._normalize_auto(basis)
+
+            # Final selection by mode
+            if self.normalize_mode == "none":
+                final_label = parsed_raw
+            elif self.normalize_mode == "substring":
+                final_label = sub_norm
+            elif self.normalize_mode == "levenshtein":
+                final_label = lvn_norm
+            elif self.normalize_mode == "auto":
+                final_label = auto_norm
+            else:
+                final_label = parsed_raw  # fallback
+
+            # Persist to DataFrame for inspection/export
+            eval_df.at[idx, "prediction_raw"] = parsed_raw
+            eval_df.at[idx, "prediction_sub"] = sub_norm
+            eval_df.at[idx, "prediction_lvn"] = lvn_norm
+            eval_df.at[idx, "prediction_auto"] = auto_norm
+            eval_df.at[idx, "prediction"] = final_label
+
+        # Return in the format expected by the pipeline
+        return [{"parent": p, "child": c} for p, c in zip(eval_df["prediction"], eval_df["child"])]
+
+    def _generate_and_parse(self, child_term: str) -> (str, str):
+        """
+        Generate a completion for the given child term and extract the raw predicted class
+        using the strict '#[ ... ]#' pattern.
+
+        Returns
+        -------
+        (raw_generation_text, parsed_prediction_or_unknown)
+        """
+        messages = [
+            {"role": "system", "content": "You are a helpful classifier."},
+            {"role": "user", "content": f"{self._classification_prompt} {child_term}"},
+        ]
+
+        prompt = self._tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+
+        generation = self._pipeline(
+            prompt,
+            max_new_tokens=self.max_new_tokens,
+            do_sample=False,
+            temperature=0.0,
+            top_p=1.0,
+            eos_token_id=self._tokenizer.eos_token_id,
+            pad_token_id=self._tokenizer.eos_token_id,
+            return_full_text=False,
+        )[0]["generated_text"]
+
+        match = self._PREDICTION_PATTERN.search(generation)
+        parsed = match.group(1).strip() if match else "unknown"
+        return generation, parsed
+
+    # -------------------------------------------------------------------------
+    # Normalization helpers
+    # -------------------------------------------------------------------------
+
+    def _normalize_substring_only(self, text: str) -> str:
+        """
+        Snap to a label if the string is equal to / contained in / contains a valid label (case-insensitive).
+        """
+        if not isinstance(text, str):
+            return "unknown"
+        lowered = text.strip().lower()
+        if not lowered:
+            return "unknown"
+
+        for label in self.CLASS_LIST:
+            label_lower = label.lower()
+            if lowered == label_lower or lowered in label_lower or label_lower in lowered:
+                return label
+        return "unknown"
+
+    def _normalize_levenshtein_only(self, text: str) -> str:
+        """
+        Snap to the nearest label by Levenshtein (edit) distance.
+        """
+        if not isinstance(text, str):
+            return "unknown"
+        lowered = text.strip().lower()
+        if not lowered:
+            return "unknown"
+
+        best_label = None
+        best_distance = 10**9
+        for label in self.CLASS_LIST:
+            label_lower = label.lower()
+            distance = Levenshtein.distance(lowered, label_lower)
+            if distance < best_distance:
+                best_distance = distance
+                best_label = label
+        return best_label or "unknown"
+
+    def _normalize_auto(self, text: str) -> str:
+        """
+        Cascade: try substring-first; if no match, fall back to Levenshtein snapping.
+        """
+        snapped = self._normalize_substring_only(text)
+        return snapped if snapped != "unknown" else self._normalize_levenshtein_only(text)
+
+    def _to_dataframe(data: Any) -> pd.DataFrame:
+        """
+        Normalize various input formats into a DataFrame with columns:
+        ['child', 'parent'] or ['child', 'parent', 'label'].
+        """
+        # Already a DataFrame
+        if isinstance(data, pd.DataFrame):
+            df = data.copy()
+            df.columns = [str(c).lower() for c in df.columns]
+            return df.reset_index(drop=True)
+
+        # List[dict]
+        if isinstance(data, list) and data and isinstance(data[0], dict):
+            rows = [{str(k).lower(): v for k, v in d.items()} for d in data]
+            return pd.DataFrame(rows).reset_index(drop=True)
+
+        # Iterable of tuples/lists: (child, parent[, label])
+        if isinstance(data, (list, tuple)) and data:
+            first = data[0]
+            if isinstance(first, (list, tuple)) and not isinstance(first, dict):
+                n = len(first)
+                if n >= 3:
+                    return pd.DataFrame(data, columns=["child", "parent", "label"]).reset_index(drop=True)
+                if n == 2:
+                    return pd.DataFrame(data, columns=["child", "parent"]).reset_index(drop=True)
+
+        # OntoLearner-style object (with .type_taxonomies.taxonomies)
+        try:
+            type_taxonomies = getattr(data, "type_taxonomies", None)
+            if type_taxonomies is not None:
+                taxonomies = getattr(type_taxonomies, "taxonomies", None)
+                if taxonomies is not None:
+                    rows = []
+                    for rel in taxonomies:
+                        parent = getattr(rel, "parent", None)
+                        child = getattr(rel, "child", None)
+                        label = getattr(rel, "label", None) if hasattr(rel, "label") else None
+                        if parent is not None and child is not None:
+                            rows.append({"child": child, "parent": parent, "label": label})
+                    if rows:
+                        return pd.DataFrame(rows).reset_index(drop=True)
+        except Exception:
+            pass
+
+        raise ValueError(
+            "Unsupported data format. Provide a DataFrame, a list of dicts, "
+            "a list of (child, parent[, label]) tuples/lists, or an object with "
+            ".type_taxonomies.taxonomies."
+        )
+
+    def _resolve_save_path(save_path: str, default_filename: str) -> str:
+        """
+        If `save_path` is a directory, join it with `default_filename`.
+        If it's a file path, return as-is.
+        """
+        base = os.path.basename(save_path)
+        has_ext = os.path.splitext(base)[1] != ""
+        return save_path if has_ext else os.path.join(save_path, default_filename)
diff --git a/requirements.txt b/requirements.txt
index 6d71bd5..28a92bb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,3 +23,4 @@ mistral-common[sentencepiece]~=1.8.5
 g4f
 protobuf<5
 accelerate>=0.26.0
+Levenshtein

From 844de4f0a0b6a2aa1240941fe6283fca1f0c52ed Mon Sep 17 00:00:00 2001
From: KrishnaRani <thakurkrishnarani@gmail.com>
Date: Wed, 29 Oct 2025 16:07:24 +0100
Subject: [PATCH 3/7] adding sbunlp learner models

---
 ...lm_learner_sbunlp_fs_taxonomy_discovery.py |  66 +++
 examples/llm_learner_sbunlp_text2onto.py      |  81 +++
 examples/llm_learner_sbunlp_zs_term_typing.py |  55 ++
 ontolearner/__init__.py                       |  10 +-
 ontolearner/learner/__init__.py               |   3 +
 .../learner/taxonomy_discovery/__init__.py    |   1 +
 .../learner/taxonomy_discovery/sbunlp.py      | 317 +++++++++++
 ontolearner/learner/term_typing/__init__.py   |   1 +
 ontolearner/learner/term_typing/sbunlp.py     | 400 +++++++++++++
 ontolearner/learner/text2onto/__init__.py     |  15 +
 ontolearner/learner/text2onto/sbunlp.py       | 525 ++++++++++++++++++
 11 files changed, 1472 insertions(+), 2 deletions(-)
 create mode 100644 examples/llm_learner_sbunlp_fs_taxonomy_discovery.py
 create mode 100644 examples/llm_learner_sbunlp_text2onto.py
 create mode 100644 examples/llm_learner_sbunlp_zs_term_typing.py
 create mode 100644 ontolearner/learner/taxonomy_discovery/sbunlp.py
 create mode 100644 ontolearner/learner/term_typing/sbunlp.py
 create mode 100644 ontolearner/learner/text2onto/__init__.py
 create mode 100644 ontolearner/learner/text2onto/sbunlp.py

diff --git a/examples/llm_learner_sbunlp_fs_taxonomy_discovery.py b/examples/llm_learner_sbunlp_fs_taxonomy_discovery.py
new file mode 100644
index 0000000..19797a9
--- /dev/null
+++ b/examples/llm_learner_sbunlp_fs_taxonomy_discovery.py
@@ -0,0 +1,66 @@
+# Import core modules from the OntoLearner library
+from ontolearner import GeoNames, train_test_split, LearnerPipeline
+# Import the specific Few-Shot Learner implementation
+from ontolearner import SBUNLPFewShotLearner
+
+# Load ontology and split
+# Load the GeoNames ontology for taxonomy discovery.
+# GeoNames provides geographic parent-child relationships (is-a hierarchy).
+ontology = GeoNames()
+ontology.load()
+data = ontology.extract() # Extract the list of taxonomic relationships from the ontology object
+
+# Split the taxonomic relationships into train and test sets
+train_data, test_data = train_test_split(
+    data,
+    test_size=0.6, # 60% of data used for testing (terms to find relations for)
+    random_state=42,
+)
+
+# Configure the learner with user-defined inference args + device
+# Configure the SBUNLP Few-Shot Learner using the Qwen model.
+# This performs in-context learning via N x M batch prompting.
+llm_learner = SBUNLPFewShotLearner(
+    # Model / decoding
+    model_name="Qwen/Qwen2.5-0.5B-Instruct", # The Qwen model to load
+    try_4bit=True,              # uses 4-bit if bitsandbytes + CUDA available for memory efficiency
+    max_new_tokens=140,         # limit the length of the model's response (for JSON output)
+    max_input_tokens=1500,      # limit the total prompt length (context window)
+    temperature=0.0,            # set to 0.0 for deterministic output (best for structured JSON)
+    top_p=1.0,                  # top-p sampling disabled with temperature=0.0
+
+    # Grid settings (N x M prompts)
+    n_train_chunks=7,           # N: split training examples (few-shot context) into 7 chunks
+    m_test_chunks=7,            # M: split test terms (vocabulary) into 7 chunks (total 49 prompts)
+
+    # Run controls
+    limit_prompts=None,         # None runs all N x M prompts; set to an integer for a dry-run
+    output_dir="./outputs/taskC_batches",  # Optional: dump per-prompt JSON results for debugging
+)
+
+# Build pipeline and run
+# Build the pipeline, passing the Few-Shot Learner.
+pipe = LearnerPipeline(
+    llm=llm_learner,
+    llm_id=llm_learner.model_name,
+    ontologizer_data=True,      # Let the learner flatten structured ontology objects via its tasks_* helpers
+    device="auto",              # automatically select CUDA or CPU
+)
+
+# Run the full learning pipeline on the taxonomy-discovery task
+outputs = pipe(
+    train_data=train_data,
+    test_data=test_data,
+    task="taxonomy-discovery",
+    evaluate=True,
+    ontologizer_data=True,
+)
+
+# Display the evaluation results
+print("Metrics:", outputs.get("metrics"))
+
+# Display total elapsed time for training + prediction + evaluation
+print("Elapsed time:", outputs["elapsed_time"])
+
+# Print all returned outputs (include predictions)
+print(outputs)
diff --git a/examples/llm_learner_sbunlp_text2onto.py b/examples/llm_learner_sbunlp_text2onto.py
new file mode 100644
index 0000000..564f641
--- /dev/null
+++ b/examples/llm_learner_sbunlp_text2onto.py
@@ -0,0 +1,81 @@
+import os
+import torch
+#Import all the required classes
+from ontolearner import SBUNLPText2OntoLearner
+from ontolearner.learner.text2onto.sbunlp import LocalAutoLLM
+
+# Local folder where the dataset is stored
+# This path is relative to the directory where the script is executed
+# (e.g., E:\OntoLearner\examples)
+LOCAL_DATA_DIR = "./dataset_llms4ol_2025/TaskA-Text2Onto/ecology"
+
+# Ensure the base directories exist
+# Creates the train and test subdirectories if they don't already exist.
+os.makedirs(os.path.join(LOCAL_DATA_DIR, 'train'), exist_ok=True)
+os.makedirs(os.path.join(LOCAL_DATA_DIR, 'test'), exist_ok=True)
+
+# Define local file paths: POINTING TO ALREADY SAVED FILES
+# These files are used as input for the Fit and Predict phases.
+DOCS_ALL_PATH = "./dataset_llms4ol_2025/TaskA-Text2Onto/ecology/train/documents.jsonl"
+TERMS2DOC_PATH = "./dataset_llms4ol_2025/TaskA-Text2Onto/ecology/train/terms2docs.json"
+DOCS_TEST_PATH = "./dataset_llms4ol_2025/TaskA-Text2Onto/ecology/test/text2onto_ecology_test_documents.jsonl"
+
+# Output files for predictions (saved directly under LOCAL_DATA_DIR/test)
+# These files will be created by the predict_terms/types methods.
+TERMS_PRED_OUT = "./dataset_llms4ol_2025/TaskA-Text2Onto/ecology/test/extracted_terms_ecology.jsonl"
+TYPES_PRED_OUT = "./dataset_llms4ol_2025/TaskA-Text2Onto/ecology/test/extracted_types_ecology.jsonl"
+
+#Initialize and Load Learner ---
+MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+# Determine the device for inference (GPU or CPU)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+
+# Instantiate the underlying LLM helper
+# (LocalAutoLLM handles model loading and generation)
+llm_model_helper = LocalAutoLLM(device=DEVICE)
+
+# Instantiate the main learner class, passing the LLM helper to its constructor
+learner = SBUNLPText2OntoLearner(model=llm_model_helper, device=DEVICE)
+
+# Load the model (This calls llm_model_helper.load)
+LOAD_IN_4BIT = torch.cuda.is_available()
+learner.model.load(MODEL_ID, load_in_4bit=LOAD_IN_4BIT)
+
+# Build Few-Shot Exemplars (Fit Phase)
+# The fit method uses the local data paths to build the in-context learning prompts.
+learner.fit(
+    train_docs_jsonl=DOCS_ALL_PATH,
+    terms2doc_json=TERMS2DOC_PATH,
+    sample_size=28,
+    seed=123 # Seed for stratified random sampling stability
+)
+
+MAX_NEW_TOKENS = 100
+
+terms_written = learner.predict_terms(
+    docs_test_jsonl=DOCS_TEST_PATH,
+    out_jsonl=TERMS_PRED_OUT,
+    max_new_tokens=MAX_NEW_TOKENS
+)
+print(f"✅ Term Extraction Complete. Wrote {terms_written} prediction lines.")
+
+# Type Extraction subtask
+types_written = learner.predict_types(
+    docs_test_jsonl=DOCS_TEST_PATH,
+    out_jsonl=TYPES_PRED_OUT,
+    max_new_tokens=MAX_NEW_TOKENS
+)
+print(f"✅ Type Extraction Complete. Wrote {types_written} prediction lines.")
+
+try:
+    # Evaluate Term Extraction using the custom F1 function and gold data
+    f1_term = learner.evaluate_extraction_f1(TERMS2DOC_PATH, TERMS_PRED_OUT, key="term")
+    print(f"Final Term Extraction F1: {f1_term:.4f}")
+
+    # Evaluate Type Extraction
+    f1_type = learner.evaluate_extraction_f1(TERMS2DOC_PATH, TYPES_PRED_OUT, key="type")
+    print(f"Final Type Extraction F1: {f1_type:.4f}")
+
+except Exception as e:
+     # Catches errors like missing sklearn (ImportError) or missing prediction files (FileNotFoundError)
+     print(f"❌ Evaluation Error: {e}. Ensure sklearn is installed and prediction files were created.")
diff --git a/examples/llm_learner_sbunlp_zs_term_typing.py b/examples/llm_learner_sbunlp_zs_term_typing.py
new file mode 100644
index 0000000..75d01da
--- /dev/null
+++ b/examples/llm_learner_sbunlp_zs_term_typing.py
@@ -0,0 +1,55 @@
+# Import core modules from the OntoLearner library
+from ontolearner import AgrO, train_test_split, LearnerPipeline
+# Import the specific Zero-Shot Learner implementation for Term Typing
+from ontolearner import SBUNLPZSLearner
+
+# Load ontology and split
+# Load the AgrO ontology for type inventory and test data.
+ontology = AgrO()
+ontology.load()
+data = ontology.extract() # Extract the full set of relationships/terms
+
+# Split the data into train (to learn type inventory) and test (terms to predict)
+train_data, test_data = train_test_split(
+    data,
+    test_size=0.6, # 60% of data used for testing
+    random_state=42,
+)
+
+# Configure the Qwen Zero-Shot learner (inference-only)
+# This learner's 'fit' phase learns the vocabulary of allowed type labels.
+llm_learner = SBUNLPZSLearner(
+    # Model / decoding
+    model_id="Qwen/Qwen2.5-0.5B-Instruct", # The Qwen model to load
+    # device= is auto-detected
+    max_new_tokens=64,         # Sufficient length for JSON list of types
+    temperature=0.0,           # Ensures deterministic (greedy) output
+    # token= None,             # Assuming public model access
+)
+
+# Build pipeline and run
+# Build the pipeline, passing the Zero-Shot Learner.
+pipe = LearnerPipeline(
+    llm=llm_learner,
+    llm_id=llm_learner.model_id,
+    ontologizer_data=False,
+    device="cpu",             #  select CUDA or CPU
+)
+
+# Run the full learning pipeline on the Term-Typing task
+outputs = pipe(
+    train_data=train_data,
+    test_data=test_data,
+    task="term-typing",
+    evaluate=True,
+    ontologizer_data=False,
+)
+
+# Display the evaluation results
+print("Metrics:", outputs.get("metrics"))
+
+# Display total elapsed time for learning (type inventory) + prediction + evaluation
+print("Elapsed time:", outputs.get("elapsed_time"))
+
+# Print all returned outputs (include predictions)
+print(outputs)
diff --git a/ontolearner/__init__.py b/ontolearner/__init__.py
index d9ba608..49b94c4 100644
--- a/ontolearner/__init__.py
+++ b/ontolearner/__init__.py
@@ -31,9 +31,12 @@
                       StandardizedPrompting,
                       LabelMapper,
                       RWTHDBISTaxonomyLearner,
-                      RWTHDBISTermTypingLearner
+                      RWTHDBISTermTypingLearner,
                       SKHNLPZSLearner,
-                      SKHNLPSequentialFTLearner)
+                      SKHNLPSequentialFTLearner,
+                      SBUNLPFewShotLearner,
+                      SBUNLPZSLearner,
+                      SBUNLPText2OntoLearner)
 from ._learner import LearnerPipeline
 
 from .processor import Processor
@@ -55,6 +58,9 @@
     "RWTHDBISTermTypingLearner",
     "SKHNLPZSLearner",
     "SKHNLPSequentialFTLearner",
+    "SBUNLPFewShotLearner",
+    "SBUNLPZSLearner",
+    "SBUNLPText2OntoLearner",
     "data_structure",
     "text2onto",
     "ontology",
diff --git a/ontolearner/learner/__init__.py b/ontolearner/learner/__init__.py
index 3c56154..4f41586 100644
--- a/ontolearner/learner/__init__.py
+++ b/ontolearner/learner/__init__.py
@@ -20,3 +20,6 @@
 from .taxonomy_discovery.rwthdbis import RWTHDBISSFTLearner as RWTHDBISTaxonomyLearner
 from .term_typing.rwthdbis        import RWTHDBISSFTLearner as RWTHDBISTermTypingLearner
 from .taxonomy_discovery.skhnlp import SKHNLPSequentialFTLearner, SKHNLPZSLearner
+from .taxonomy_discovery.sbunlp import SBUNLPFewShotLearner
+from .term_typing.sbunlp import SBUNLPZSLearner
+from .text2onto import SBUNLPFewShotLearner as SBUNLPText2OntoLearner
diff --git a/ontolearner/learner/taxonomy_discovery/__init__.py b/ontolearner/learner/taxonomy_discovery/__init__.py
index 2c6b452..d52513b 100644
--- a/ontolearner/learner/taxonomy_discovery/__init__.py
+++ b/ontolearner/learner/taxonomy_discovery/__init__.py
@@ -14,3 +14,4 @@
 
 from .rwthdbis import RWTHDBISSFTLearner
 from .skhnlp import SKHNLPSequentialFTLearner, SKHNLPZSLearner
+from .sbunlp import SBUNLPFewShotLearner
diff --git a/ontolearner/learner/taxonomy_discovery/sbunlp.py b/ontolearner/learner/taxonomy_discovery/sbunlp.py
new file mode 100644
index 0000000..9fc520d
--- /dev/null
+++ b/ontolearner/learner/taxonomy_discovery/sbunlp.py
@@ -0,0 +1,317 @@
+# Copyright (c) 2025 SciKnowOrg
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://opensource.org/licenses/MIT
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+import json
+import importlib.util
+from typing import Any, Dict, List, Optional, Tuple
+
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+
+from ...base import AutoLearner
+
+class SBUNLPFewShotLearner(AutoLearner):
+    """
+    Taxonomy-discovery via N×M batch prompting with a small Qwen model.
+
+    Lifecycle
+    ---------
+    fit():
+        Cache + clean training parent–child pairs.
+    predict():
+        Chunk (train pairs × test terms), prompt per chunk pair, parse, merge,
+        and deduplicate predicted relations.
+    """
+
+    def __init__(
+        self,
+        model_name: str = "Qwen/Qwen2.5-0.5B-Instruct",
+        try_4bit: bool = True,
+        num_train_chunks: int = 7,
+        num_test_chunks: int = 7,
+        max_new_tokens: int = 140,
+        max_input_tokens: int = 1500,
+        temperature: float = 0.0,
+        top_p: float = 1.0,
+        limit_num_prompts: Optional[int] = None,
+        output_dir: Optional[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.model_name = model_name
+        self.try_4bit = try_4bit
+
+        self.num_train_chunks = num_train_chunks
+        self.num_test_chunks = num_test_chunks
+
+        self.max_new_tokens = max_new_tokens
+        self.max_input_tokens = max_input_tokens
+        self.temperature = temperature
+        self.top_p = top_p
+        self.limit_num_prompts = limit_num_prompts
+
+        self.output_dir = output_dir
+
+        self.tokenizer: Optional[AutoTokenizer] = None
+        self.model: Optional[AutoModelForCausalLM] = None
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+
+        self.train_pairs_clean: List[Dict[str, str]] = []
+
+    # ----------------------- small helpers ----------------------
+    def _clean_pairs(pair_rows: List[Dict[str, str]]) -> List[Dict[str, str]]:
+        """
+        Normalize, drop empty or self-relations, and deduplicate by (parent, child).
+        """
+        cleaned_pairs: List[Dict[str, str]] = []
+        seen_parent_child: set[Tuple[str, str]] = set()
+
+        for pair_record in pair_rows or []:
+            if not isinstance(pair_record, dict):
+                continue
+
+            parent_label = str(pair_record.get("parent", "")).strip()
+            child_label = str(pair_record.get("child", "")).strip()
+            if not parent_label or not child_label:
+                continue
+
+            normalized_key = (parent_label.lower(), child_label.lower())
+            if normalized_key[0] == normalized_key[1]:  # parent==child
+                continue
+            if normalized_key in seen_parent_child:
+                continue
+
+            seen_parent_child.add(normalized_key)
+            cleaned_pairs.append({"parent": parent_label, "child": child_label})
+
+        return cleaned_pairs
+
+    def _chunk_list(items: List[Any], num_chunks: int) -> List[List[Any]]:
+        """
+        Split `items` into `num_chunks` near-equal parts. Some chunks may be empty.
+        """
+        if num_chunks <= 0:
+            return [items]
+        total_items = len(items)
+        base_size, remainder = divmod(total_items, num_chunks)
+
+        chunks: List[List[Any]] = []
+        start_index = 0
+        for chunk_index in range(num_chunks):
+            current_size = base_size + (1 if chunk_index < remainder else 0)
+            end_index = start_index + current_size
+            chunks.append(items[start_index:end_index])
+            start_index = end_index
+        return chunks
+
+    def _ensure_dir(self, maybe_path: Optional[str]) -> None:
+        if maybe_path:
+            os.makedirs(maybe_path, exist_ok=True)
+
+    # ---------------------- model load/gen ----------------------
+    def load(self, **_: Any) -> None:
+        """
+        Load tokenizer/model; use 4-bit nf4 on CUDA if available + requested.
+        """
+        bnb_available = importlib.util.find_spec("bitsandbytes") is not None
+        use_4bit_quant = bool(self.try_4bit and bnb_available and self.device == "cuda")
+
+        quant_config = None
+        if use_4bit_quant:
+            from transformers import BitsAndBytesConfig
+            quant_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_compute_dtype=torch.float16,
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_quant_type="nf4",
+            )
+
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_name,
+            device_map=("auto" if self.device == "cuda" else None),
+            torch_dtype=(torch.float16 if self.device == "cuda" else torch.float32),
+            quantization_config=quant_config,
+        )
+
+    def _format_chat(self, user_text: str) -> str:
+        """
+        Wrap user text with the model's chat template (if present).
+        """
+        if hasattr(self.tokenizer, "apply_chat_template") and getattr(self.tokenizer, "chat_template", None):
+            return self.tokenizer.apply_chat_template(
+                [{"role": "user", "content": user_text}],
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+        return user_text
+
+    @torch.no_grad()
+    def _generate(self, prompt_text: str) -> str:
+        """
+        Single prompt → model text. Clips *input* tokens to avoid overflow.
+        """
+        formatted_prompt = self._format_chat(prompt_text)
+        prompt_token_ids = self.tokenizer(formatted_prompt, add_special_tokens=False, return_tensors=None)["input_ids"]
+        if len(prompt_token_ids) > self.max_input_tokens:
+            prompt_token_ids = prompt_token_ids[-self.max_input_tokens:]
+
+        prompt_tensor = torch.tensor([prompt_token_ids]).to(self.model.device)
+
+        generation = self.model.generate(
+            input_ids=prompt_tensor,
+            max_new_tokens=self.max_new_tokens,
+            do_sample=(self.temperature > 0.0),
+            temperature=self.temperature,
+            top_p=self.top_p,
+            pad_token_id=self.tokenizer.pad_token_id,
+            eos_token_id=getattr(self.tokenizer, "eos_token_id", None),
+            use_cache=True,
+        )
+
+        decoded_full = self.tokenizer.decode(generation[0], skip_special_tokens=True)
+        decoded_prompt = self.tokenizer.decode(prompt_tensor[0], skip_special_tokens=True)
+        return decoded_full[len(decoded_prompt):].strip() if decoded_full.startswith(decoded_prompt) else decoded_full.strip()
+
+    # ------------------ prompt build & parsing ------------------
+    def _build_prompt(train_pairs_chunk: List[Dict[str, str]],
+                      test_terms_chunk: List[str]) -> str:
+        """
+        Few-shot with JSON examples + a block of test terms.
+        The model must return ONLY a JSON array of {parent, child}.
+        """
+        examples_json = json.dumps(train_pairs_chunk, ensure_ascii=False, indent=2)
+        test_types_block = "\n".join(test_terms_chunk)
+        return (
+            "From this file, extract all parent–child relations like in the examples.\n"
+            "Return ONLY a JSON array of objects with keys 'parent' and 'child'.\n"
+            "Output format:\n"
+            "[\n"
+            '  {"parent": "parent1", "child": "child1"},\n'
+            '  {"parent": "parent2", "child": "child2"}\n'
+            "]\n\n"
+            "EXAMPLES (JSON):\n"
+            f"{examples_json}\n\n"
+            "TEST TYPES (between [PAIR] tags):\n"
+            "[PAIR]\n"
+            f"{test_types_block}\n"
+            "[PAIR]\n"
+            "Return only JSON."
+        )
+
+    def _parse_pairs(model_text: str) -> List[Dict[str, str]]:
+        """
+        Parse a model response into a list of {'parent','child'} dicts.
+        """
+        def deduplicate_and_normalize(dict_list: List[Dict[str, str]]) -> List[Dict[str, str]]:
+            return SBUNLPFewShotLearner._clean_pairs(dict_list)
+
+        response_text = model_text.strip()
+
+        # 1) Direct JSON list
+        try:
+            maybe_json = json.loads(response_text)
+            if isinstance(maybe_json, list):
+                return deduplicate_and_normalize(maybe_json)
+        except Exception:
+            pass
+
+        # 2) Find outermost [ ... ] and parse that
+        outer_list_match = re.search(r"\[\s*(?:\{[\s\S]*?\}\s*,?\s*)*\]", response_text)
+        if outer_list_match:
+            try:
+                array_json = json.loads(outer_list_match.group(0))
+                if isinstance(array_json, list):
+                    return deduplicate_and_normalize(array_json)
+            except Exception:
+                pass
+
+        # 3) Nothing parsable
+        return []
+
+    # --------------------- AutoLearner hooks --------------------
+    def fit(self, train_data: Any, task: str, ontologizer: bool = True):
+        """
+        Build the training example bank (parent–child pairs).
+        """
+        if task != "taxonomy-discovery":
+            return super().fit(train_data, task, ontologizer)
+
+        if ontologizer:
+            # Convert ontology object → list of {"parent","child"} gold pairs
+            gold_pairs_from_ontology = self.tasks_ground_truth_former(
+                train_data, task="taxonomy-discovery"
+            )
+            self.train_pairs_clean = self._clean_pairs(gold_pairs_from_ontology)
+        else:
+            # Already a Python list of dicts
+            self.train_pairs_clean = self._clean_pairs(train_data)
+
+    def _taxonomy_discovery(self, data: Any, test: bool = False) -> Optional[Any]:
+        """
+        Main prediction path. Returns a deduplicated list of relations.
+        """
+        if not test:
+            return None
+
+        if self.model is None or self.tokenizer is None:
+            self.load()
+
+        # Build test vocabulary of types/terms
+        if isinstance(data, list) and (len(data) == 0 or isinstance(data[0], str)):
+            test_type_list: List[str] = data
+        else:
+            test_type_list = super().tasks_data_former(
+                data=data, task="taxonomy-discovery", test=True
+            )
+
+        # Create N×M grid
+        train_chunks = self._chunk_list(self.train_pairs_clean, self.num_train_chunks)
+        test_chunks = self._chunk_list(test_type_list, self.num_test_chunks)
+
+        self._ensure_dir(self.output_dir)
+
+        merged_predicted_pairs: List[Dict[str, str]] = []
+        issued_prompt_count = 0
+
+        for train_chunk_index, train_pairs_chunk in enumerate(train_chunks, start=1):
+            for test_chunk_index, test_terms_chunk in enumerate(test_chunks, start=1):
+                issued_prompt_count += 1
+                if self.limit_num_prompts and issued_prompt_count > self.limit_num_prompts:
+                    break
+
+                prompt_text = self._build_prompt(train_pairs_chunk, test_terms_chunk)
+                model_response = self._generate(prompt_text)
+                parsed_relation_pairs = self._parse_pairs(model_response)
+
+                # Optional per-batch dump for debugging
+                if self.output_dir:
+                    batch_json_path = os.path.join(
+                        self.output_dir, f"pairs_T{train_chunk_index}_S{test_chunk_index}.json"
+                    )
+                    with open(batch_json_path, "w", encoding="utf-8") as fp:
+                        json.dump(parsed_relation_pairs, fp, ensure_ascii=False, indent=2)
+
+                merged_predicted_pairs.extend(parsed_relation_pairs)
+
+            if self.limit_num_prompts and issued_prompt_count >= (self.limit_num_prompts or 0):
+                break
+
+        # Deduplicate final list
+        return self._clean_pairs(merged_predicted_pairs)
diff --git a/ontolearner/learner/term_typing/__init__.py b/ontolearner/learner/term_typing/__init__.py
index ab5b4f8..ebd8cd9 100644
--- a/ontolearner/learner/term_typing/__init__.py
+++ b/ontolearner/learner/term_typing/__init__.py
@@ -13,3 +13,4 @@
 # limitations under the License.
 
 from .rwthdbis import RWTHDBISSFTLearner
+from .sbunlp import SBUNLPZSLearner
diff --git a/ontolearner/learner/term_typing/sbunlp.py b/ontolearner/learner/term_typing/sbunlp.py
new file mode 100644
index 0000000..f838bd0
--- /dev/null
+++ b/ontolearner/learner/term_typing/sbunlp.py
@@ -0,0 +1,400 @@
+# Copyright (c) 2025 SciKnowOrg
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://opensource.org/licenses/MIT
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List, Optional
+import re
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+from ...base import AutoLearner
+
+class SBUNLPZSLearner(AutoLearner):
+    """
+    Qwen-based blind term typing learner (Task B), implemented as an AutoLearner.
+
+    This class reproduces the notebook logic:
+      - Fit phase learns the *allowed type inventory* from training data.
+      - Predict phase performs blind prompting per term using the learned type list.
+      - Outputs are restricted to the allowed types and returned as [{"id", "types"}].
+
+    Expected I/O (recommended):
+      - fit(train_data, task="term-typing", ontologizer=True):
+          The framework's AutoLearner.tasks_data_former() provides a unique list of
+          type labels; we store it to `self.allowed_types`.
+      - predict(eval_data, task="term-typing", ontologizer=False):
+          Pass a list of dicts with keys {"id": str, "term": str} so IDs are preserved.
+          Returns a list of dicts [{"id": ..., "types": [...] }].
+    """
+
+    def __init__(
+        self,
+        model_id: str = "Qwen/Qwen2.5-0.5B-Instruct",
+        device: Optional[str] = None,
+        max_new_tokens: int = 64,
+        temperature: float = 0.0,
+        token: Optional[str] = None,
+    ) -> None:
+        """
+        Args:
+            model_id: HF model id for Qwen.
+            device: "cuda", "mps", or "cpu". Auto-detected if None.
+            max_new_tokens: Generation cap per prompt.
+            temperature: Not used for greedy decoding (kept for future).
+            token: HF token if the model is gated.
+        """
+        super().__init__()
+
+        # Basic configuration
+        self.model_id = model_id
+        # default device detection: prefer CUDA if available
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self.max_new_tokens = max_new_tokens
+        self.temperature = temperature
+        self.token = token
+
+        # Model/tokenizer placeholders (populated by load())
+        self.tokenizer: Optional[AutoTokenizer] = None
+        self.model: Optional[AutoModelForCausalLM] = None
+
+        # Learned inventory of allowed type labels (populated by fit())
+        self.allowed_types: List[str] = []
+
+        # Regex used to extract quoted strings from model output (e.g. "type")
+        self._quoted_re = re.compile(r'"([^"]+)"')
+
+    def load(self, **kwargs: Any):
+        """
+        Load Qwen model and tokenizer.
+
+        NOTE:
+          - The HF arguments used here mirror your original code (`token=...`).
+            You may see a deprecation warning for `torch_dtype` (older transformers);
+            switching to `dtype=` is recommended but I did not change behavior here.
+        """
+        # Respect overrides from kwargs if provided
+        model_id = kwargs.get("model_id", self.model_id)
+        token = kwargs.get("token", self.token)
+
+        # Load tokenizer. If the model is gated, pass token (original code uses `token`).
+        # If your environment requires `use_auth_token=` replace here.
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
+
+        # Ensure tokenizer has a pad token (some models omit it)
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+
+        # Device mapping for from_pretrained -> keep same behavior as original code
+        device_map = "auto" if self.device != "cpu" else "cpu"
+        # original code used torch_dtype; left as-is to avoid behavioral change
+        torch_dtype = torch.float16 if self.device != "cpu" else torch.float32
+
+        # Load the model weights. This can be heavy; keep same params as original.
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            device_map=device_map,
+            torch_dtype=torch_dtype,
+            token=token,
+        )
+        return self
+
+    # -------------------------------------------------------------------------
+    # Fit / Predict interface
+    # -------------------------------------------------------------------------
+    def fit(self, train_data: Any, task: str, ontologizer: bool = True):
+        """
+        Learn the allowed type inventory from the training data.
+
+        Expected behavior:
+          - If `tasks_data_former(..., test=False)` returns a list of strings,
+            set allowed_types to that list (deduped & sorted).
+          - If it returns a list of dicts (relationships), extract unique 'parent'
+            fields and use those as the allowed type inventory.
+
+        This method contains a tolerant branch for the framework's custom container:
+          If the returned `train_fmt` is not a list but has a `.term_typings` attribute
+          (e.g., OntologyData object used by the framework), iterate that attribute
+          and collect any `types` values found.
+        """
+        train_fmt = self.tasks_data_former(data=train_data, task=task, test=False) if ontologizer else train_data
+        if task != "term-typing":
+            raise ValueError("SBUNLPZSLearner only implements 'term-typing'.")
+
+        # If framework passed a container with `.term_typings`, extract types from there
+        if not isinstance(train_fmt, list):
+            # handle OntologyData-like object with attribute 'term_typings'
+            if hasattr(train_fmt, "term_typings"):
+                try:
+                    # term_typings is expected to be an iterable of objects with attribute `types`
+                    collected = set()
+                    for tt in getattr(train_fmt, "term_typings") or []:
+                        # tt.types could be list[str] or a single str
+                        if hasattr(tt, "types"):
+                            tvals = tt.types
+                        elif isinstance(tt, dict) and "types" in tt:
+                            tvals = tt["types"]
+                        else:
+                            tvals = None
+
+                        # Normalize both list and single-string cases
+                        if isinstance(tvals, (list, tuple, set)):
+                            for x in tvals:
+                                if isinstance(x, str):
+                                    collected.add(x)
+                        elif isinstance(tvals, str):
+                            collected.add(tvals)
+
+                    # If we successfully collected types, set allowed_types and return
+                    if collected:
+                        self.allowed_types = sorted(collected)
+                        return self
+                    # else fall through to error below (no types found)
+                except Exception:
+                    # If anything unexpected occurs while iterating term_typings,
+                    # gracefully fall through and raise the original TypeError below.
+                    pass
+
+            # not a supported non-list type -> keep original behavior (raise)
+            raise TypeError("For term-typing, expected a list of type labels at fit().")
+
+        # At this point train_fmt is a list (original logic preserved)
+        if train_fmt and isinstance(train_fmt[0], dict) and "parent" in train_fmt[0]:
+            # Case A: Received raw relationships/pairs (e.g., from train_test_split).
+            # Extract unique parent types from the relationship records.
+            unique_types = set(r.get("parent") for r in train_fmt if r.get("parent"))
+            self.allowed_types = sorted(unique_types)
+        elif all(isinstance(x, str) for x in train_fmt):
+            # Case B: Received a clean list of type labels (List[str]).
+            self.allowed_types = sorted(set(train_fmt))
+        else:
+            # The input is a list but not in either expected format -> raise
+            raise TypeError("For term-typing, input data format for fit() is invalid. Expected list of strings (types) or list of relationships (dicts).")
+
+        return self
+
+    def predict(self, eval_data: Any, task: str, ontologizer: bool = True) -> Any:
+        """
+        Predict types for each term.
+
+        Expected inputs:
+          - With ontologizer=True: a list[str] of term strings (IDs are autogenerated).
+          - With ontologizer=False: a list[dict] where each dict has keys {'id','term'}.
+
+        This method tolerantly converts common framework containers (e.g., an
+        OntologyData object exposing `.term_typings`) into the expected list[dict]
+        shape so that the internal _term_typing() can run unchanged.
+        """
+        if task != "term-typing":
+            # Delegate to base for other tasks (not implemented here)
+            return super().predict(eval_data, task, ontologizer=ontologizer)
+
+        def _extract_list_of_dicts_from_term_typings(obj) -> Optional[List[Dict[str, str]]]:
+            """
+            Helper: try to produce a list of {"id","term"} dicts from objects
+            exposing a `term_typings` iterable. Supports either object-like
+            TermTyping (attributes) or dict-style entries.
+            """
+            tts = getattr(obj, "term_typings", None)
+            if tts is None:
+                return None
+            out = []
+            for tt in tts:
+                # support object-style TermTyping (attributes) and dict-style
+                if isinstance(tt, dict):
+                    # try several common key names for ID
+                    tid = tt.get("ID") or tt.get("id") or tt.get("Id") or tt.get("ID_")
+                    tterm = tt.get("term") or tt.get("label") or tt.get("name")
+                else:
+                    # object-style access
+                    tid = getattr(tt, "ID", None) or getattr(tt, "id", None) or getattr(tt, "Id", None)
+                    tterm = getattr(tt, "term", None) or getattr(tt, "label", None) or getattr(tt, "name", None)
+                if tid is None or tterm is None:
+                    # skip malformed entry - this is defensive so downstream code has valid inputs
+                    continue
+                out.append({"id": str(tid), "term": str(tterm)})
+            return out if out else None
+
+        # Case A: ontologizer=True -> framework often provides list[str]
+        if ontologizer:
+            if isinstance(eval_data, list) and all(isinstance(x, str) for x in eval_data):
+                # Simple case: convert list of terms to list of dicts with generated IDs
+                eval_pack = [{"id": f"TT_{i:06d}", "term": t} for i, t in enumerate(eval_data)]
+            else:
+                # Try to extract from a framework container (e.g., OntologyData)
+                maybe = _extract_list_of_dicts_from_term_typings(eval_data)
+                if maybe is not None:
+                    eval_pack = maybe
+                else:
+                    # Last resort: if eval_data is some iterable of strings, convert it
+                    try:
+                        if hasattr(eval_data, "__iter__") and not isinstance(eval_data, (str, bytes)):
+                            lst = list(eval_data)
+                            if all(isinstance(x, str) for x in lst):
+                                eval_pack = [{"id": f"TT_{i:06d}", "term": t} for i, t in enumerate(lst)]
+                            else:
+                                raise TypeError("With ontologizer=True, eval_data must be list[str] of terms.")
+                        else:
+                            raise TypeError("With ontologizer=True, eval_data must be list[str] of terms.")
+                    except TypeError:
+                        # re-raise to preserve original error semantics
+                        raise
+            # Delegate to internal inference routine
+            return self._term_typing(eval_pack, test=True)
+
+        # Case B: ontologizer=False -> we expect list[dict], but tolerate common containers
+        else:
+            if isinstance(eval_data, list) and all(isinstance(x, dict) for x in eval_data):
+                eval_pack = eval_data
+            else:
+                # Try to extract from framework container (term_typings)
+                maybe = _extract_list_of_dicts_from_term_typings(eval_data)
+                if maybe is not None:
+                    eval_pack = maybe
+                else:
+                    # As a final attempt, allow eval_data to be a dict with a list under some known keys
+                    if isinstance(eval_data, dict):
+                        for key in ("term_typings", "terms", "items"):
+                            if key in eval_data and isinstance(eval_data[key], (list, tuple)):
+                                converted = []
+                                for x in eval_data[key]:
+                                    # Accept dict-style entries that include id and term/name
+                                    if isinstance(x, dict) and ("id" in x or "ID" in x) and ("term" in x or "name" in x):
+                                        tid = x.get("ID") or x.get("id")
+                                        tterm = x.get("term") or x.get("name")
+                                        converted.append({"id": str(tid), "term": str(tterm)})
+                                if converted:
+                                    eval_pack = converted
+                                    break
+                        else:
+                            # Could not convert; raise same TypeError as before
+                            raise TypeError("With ontologizer=False, eval_data must be a list of dicts with keys {'id','term'}.")
+                    else:
+                        # Not a supported container -> raise
+                        raise TypeError("With ontologizer=False, eval_data must be a list of dicts with keys {'id','term'}.")
+            # Delegate to internal inference routine
+            return self._term_typing(eval_pack, test=True)
+
+
+    # -------------------------------------------------------------------------
+    # Internal task implementations (AutoLearner hooks)
+    # -------------------------------------------------------------------------
+    def _term_typing(self, data: Any, test: bool = False) -> Optional[Any]:
+        """
+        Core implementation:
+         - training mode (test=False): `data` is a list of allowed type labels -> store them.
+         - inference mode (test=True): `data` is a list of {"id","term"} -> produce [{"id","types"}].
+        """
+        if not test:
+            # training: expect a list of strings (type labels)
+            if not isinstance(data, list):
+                raise TypeError("Expected a list of type labels at training time.")
+            self.allowed_types = sorted(set(data))
+            return None
+
+        # Inference path
+        if not isinstance(data, list) or not all(isinstance(x, dict) for x in data):
+            raise TypeError("At prediction time, expected a list of {'id','term'} dicts.")
+
+        # Ensure model and tokenizer are loaded
+        if self.model is None or self.tokenizer is None:
+            raise RuntimeError("Model/tokenizer not loaded. Call .load() before predict().")
+
+        results = []
+        for item in data:
+            # preserve incoming IDs and terms
+            term_id = item["id"]
+            term_text = item["term"]
+
+            # build the blind JSON-prompt that instructs the model to output types
+            prompt = self._build_blind_prompt(term_id, term_text, self.allowed_types)
+
+            # generate and parse model output into allowed types
+            types = self._generate_and_parse_types(prompt)
+
+            # append result for this term (keep original id)
+            # include the original term so downstream evaluation (and any consumers) can match by term
+            results.append({"id": term_id, "term": term_text, "types": types})
+
+        return results
+
+    # -------------------------------------------------------------------------
+    # Prompting + parsing
+    # -------------------------------------------------------------------------
+
+    def _format_types_inline(allowed: List[str]) -> str:
+        """
+        Format allowed types as comma-separated quoted strings for insertion into the prompt.
+        Example: '"type1", "type2", "type3"'
+        """
+        return ", ".join(f'"{t}"' for t in allowed)
+
+    def _build_blind_prompt(self, term_id: str, term: str, allowed_types: List[str]) -> str:
+        """
+        Construct the prompt given a single term. The prompt:
+          - Instructs the model to produce a JSON array of {id, types} objects.
+          - Provides the allowed types list (so the model should only use those).
+          - Includes the single input item for which the model must decide types.
+
+        Note: This is the same blind-prompting approach used in the original notebook.
+        """
+        allowed_str = self._format_types_inline(allowed_types)
+        return (
+            "Identify the type(s) of the term in a second JSON file.\n"
+            "A term can have more than one type.\n"
+            "Output file must be in this format:\n"
+            "[\n"
+            '{ "id": "TT_465e8904", "types": [ "type1" ] },\n'
+            '{ "id": "TT_01c7707e", "types": [ "type2", "type3" ] },\n'
+            '{ "id": "TT_b20cb478", "types": [ "type4" ] }\n'
+            "]\n"
+            "The id must be taken from the input JSON file.\n"
+            "You must find the type(s) for each term in the JSON file.\n"
+            "Types must be selected only from the types list.\n\n"
+            f"Types list: {allowed_str}\n\n"
+            f'{{ "id": "{term_id}", "term": "{term}" }}'
+        )
+
+    def _generate_and_parse_types(self, prompt: str) -> List[str]:
+        """
+        Greedy generate, then extract quoted strings and filter by allowed types.
+
+        Important details:
+          - We assert model/tokenizer presence before calling.
+          - Tokenized inputs are moved to the model device (original code uses .to(self.model.device)).
+          - The decoded text is scanned for quoted substrings using self._quoted_re.
+          - Only quoted strings that are present in self.allowed_types are kept.
+          - Returned list is unique & sorted for deterministic ordering.
+        """
+        assert self.model is not None and self.tokenizer is not None
+
+        # Tokenize prompt and move tensors to model device to avoid device mismatch
+        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
+
+        with torch.no_grad():
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=self.max_new_tokens,
+                do_sample=False,  # deterministic (greedy) decoding
+                pad_token_id=self.tokenizer.eos_token_id,
+            )
+
+        # Decode full generated sequence (prompt + generation). Then extract quoted strings.
+        text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        candidates = self._quoted_re.findall(text)
+
+        # Filter candidates to the allowed inventory
+        filtered = [c for c in candidates if c in self.allowed_types]
+
+        # Return unique & sorted for stability across runs
+        return sorted(set(filtered))
diff --git a/ontolearner/learner/text2onto/__init__.py b/ontolearner/learner/text2onto/__init__.py
new file mode 100644
index 0000000..30e8372
--- /dev/null
+++ b/ontolearner/learner/text2onto/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025 SciKnowOrg
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://opensource.org/licenses/MIT
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .sbunlp import SBUNLPFewShotLearner
diff --git a/ontolearner/learner/text2onto/sbunlp.py b/ontolearner/learner/text2onto/sbunlp.py
new file mode 100644
index 0000000..8ab617d
--- /dev/null
+++ b/ontolearner/learner/text2onto/sbunlp.py
@@ -0,0 +1,525 @@
+# Copyright (c) 2025 SciKnowOrg
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://opensource.org/licenses/MIT
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import random
+import re
+import ast
+import gc
+from typing import Any, Dict, List, Optional, Set, Tuple
+from collections import defaultdict
+
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+
+from ...base import AutoLearner, AutoLLM
+
+# -----------------------------------------------------------------------------
+# Concrete AutoLLM: local HF wrapper that follows the AutoLLM interface
+# -----------------------------------------------------------------------------
+class LocalAutoLLM(AutoLLM):
+    """
+    Handles loading and generation for a Hugging Face Causal Language Model (Qwen/TinyLlama).
+    Uses 4-bit quantization for efficiency and greedy decoding by default.
+    """
+
+    def __init__(self, label_mapper: Any = None, device: str = "cpu", token: str = "") -> None:
+        super().__init__(label_mapper=label_mapper, device=device, token=token)
+        self.model = None
+        self.tokenizer = None
+
+    def load(self, model_id: str, load_in_4bit: bool = False, dtype: str = "auto", trust_remote_code: bool = True):
+        """Load tokenizer + model, applying 4-bit quantization if specified and possible."""
+
+        # Determine the target data type (default to float32 for CPU, float16 for GPU)
+        torch_dtype_val = (torch.float16 if torch.cuda.is_available() else torch.float32)
+
+        # Load the tokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust_remote_code)
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+
+        quant_config = None
+        if load_in_4bit:
+            # Configure BitsAndBytes for 4-bit loading
+            quant_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_compute_dtype=torch.float16,
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_quant_type="nf4",
+            )
+            if torch_dtype_val is None:
+                torch_dtype_val = torch.float16
+
+        # Set device mapping (auto for multi-GPU or single GPU, explicit CPU otherwise)
+        device_map = "auto" if (self.device != "cpu") else {"": "cpu"}
+
+        # Load the Causal Language Model
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            device_map=device_map,
+            torch_dtype=torch_dtype_val,
+            quantization_config=quant_config,
+            trust_remote_code=trust_remote_code,
+        )
+
+        # Ensure model is on the correct device (redundant if device_map="auto" but safe)
+        if self.device == "cpu":
+            self.model.to("cpu")
+
+    def generate(self, inputs: List[str], max_new_tokens: int = 64, temperature: float = 0.0, top_p: float = 1.0) -> List[str]:
+        """Generate continuations for a list of prompts, returning only the generated part."""
+        if self.model is None or self.tokenizer is None:
+            raise RuntimeError("Model/tokenizer not loaded. Call .load() first.")
+
+        # --- Generation Setup ---
+        # Tokenize batch (padding is essential for batch inference)
+        enc = self.tokenizer(inputs, return_tensors="pt", padding=True, truncation=True)
+        input_ids = enc["input_ids"]
+        attention_mask = enc["attention_mask"]
+
+        # Move tensors to the model's device (e.g., cuda:0)
+        model_device = next(self.model.parameters()).device
+        input_ids = input_ids.to(model_device)
+        attention_mask = attention_mask.to(model_device)
+
+        # --- Generate ---
+        with torch.no_grad():
+            outputs = self.model.generate(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                max_new_tokens=max_new_tokens,
+                do_sample=(temperature > 0.0), # Use greedy decoding if temperature is 0.0
+                temperature=temperature,
+                top_p=top_p,
+                pad_token_id=self.tokenizer.eos_token_id,
+            )
+
+        # --- Post-processing: Extract only the generated tail ---
+        decoded_outputs: List[str] = []
+        for i, output_ids in enumerate(outputs):
+            full_decoded_text = self.tokenizer.decode(output_ids, skip_special_tokens=True)
+            prompt_text = self.tokenizer.decode(input_ids[i], skip_special_tokens=True)
+
+            # Safely strip the prompt text from the full output
+            if full_decoded_text.startswith(prompt_text):
+                generated_tail = full_decoded_text[len(prompt_text):].strip()
+            else:
+                # Fallback extraction (less robust if padding affects token indices)
+                prompt_len = input_ids.shape[1]
+                generated_tail = self.tokenizer.decode(output_ids[prompt_len:], skip_special_tokens=True).strip()
+            decoded_outputs.append(generated_tail)
+
+        return decoded_outputs
+
+# -----------------------------------------------------------------------------
+# Main Learner: SBUNLPFewShotLearner (Task A Text2Onto)
+# -----------------------------------------------------------------------------
+class SBUNLPFewShotLearner(AutoLearner):
+    """
+    Concrete learner implementing the Task A Text2Onto pipeline (Term and Type Extraction).
+    It uses Few-Shot prompts generated from training data for inference.
+    """
+
+    def __init__(self, model: Optional[AutoLLM] = None, device: str = "cpu"):
+        super().__init__()
+        # self.model is an instance of LocalAutoLLM
+        self.model = model or LocalAutoLLM(device=device)
+        self.device = device
+        # Cached in-memory prompt blocks built during the fit phase
+        self.fewshot_terms_block: str = ""
+        self.fewshot_types_block: str = ""
+
+    # --- Few-shot construction (terms) ---
+    def build_stratified_fewshot_prompt(
+        self,
+        documents_path: str,
+        terms_path: str,
+        sample_size: int = 28,
+        seed: int = 123,
+        max_chars_per_text: int = 1200,
+    ) -> str:
+        """
+        Builds the few-shot exemplar block for Term Extraction using stratified sampling.
+        """
+        random.seed(seed)
+
+        # Read documents (JSONL) into a list
+        corpus_documents: List[Dict[str, Any]] = []
+        with open(documents_path, "r", encoding="utf-8") as file_handle:
+            for line in file_handle:
+                if line.strip():
+                    corpus_documents.append(json.loads(line))
+
+        num_total_docs = len(corpus_documents)
+        num_sample_docs = min(sample_size, num_total_docs)
+
+        # Load the map of term -> [list of document IDs]
+        with open(terms_path, "r", encoding="utf-8") as file_handle:
+            term_to_doc_map = json.load(file_handle)
+
+        # Invert map: document ID -> [list of terms]
+        doc_id_to_terms_map = defaultdict(list)
+        for term, doc_ids in term_to_doc_map.items():
+            for doc_id in doc_ids:
+                doc_id_to_terms_map[doc_id].append(term)
+
+        # Define strata (groups of documents associated with specific terms)
+        strata_map = defaultdict(list)
+        for doc in corpus_documents:
+            doc_id = doc.get("id", "")
+            associated_terms = doc_id_to_terms_map.get(doc_id, ["no_term"])
+            for term in associated_terms:
+                strata_map[term].append(doc)
+
+        # Perform proportional sampling across strata
+        sampled_documents: List[Dict[str, Any]] = []
+        for term_str, stratum_docs in strata_map.items():
+            num_stratum_docs = len(stratum_docs)
+            if num_stratum_docs == 0:
+                continue
+
+            # Calculate proportional sample size
+            proportion = num_stratum_docs / num_total_docs
+            num_to_sample_from_stratum = int(num_sample_docs * proportion)
+
+            if num_to_sample_from_stratum > 0:
+                sampled_documents.extend(random.sample(stratum_docs, min(num_to_sample_from_stratum, num_stratum_docs)))
+
+        # Deduplicate sampled documents by ID and adjust count to exactly 'sample_size'
+        unique_docs_by_id = {}
+        for doc in sampled_documents:
+            unique_docs_by_id[doc.get("id", "")] = doc
+
+        final_sample_docs = list(unique_docs_by_id.values())
+
+        if len(final_sample_docs) > num_sample_docs:
+            final_sample_docs = random.sample(final_sample_docs, num_sample_docs)
+        elif len(final_sample_docs) < num_sample_docs:
+            remaining_docs = [d for d in corpus_documents if d.get("id", "") not in unique_docs_by_id]
+            needed_count = min(num_sample_docs - len(final_sample_docs), len(remaining_docs))
+            final_sample_docs.extend(random.sample(remaining_docs, needed_count))
+
+        # Format the few-shot exemplar text block
+        prompt_lines: List[str] = []
+        for doc in final_sample_docs:
+            doc_id = doc.get("id", "")
+            title = doc.get("title", "")
+            text = doc.get("text", "")
+
+            # Truncate text if it exceeds the maximum character limit
+            if max_chars_per_text and len(text) > max_chars_per_text:
+                text = text[:max_chars_per_text] + "…"
+
+            associated_terms = doc_id_to_terms_map.get(doc_id, [])
+            prompt_lines.append(
+                f"Document ID: {doc_id}\nTitle: {title}\nText: {text}\nAssociated Terms: {associated_terms}\n----------------------------------------"
+            )
+
+        prompt_block = "\n".join(prompt_lines)
+        self.fewshot_terms_block = prompt_block
+        return prompt_block
+
+    # --- Few-shot construction (types) ---
+    def build_types_fewshot_block(
+        self,
+        docs_jsonl: str,
+        terms2doc_json: str,
+        sample_per_term: int = 1,
+        full_word: bool = True,
+        case_sensitive: bool = True,
+        max_chars_per_text: int = 800,
+    ) -> str:
+        """
+        Builds the few-shot block for Type Extraction.
+        This method samples documents based on finding an associated term/type within the text.
+        """
+        # Load documents into dict by ID
+        docs_by_id = {}
+        with open(docs_jsonl, "r", encoding="utf-8") as file_handle:
+            for line in file_handle:
+                line_stripped = line.strip()
+                if line_stripped:
+                    try:
+                        doc = json.loads(line_stripped)
+                        doc_id = doc.get("id", "")
+                        if doc_id:
+                            docs_by_id[doc_id] = doc
+                    except json.JSONDecodeError:
+                        continue
+
+        # Load term -> [doc_id,...] map
+        with open(terms2doc_json, "r", encoding="utf-8") as file_handle:
+            term_to_doc_map = json.load(file_handle)
+
+        flags = 0 if case_sensitive else re.IGNORECASE
+        prompt_lines: List[str] = []
+
+        # Iterate over terms (which act as types in this context)
+        for term, doc_ids in term_to_doc_map.items():
+            escaped_term = re.escape(term)
+            # Create regex pattern for matching the term in the text
+            pattern = rf"\b{escaped_term}\b" if full_word else escaped_term
+            term_regex = re.compile(pattern, flags=flags)
+
+            picked_count = 0
+            for doc_id in doc_ids:
+                doc = docs_by_id.get(doc_id)
+                if not doc:
+                    continue
+
+                title = doc.get("title", "")
+                text = doc.get("text", "")
+
+                # Check if the term/type is actually present in the document text/title
+                if term_regex.search(f"{title} {text}"):
+                    text_content = text
+
+                    # Truncate text if necessary
+                    if max_chars_per_text and len(text_content) > max_chars_per_text:
+                        text_content = text_content[:max_chars_per_text] + "…"
+
+                    # Escape single quotes in the term for Python list formatting in the prompt
+                    term_for_prompt = term.replace("'", "\\'")
+
+                    prompt_lines.append(
+                        f"Document ID: {doc_id}\nTitle: {title}\nText: {text_content}\nAssociated Types: ['{term_for_prompt}']\n----------------------------------------"
+                    )
+                    picked_count += 1
+
+                    if picked_count >= sample_per_term:
+                        break # Move to the next term
+
+        prompt_block = "\n".join(prompt_lines)
+        self.fewshot_types_block = prompt_block
+        return prompt_block
+
+    def fit(self, train_docs_jsonl: str, terms2doc_json: str, sample_size: int = 28, seed: int = 123) -> None:
+        """
+        Fit phase: Builds and caches the few-shot prompt blocks from the training files.
+        No model training occurs (Few-Shot/In-Context Learning).
+        """
+        # Build prompt block for Term extraction
+        _ = self.build_stratified_fewshot_prompt(train_docs_jsonl, terms2doc_json, sample_size=sample_size, seed=seed)
+        # Build prompt block for Type extraction
+        _ = self.build_types_fewshot_block(train_docs_jsonl, terms2doc_json, sample_per_term=1)
+
+    # -------------------------
+    # Inference helpers (prompt construction and output parsing)
+    # -------------------------
+    def _build_term_prompt(self, example_block: str, title: str, text: str) -> str:
+        """Constructs the full prompt for Term Extraction."""
+        return f"""{example_block}
+            [var]
+            Title: {title}
+            Text: {text}
+            [var]
+            Extract all relevant terms that could form the basis of an ontology from the above document.
+            Return ONLY a Python list like ['term1', 'term2', ...] and nothing else.
+            If no terms are found, return [].
+            """
+
+    def _build_type_prompt(self, example_block: str, title: str, text: str) -> str:
+        """Constructs the full prompt for Type Extraction."""
+        return f"""{example_block}
+            [var]
+            Title: {title}
+            Text: {text}
+            [var]
+            Extract all relevant TYPES mentioned in the above document that could serve as ontology classes.
+            Only consider content inside the [var] ... [var] block.
+            Return ONLY a valid Python list like ['type1', 'type2'] and nothing else. If none, return [].
+            """
+
+    def _parse_list_like(self, raw_string: str) -> List[str]:
+        """Try to extract a Python list of strings from model output robustly."""
+        processed_string = raw_string.strip()
+        if processed_string in ("[]", ""):
+            return []
+
+        # 1. Try direct evaluation
+        try:
+            parsed_value = ast.literal_eval(processed_string)
+            if isinstance(parsed_value, list):
+                # Filter to ensure only strings are returned
+                return [item for item in parsed_value if isinstance(item, str)]
+        except Exception:
+            pass
+
+        # 2. Try finding and evaluating text within outermost brackets [ ... ]
+        bracket_match = re.search(r"\[[\s\S]*?\]", processed_string)
+        if bracket_match:
+            try:
+                parsed_value = ast.literal_eval(bracket_match.group(0))
+                if isinstance(parsed_value, list):
+                    return [item for item in parsed_value if isinstance(item, str)]
+            except Exception:
+                pass
+
+        # 3. Fallback: Find comma-separated quoted substrings (less robust, but catches errors)
+        # Finds content inside either single quotes ('...') or double quotes ("...")
+        quoted_matches = re.findall(r"'([^']+)'|\"([^\"]+)\"", processed_string)
+        flattened_list = [a_match or b_match for a_match, b_match in quoted_matches]
+        return flattened_list
+
+    def _call_model_one(self, prompt: str, max_new_tokens: int = 120) -> str:
+        """Calls the underlying LocalAutoLLM for a single prompt. Returns the raw tail output."""
+        # self.model is an instance of LocalAutoLLM
+        model_output = self.model.generate([prompt], max_new_tokens=max_new_tokens, temperature=0.0, top_p=1.0)
+        return model_output[0] if model_output else ""
+
+    def predict_terms(self, docs_test_jsonl: str, out_jsonl: str, max_lines: int = -1, max_new_tokens: int = 120) -> int:
+        """
+        Runs Term Extraction on the test documents and saves results to a JSONL file.
+        Returns: The count of individual terms written.
+        """
+        if not self.fewshot_terms_block:
+            raise RuntimeError("Few-shot block for terms is empty. Call fit() first.")
+
+        num_written_terms = 0
+        with open(docs_test_jsonl, "r", encoding="utf-8") as file_in, open(out_jsonl, "w", encoding="utf-8") as file_out:
+            for line_index, line in enumerate(file_in, start=1):
+                if 0 < max_lines < line_index:
+                    break
+
+                try:
+                    document = json.loads(line.strip())
+                except Exception:
+                    continue # Skip malformed JSON lines
+
+                doc_id = document.get("id", "unknown")
+                title = document.get("title", "")
+                text = document.get("text", "")
+
+                # Construct and call model
+                prompt = self._build_term_prompt(self.fewshot_terms_block, title, text)
+                raw_output = self._call_model_one(prompt, max_new_tokens=max_new_tokens)
+                predicted_terms = self._parse_list_like(raw_output)
+
+                # Write extracted terms
+                for term_or_type in predicted_terms:
+                    if isinstance(term_or_type, str) and term_or_type.strip():
+                        file_out.write(json.dumps({"doc_id": doc_id, "term": term_or_type.strip()}) + "\n")
+                        num_written_terms += 1
+
+                # Lightweight memory management for long runs
+                if line_index % 50 == 0:
+                    gc.collect()
+                    if torch.cuda.is_available():
+                        torch.cuda.empty_cache()
+
+        return num_written_terms
+
+    def predict_types(self, docs_test_jsonl: str, out_jsonl: str, max_lines: int = -1, max_new_tokens: int = 120) -> int:
+        """
+        Runs Type Extraction on the test documents and saves results to a JSONL file.
+        Returns: The count of individual types written.
+        """
+        if not self.fewshot_types_block:
+            raise RuntimeError("Few-shot block for types is empty. Call fit() first.")
+
+        num_written_types = 0
+        with open(docs_test_jsonl, "r", encoding="utf-8") as file_in, open(out_jsonl, "w", encoding="utf-8") as file_out:
+            for line_index, line in enumerate(file_in, start=1):
+                if 0 < max_lines < line_index:
+                    break
+
+                try:
+                    document = json.loads(line.strip())
+                except Exception:
+                    continue # Skip malformed JSON lines
+
+                doc_id = document.get("id", "unknown")
+                title = document.get("title", "")
+                text = document.get("text", "")
+
+                # Construct and call model using the dedicated type prompt block
+                prompt = self._build_type_prompt(self.fewshot_types_block, title, text)
+                raw_output = self._call_model_one(prompt, max_new_tokens=max_new_tokens)
+                predicted_types = self._parse_list_like(raw_output)
+
+                # Write extracted types
+                for term_or_type in predicted_types:
+                    if isinstance(term_or_type, str) and term_or_type.strip():
+                        file_out.write(json.dumps({"doc_id": doc_id, "type": term_or_type.strip()}) + "\n")
+                        num_written_types += 1
+
+                if line_index % 50 == 0:
+                    gc.collect()
+                    if torch.cuda.is_available():
+                        torch.cuda.empty_cache()
+
+        return num_written_types
+
+    # --- Evaluation utilities (unchanged from prior definition, added docstrings) ---
+    def load_gold_pairs(self, terms2doc_path: str) -> Set[Tuple[str, str]]:
+        """Convert terms2docs JSON into a set of unique (doc_id, term) pairs, lowercased."""
+        gold_pairs = set()
+        with open(terms2doc_path, "r", encoding="utf-8") as file_handle:
+            term_to_doc_map = json.load(file_handle)
+
+        for term, doc_ids in term_to_doc_map.items():
+            clean_term = term.strip().lower()
+            for doc_id in doc_ids:
+                gold_pairs.add((doc_id, clean_term))
+        return gold_pairs
+
+    def load_predicted_pairs(self, predicted_jsonl_path: str, key: str = "term") -> Set[Tuple[str, str]]:
+        """Load predicted (doc_id, term/type) pairs from a JSONL file, lowercased."""
+        predicted_pairs = set()
+        with open(predicted_jsonl_path, "r", encoding="utf-8") as file_handle:
+            for line in file_handle:
+                try:
+                    entry = json.loads(line.strip())
+                except Exception:
+                    continue
+                doc_id = entry.get("doc_id")
+                value = entry.get(key)
+                if doc_id and value:
+                    predicted_pairs.add((doc_id, value.strip().lower()))
+        return predicted_pairs
+
+    def evaluate_extraction_f1(self, terms2doc_path: str, predicted_jsonl: str, key: str = "term") -> float:
+        """
+        Computes set-based binary Precision, Recall, and F1 score against the gold pairs.
+        """
+        # Load the ground truth and predictions
+        gold_set = self.load_gold_pairs(terms2doc_path)
+        predicted_set = self.load_predicted_pairs(predicted_jsonl, key=key)
+
+        # Build combined universe of all pairs for score calculation
+        all_pairs = sorted(gold_set | predicted_set)
+
+        # Create binary labels (1=present, 0=absent)
+        y_true = [1 if pair in gold_set else 0 for pair in all_pairs]
+        y_pred = [1 if pair in predicted_set else 0 for pair in all_pairs]
+
+        # Use scikit-learn for metric calculation
+        from sklearn.metrics import precision_recall_fscore_support
+        precision, recall, f1, _ = precision_recall_fscore_support(
+            y_true, y_pred, average="binary", zero_division=0
+        )
+
+        # Display results
+        num_true_positives = len(gold_set & predicted_set)
+
+        print("\n📊 Evaluation Results:")
+        print(f"   ✅ Precision: {precision:.4f}")
+        print(f"   ✅ Recall:    {recall:.4f}")
+        print(f"   ✅ F1 Score:  {f1:.4f}")
+        print(f"   📌 Gold pairs:      {len(gold_set)}")
+        print(f"   📌 Predicted pairs:{len(predicted_set)}")
+        print(f"   🎯 True Positives: {num_true_positives}")
+
+        return float(f1)

From be80e735b2de7cbc48c2c5bfcb0c34b065c537a0 Mon Sep 17 00:00:00 2001
From: KrishnaRani <thakurkrishnarani@gmail.com>
Date: Mon, 3 Nov 2025 23:09:36 +0100
Subject: [PATCH 4/7] alexbek learner models

---
 .../llm_learner_alexbek_rag_term_typing.py    |   50 +
 .../llm_learner_alexbek_rf_term_typing.py     |   54 +
 ...er_alexbek_self_attn_taxonomy_discovery.py |   41 +
 examples/llm_learner_alexbek_text2onto.py     |   74 ++
 ontolearner/__init__.py                       |   10 +-
 ontolearner/learner/__init__.py               |    3 +
 .../learner/taxonomy_discovery/__init__.py    |    1 +
 .../learner/taxonomy_discovery/alexbek.py     |  305 +++++
 ontolearner/learner/term_typing/__init__.py   |    1 +
 ontolearner/learner/term_typing/alexbek.py    |  809 ++++++++++++
 ontolearner/learner/text2onto/__init__.py     |    1 +
 ontolearner/learner/text2onto/alexbek.py      | 1084 +++++++++++++++++
 12 files changed, 2432 insertions(+), 1 deletion(-)
 create mode 100644 examples/llm_learner_alexbek_rag_term_typing.py
 create mode 100644 examples/llm_learner_alexbek_rf_term_typing.py
 create mode 100644 examples/llm_learner_alexbek_self_attn_taxonomy_discovery.py
 create mode 100644 examples/llm_learner_alexbek_text2onto.py
 create mode 100644 ontolearner/learner/taxonomy_discovery/alexbek.py
 create mode 100644 ontolearner/learner/term_typing/alexbek.py
 create mode 100644 ontolearner/learner/text2onto/alexbek.py

diff --git a/examples/llm_learner_alexbek_rag_term_typing.py b/examples/llm_learner_alexbek_rag_term_typing.py
new file mode 100644
index 0000000..5723e36
--- /dev/null
+++ b/examples/llm_learner_alexbek_rag_term_typing.py
@@ -0,0 +1,50 @@
+# Import core modules from the OntoLearner library
+from ontolearner import GeoNames, train_test_split, LearnerPipeline
+from ontolearner import AlexbekRAGLearner
+
+# Load the GeoNames ontology.
+ontology = GeoNames()
+ontology.load()
+
+# Extract labeled items and split into train/test sets for evaluation
+train_data, test_data = train_test_split(ontology.extract(), test_size=0.2, random_state=42)
+
+# Configure a Retrieval-Augmented Generation (RAG) term-typing classifier.
+# - llm_model_id: generator used to predict types from the prompt + retrieved examples
+# - retriever_model_id: encoder used to embed items and fetch top-k similar (RAG) examples
+# - device: "cuda" for GPU or "cpu"
+# - top_k: number of nearest examples to retrieve per query term
+# - max_new_tokens: decoding budget of the LLM during prediction
+# - output_dir: where intermediate artifacts / logs can be stored
+rag_learner = AlexbekRAGLearner(
+    llm_model_id="Qwen/Qwen2.5-0.5B-Instruct",
+    retriever_model_id="sentence-transformers/all-MiniLM-L6-v2",
+    device="cuda",
+    top_k=3,
+    max_new_tokens=256,
+    output_dir="./results/",
+)
+
+# Build the pipeline and pass raw structured objects end-to-end.
+# We place the RAG learner in the llm slot and set llm_id accordingly.
+pipe = LearnerPipeline(
+    llm=rag_learner,
+    llm_id="Qwen/Qwen2.5-0.5B-Instruct",
+    ontologizer_data=True,
+)
+
+# Run the full learning pipeline on the term-typing task
+# - task="term-typing" (Task B)
+# - evaluate=True computes precision/recall/F1 on the held-out test split
+# - ontologizer_data=True must match the pipeline flag above
+outputs = pipe(
+    train_data=train_data,
+    test_data=test_data,
+    task="term-typing",
+    evaluate=True,
+    ontologizer_data=True,
+)
+
+# Display the evaluation results and runtime
+print("Metrics:", outputs.get("metrics"))          # e.g., {'precision': ..., 'recall': ..., 'f1_micro': ..., ...}
+print("Elapsed time (s):", outputs.get("elapsed_time"))
diff --git a/examples/llm_learner_alexbek_rf_term_typing.py b/examples/llm_learner_alexbek_rf_term_typing.py
new file mode 100644
index 0000000..c5c7454
--- /dev/null
+++ b/examples/llm_learner_alexbek_rf_term_typing.py
@@ -0,0 +1,54 @@
+# Import core modules from the OntoLearner library
+from ontolearner import GeoNames, train_test_split, LearnerPipeline
+from ontolearner import AlexbekRFLearner   # A random-forest term-typing learner over text+graph features
+
+# Load the GeoNames ontology and extract labeled term-typing data
+
+ontology = GeoNames()
+ontology.load()
+
+data = ontology.extract()
+
+# Split the labeled term-typing data into train and test sets
+train_data, test_data = train_test_split(
+    data,
+    test_size=0.2,
+    random_state=42
+)
+
+# Configure the RF-based learner (embeddings + optional graph features)
+#    - device: "cpu" or "cuda"
+#    - threshold: decision threshold for multi-label assignment
+#    - use_graph_features: include ontology-graph-derived features if available
+rf_learner = AlexbekRFLearner(
+    device="cpu",            # switch to "cuda" if you have a GPU
+    batch_size=16,
+    max_length=512,          # max tokenizer length for embedding model inputs
+    threshold=0.30,          # probability cutoff for assigning each type
+    use_graph_features=True  # set False for pure RF on text embeddings only
+)
+
+# Build the pipeline and pass raw structured objects end-to-end.
+pipe = LearnerPipeline(
+    retriever=rf_learner,
+    retriever_id="intfloat/e5-base-v2",   # or "Qwen/Qwen3-Embedding-4B" if you have sufficient GPU memory
+    ontologizer_data=True,                # True if data is already {"term": ..., "types": [...], ...}
+    device="cpu",
+    batch_size=16
+)
+
+# Run the full learning pipeline on the term-typing task
+outputs = pipe(
+    train_data=train_data,
+    test_data=test_data,
+    task="term-typing",
+    evaluate=True,
+    ontologizer_data=True,
+)
+
+# Display evaluation summary and runtime
+print("Metrics:", outputs.get("metrics"))
+
+print("Elapsed time:", outputs["elapsed_time"])
+
+print(ontology)
diff --git a/examples/llm_learner_alexbek_self_attn_taxonomy_discovery.py b/examples/llm_learner_alexbek_self_attn_taxonomy_discovery.py
new file mode 100644
index 0000000..b78976f
--- /dev/null
+++ b/examples/llm_learner_alexbek_self_attn_taxonomy_discovery.py
@@ -0,0 +1,41 @@
+from ontolearner import GeoNames, train_test_split, LearnerPipeline
+from ontolearner import AlexbekCrossAttnLearner
+# 1) Load & split
+ontology = GeoNames()
+ontology.load()
+data = ontology.extract()
+train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
+
+# 2) Configure the cross-attention learner
+cross_learner = AlexbekCrossAttnLearner(
+    embedding_model="sentence-transformers/all-MiniLM-L6-v2",  # or "Qwen/Qwen2.5-1.5B-... (if wrapped as ST)"
+    device="cpu",
+    num_heads=8,
+    lr=5e-5,
+    weight_decay=0.01,
+    num_epochs=1,
+    batch_size=256,
+    neg_ratio=1.0,
+    output_dir="./results/crossattn/",
+    seed=42,
+)
+
+# 3) Build pipeline
+pipeline = LearnerPipeline(
+    llm=cross_learner,     # <- our learner
+    llm_id="cross-attn",   # label for bookkeeping
+    ontologizer_data=False # pass raw ontology objects as in your example
+)
+
+# 4) Train + predict + evaluate
+outputs = pipeline(
+    train_data=train_data,
+    test_data=test_data,
+    task="taxonomy-discovery",
+    evaluate=True,
+    ontologizer_data=False,
+)
+
+print("Metrics:", outputs.get("metrics"))
+print("Elapsed time:", outputs["elapsed_time"])
+print(outputs)
diff --git a/examples/llm_learner_alexbek_text2onto.py b/examples/llm_learner_alexbek_text2onto.py
new file mode 100644
index 0000000..caf4c5b
--- /dev/null
+++ b/examples/llm_learner_alexbek_text2onto.py
@@ -0,0 +1,74 @@
+import os
+import json
+import torch
+
+# LocalAutoLLM handles model loading/generation; AlexbekFewShotLearner provides fit/predict APIs
+from ontolearner.learner.text2onto.alexbek import LocalAutoLLM, AlexbekFewShotLearner
+
+# Local folder where the dataset is stored (relative to this script)
+DATA_DIR = "./dataset_llms4ol_2025/TaskA-Text2Onto/ecology"
+
+# Input paths (already saved)
+TRAIN_DOCS_PATH        = os.path.join(DATA_DIR, "train", "documents.jsonl")
+TRAIN_TERMS2DOCS_PATH  = os.path.join(DATA_DIR, "train", "terms2docs.json")
+TEST_DOCS_FULL_PATH    = os.path.join(DATA_DIR, "test", "text2onto_ecology_test_documents.jsonl")
+
+# Output paths
+DOC_TERMS_OUT_PATH     = os.path.join(DATA_DIR, "test", "extracted_terms_ecology.fast.jsonl")
+TERMS2TYPES_OUT_PATH   = os.path.join(DATA_DIR, "test", "terms2types_pred_ecology.fast.json")
+TYPES2DOCS_OUT_PATH    = os.path.join(DATA_DIR, "test", "types2docs_pred_ecology.fast.json")
+
+# Device selection
+DEVICE = (
+    "cuda"
+    if torch.cuda.is_available()
+    else ("mps" if torch.backends.mps.is_available() else "cpu")
+)
+
+# Model config
+MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
+LOAD_IN_4BIT = (DEVICE == "cuda")  # 4-bit helps on GPU
+
+# 1) Load LLM
+llm = LocalAutoLLM(device=DEVICE)
+llm.load(MODEL_ID, load_in_4bit=LOAD_IN_4BIT)
+
+# 2) Build few-shot exemplars from training split
+learner = AlexbekFewShotLearner(model=llm, device=DEVICE)
+learner.fit(
+    train_docs_jsonl=TRAIN_DOCS_PATH,
+    terms2doc_json=TRAIN_TERMS2DOCS_PATH,
+    # use defaults for sample size/seed
+)
+
+# 3) Predict terms per test document
+os.makedirs(os.path.dirname(DOC_TERMS_OUT_PATH), exist_ok=True)
+num_written_doc_terms = learner.predict_terms(
+    docs_test_jsonl=TEST_DOCS_FULL_PATH,
+    out_jsonl=DOC_TERMS_OUT_PATH,
+    # use defaults for max_new_tokens and few_shot_k
+)
+print(f"[terms] wrote {num_written_doc_terms} lines → {DOC_TERMS_OUT_PATH}")
+
+# 4) Predict types for extracted terms, using the JSONL we just wrote
+typing_summary = learner.predict_types_from_terms(
+    doc_terms_jsonl=DOC_TERMS_OUT_PATH,   # read the predictions directly
+    doc_terms_list=None,                  # (not needed when doc_terms_jsonl is provided)
+    model_id=MODEL_ID,                    # reuse the same small model
+    out_terms2types=TERMS2TYPES_OUT_PATH,
+    out_types2docs=TYPES2DOCS_OUT_PATH,
+    # use defaults for everything else
+)
+
+print(f"[types] {typing_summary['unique_terms']} unique terms | {typing_summary['types_count']} types")
+print(f"[saved] {TERMS2TYPES_OUT_PATH}")
+print(f"[saved] {TYPES2DOCS_OUT_PATH}")
+
+# 5) Small preview of term→types
+try:
+    with open(TERMS2TYPES_OUT_PATH, "r", encoding="utf-8") as fin:
+        preview = json.load(fin)[:3]
+    print("[preview] first 3:")
+    print(json.dumps(preview, ensure_ascii=False, indent=2))
+except Exception as e:
+    print(f"[preview] skipped: {e}")
diff --git a/ontolearner/__init__.py b/ontolearner/__init__.py
index 49b94c4..5ebd3f6 100644
--- a/ontolearner/__init__.py
+++ b/ontolearner/__init__.py
@@ -36,7 +36,11 @@
                       SKHNLPSequentialFTLearner,
                       SBUNLPFewShotLearner,
                       SBUNLPZSLearner,
-                      SBUNLPText2OntoLearner)
+                      SBUNLPText2OntoLearner,
+                      AlexbekCrossAttnLearner,
+                      AlexbekRFLearner,
+                      AlexbekRAGLearner,
+                      AlexbekFewShotLearner)
 from ._learner import LearnerPipeline
 
 from .processor import Processor
@@ -61,6 +65,10 @@
     "SBUNLPFewShotLearner",
     "SBUNLPZSLearner",
     "SBUNLPText2OntoLearner",
+    "AlexbekCrossAttnLearner",
+    "AlexbekRFLearner",
+    "AlexbekRAGLearner",
+    "AlexbekFewShotLearner",
     "data_structure",
     "text2onto",
     "ontology",
diff --git a/ontolearner/learner/__init__.py b/ontolearner/learner/__init__.py
index 4f41586..71020e8 100644
--- a/ontolearner/learner/__init__.py
+++ b/ontolearner/learner/__init__.py
@@ -23,3 +23,6 @@
 from .taxonomy_discovery.sbunlp import SBUNLPFewShotLearner
 from .term_typing.sbunlp import SBUNLPZSLearner
 from .text2onto import SBUNLPFewShotLearner as SBUNLPText2OntoLearner
+from .taxonomy_discovery.alexbek import AlexbekCrossAttnLearner
+from .term_typing.alexbek import AlexbekRFLearner, AlexbekRAGLearner
+from .text2onto.alexbek import AlexbekFewShotLearner
diff --git a/ontolearner/learner/taxonomy_discovery/__init__.py b/ontolearner/learner/taxonomy_discovery/__init__.py
index d52513b..57a845b 100644
--- a/ontolearner/learner/taxonomy_discovery/__init__.py
+++ b/ontolearner/learner/taxonomy_discovery/__init__.py
@@ -15,3 +15,4 @@
 from .rwthdbis import RWTHDBISSFTLearner
 from .skhnlp import SKHNLPSequentialFTLearner, SKHNLPZSLearner
 from .sbunlp import SBUNLPFewShotLearner
+from .alexbek import AlexbekCrossAttnLearner
diff --git a/ontolearner/learner/taxonomy_discovery/alexbek.py b/ontolearner/learner/taxonomy_discovery/alexbek.py
new file mode 100644
index 0000000..616d50f
--- /dev/null
+++ b/ontolearner/learner/taxonomy_discovery/alexbek.py
@@ -0,0 +1,305 @@
+# Copyright (c) 2025 SciKnowOrg
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://opensource.org/licenses/MIT
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List, Optional, Tuple
+
+import math
+import os
+import random
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from sentence_transformers import SentenceTransformer
+
+from ...base import AutoLearner
+
+class RMSNorm(nn.Module):
+    """Root Mean Square normalization with learnable scale.
+
+    Computes:  y = weight * x / sqrt(mean(x^2) + eps)
+    """
+
+    def __init__(self, dim: int, eps: float = 1e-6):
+        super().__init__()
+        self.eps = eps
+        self.weight = nn.Parameter(torch.ones(dim))
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        rms_inv = torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
+        return self.weight * (x * rms_inv)
+
+class CrossAttentionHead(nn.Module):
+    """Minimal multi-head *pair* scorer using cross-attention-style projections.
+
+    Given child vector c and parent vector p:
+      q = Wq * c, k = Wk * p
+      per-head score = (q_h · k_h) / sqrt(d_head)
+      aggregate by mean across heads, then sigmoid to get probability.
+    """
+
+    def __init__(self, hidden_size: int, num_heads: int = 8, rms_norm_eps: float = 1e-6):
+        super().__init__()
+        assert hidden_size % num_heads == 0, "hidden_size must be divisible by num_heads"
+        self.hidden_size = hidden_size
+        self.num_heads = num_heads
+        self.dim_per_head = hidden_size // num_heads
+
+        # Linear projections for queries (child) and keys (parent)
+        self.query_projection = nn.Linear(hidden_size, hidden_size, bias=False)
+        self.key_projection = nn.Linear(hidden_size, hidden_size, bias=False)
+
+        # Pre-projection normalization for stability
+        self.query_norm = RMSNorm(hidden_size, eps=rms_norm_eps)
+        self.key_norm = RMSNorm(hidden_size, eps=rms_norm_eps)
+
+        # Xavier init helps stabilize training
+        nn.init.xavier_uniform_(self.query_projection.weight)
+        nn.init.xavier_uniform_(self.key_projection.weight)
+
+    def forward(self, child_embeddings: torch.Tensor, parent_embeddings: torch.Tensor) -> torch.Tensor:
+        """Score (child, parent) pairs.
+
+        Args:
+            child_embeddings:  Tensor of shape (batch, hidden_size)
+            parent_embeddings: Tensor of shape (batch, hidden_size)
+        Returns:
+            Tensor of probabilities with shape (batch,)
+        """
+        batch_size, _ = child_embeddings.shape
+
+        # Project and normalize
+        queries = self.query_norm(self.query_projection(child_embeddings))
+        keys = self.key_norm(self.key_projection(parent_embeddings))
+
+        # Reshape into heads: (batch, heads, dim_per_head)
+        queries = queries.view(batch_size, self.num_heads, self.dim_per_head)
+        keys = keys.view(batch_size, self.num_heads, self.dim_per_head)
+
+        # Scaled dot-product similarity per head -> (batch, heads)
+        per_head_scores = (queries * keys).sum(-1) / math.sqrt(self.dim_per_head)
+
+        # Aggregate across heads -> (batch,)
+        mean_score = per_head_scores.mean(-1)
+
+        # Map to probability
+        return torch.sigmoid(mean_score)
+
+class AlexbekCrossAttnLearner(AutoLearner):
+    """Cross-Attention Taxonomy Learner (inherits AutoLearner).
+
+    - Encodes type strings with a SentenceTransformer.
+    - Trains a small cross-attention head to score (parent, child) edges.
+    - Predicts probabilities for provided pairs.
+
+    Helper functions live in this same module (below), *not* as class methods.
+    """
+
+    def __init__(
+        self,
+        *,
+        embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2",
+        device: str = "cpu",
+        num_heads: int = 8,
+        lr: float = 5e-5,
+        weight_decay: float = 0.01,
+        num_epochs: int = 1,
+        batch_size: int = 256,
+        neg_ratio: float = 1.0,  # negatives per positive
+        output_dir: str = "./results/",
+        seed: int = 42,
+        **kwargs: Any,
+    ):
+        """Configure the learner.
+
+        All configuration is kept directly on the learner (no separate Config class).
+        """
+        super().__init__(**kwargs)
+
+        # ----- hyperparameters / settings -----
+        self.embedding_model_id = embedding_model
+        self.requested_device = device
+        self.num_heads = num_heads
+        self.learning_rate = lr
+        self.weight_decay = weight_decay
+        self.num_epochs = num_epochs
+        self.batch_size = batch_size
+        self.negative_ratio = neg_ratio
+        self.output_dir = output_dir
+        self.seed = seed
+
+        # Prefer requested device but gracefully fall back to CPU
+        if torch.cuda.is_available() or self.requested_device == "cpu":
+            self.device = torch.device(self.requested_device)
+        else:
+            self.device = torch.device("cpu")
+
+        # Will be set in load()
+        self.embedder: Optional[SentenceTransformer] = None
+        self.cross_attn_head: Optional[CrossAttentionHead] = None
+        self.embedding_dim: Optional[int] = None
+
+        # Cache of term -> embedding tensor (on device)
+        self.term_to_vector: Dict[str, torch.Tensor] = {}
+
+        os.makedirs(self.output_dir, exist_ok=True)
+        random.seed(self.seed)
+        torch.manual_seed(self.seed)
+
+    def load(self, **kwargs: Any):
+        """Load the sentence embedding model and initialize the cross-attention head."""
+        model_id = kwargs.get("embedding_model", self.embedding_model_id)
+        self.embedder = SentenceTransformer(model_id, trust_remote_code=True, device=str(self.device))
+
+        # Probe output dimensionality using a dummy encode
+        probe_embedding = self.embedder.encode(["_dim_probe_"], convert_to_tensor=True, normalize_embeddings=False)
+        self.embedding_dim = int(probe_embedding.shape[-1])
+
+        # Initialize the cross-attention head
+        self.cross_attn_head = CrossAttentionHead(hidden_size=self.embedding_dim, num_heads=self.num_heads).to(
+            self.device
+        )
+
+    def _taxonomy_discovery(self, data: Any, test: bool = False) -> Optional[Any]:
+        if self.embedder is None or self.cross_attn_head is None:
+            self.load()
+
+        if not test:
+            positive_pairs, unique_terms = self._extract_parent_child_pairs_and_terms(data)
+            self._ensure_term_embeddings(unique_terms)
+            negative_pairs = self._sample_negative_pairs(
+                positive_pairs, unique_terms, ratio=self.negative_ratio, seed=self.seed
+            )
+            self._train_cross_attn_head(positive_pairs, negative_pairs)
+            return None
+        else:
+            candidate_pairs, unique_terms = self._extract_parent_child_pairs_and_terms(data)
+            self._ensure_term_embeddings(unique_terms, append_only=True)
+            probabilities = self._score_parent_child_pairs(candidate_pairs)
+
+            predictions = [
+                {"parent": parent, "child": child, "score": float(prob), "label": int(prob >= 0.5)}
+                for (parent, child), prob in zip(candidate_pairs, probabilities)
+            ]
+            return predictions
+
+    def _ensure_term_embeddings(self, terms: List[str], append_only: bool = False) -> None:
+        """Encode terms with the sentence embedder and store in cache.
+
+        Args:
+            terms: list of unique strings to embed
+            append_only: if True, only embed terms missing from cache
+        """
+        if self.embedder is None:
+            raise RuntimeError("Call load() before building term embeddings")
+
+        terms_to_encode = [t for t in terms if t not in self.term_to_vector] if append_only else terms
+        if not terms_to_encode:
+            return
+
+        embeddings = self.embedder.encode(
+            terms_to_encode,
+            convert_to_tensor=True,
+            normalize_embeddings=False,
+            batch_size=256,
+            show_progress_bar=False,
+        )
+        for term, embedding in zip(terms_to_encode, embeddings):
+            self.term_to_vector[term] = embedding.detach().to(self.device)
+
+    def _pairs_as_tensors(self, pairs: List[Tuple[str, str]]) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Turn list of (parent, child) strings into two aligned tensors on device."""
+        # child embeddings tensor of shape (batch, dim)
+        child_tensor = torch.stack([self.term_to_vector[child] for (_, child) in pairs], dim=0).to(self.device)
+        # parent embeddings tensor of shape (batch, dim)
+        parent_tensor = torch.stack([self.term_to_vector[parent] for (parent, _) in pairs], dim=0).to(self.device)
+        return child_tensor, parent_tensor
+
+    def _train_cross_attn_head(self, positive_pairs: List[Tuple[str, str]], negative_pairs: List[Tuple[str, str]]) -> None:
+        """Train the cross-attention head with BCE loss on labeled pairs."""
+        if self.cross_attn_head is None:
+            raise RuntimeError("Head not initialized. Call load().")
+
+        self.cross_attn_head.train()
+        optimizer = torch.optim.AdamW(
+            self.cross_attn_head.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay
+        )
+
+        # Build a simple supervised dataset: 1 for positive, 0 for negative
+        labeled_pairs: List[Tuple[int, Tuple[str, str]]] = [(1, pc) for pc in positive_pairs] + [
+            (0, nc) for nc in negative_pairs
+        ]
+        random.shuffle(labeled_pairs)
+
+        def iterate_minibatches(items: List[Tuple[int, Tuple[str, str]]], batch_size: int):
+            for start in range(0, len(items), batch_size):
+                yield items[start : start + batch_size]
+
+        for epoch in range(self.num_epochs):
+            epoch_loss_sum = 0.0
+            for minibatch in iterate_minibatches(labeled_pairs, self.batch_size):
+                labels = torch.tensor([y for y, _ in minibatch], dtype=torch.float32, device=self.device)
+                string_pairs = [pc for _, pc in minibatch]
+                child_tensor, parent_tensor = self._pairs_as_tensors(string_pairs)
+
+                probs = self.cross_attn_head(child_tensor, parent_tensor)
+                loss = F.binary_cross_entropy(probs, labels)
+
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+
+                epoch_loss_sum += float(loss.item()) * len(minibatch)
+
+
+    def _score_parent_child_pairs(self, pairs: List[Tuple[str, str]]) -> List[float]:
+        """Compute probability scores for (parent, child) pairs."""
+        if self.cross_attn_head is None:
+            raise RuntimeError("Head not initialized. Call load().")
+
+        self.cross_attn_head.eval()
+        scores: List[float] = []
+        with torch.no_grad():
+            for start in range(0, len(pairs), self.batch_size):
+                chunk = pairs[start : start + self.batch_size]
+                child_tensor, parent_tensor = self._pairs_as_tensors(chunk)
+                prob = self.cross_attn_head(child_tensor, parent_tensor)
+                scores.extend(prob.detach().cpu().tolist())
+        return scores
+
+    def _extract_parent_child_pairs_and_terms(self, data):
+        parent_child_pairs = []
+        unique_terms = set()
+        for edge in getattr(data, "type_taxonomies").taxonomies:
+            parent, child = str(edge.parent), str(edge.child)
+            parent_child_pairs.append((parent, child))
+            unique_terms.add(parent)
+            unique_terms.add(child)
+        return parent_child_pairs, sorted(unique_terms)
+
+    def _sample_negative_pairs(self, positive_pairs, terms, ratio: float = 1.0, seed: int = 42):
+        random.seed(seed)
+        term_list = list(terms)
+        positive_set = set(positive_pairs)
+        negatives = []
+        target_negative_count = int(len(positive_pairs) * ratio)
+        while len(negatives) < target_negative_count:
+            parent = random.choice(term_list)
+            child = random.choice(term_list)
+            if parent == child:
+                continue
+            candidate = (parent, child)
+            if candidate in positive_set:
+                continue
+            negatives.append(candidate)
+        return negatives
diff --git a/ontolearner/learner/term_typing/__init__.py b/ontolearner/learner/term_typing/__init__.py
index ebd8cd9..a42d716 100644
--- a/ontolearner/learner/term_typing/__init__.py
+++ b/ontolearner/learner/term_typing/__init__.py
@@ -14,3 +14,4 @@
 
 from .rwthdbis import RWTHDBISSFTLearner
 from .sbunlp import SBUNLPZSLearner
+from .alexbek import AlexbekRFLearner, AlexbekRAGLearner
diff --git a/ontolearner/learner/term_typing/alexbek.py b/ontolearner/learner/term_typing/alexbek.py
new file mode 100644
index 0000000..7aa6033
--- /dev/null
+++ b/ontolearner/learner/term_typing/alexbek.py
@@ -0,0 +1,809 @@
+# Copyright (c) 2025 SciKnowOrg
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://opensource.org/licenses/MIT
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+import json
+import re
+from typing import Any, Dict, List, Optional, Tuple
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+import networkx as nx
+from tqdm import tqdm
+from sklearn.preprocessing import MultiLabelBinarizer
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.multiclass import OneVsRestClassifier
+
+from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
+from sentence_transformers import SentenceTransformer
+
+from ...base import AutoLearner, AutoRetriever
+
+class AlexbekRFLearner(AutoRetriever):
+    """
+    Embedding-based multi-label classifier for *term typing*.
+
+    Pipeline overview:
+      1) Load a Hugging Face encoder (tokenizer + model).
+      2) Encode input terms into sentence embeddings.
+      3) Optionally augment with simple graph (co-occurrence) features.
+      4) Train a One-vs-Rest RandomForest on the concatenated features.
+      5) Predict multi-label types with a probability threshold (fallback to top-1).
+
+    API expected by LearnerPipeline:
+      - load(model_id)
+      - fit(data, task, ontologizer=True)
+      - predict(data, task, ontologizer=True)
+      - tasks_ground_truth_former(data, task)
+    """
+
+    def __init__(
+        self,
+        device: str = "cpu",
+        batch_size: int = 16,
+        max_length: int = 256,
+        threshold: float = 0.30,
+        use_graph_features: bool = True,
+        rf_kwargs: Optional[Dict[str, Any]] = None,
+    ):
+        # Runtime / inference settings
+        self.device = torch.device(device)
+        self.batch_size = batch_size
+        self.max_length = max_length
+        self.threshold = threshold  # probability cutoff for selecting labels
+        self.use_graph_features = use_graph_features
+
+        # RandomForest hyperparameters (with sensible defaults)
+        self.rf_kwargs = rf_kwargs or dict(
+            n_estimators=200, max_depth=20, class_weight="balanced", random_state=42
+        )
+
+        # Filled during load/fit
+        self.model_name: Optional[str] = None
+        self.tokenizer: Optional[AutoTokenizer] = None
+        self.embedding_model: Optional[AutoModel] = None
+
+        # Label processing / classifier / optional graph
+        self.label_binarizer = MultiLabelBinarizer()
+        self.ovr_random_forest: Optional[OneVsRestClassifier] = None
+        self.term_graph: Optional[nx.Graph] = None
+
+    def load(self, model_id: str, **_: Any) -> None:
+        """Load a Hugging Face encoder by model id (tokenizer + base model)."""
+        self.model_name = model_id
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
+        self.embedding_model = AutoModel.from_pretrained(model_id)
+        self.embedding_model.eval().to(self.device)
+
+    def fit(self, data: Any, task: str, ontologizer: bool = True, **_: Any) -> None:
+        """Train the One-vs-Rest RandomForest on term embeddings (+ optional graph features)."""
+        if task != "term-typing":
+            raise ValueError("OntologyTypeRFClassifier supports only task='term-typing'.")
+
+        # Normalize incoming training data into a list of dicts: {term, types, RAG}
+        training_rows = self._as_term_types_dicts(data)
+        if not training_rows:
+            raise ValueError("No valid training examples found (need 'term' and 'types').")
+
+        # Split out terms and raw labels
+        training_terms: List[str] = [row["term"] for row in training_rows]
+        raw_label_lists: List[List[str]] = [row["types"] for row in training_rows]
+
+        # Fit label binarizer to learn label space/order
+        self.label_binarizer.fit(raw_label_lists)
+
+        # Encode terms to sentence embeddings
+        term_embeddings_train = self._encode(training_terms)
+
+        # Optionally build a light-weight co-occurrence graph and extract features
+        if self.use_graph_features:
+            self.term_graph = self._create_term_graph(training_rows)
+            graph_features_train = self._extract_graph_features(self.term_graph, training_terms)
+            X_train = np.hstack([term_embeddings_train, graph_features_train])
+        else:
+            self.term_graph = None
+            X_train = term_embeddings_train
+
+        # Multi-label targets (multi-hot)
+        Y_train = self.label_binarizer.transform(raw_label_lists)
+
+        # One-vs-Rest RandomForest (one binary RF per label)
+        self.ovr_random_forest = OneVsRestClassifier(RandomForestClassifier(**self.rf_kwargs))
+        self.ovr_random_forest.fit(X_train, Y_train)
+
+
+    def predict(self, data: Any, task: str, ontologizer: bool = True, **_: Any) -> List[Dict[str, Any]]:
+        """Predict multi-label types for input terms.
+
+        Returns a list of dicts with keys: {id, term, types}.
+        """
+        if task != "term-typing":
+            raise ValueError("OntologyTypeRFClassifier supports only task='term-typing'.")
+        if self.ovr_random_forest is None or self.tokenizer is None or self.embedding_model is None:
+            raise RuntimeError("Call load() and fit() before predict().")
+
+        # Normalize prediction input into parallel lists of terms and example ids
+        test_terms, example_ids = self._as_predict_terms_ids(data)
+
+        # Encode terms
+        term_embeddings_test = self._encode(test_terms)
+
+        # Match feature layout used during training
+        if self.use_graph_features and self.term_graph is not None:
+            graph_features_test = self._extract_graph_features(self.term_graph, test_terms)
+            X_test = np.hstack([term_embeddings_test, graph_features_test])
+        else:
+            X_test = term_embeddings_test
+
+        # Probabilities per label (shape: [n_samples, n_labels])
+        probability_matrix = self.ovr_random_forest.predict_proba(X_test)
+
+        predictions: List[Dict[str, Any]] = []
+        label_names = self.label_binarizer.classes_
+        threshold = float(self.threshold)
+
+        # Select labels above threshold; fallback to argmax if none exceed it
+        for row_index, label_probabilities in enumerate(probability_matrix):
+            selected_label_indices = np.where(label_probabilities > threshold)[0]
+            if len(selected_label_indices) == 0:
+                selected_label_indices = [int(np.argmax(label_probabilities))]
+
+            predicted_types = [label_names[label_idx] for label_idx in selected_label_indices]
+
+            predictions.append(
+                {
+                    "id": example_ids[row_index],
+                    "term": test_terms[row_index],
+                    "types": predicted_types,
+                }
+            )
+        return predictions
+
+    def tasks_ground_truth_former(self, data: Any, task: str) -> List[Dict[str, Any]]:
+        """Normalize ground-truth into a list of {id, term, types} dicts for evaluation."""
+        if task != "term-typing":
+            raise ValueError("OntologyTypeRFClassifier supports only task='term-typing'.")
+        return self._as_gold_id_term_types(data)
+
+    def _encode(self, texts: List[str]) -> np.ndarray:
+        """Encode a list of strings into L2-normalized sentence embeddings (NumPy array).
+
+        If no texts are provided, returns an empty array with width equal to the model hidden size.
+        """
+        assert self.tokenizer is not None and self.embedding_model is not None, "Call load(model_id) first."
+
+        if not texts:
+            hidden_size = getattr(getattr(self.embedding_model, "config", None), "hidden_size", 768)
+            return np.zeros((0, hidden_size), dtype=np.float32)
+
+        batch_embeddings: List[torch.Tensor] = []
+
+        for start_idx in tqdm(range(0, len(texts), self.batch_size), desc="Embedding"):
+            end_idx = start_idx + self.batch_size
+            batch_texts = texts[start_idx:end_idx]
+
+            # Tokenize and move to device
+            tokenized_batch = self.tokenizer(
+                batch_texts,
+                padding=True,
+                truncation=True,
+                max_length=self.max_length,
+                return_tensors="pt",
+            ).to(self.device)
+
+            # Forward pass without gradients
+            with torch.no_grad():
+                model_output = self.embedding_model(**tokenized_batch)
+
+                # Prefer dedicated pooler if provided; otherwise pool by last valid token
+                if hasattr(model_output, "pooler_output") and model_output.pooler_output is not None:
+                    sentence_embeddings = model_output.pooler_output
+                else:
+                    sentence_embeddings = self._last_token_pool(
+                        model_output.last_hidden_state, tokenized_batch["attention_mask"]
+                    )
+
+                # L2-normalize embeddings for stability
+                sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
+
+            # Detach, move to CPU, collect
+            batch_embeddings.append(sentence_embeddings.detach().cpu())
+
+            # Best-effort memory cleanup (especially useful on CUDA)
+            del tokenized_batch, model_output, sentence_embeddings
+            if self.device.type == "cuda":
+                torch.cuda.empty_cache()
+            gc.collect()
+
+        # Concatenate all batches and convert to NumPy
+        return torch.cat(batch_embeddings, dim=0).numpy()
+
+    def _last_token_pool(self, last_hidden_states: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
+        """Select the last *non-padding* token embedding for each sequence in the batch."""
+        last_valid_token_idx = attention_mask.sum(dim=1) - 1  # (batch,)
+        batch_row_idx = torch.arange(last_hidden_states.size(0), device=last_hidden_states.device)
+        return last_hidden_states[batch_row_idx, last_valid_token_idx]
+
+    def _create_term_graph(self, training_rows: List[Dict[str, Any]]) -> nx.Graph:
+        """Create a simple undirected co-occurrence graph from training rows.
+
+        Nodes: terms (with node attribute 'types').
+        Edges: between a term and each neighbor from its optional RAG list.
+               Edge weight = number of shared types (or 0.1 if none shared).
+        """
+        graph = nx.Graph()
+
+        for row in training_rows:
+            term = row["term"]
+            term_types = row.get("types", [])
+            graph.add_node(term, types=term_types)
+
+            # RAG may be a list of neighbor dicts like {"term": ..., "types": [...]}
+            for neighbor in (row.get("RAG", []) or []):
+                neighbor_term = neighbor.get("term")
+                neighbor_types = neighbor.get("types", [])
+
+                # Shared-type-based edge weight (weak edge if no overlap)
+                shared_types = set(term_types).intersection(set(neighbor_types))
+                edge_weight = float(len(shared_types)) if shared_types else 0.1
+
+                graph.add_edge(term, neighbor_term, weight=edge_weight)
+
+        return graph
+
+    def _extract_graph_features(self, term_graph: nx.Graph, terms: List[str]) -> np.ndarray:
+        """Compute simple per-term graph features.
+
+        For each term we compute a 4-dim vector:
+          [degree, clustering_coefficient, degree_centrality, pagerank_score]
+        Returns an array of shape [len(terms), 4].
+        """
+        if len(term_graph):
+            degree_centrality = nx.degree_centrality(term_graph)
+            pagerank_scores = nx.pagerank(term_graph)
+        else:
+            degree_centrality, pagerank_scores = {}, {}
+
+        feature_rows: List[List[float]] = []
+        for term in terms:
+            if term in term_graph:
+                feature_rows.append(
+                    [
+                        float(term_graph.degree(term)),
+                        float(nx.clustering(term_graph, term)),
+                        float(degree_centrality.get(term, 0.0)),
+                        float(pagerank_scores.get(term, 0.0)),
+                    ]
+                )
+            else:
+                feature_rows.append([0.0, 0.0, 0.0, 0.0])
+
+        return np.asarray(feature_rows, dtype=np.float32)
+
+    def _as_term_types_dicts(self, data: Any) -> List[Dict[str, Any]]:
+        """Normalize diverse training data formats to a list of dicts: {term, types, RAG}."""
+        normalized_rows: List[Dict[str, Any]] = []
+
+        # Case 1: object with attribute `.term_typings`
+        term_typings_attr = getattr(data, "term_typings", None)
+        if term_typings_attr is not None:
+            for item in term_typings_attr:
+                term_text = getattr(item, "term", None)
+                type_list = getattr(item, "types", None)
+                rag_neighbors = getattr(item, "RAG", None)
+                if term_text is None or type_list is None:
+                    continue
+                if not isinstance(type_list, list):
+                    type_list = [type_list]
+                normalized_rows.append(
+                    {"term": str(term_text), "types": [str(x) for x in type_list], "RAG": rag_neighbors}
+                )
+            return normalized_rows
+
+        # Otherwise: must be a list/tuple-like container
+        if not isinstance(data, (list, tuple)):
+            raise ValueError("Training data must be a list/tuple or expose .term_typings")
+
+        if not data:
+            return normalized_rows
+
+        # Case 2: list of dicts
+        if isinstance(data[0], dict):
+            for row in data:
+                term_text = row.get("term")
+                type_list = row.get("types")
+                rag_neighbors = row.get("RAG")
+                if term_text is None or type_list is None:
+                    continue
+                if not isinstance(type_list, list):
+                    type_list = [type_list]
+                normalized_rows.append(
+                    {"term": str(term_text), "types": [str(x) for x in type_list], "RAG": rag_neighbors}
+                )
+            return normalized_rows
+
+        # Case 3: list of tuples/lists: (term, types[, RAG])
+        for item in data:
+            if not isinstance(item, (list, tuple)) or len(item) < 2:
+                continue
+            term_text, type_list = item[0], item[1]
+            rag_neighbors = item[2] if len(item) > 2 else None
+            if term_text is None or type_list is None:
+                continue
+            if not isinstance(type_list, list):
+                type_list = [type_list]
+            normalized_rows.append(
+                {"term": str(term_text), "types": [str(x) for x in type_list], "RAG": rag_neighbors}
+            )
+
+        return normalized_rows
+
+    def _as_predict_terms_ids(self, data: Any) -> Tuple[List[str], List[Any]]:
+        """Normalize prediction input into parallel lists: (terms, ids)."""
+        terms: List[str] = []
+        example_ids: List[Any] = []
+
+        # Case 1: object with attribute `.term_typings`
+        term_typings_attr = getattr(data, "term_typings", None)
+        if term_typings_attr is not None:
+            for idx, item in enumerate(term_typings_attr):
+                terms.append(str(getattr(item, "term", "")))
+                example_ids.append(getattr(item, "id", getattr(item, "ID", idx)))
+            return terms, example_ids
+
+        # Case 2: list/tuple container
+        if isinstance(data, (list, tuple)) and data:
+            first_element = data[0]
+
+            # 2a) list of dicts
+            if isinstance(first_element, dict):
+                for i, row in enumerate(data):
+                    terms.append(str(row.get("term", "")))
+                    example_ids.append(row.get("id", row.get("ID", i)))
+                return terms, example_ids
+
+            # 2b) list of tuples/lists: (term, id[, ...])
+            if isinstance(first_element, (list, tuple)):
+                for i, tuple_row in enumerate(data):
+                    if not tuple_row:
+                        continue
+                    terms.append(str(tuple_row[0]))
+                    example_ids.append(tuple_row[1] if len(tuple_row) > 1 else i)
+                return terms, example_ids
+
+            # 2c) list of strings (terms only)
+            if isinstance(first_element, str):
+                terms = [str(x) for x in data]  # type: ignore[arg-type]
+                example_ids = list(range(len(terms)))
+                return terms, example_ids
+
+        raise ValueError("Unsupported predict() input format.")
+
+    def _as_gold_id_term_types(self, data: Any) -> List[Dict[str, Any]]:
+        """Normalize gold labels into a list of dicts: {id, term, types}."""
+        gold_rows: List[Dict[str, Any]] = []
+
+        # Case 1: object with attribute `.term_typings`
+        term_typings_attr = getattr(data, "term_typings", None)
+        if term_typings_attr is not None:
+            for idx, item in enumerate(term_typings_attr):
+                gold_id = getattr(item, "id", getattr(item, "ID", idx))
+                term_text = str(getattr(item, "term", ""))
+                type_list = getattr(item, "types", [])
+                if not isinstance(type_list, list):
+                    type_list = [type_list]
+                gold_rows.append({"id": gold_id, "term": term_text, "types": [str(t) for t in type_list]})
+            return gold_rows
+
+        # Case 2: list/tuple container
+        if isinstance(data, (list, tuple)) and data:
+            first_element = data[0]
+
+            # 2a) list of dicts
+            if isinstance(first_element, dict):
+                for i, row in enumerate(data):
+                    gold_id = row.get("id", row.get("ID", i))
+                    term_text = str(row.get("term", ""))
+                    type_list = row.get("types", [])
+                    if not isinstance(type_list, list):
+                        type_list = [type_list]
+                    gold_rows.append({"id": gold_id, "term": term_text, "types": [str(t) for t in type_list]})
+                return gold_rows
+
+            # 2b) list of tuples/lists: (term, types[, id])
+            if isinstance(first_element, (list, tuple)):
+                for i, tuple_row in enumerate(data):
+                    if not tuple_row or len(tuple_row) < 2:
+                        continue
+                    term_text = str(tuple_row[0])
+                    type_list = tuple_row[1]
+                    gold_id = tuple_row[2] if len(tuple_row) > 2 else i
+                    if not isinstance(type_list, list):
+                        type_list = [type_list]
+                    gold_rows.append({"id": gold_id, "term": term_text, "types": [str(t) for t in type_list]})
+                return gold_rows
+
+        raise ValueError("Unsupported ground-truth input format for tasks_ground_truth_former().")
+
+class AlexbekRAGLearner(AutoLearner):
+    """Retrieval-Augmented Term Typing learner (single task: term-typing).
+
+    Flow:
+      1) fit: collect (term -> [types]) examples, build an in-memory index
+         using a sentence-embedding model.
+      2) predict: for each new term, retrieve top-k similar examples, compose a
+         structured prompt, query an instruction-tuned causal LLM, and parse types.
+
+    Returns a list of dicts: {"term": str, "types": List[str], "id": Optional[str]}.
+    """
+
+    def __init__(
+        self,
+        llm_model_id: str = "Qwen/Qwen2.5-0.5B-Instruct",
+        retriever_model_id: str = "sentence-transformers/all-MiniLM-L6-v2",
+        device: str = "auto",      # "auto" | "cuda" | "cpu"
+        token: str = "",           # HF token if needed
+        top_k: int = 3,
+        max_new_tokens: int = 256,
+        gen_batch_size: int = 4,   # generation batch size
+        enc_batch_size: int = 64,  # embedding batch size
+        **kwargs: Any,             # absorb extra pipeline-style args
+    ) -> None:
+        super().__init__()
+
+        # Consolidated configuration for simple serialization
+        self.cfg: Dict[str, Any] = {
+            "llm_model_id": llm_model_id,
+            "retriever_model_id": retriever_model_id,
+            "device": device,
+            "token": token,
+            "top_k": int(top_k),
+            "max_new_tokens": int(max_new_tokens),
+            "gen_batch_size": int(gen_batch_size),
+            "enc_batch_size": int(enc_batch_size),
+        }
+        self.extra_cfg: Dict[str, Any] = dict(kwargs)
+
+        # LLM components
+        self.tokenizer: Optional[AutoTokenizer] = None
+        self.generation_model: Optional[AutoModelForCausalLM] = None
+
+        # Retriever components
+        self.embedder: Optional[SentenceTransformer] = None
+        self.indexed_corpus: List[str] = []                  # items: "<term> || [<types>...]"
+        self.corpus_embeddings: Optional[torch.Tensor] = None
+
+        # Training cache of (term, [types]) tuples
+        self.train_term_types: List[Tuple[str, List[str]]] = []
+
+        # Prompt templates
+        self._system_prompt: str = (
+            "You are an expert in ontologies and semantic term classification.\n"
+            "Task: determine semantic types for the TERM using the EXAMPLES provided.\n"
+            "Rules:\n"
+            "1) Types must be generalizing categories from the domain ontology.\n"
+            "2) Be concise. Respond ONLY in JSON using double quotes.\n"
+            'Format: {"term":"...", "reasoning":"<<=100 words>>", "types":["...", "..."]}\n'
+        )
+        self._user_prompt_template: str = (
+            """{examples}
+
+            TERM: {term}
+
+            TASK: Determine semantic types for the given term based on the domain ontology.
+            Remember: types are generalizing categories, not the term itself. Respond in JSON.
+            """
+        )
+
+    def load(
+        self,
+        model_id: Optional[str] = None,
+        retriever_id: Optional[str] = None,
+        device: Optional[str] = None,
+        token: Optional[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Load the LLM and the embedding retriever. Overrides constructor values if provided."""
+        if model_id is not None:
+            self.cfg["llm_model_id"] = model_id
+        if retriever_id is not None:
+            self.cfg["retriever_model_id"] = retriever_id
+        if device is not None:
+            self.cfg["device"] = device
+        if token is not None:
+            self.cfg["token"] = token
+        self.extra_cfg.update(kwargs)
+
+        # Choose device & dtype for the LLM
+        cuda_available: bool = torch.cuda.is_available()
+        use_cuda: bool = cuda_available and (self.cfg["device"] != "cpu")
+        device_map: str = "auto" if use_cuda else "cpu"
+        torch_dtype = torch.bfloat16 if use_cuda else torch.float32
+
+        # Tokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.cfg["llm_model_id"], padding_side="left", token=self.cfg["token"]
+        )
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+
+        # LLM
+        self.generation_model = AutoModelForCausalLM.from_pretrained(
+            self.cfg["llm_model_id"],
+            device_map=device_map,
+            torch_dtype=torch_dtype,
+            token=self.cfg["token"],
+        )
+
+        # Deterministic decoding defaults
+        generation_cfg = self.generation_model.generation_config
+        generation_cfg.do_sample = False
+        generation_cfg.temperature = None
+        generation_cfg.top_p = None
+        generation_cfg.top_k = None
+        generation_cfg.num_beams = 1
+
+        # Retriever
+        self.embedder = SentenceTransformer(self.cfg["retriever_model_id"], trust_remote_code=True)
+
+    def fit(self, train_data: Any, task: str, ontologizer: bool = True) -> None:
+        """Prepare the retrieval index from training examples."""
+        if task != "term-typing":
+            return super().fit(train_data, task, ontologizer)
+
+        # Normalize incoming training data -> list[(term, [types])]
+        self.train_term_types = self._unpack_train(train_data)
+
+        # Build the textual corpus to index
+        self.indexed_corpus = [
+            f"{term} || {json.dumps(types, ensure_ascii=False)}" for term, types in self.train_term_types
+        ]
+
+        # Embed the corpus if available; else fall back to zero-shot prompting
+        if self.indexed_corpus and self.embedder is not None:
+            self.corpus_embeddings = self._encode_texts(self.indexed_corpus)
+        else:
+            self.corpus_embeddings = None
+
+    def predict(self, eval_data: Any, task: str, ontologizer: bool = True) -> Any:
+        """Predict types for evaluation items; returns a list of {term, types, id?}."""
+        if task != "term-typing":
+            return super().predict(eval_data, task, ontologizer)
+
+        eval_terms, eval_ids = self._unpack_eval(eval_data)
+        if not eval_terms:
+            return []
+
+        # Use RAG if we have an indexed corpus & embeddings; otherwise zero-shot
+        rag_available = (
+            self.corpus_embeddings is not None and self.embedder is not None and len(self.indexed_corpus) > 0
+        )
+
+        if rag_available:
+            neighbor_docs_per_query = self._retrieve_batch(eval_terms, top_k=int(self.cfg["top_k"]))
+        else:
+            neighbor_docs_per_query = [[] for _ in eval_terms]
+
+        # Compose prompts
+        prompts: List[str] = []
+        for term, neighbor_docs in zip(eval_terms, neighbor_docs_per_query):
+            example_pairs = self._decode_examples(neighbor_docs)
+            examples_block = self._format_examples(example_pairs)
+            prompt_text = self._compose_prompt(examples_block, term)
+            prompts.append(prompt_text)
+
+        predicted_types_lists = self._generate_and_parse(prompts)
+
+        # Build standardized results
+        results: List[Dict[str, Any]] = []
+        for term, example_id, predicted_types in zip(eval_terms, eval_ids, predicted_types_lists):
+            result_row: Dict[str, Any] = {
+                "term": term,
+                "types": sorted({t for t in predicted_types}),  # unique + sorted
+            }
+            if example_id is not None:
+                result_row["id"] = example_id
+            results.append(result_row)
+
+        assert all(("term" in row and "types" in row) for row in results), "predict() must return term + types"
+        return results
+
+    def _unpack_train(self, data: Any) -> List[Tuple[str, List[str]]]:
+        """Extract (term, [types]) tuples from supported training payloads."""
+        term_typings = getattr(data, "term_typings", None)
+        if term_typings is not None:
+            parsed_pairs: List[Tuple[str, List[str]]] = []
+            for item in term_typings:
+                term = getattr(item, "term", None)
+                types = list(getattr(item, "types", []) or [])
+                if term and types:
+                    parsed_pairs.append((term, [t for t in types if isinstance(t, str)]))
+            return parsed_pairs
+
+        if isinstance(data, list) and data and isinstance(data[0], dict):
+            parsed_pairs = []
+            for row in data:
+                term = row.get("term")
+                types = row.get("types") or []
+                if term and isinstance(types, list) and types:
+                    parsed_pairs.append((term, [t for t in types if isinstance(t, str)]))
+            return parsed_pairs
+
+        # If only a list of strings is provided, there's nothing to index for RAG
+        if isinstance(data, (list, set, tuple)) and all(isinstance(x, str) for x in data):
+            return []
+
+        return []
+
+    def _unpack_eval(self, data: Any) -> Tuple[List[str], List[Optional[str]]]:
+        """Extract (terms, ids) from supported evaluation payloads."""
+        term_typings = getattr(data, "term_typings", None)
+        if term_typings is not None:
+            terms: List[str] = []
+            ids: List[Optional[str]] = []
+            for item in term_typings:
+                terms.append(getattr(item, "term", ""))
+                ids.append(getattr(item, "id", None))
+            return terms, ids
+
+        if isinstance(data, list) and data and isinstance(data[0], str):
+            return list(data), [None] * len(data)
+
+        if isinstance(data, list) and data and isinstance(data[0], dict):
+            terms: List[str] = []
+            ids: List[Optional[str]] = []
+            for row in data:
+                terms.append(row.get("term", ""))
+                ids.append(row.get("id"))
+            return terms, ids
+
+        return [], []
+
+    def _encode_texts(self, texts: List[str]) -> torch.Tensor:
+        """Encode a batch of texts with the sentence-embedding model."""
+        batch_size = int(self.cfg["enc_batch_size"])
+        batch_embeddings: List[torch.Tensor] = []
+
+        for batch_start in range(0, len(texts), batch_size):
+            batch_texts = texts[batch_start : batch_start + batch_size]
+            embeddings = self.embedder.encode(batch_texts, convert_to_tensor=True, show_progress_bar=False)
+            batch_embeddings.append(embeddings)
+
+        return torch.cat(batch_embeddings, dim=0) if batch_embeddings else torch.empty(0)
+
+    def _retrieve_batch(self, queries: List[str], top_k: int) -> List[List[str]]:
+        """Return for each query the top-k most similar corpus entries (as raw text rows)."""
+        if self.corpus_embeddings is None or not self.indexed_corpus:
+            return [[] for _ in queries]
+
+        query_embeddings = self._encode_texts(queries)   # [Q, D]
+        doc_embeddings = self.corpus_embeddings          # [N, D]
+        if query_embeddings.shape[-1] != doc_embeddings.shape[-1]:
+            raise ValueError(
+                f"Embedding dim mismatch: {query_embeddings.shape[-1]} vs {doc_embeddings.shape[-1]}"
+            )
+
+        # Cosine similarity via L2-normalized dot product
+        q_norm = F.normalize(query_embeddings, p=2, dim=1)
+        d_norm = F.normalize(doc_embeddings, p=2, dim=1)
+        cos_sim = torch.matmul(q_norm, d_norm.T)  # [Q, N]
+
+        k = min(max(1, top_k), len(self.indexed_corpus))
+        _, top_indices = torch.topk(cos_sim, k=k, dim=1)
+        return [[self.indexed_corpus[j] for j in row.tolist()] for row in top_indices]
+
+    def _decode_examples(self, docs: List[str]) -> List[Tuple[str, List[str]]]:
+        """Parse raw corpus rows ('term || [types]') into (term, [types]) pairs."""
+        example_pairs: List[Tuple[str, List[str]]] = []
+        for raw_row in docs:
+            try:
+                term_raw, types_json = raw_row.split("||", 1)
+                term = term_raw.strip()
+                types_list = json.loads(types_json.strip())
+                if isinstance(types_list, list):
+                    example_pairs.append((term, [t for t in types_list if isinstance(t, str)]))
+            except Exception:
+                continue
+        return example_pairs
+
+    def _format_examples(self, pairs: List[Tuple[str, List[str]]]) -> str:
+        """Format retrieved example pairs into a compact block for the prompt."""
+        if not pairs:
+            return "EXAMPLES: (none provided)"
+        lines: List[str] = ["CLASSIFICATION EXAMPLES:"]
+        for idx, (term, types) in enumerate(pairs, 1):
+            preview_types = types[:3]  # keep context small
+            lines.append(f"{idx}. Term: '{term}' → Types: {list(preview_types)}")
+        lines.append("END OF EXAMPLES.")
+        return "\n".join(lines)
+
+    def _compose_prompt(self, examples_block: str, term: str) -> str:
+        """Compose the final prompt from system + user blocks."""
+        user_block = self._user_prompt_template.format(examples=examples_block, term=term)
+        return f"{self._system_prompt}\n\n{user_block}\n"
+
+    def _generate_and_parse(self, prompts: List[str]) -> List[List[str]]:
+        """Run generation for a batch of prompts and parse the JSON 'types' from outputs."""
+        batch_size = int(self.cfg["gen_batch_size"])
+        all_predicted_types: List[List[str]] = []
+
+        for batch_start in range(0, len(prompts), batch_size):
+            prompt_batch = prompts[batch_start : batch_start + batch_size]
+
+            # Tokenize and move to the LLM's device
+            model_device = getattr(self.generation_model, "device", None)
+            encodings = self.tokenizer(prompt_batch, return_tensors="pt", padding=True).to(model_device)
+            input_token_length = encodings["input_ids"].shape[1]
+
+            # Deterministic decoding (greedy)
+            with torch.no_grad():
+                generated_tokens = self.generation_model.generate(
+                    **encodings,
+                    do_sample=False,
+                    num_beams=1,
+                    temperature=None,
+                    top_p=None,
+                    top_k=None,
+                    max_new_tokens=int(self.cfg["max_new_tokens"]),
+                    pad_token_id=self.tokenizer.eos_token_id,
+                )
+
+            # Slice off the prompt tokens and decode only newly generated tokens
+            new_token_span = generated_tokens[:, input_token_length:]
+            decoded_texts = [self.tokenizer.decode(seq, skip_special_tokens=True) for seq in new_token_span]
+
+            parsed_types_per_prompt = [self._parse_types(text) for text in decoded_texts]
+            all_predicted_types.extend(parsed_types_per_prompt)
+
+        return all_predicted_types
+
+    def _parse_types(self, text: str) -> List[str]:
+        """Extract a list of type strings from LLM output.
+
+        Attempts (in order):
+          1) Strict JSON object with "types".
+          2) Regex-extract JSON object containing "types".
+          3) Regex-extract first bracketed list.
+          4) Comma-split fallback.
+        """
+        try:
+            obj = json.loads(text)
+            if isinstance(obj, dict) and isinstance(obj.get("types"), list):
+                return [t for t in obj["types"] if isinstance(t, str)]
+        except Exception:
+            pass
+
+        try:
+            obj_match = re.search(r'\{[^{}]*"types"\s*:\s*\[[^\]]*\][^{}]*\}', text, re.S)
+            if obj_match:
+                obj = json.loads(obj_match.group(0))
+                types = obj.get("types", [])
+                return [t for t in types if isinstance(t, str)]
+        except Exception:
+            pass
+
+        try:
+            list_match = re.search(r'\[([^\]]+)\]', text)
+            if list_match:
+                items = [x.strip().strip('"').strip("'") for x in list_match.group(1).split(",")]
+                return [t for t in items if t]
+        except Exception:
+            pass
+
+        if "," in text:
+            items = [x.strip().strip('"').strip("'") for x in text.split(",")]
+            return [t for t in items if t]
+
+        return []
diff --git a/ontolearner/learner/text2onto/__init__.py b/ontolearner/learner/text2onto/__init__.py
index 30e8372..6408881 100644
--- a/ontolearner/learner/text2onto/__init__.py
+++ b/ontolearner/learner/text2onto/__init__.py
@@ -13,3 +13,4 @@
 # limitations under the License.
 
 from .sbunlp import SBUNLPFewShotLearner
+from .alexbek import AlexbekFewShotLearner
diff --git a/ontolearner/learner/text2onto/alexbek.py b/ontolearner/learner/text2onto/alexbek.py
new file mode 100644
index 0000000..5760dca
--- /dev/null
+++ b/ontolearner/learner/text2onto/alexbek.py
@@ -0,0 +1,1084 @@
+# Copyright (c) 2025 SciKnowOrg
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://opensource.org/licenses/MIT
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List, Optional, Tuple, Iterable
+import json
+from json.decoder import JSONDecodeError
+import os
+import random
+import re
+
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+
+from ...base import AutoLearner, AutoLLM
+
+try:
+    from outlines.models import Transformers as OutlinesTFModel
+    from outlines.generate import json as outlines_generate_json
+    from pydantic import BaseModel
+
+    class _PredictedTypesSchema(BaseModel):
+        """Schema used when generating structured JSON { "types": [...] }."""
+        types: List[str]
+
+    OUTLINES_AVAILABLE: bool = True
+except Exception:
+    # If outlines is unavailable, we will fall back to greedy decoding + regex parsing.
+    OUTLINES_AVAILABLE = False
+    _PredictedTypesSchema = None
+    OutlinesTFModel = None
+    outlines_generate_json = None
+
+class LocalAutoLLM(AutoLLM):
+    """
+    Minimal local LLM helper.
+
+    - Inherits AutoLLM but overrides load/generate to avoid label_mapper.
+    - Optional 4-bit loading with `load_in_4bit=True` in .load().
+    - Greedy decoding by default (deterministic).
+    """
+
+    def __init__(self, device: str = "cpu", token: str = "") -> None:
+        """
+        Initialize the local LLM holder.
+
+        Parameters
+        ----------
+        device : str
+            Execution device: "cpu" or "cuda".
+        token : str
+            Optional auth token for private model hubs.
+        """
+        super().__init__(label_mapper=None, device=device, token=token)
+        self.model: Optional[AutoModelForCausalLM] = None
+        self.tokenizer: Optional[AutoTokenizer] = None
+
+    def load(self, model_id: str, *, load_in_4bit: bool = False) -> None:
+        """
+        Load a Hugging Face causal model + tokenizer and set deterministic
+        generation defaults.
+
+        Parameters
+        ----------
+        model_id : str
+            Model identifier resolvable by HF `from_pretrained`.
+        load_in_4bit : bool
+            If True and bitsandbytes is available, load using 4-bit quantization.
+        """
+        # Tokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_id, padding_side="left", token=self.token
+        )
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+
+        # Model (optionally quantized)
+        if load_in_4bit:
+            from transformers import BitsAndBytesConfig
+
+            quantization_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_quant_type="nf4",
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_compute_dtype=torch.bfloat16,
+            )
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_id,
+                device_map="auto",
+                quantization_config=quantization_config,
+                token=self.token,
+            )
+        else:
+            device_map = "auto" if (self.device != "cpu" and torch.cuda.is_available()) else None
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_id,
+                device_map=device_map,
+                torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+                token=self.token,
+            )
+
+        # Deterministic generation defaults
+        generation_cfg = self.model.generation_config
+        generation_cfg.do_sample = False
+        generation_cfg.temperature = None
+        generation_cfg.top_k = None
+        generation_cfg.top_p = None
+        generation_cfg.num_beams = 1
+
+    def generate(self, prompts: List[str], max_new_tokens: int = 128) -> List[str]:
+        """
+        Greedy-generate continuations for a list of prompts.
+
+        Parameters
+        ----------
+        prompts : List[str]
+            Prompts to generate for (batched).
+        max_new_tokens : int
+            Maximum number of new tokens per continuation.
+
+        Returns
+        -------
+        List[str]
+            Decoded new-token texts (no special tokens, stripped).
+        """
+        if self.model is None or self.tokenizer is None:
+            raise RuntimeError("Call .load(model_id) on LocalAutoLLM before generate().")
+
+        tokenized_batch = self.tokenizer(prompts, return_tensors="pt", padding=True, truncation=True)
+        input_seq_len = tokenized_batch["input_ids"].shape[1]
+        tokenized_batch = {k: v.to(self.model.device) for k, v in tokenized_batch.items()}
+
+        with torch.no_grad():
+            outputs = self.model.generate(
+                **tokenized_batch,
+                max_new_tokens=max_new_tokens,
+                pad_token_id=self.tokenizer.eos_token_id,
+                do_sample=False,
+                num_beams=1,
+            )
+
+        # Only return the newly generated part for each row in the batch
+        continuation_token_ids = outputs[:, input_seq_len:]
+        return [self.tokenizer.decode(row, skip_special_tokens=True).strip() for row in continuation_token_ids]
+
+class AlexbekFewShotLearner(AutoLearner):
+    """
+    Text2Onto learner for LLMS4OL Task A (term & type extraction).
+
+    Public API (A1 + convenience):
+      - fit(train_docs_jsonl, terms2doc_json, sample_size=24, seed=42)
+      - predict_terms(docs_test_jsonl, out_jsonl, max_new_tokens=128, few_shot_k=6) -> int
+      - predict_types(docs_test_jsonl, out_jsonl, max_new_tokens=128, few_shot_k=6) -> int
+      - evaluate_extraction_f1(gold_item2docs_json, preds_jsonl, key="term"|"type") -> float
+
+    Option A (A2, term→types) bridge:
+      - predict_types_from_terms_option_a(...)
+        Reads your A1 results (docs→terms), predicts types for each term, and
+        writes two files: terms2types_pred.json + types2docs_pred.json
+    """
+    def __init__(self, model: LocalAutoLLM, device: str = "cpu", **_: Any) -> None:
+        """
+        Initialize learner state and canned prompts.
+
+        Parameters
+        ----------
+        model : LocalAutoLLM
+            Loaded local LLM helper instance.
+        device : str
+            Device name ("cpu" or "cuda").
+        """
+        super().__init__(**_)
+        self.model = model
+        self.device = device
+
+        # Few-shot exemplars for A1 (Docs→Terms) and for Docs→Types:
+        # Each exemplar is a tuple: (title, text, gold_list)
+        self._fewshot_terms_docs: List[Tuple[str, str, List[str]]] = []
+        self._fewshot_types_docs: List[Tuple[str, str, List[str]]] = []
+
+        # System prompts
+        self._system_prompt_terms = (
+            "You are an expert in ontology term extraction.\n"
+            "Extract only terms that explicitly appear in the document.\n"
+            'Answer strictly as JSON: {"terms": ["..."]}\n'
+        )
+        self._system_prompt_types = (
+            "You are an expert in ontology type classification.\n"
+            "List ontology *types* that characterize the document’s terminology.\n"
+            'Answer strictly as JSON: {"types": ["..."]}\n'
+        )
+
+        # Compiled regex for robust JSON extraction from LLM outputs
+        self._json_object_regex = re.compile(r"\{[^{}]*\}", re.S)
+        self._json_array_regex = re.compile(r"\[[^\]]*\]", re.S)
+
+        # Term→Types (Option A) specific prompt
+        self._system_prompt_term_to_types = (
+            "You are an expert in ontology and semantic type classification.\n"
+            "Given a term, predict its semantic types from the domain-specific ontology.\n"
+            'Answer strictly as JSON:\n{"types": ["type1", "type2", "..."]}'
+        )
+
+    def fit(
+        self,
+        *,
+        train_docs_jsonl: str,
+        terms2doc_json: str,
+        sample_size: int = 24,
+        seed: int = 42,
+    ) -> None:
+        """
+        Build internal few-shot exemplars from a labeled training split.
+
+        Parameters
+        ----------
+        train_docs_jsonl : str
+            Path to JSONL (or tolerant JSON/JSONL) with train documents.
+        terms2doc_json : str
+            JSON mapping item -> [doc_id,...]; "item" can be a term or type.
+        sample_size : int
+            Number of exemplar documents to keep for few-shot prompting.
+        seed : int
+            RNG seed for reproducible sampling.
+        """
+        rng = random.Random(seed)
+
+        # Load documents and map doc_id -> row
+        document_map = self._load_documents_jsonl(train_docs_jsonl)
+        if not document_map:
+            raise FileNotFoundError(f"No documents found in: {train_docs_jsonl}")
+
+        # Load item -> [doc_ids]
+        item_to_docs_map = self._load_json(terms2doc_json)
+        if not isinstance(item_to_docs_map, dict):
+            raise ValueError(f"{terms2doc_json} must be a JSON dict mapping item -> [doc_ids]")
+
+        # Reverse mapping: doc_id -> [items]
+        doc_id_to_items_map: Dict[str, List[str]] = {}
+        for item_label, doc_id_list in item_to_docs_map.items():
+            for doc_id in doc_id_list:
+                doc_id_to_items_map.setdefault(doc_id, []).append(item_label)
+
+        # Build candidate exemplars (title, text, gold_list)
+        exemplar_candidates: List[Tuple[str, str, List[str]]] = []
+        for doc_id, labeled_items in doc_id_to_items_map.items():
+            doc_row = document_map.get(doc_id)
+            if not doc_row:
+                continue
+            doc_title = str(doc_row.get("title", ""))  # be defensive (may be None)
+            doc_text = self._to_text(doc_row.get("text", ""))  # string-ify list if needed
+            if not doc_text:
+                continue
+            gold_items = self._unique_preserve([s for s in labeled_items if isinstance(s, str)])
+            if gold_items:
+                exemplar_candidates.append((doc_title, doc_text, gold_items))
+
+        if not exemplar_candidates:
+            raise RuntimeError("No candidate docs with items found to build few-shot exemplars.")
+
+        chosen_exemplars = rng.sample(exemplar_candidates, k=min(sample_size, len(exemplar_candidates)))
+        # Reuse exemplars for both docs→terms and docs→types prompting
+        self._fewshot_terms_docs = chosen_exemplars
+        self._fewshot_types_docs = chosen_exemplars
+
+    def predict_terms(
+        self,
+        *,
+        docs_test_jsonl: str,
+        out_jsonl: str,
+        max_new_tokens: int = 128,
+        few_shot_k: int = 6,
+    ) -> int:
+        """
+        Extract terms that explicitly appear in each document.
+
+        Writes one JSON object per line:
+          {"id": "<doc_id>", "terms": ["...", "...", ...]}
+
+        Parameters
+        ----------
+        docs_test_jsonl : str
+            Path to test/dev documents in JSONL or tolerant JSON/JSONL.
+        out_jsonl : str
+            Output JSONL path where predictions are written (one line per doc).
+        max_new_tokens : int
+            Max generation length.
+        few_shot_k : int
+            Number of few-shot exemplars to prepend per prompt.
+
+        Returns
+        -------
+        int
+            Number of lines written (i.e., number of processed documents).
+        """
+        if self.model is None or self.model.model is None:
+            raise RuntimeError("Load a model first: learner.model.load(MODEL_ID, ...)")
+
+        test_documents = self._load_documents_jsonl(docs_test_jsonl)
+        prompts: List[str] = []
+        document_order: List[str] = []
+
+        for document_id, document_row in test_documents.items():
+            title = str(document_row.get("title", ""))
+            text = self._to_text(document_row.get("text", ""))
+
+            fewshot_block = self._format_fewshot_block(
+                self._system_prompt_terms, self._fewshot_terms_docs, key="terms", k=few_shot_k
+            )
+            user_block = self._format_user_block(title, text)
+
+            prompts.append(f"{fewshot_block}\n{user_block}\nAssistant:")
+            document_order.append(document_id)
+
+        generations = self.model.generate(prompts, max_new_tokens=max_new_tokens)
+        parsed_term_lists = [self._parse_json_list(generated, key="terms") for generated in generations]
+
+        os.makedirs(os.path.dirname(out_jsonl) or ".", exist_ok=True)
+        lines_written = 0
+        with open(out_jsonl, "w", encoding="utf-8") as fp_out:
+            for document_id, term_list in zip(document_order, parsed_term_lists):
+                payload = {"id": document_id, "terms": self._unique_preserve(term_list)}
+                fp_out.write(json.dumps(payload, ensure_ascii=False) + "\n")
+                lines_written += 1
+        return lines_written
+
+
+    def predict_types(
+        self,
+        *,
+        docs_test_jsonl: str,
+        out_jsonl: str,
+        max_new_tokens: int = 128,
+        few_shot_k: int = 6,
+    ) -> int:
+        """
+        Predict ontology types that characterize each document’s terminology.
+
+        Writes one JSON object per line:
+          {"id": "<doc_id>", "types": ["...", "...", ...]}
+
+        Parameters
+        ----------
+        docs_test_jsonl : str
+            Path to test/dev documents in JSONL or tolerant JSON/JSONL.
+        out_jsonl : str
+            Output JSONL path where predictions are written (one line per doc).
+        max_new_tokens : int
+            Max generation length.
+        few_shot_k : int
+            Number of few-shot exemplars to prepend per prompt.
+
+        Returns
+        -------
+        int
+            Number of lines written (i.e., number of processed documents).
+        """
+        if self.model is None or self.model.model is None:
+            raise RuntimeError("Load a model first: learner.model.load(MODEL_ID, ...)")
+
+        test_documents = self._load_documents_jsonl(docs_test_jsonl)
+        prompts: List[str] = []
+        document_order: List[str] = []
+
+        for document_id, document_row in test_documents.items():
+            title = str(document_row.get("title", ""))
+            text = self._to_text(document_row.get("text", ""))
+
+            fewshot_block = self._format_fewshot_block(
+                self._system_prompt_types, self._fewshot_types_docs, key="types", k=few_shot_k
+            )
+            user_block = self._format_user_block(title, text)
+
+            prompts.append(f"{fewshot_block}\n{user_block}\nAssistant:")
+            document_order.append(document_id)
+
+        generations = self.model.generate(prompts, max_new_tokens=max_new_tokens)
+        parsed_type_lists = [self._parse_json_list(generated, key="types") for generated in generations]
+
+        os.makedirs(os.path.dirname(out_jsonl) or ".", exist_ok=True)
+        lines_written = 0
+        with open(out_jsonl, "w", encoding="utf-8") as fp_out:
+            for document_id, type_list in zip(document_order, parsed_type_lists):
+                payload = {"id": document_id, "types": self._unique_preserve(type_list)}
+                fp_out.write(json.dumps(payload, ensure_ascii=False) + "\n")
+                lines_written += 1
+        return lines_written
+
+    def evaluate_extraction_f1(
+        self,
+        gold_item2docs_json: str,
+        preds_jsonl: str,
+        *,
+        key: str = "term",
+    ) -> float:
+        """
+        Compute micro-F1 over (doc_id, item) pairs.
+
+        Parameters
+        ----------
+        gold_item2docs_json : str
+            JSON mapping item -> [doc_ids].
+        preds_jsonl : str
+            JSONL lines like {"id": "...", "terms":[...]} or {"id":"...","types":[...]}.
+        key : str
+            "term" or "type" depending on what you are evaluating.
+
+        Returns
+        -------
+        float
+            Micro-averaged F1 score.
+        """
+        item_to_doc_ids: Dict[str, List[str]] = self._load_json(gold_item2docs_json)
+
+        # Build gold: doc_id -> set(items)
+        gold_doc_to_items: Dict[str, set] = {}
+        for item_label, doc_id_list in item_to_doc_ids.items():
+            for document_id in doc_id_list:
+                gold_doc_to_items.setdefault(document_id, set()).add(self._norm(item_label))
+
+        # Build predictions: doc_id -> set(items)
+        pred_doc_to_items: Dict[str, set] = {}
+        with open(preds_jsonl, "r", encoding="utf-8") as fp_in:
+            for line in fp_in:
+                row = json.loads(line.strip())
+                document_id = str(row.get("id", ""))
+                items_list = row.get("terms" if key == "term" else "types", [])
+                pred_doc_to_items[document_id] = {self._norm(x) for x in items_list if isinstance(x, str)}
+
+        # Micro counts
+        true_positive = false_positive = false_negative = 0
+        all_document_ids = set(gold_doc_to_items.keys()) | set(pred_doc_to_items.keys())
+        for document_id in all_document_ids:
+            gold_set = gold_doc_to_items.get(document_id, set())
+            pred_set = pred_doc_to_items.get(document_id, set())
+            true_positive += len(gold_set & pred_set)
+            false_positive += len(pred_set - gold_set)
+            false_negative += len(gold_set - pred_set)
+
+        precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) else 0.0
+        recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) else 0.0
+        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) else 0.0
+        return f1
+
+    def predict_types_from_terms(
+        self,
+        *,
+        doc_terms_jsonl: Optional[str] = None,            # formerly a1_results_jsonl
+        doc_terms_list: Optional[List[Dict]] = None,      # formerly a1_results_list
+        few_shot_jsonl: Optional[str] = None,             # JSONL lines: {"term":"...", "types":[...]}
+        rag_terms_json: Optional[str] = None,             # JSON list; items may contain "term" and "RAG":[...]
+        random_few_shot: Optional[int] = 3,
+        model_id: str = "Qwen/Qwen2.5-1.5B-Instruct",
+        use_structured_output: bool = True,
+        seed: int = 42,
+        out_terms2types: str = "terms2types_pred.json",
+        out_types2docs: str = "types2docs_pred.json",
+    ) -> Dict[str, Any]:
+        """
+        Predict types for each unique term extracted per document and derive a types→docs map.
+
+        Parameters
+        ----------
+        doc_terms_jsonl : Optional[str]
+            Path to JSONL with lines like {"id": "...", "terms": [...]} or a JSON with {"results":[...]}.
+        doc_terms_list : Optional[List[Dict]]
+            In-memory results like [{"id":"...","extracted_terms":[...]}] or {"id":"...","terms":[...]}.
+        few_shot_jsonl : Optional[str]
+            Global few-shot exemplars: one JSON object per line with {"term": "...", "types":[...]}.
+        rag_terms_json : Optional[str]
+            Optional per-term RAG exemplars: a JSON list of {"term": "...", "RAG":[{"term": "...", "types":[...]}]}.
+        random_few_shot : Optional[int]
+            If provided, randomly select up to this many few-shot examples for each prediction.
+        model_id : str
+            HF model id used specifically for term→types predictions.
+        use_structured_output : bool
+            If True and outlines is available, enforce structured {"types":[...]} output.
+        seed : int
+            Random seed for reproducibility.
+        out_terms2types : str
+            Output JSON path for list of {"term": "...", "predicted_types":[...]}.
+        out_types2docs : str
+            Output JSON path for dict {"TYPE":[doc_ids,...], ...}.
+
+        Returns
+        -------
+        Dict[str, Any]
+            Summary with predictions and counts.
+        """
+        torch.manual_seed(seed)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed(seed)
+
+        # Load normalized document→terms results
+        doc_term_extractions = self._load_doc_term_extractions(
+            results_json_path=doc_terms_jsonl,
+            in_memory_results=doc_terms_list,
+        )
+        if not doc_term_extractions:
+            raise ValueError("No document→terms results provided (doc_terms_jsonl/doc_terms_list).")
+
+        # Prepare unique term list and term→doc occurrences
+        unique_terms = self._collect_unique_terms_from_extractions(doc_term_extractions)
+        term_to_doc_ids_map = self._build_term_to_doc_ids(doc_term_extractions)
+
+        # Load optional global few-shot examples
+        global_few_shot_examples: List[Dict] = []
+        if few_shot_jsonl and os.path.exists(few_shot_jsonl):
+            with open(few_shot_jsonl, "r", encoding="utf-8") as few_shot_file:
+                for raw_line in few_shot_file:
+                    raw_line = raw_line.strip()
+                    if not raw_line:
+                        continue
+                    try:
+                        json_obj = json.loads(raw_line)
+                    except Exception:
+                        continue
+                    if isinstance(json_obj, dict) and "term" in json_obj and "types" in json_obj:
+                        global_few_shot_examples.append(json_obj)
+
+        # Optional per-term RAG examples: {normalized_term -> [examples]}
+        rag_examples_lookup: Dict[str, List[Dict]] = {}
+        if rag_terms_json and os.path.exists(rag_terms_json):
+            try:
+                rag_payload = self._load_json(rag_terms_json)
+                if isinstance(rag_payload, list):
+                    for rag_item in rag_payload:
+                        if isinstance(rag_item, dict):
+                            normalized_term = self._normalize_term(rag_item.get("term", ""))
+                            rag_examples_lookup[normalized_term] = rag_item.get("RAG", [])
+            except Exception:
+                pass
+
+        # Load a small chat LLM dedicated to Term→Types
+        typing_model, typing_tokenizer = self._load_llm_for_types(model_id)
+
+        # Predict types per term
+        term_to_predicted_types_list: List[Dict] = []
+        for term_text in unique_terms:
+            normalized_term = self._normalize_term(term_text)
+
+            # Prefer per-term RAG for this term, else use global few-shot
+            few_shot_examples_for_term = rag_examples_lookup.get(normalized_term, None) or global_few_shot_examples
+
+            # Build conversation and prompt
+            conversation_messages = self._build_conv_for_type_infer(
+                term=term_text,
+                few_shot_examples=few_shot_examples_for_term,
+                random_k=random_few_shot,
+            )
+            typing_prompt_string = self._apply_chat_template_safe_types(typing_tokenizer, conversation_messages)
+
+            predicted_types: List[str] = []
+            raw_generation_text: str = ""
+
+            # Structured JSON path (if requested and available)
+            if use_structured_output and OUTLINES_AVAILABLE and _PredictedTypesSchema is not None:
+                try:
+                    outlines_model = OutlinesTFModel(typing_model, typing_tokenizer)  # type: ignore
+                    generator = outlines_generate_json(outlines_model, _PredictedTypesSchema)  # type: ignore
+                    structured = generator(typing_prompt_string, max_tokens=512)
+                    predicted_types = [label for label in structured.types if isinstance(label, str)]
+                    raw_generation_text = json.dumps({"types": predicted_types}, ensure_ascii=False)
+                except Exception:
+                    # Fall back to greedy decoding
+                    use_structured_output = False
+
+            # Greedy decode fallback
+            if not use_structured_output or not OUTLINES_AVAILABLE or _PredictedTypesSchema is None:
+                tokenized_prompt = typing_tokenizer(typing_prompt_string, return_tensors="pt", truncation=True, max_length=2048)
+                if torch.cuda.is_available():
+                    tokenized_prompt = {name: tensor.cuda() for name, tensor in tokenized_prompt.items()}
+                with torch.no_grad():
+                    output_ids = typing_model.generate(
+                        **tokenized_prompt,
+                        max_new_tokens=256,
+                        do_sample=False,
+                        num_beams=1,
+                        pad_token_id=typing_tokenizer.eos_token_id,
+                    )
+                new_token_span = output_ids[0][tokenized_prompt["input_ids"].shape[1]:]
+                raw_generation_text = typing_tokenizer.decode(new_token_span, skip_special_tokens=True)
+                predicted_types = self._extract_types_from_text(raw_generation_text)
+
+            term_to_predicted_types_list.append({
+                "term": term_text,
+                "predicted_types": sorted(set(predicted_types)),
+            })
+
+        # 7) Build types→docs from (term→types) and (term→docs)
+        types_to_doc_id_set: Dict[str, set] = {}
+        for term_prediction in term_to_predicted_types_list:
+            normalized_term = self._normalize_term(term_prediction["term"])
+            doc_ids_for_term = term_to_doc_ids_map.get(normalized_term, [])
+            for type_label in term_prediction.get("predicted_types", []):
+                types_to_doc_id_set.setdefault(type_label, set()).update(doc_ids_for_term)
+
+        types_to_doc_ids: Dict[str, List[str]] = {
+            type_label: sorted(doc_id_set) for type_label, doc_id_set in types_to_doc_id_set.items()
+        }
+
+        # 8) Save outputs
+        os.makedirs(os.path.dirname(out_terms2types) or ".", exist_ok=True)
+        with open(out_terms2types, "w", encoding="utf-8") as fp_terms2types:
+            json.dump(term_to_predicted_types_list, fp_terms2types, ensure_ascii=False, indent=2)
+
+        os.makedirs(os.path.dirname(out_types2docs) or ".", exist_ok=True)
+        with open(out_types2docs, "w", encoding="utf-8") as fp_types2docs:
+            json.dump(types_to_doc_ids, fp_types2docs, ensure_ascii=False, indent=2)
+
+        # Cleanup VRAM if any
+        del typing_model, typing_tokenizer
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+
+        return {
+            "terms2types_pred": term_to_predicted_types_list,
+            "types2docs_pred": types_to_doc_ids,
+            "unique_terms": len(unique_terms),
+            "types_count": len(types_to_doc_ids),
+        }
+
+    def _load_json(self, path: str) -> Dict[str, Any]:
+        """Load a JSON file from disk and return its parsed object."""
+        with open(path, "r", encoding="utf-8") as file_obj:
+            return json.load(file_obj)
+
+
+    def _iter_json_objects(self, blob: str) -> Iterable[Dict[str, Any]]:
+        """
+        Iterate over *all* JSON objects found inside a string.
+
+        Supports cases where multiple JSON objects are concatenated back-to-back
+        in a single line. It skips stray commas/whitespace between objects.
+
+        Parameters
+        ----------
+        blob : str
+            A string that may contain one or more JSON objects.
+
+        Yields
+        ------
+        Dict[str, Any]
+            Each parsed JSON object.
+        """
+        json_decoder = json.JSONDecoder()
+        cursor_index, text_length = 0, len(blob)
+        while cursor_index < text_length:
+            # Skip whitespace/commas between objects
+            while cursor_index < text_length and blob[cursor_index] in " \t\r\n,":
+                cursor_index += 1
+            if cursor_index >= text_length:
+                break
+            try:
+                json_obj, end_index = json_decoder.raw_decode(blob, idx=cursor_index)
+            except JSONDecodeError:
+                # Can't decode from this position; stop scanning this chunk
+                break
+            yield json_obj
+            cursor_index = end_index
+
+
+    def _load_documents_jsonl(self, path: str) -> Dict[str, Dict[str, Any]]:
+        """
+        Robust reader that supports:
+        • True JSONL (one object per line)
+        • Lines with multiple concatenated JSON objects
+        • Whole file as a JSON array
+
+        Returns
+        -------
+        Dict[str, Dict[str, Any]]
+            Mapping doc_id -> full document row.
+        """
+        documents_by_id: Dict[str, Dict[str, Any]] = {}
+
+        with open(path, "r", encoding="utf-8") as file_obj:
+            content = file_obj.read().strip()
+
+        # Case A: whole-file JSON array
+        if content.startswith("["):
+            try:
+                json_array = json.loads(content)
+                if isinstance(json_array, list):
+                    for record in json_array:
+                        if not isinstance(record, dict):
+                            continue
+                        document_id = str(
+                            record.get("id")
+                            or record.get("doc_id")
+                            or (record.get("doc") or {}).get("id")
+                            or ""
+                        )
+                        if document_id:
+                            documents_by_id[document_id] = record
+                    return documents_by_id
+            except Exception:
+                # Fall back to line-wise handling if array parsing fails
+                pass
+
+        # Case B: treat as JSONL-ish; parse *all* objects per line
+        for raw_line in content.splitlines():
+            line = raw_line.strip()
+            if not line:
+                continue
+            for record in self._iter_json_objects(line):
+                if not isinstance(record, dict):
+                    continue
+                document_id = str(
+                    record.get("id")
+                    or record.get("doc_id")
+                    or (record.get("doc") or {}).get("id")
+                    or ""
+                )
+                if document_id:
+                    documents_by_id[document_id] = record
+
+        return documents_by_id
+
+
+    def _to_text(self, text_field: Any) -> str:
+        """
+        Convert a 'text' field into a single string (handles list-of-strings).
+
+        Parameters
+        ----------
+        text_field : Any
+            The value found under "text" in the dataset row.
+
+        Returns
+        -------
+        str
+            A single-string representation of the text.
+        """
+        if isinstance(text_field, str):
+            return text_field
+        if isinstance(text_field, list):
+            return " ".join(str(part) for part in text_field)
+        return str(text_field) if text_field is not None else ""
+
+
+    def _unique_preserve(self, values: List[str]) -> List[str]:
+        """
+        Deduplicate values while preserving the original order.
+
+        Parameters
+        ----------
+        values : List[str]
+            Sequence possibly containing duplicates.
+
+        Returns
+        -------
+        List[str]
+            Sequence without duplicates, order preserved.
+        """
+        seen_values: set = set()
+        ordered_values: List[str] = []
+        for candidate in values:
+            if candidate not in seen_values:
+                seen_values.add(candidate)
+                ordered_values.append(candidate)
+        return ordered_values
+
+
+    def _norm(self, text: str) -> str:
+        """
+        Lowercased, single-spaced normalization (for comparisons).
+
+        Parameters
+        ----------
+        text : str
+            Input string.
+
+        Returns
+        -------
+        str
+            Normalized string.
+        """
+        return " ".join(text.lower().split())
+
+
+    def _normalize_term(self, term: str) -> str:
+        """
+        Normalization tailored for term keys / lookups.
+
+        Parameters
+        ----------
+        term : str
+            Term to normalize.
+
+        Returns
+        -------
+        str
+            Lowercased, trimmed and single-spaced term.
+        """
+        return " ".join(str(term).strip().split()).lower()
+
+
+    def _format_fewshot_block(
+        self,
+        system_prompt: str,
+        fewshot_examples: List[Tuple[str, str, List[str]]],
+        *,
+        key: str,
+        k: int = 6,
+    ) -> str:
+        """
+        Render a few-shot block like:
+
+        <SYSTEM PROMPT>
+
+        ### Example
+        User:
+        Title: ...
+        <text>
+        Assistant:
+        {"terms": [...]}   or   {"types": [...]}
+
+        Parameters
+        ----------
+        system_prompt : str
+            Instructional system text to prepend.
+        fewshot_examples : List[Tuple[str, str, List[str]]]
+            Examples as (title, text, labels_list).
+        key : str
+            Either "terms" or "types" depending on the task.
+        k : int
+            Number of examples to include.
+
+        Returns
+        -------
+        str
+            Formatted few-shot block text.
+        """
+        lines: List[str] = [system_prompt.strip(), ""]
+        for example_title, example_text, gold_list in fewshot_examples[:k]:
+            lines.append("### Example")
+            lines.append(f"User:\nTitle: {example_title}\n{example_text}")
+            lines.append(f'Assistant:\n{{"{key}": ' + json.dumps(gold_list, ensure_ascii=False) + "}")
+        return "\n".join(lines)
+
+
+    def _format_user_block(self, title: str, text: str) -> str:
+        """
+        Format the 'Task' block for the current document.
+
+        Parameters
+        ----------
+        title : str
+            Document title.
+        text : str
+            Document text (single string).
+
+        Returns
+        -------
+        str
+            Formatted user block.
+        """
+        return f"### Task\nUser:\nTitle: {title}\n{text}"
+
+
+    def _parse_json_list(self, generated_text: str, *, key: str) -> List[str]:
+        """
+        Extract a list from model output, trying:
+        1) JSON object with the key ({"terms":[...]} or {"types":[...]}).
+        2) Any top-level JSON array.
+        3) Fallback: comma-split.
+
+        Parameters
+        ----------
+        generated_text : str
+            Raw generation text to parse.
+        key : str
+            "terms" or "types".
+
+        Returns
+        -------
+        List[str]
+            Parsed strings (best-effort).
+        """
+        # 1) Try a JSON object and read key
+        try:
+            object_match = self._json_object_regex.search(generated_text)
+            if object_match:
+                json_obj = json.loads(object_match.group(0))
+                json_array = json_obj.get(key)
+                if isinstance(json_array, list):
+                    return [value for value in json_array if isinstance(value, str)]
+        except Exception:
+            pass
+
+        # 2) Any JSON array
+        try:
+            array_match = self._json_array_regex.search(generated_text)
+            if array_match:
+                json_array = json.loads(array_match.group(0))
+                if isinstance(json_array, list):
+                    return [value for value in json_array if isinstance(value, str)]
+        except Exception:
+            pass
+
+        # 3) Fallback: comma-split (last resort)
+        if "," in generated_text:
+            return [part.strip().strip('"').strip("'") for part in generated_text.split(",") if part.strip()]
+        return []
+
+
+    def _apply_chat_template_safe_types(self, tokenizer: AutoTokenizer, messages: List[Dict[str, str]]) -> str:
+        """
+        Safely build a prompt string for chat models. Uses the model's chat template
+        when available; otherwise falls back to a simple concatenation.
+        """
+        try:
+            return tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
+        except Exception:
+            system_text = next((m["content"] for m in messages if m.get("role") == "system"), "")
+            last_user_text = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
+            return f"{system_text}\n\nUser:\n{last_user_text}\n\nAssistant:"
+
+
+    def _build_conv_for_type_infer(
+        self,
+        term: str,
+        few_shot_examples: Optional[List[Dict]] = None,
+        random_k: Optional[int] = None,
+    ) -> List[Dict[str, str]]:
+        """
+        Create a chat-style conversation for a single term→types query,
+        optionally prepending few-shot examples.
+        """
+        messages: List[Dict[str, str]] = [{"role": "system", "content": self._system_prompt_term_to_types}]
+        examples = list(few_shot_examples or [])
+        if random_k and len(examples) > random_k:
+            import random as _rnd
+            examples = _rnd.sample(examples, random_k)
+        for exemplar in examples:
+            example_term = exemplar.get("term", "")
+            example_types = exemplar.get("types", [])
+            messages.append({"role": "user", "content": f"Term: {example_term}"})
+            messages.append({"role": "assistant", "content": json.dumps({"types": example_types}, ensure_ascii=False)})
+        messages.append({"role": "user", "content": f"Term: {term}"})
+        return messages
+
+
+    def _extract_types_from_text(self, generated_text: str) -> List[str]:
+        """
+        Parse {"types":[...]} from a free-form generation.
+        """
+        try:
+            object_match = re.search(r'\{[^}]*"types"[^}]*\}', generated_text)
+            if object_match:
+                json_obj = json.loads(object_match.group(0))
+                types_array = json_obj.get("types", [])
+                return [type_label for type_label in types_array if isinstance(type_label, str)]
+        except Exception:
+            pass
+        return []
+
+
+    def _load_llm_for_types(self, model_id: str) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
+        """
+        Load a *separate* small chat model for Term→Types (keeps LocalAutoLLM untouched).
+        """
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto" if torch.cuda.is_available() else None,
+        )
+        return model, tokenizer
+
+
+    def _load_doc_term_extractions(
+        self,
+        *,
+        results_json_path: Optional[str] = None,
+        in_memory_results: Optional[List[Dict]] = None,
+    ) -> List[Dict]:
+        """
+        Normalize document→terms outputs to a list of:
+        {"id": "<doc_id>", "extracted_terms": ["...", ...]}
+
+        Accepts either:
+        - in_memory_results (list of dicts)
+        - results_json_path pointing to:
+            • a JSONL file with lines: {"id": "...", "terms": [...]}
+            • OR a JSON file with {"results":[{"id":..., "extracted_terms": [...]}, ...]}
+            • OR a JSON list of dicts
+        """
+        normalized_records: List[Dict] = []
+
+        def _coerce_to_record(source_row: Dict) -> Optional[Dict]:
+            document_id = str(source_row.get("id", "")) or str(source_row.get("doc_id", ""))
+            if not document_id:
+                return None
+            terms = source_row.get("extracted_terms")
+            if terms is None:
+                terms = source_row.get("terms")
+            if terms is None and "payload" in source_row and isinstance(source_row["payload"], dict):
+                terms = source_row["payload"].get("terms")
+            if not isinstance(terms, list):
+                terms = []
+            return {"id": document_id, "extracted_terms": [t for t in terms if isinstance(t, str)]}
+
+        if in_memory_results is not None:
+            for source_row in in_memory_results:
+                coerced_record = _coerce_to_record(source_row)
+                if coerced_record:
+                    normalized_records.append(coerced_record)
+            return normalized_records
+
+        if not results_json_path:
+            raise ValueError("Provide results_json_path or in_memory_results")
+
+        # Detect JSON vs JSONL by extension (best-effort)
+        if results_json_path.endswith(".jsonl"):
+            with open(results_json_path, "r", encoding="utf-8") as file_in:
+                for raw_line in file_in:
+                    raw_line = raw_line.strip()
+                    if not raw_line:
+                        continue
+                    # Multiple concatenated objects per line? Iterate them all.
+                    for json_obj in self._iter_json_objects(raw_line):
+                        if isinstance(json_obj, dict):
+                            coerced_record = _coerce_to_record(json_obj)
+                            if coerced_record:
+                                normalized_records.append(coerced_record)
+        else:
+            payload_obj = self._load_json(results_json_path)
+            if isinstance(payload_obj, dict) and "results" in payload_obj:
+                for source_row in payload_obj["results"]:
+                    coerced_record = _coerce_to_record(source_row)
+                    if coerced_record:
+                        normalized_records.append(coerced_record)
+            elif isinstance(payload_obj, list):
+                for source_row in payload_obj:
+                    if isinstance(source_row, dict):
+                        coerced_record = _coerce_to_record(source_row)
+                        if coerced_record:
+                            normalized_records.append(coerced_record)
+
+        return normalized_records
+
+
+    def _collect_unique_terms_from_extractions(self, doc_term_extractions: List[Dict]) -> List[str]:
+        """
+        Collect unique terms (original casing) from normalized document→terms results.
+        """
+        seen_normalized_terms: set = set()
+        ordered_unique_terms: List[str] = []
+        for record in doc_term_extractions:
+            for term_text in record.get("extracted_terms", []):
+                normalized = self._normalize_term(term_text)
+                if normalized and normalized not in seen_normalized_terms:
+                    seen_normalized_terms.add(normalized)
+                    ordered_unique_terms.append(term_text.strip())
+        return ordered_unique_terms
+
+
+    def _build_term_to_doc_ids(self, doc_term_extractions: List[Dict]) -> Dict[str, List[str]]:
+        """
+        Build lookup: normalized_term -> sorted unique list of doc_ids.
+        """
+        term_to_doc_set: Dict[str, set] = {}
+        for record in doc_term_extractions:
+            document_id = str(record.get("id", ""))
+            for term_text in record.get("extracted_terms", []):
+                normalized = self._normalize_term(term_text)
+                if not normalized or not document_id:
+                    continue
+                term_to_doc_set.setdefault(normalized, set()).add(document_id)
+        return {normalized_term: sorted(doc_ids) for normalized_term, doc_ids in term_to_doc_set.items()}

From 1abbbc91e7c65321da0f25f1f41b190c3776986d Mon Sep 17 00:00:00 2001
From: KrishnaRani <thakurkrishnarani@gmail.com>
Date: Mon, 10 Nov 2025 23:52:46 +0100
Subject: [PATCH 5/7] added changes for taxonomy discovery and term typing

---
 .../llm_learner_alexbek_rag_term_typing.py    |  10 +-
 .../llm_learner_alexbek_rf_term_typing.py     |  24 +-
 ...er_alexbek_self_attn_taxonomy_discovery.py |   9 +-
 examples/llm_learner_alexbek_text2onto.py     |  32 +-
 ...llm_learner_rwthdbis_taxonomy_discovery.py |  17 +-
 examples/llm_learner_rwthdbis_term_typing.py  |  13 +-
 ...lm_learner_sbunlp_fs_taxonomy_discovery.py |  33 +-
 examples/llm_learner_sbunlp_text2onto.py      |  29 +-
 examples/llm_learner_sbunlp_zs_term_typing.py |  20 +-
 ..._learner_skhnlp_sft_taxonomoy_discovery.py |  10 +-
 ...m_learner_skhnlp_zs_taxonomoy_discovery.py |  13 +-
 .../learner/taxonomy_discovery/__init__.py    |  18 -
 .../learner/taxonomy_discovery/alexbek.py     | 291 +++++-
 .../learner/taxonomy_discovery/rwthdbis.py    | 922 ++++++++++++------
 .../learner/taxonomy_discovery/sbunlp.py      | 393 +++++---
 .../learner/taxonomy_discovery/skhnlp.py      | 561 +++++++++--
 ontolearner/learner/term_typing/__init__.py   |  17 -
 ontolearner/learner/term_typing/alexbek.py    | 665 +++++++++++--
 ontolearner/learner/term_typing/rwthdbis.py   | 214 +++-
 ontolearner/learner/term_typing/sbunlp.py     | 404 ++++----
 ontolearner/learner/text2onto/__init__.py     |  16 -
 ontolearner/learner/text2onto/alexbek.py      | 293 ++++--
 ontolearner/learner/text2onto/sbunlp.py       | 127 ++-
 23 files changed, 2956 insertions(+), 1175 deletions(-)
 delete mode 100644 ontolearner/learner/taxonomy_discovery/__init__.py
 delete mode 100644 ontolearner/learner/term_typing/__init__.py
 delete mode 100644 ontolearner/learner/text2onto/__init__.py

diff --git a/examples/llm_learner_alexbek_rag_term_typing.py b/examples/llm_learner_alexbek_rag_term_typing.py
index 5723e36..3a3233f 100644
--- a/examples/llm_learner_alexbek_rag_term_typing.py
+++ b/examples/llm_learner_alexbek_rag_term_typing.py
@@ -1,13 +1,15 @@
 # Import core modules from the OntoLearner library
 from ontolearner import GeoNames, train_test_split, LearnerPipeline
-from ontolearner import AlexbekRAGLearner
+from ontolearner.learner.term_typing.alexbek import AlexbekRAGLearner
 
 # Load the GeoNames ontology.
 ontology = GeoNames()
 ontology.load()
 
 # Extract labeled items and split into train/test sets for evaluation
-train_data, test_data = train_test_split(ontology.extract(), test_size=0.2, random_state=42)
+train_data, test_data = train_test_split(
+    ontology.extract(), test_size=0.2, random_state=42
+)
 
 # Configure a Retrieval-Augmented Generation (RAG) term-typing classifier.
 # - llm_model_id: generator used to predict types from the prompt + retrieved examples
@@ -46,5 +48,7 @@
 )
 
 # Display the evaluation results and runtime
-print("Metrics:", outputs.get("metrics"))          # e.g., {'precision': ..., 'recall': ..., 'f1_micro': ..., ...}
+print(
+    "Metrics:", outputs.get("metrics")
+)  # e.g., {'precision': ..., 'recall': ..., 'f1_micro': ..., ...}
 print("Elapsed time (s):", outputs.get("elapsed_time"))
diff --git a/examples/llm_learner_alexbek_rf_term_typing.py b/examples/llm_learner_alexbek_rf_term_typing.py
index c5c7454..28ca94c 100644
--- a/examples/llm_learner_alexbek_rf_term_typing.py
+++ b/examples/llm_learner_alexbek_rf_term_typing.py
@@ -1,6 +1,8 @@
 # Import core modules from the OntoLearner library
 from ontolearner import GeoNames, train_test_split, LearnerPipeline
-from ontolearner import AlexbekRFLearner   # A random-forest term-typing learner over text+graph features
+from ontolearner.learner.term_typing.alexbek import (
+    AlexbekRFLearner,
+)  # A random-forest term-typing learner over text+graph features
 
 # Load the GeoNames ontology and extract labeled term-typing data
 
@@ -10,31 +12,27 @@
 data = ontology.extract()
 
 # Split the labeled term-typing data into train and test sets
-train_data, test_data = train_test_split(
-    data,
-    test_size=0.2,
-    random_state=42
-)
+train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
 
 # Configure the RF-based learner (embeddings + optional graph features)
 #    - device: "cpu" or "cuda"
 #    - threshold: decision threshold for multi-label assignment
 #    - use_graph_features: include ontology-graph-derived features if available
 rf_learner = AlexbekRFLearner(
-    device="cpu",            # switch to "cuda" if you have a GPU
+    device="cpu",  # switch to "cuda" if you have a GPU
     batch_size=16,
-    max_length=512,          # max tokenizer length for embedding model inputs
-    threshold=0.30,          # probability cutoff for assigning each type
-    use_graph_features=True  # set False for pure RF on text embeddings only
+    max_length=512,  # max tokenizer length for embedding model inputs
+    threshold=0.30,  # probability cutoff for assigning each type
+    use_graph_features=True,  # set False for pure RF on text embeddings only
 )
 
 # Build the pipeline and pass raw structured objects end-to-end.
 pipe = LearnerPipeline(
     retriever=rf_learner,
-    retriever_id="intfloat/e5-base-v2",   # or "Qwen/Qwen3-Embedding-4B" if you have sufficient GPU memory
-    ontologizer_data=True,                # True if data is already {"term": ..., "types": [...], ...}
+    retriever_id="intfloat/e5-base-v2",  # or "Qwen/Qwen3-Embedding-4B" if you have sufficient GPU memory
+    ontologizer_data=True,  # True if data is already {"term": ..., "types": [...], ...}
     device="cpu",
-    batch_size=16
+    batch_size=16,
 )
 
 # Run the full learning pipeline on the term-typing task
diff --git a/examples/llm_learner_alexbek_self_attn_taxonomy_discovery.py b/examples/llm_learner_alexbek_self_attn_taxonomy_discovery.py
index b78976f..6a42160 100644
--- a/examples/llm_learner_alexbek_self_attn_taxonomy_discovery.py
+++ b/examples/llm_learner_alexbek_self_attn_taxonomy_discovery.py
@@ -1,5 +1,6 @@
 from ontolearner import GeoNames, train_test_split, LearnerPipeline
-from ontolearner import AlexbekCrossAttnLearner
+from ontolearner.learner.taxonomy_discovery.alexbek import AlexbekCrossAttnLearner
+
 # 1) Load & split
 ontology = GeoNames()
 ontology.load()
@@ -22,9 +23,9 @@
 
 # 3) Build pipeline
 pipeline = LearnerPipeline(
-    llm=cross_learner,     # <- our learner
-    llm_id="cross-attn",   # label for bookkeeping
-    ontologizer_data=False # pass raw ontology objects as in your example
+    llm=cross_learner,  # <- our learner
+    llm_id="cross-attn",  # label for bookkeeping
+    ontologizer_data=False,  # pass raw ontology objects as in your example
 )
 
 # 4) Train + predict + evaluate
diff --git a/examples/llm_learner_alexbek_text2onto.py b/examples/llm_learner_alexbek_text2onto.py
index caf4c5b..69282a9 100644
--- a/examples/llm_learner_alexbek_text2onto.py
+++ b/examples/llm_learner_alexbek_text2onto.py
@@ -9,14 +9,22 @@
 DATA_DIR = "./dataset_llms4ol_2025/TaskA-Text2Onto/ecology"
 
 # Input paths (already saved)
-TRAIN_DOCS_PATH        = os.path.join(DATA_DIR, "train", "documents.jsonl")
-TRAIN_TERMS2DOCS_PATH  = os.path.join(DATA_DIR, "train", "terms2docs.json")
-TEST_DOCS_FULL_PATH    = os.path.join(DATA_DIR, "test", "text2onto_ecology_test_documents.jsonl")
+TRAIN_DOCS_PATH = os.path.join(DATA_DIR, "train", "documents.jsonl")
+TRAIN_TERMS2DOCS_PATH = os.path.join(DATA_DIR, "train", "terms2docs.json")
+TEST_DOCS_FULL_PATH = os.path.join(
+    DATA_DIR, "test", "text2onto_ecology_test_documents.jsonl"
+)
 
 # Output paths
-DOC_TERMS_OUT_PATH     = os.path.join(DATA_DIR, "test", "extracted_terms_ecology.fast.jsonl")
-TERMS2TYPES_OUT_PATH   = os.path.join(DATA_DIR, "test", "terms2types_pred_ecology.fast.json")
-TYPES2DOCS_OUT_PATH    = os.path.join(DATA_DIR, "test", "types2docs_pred_ecology.fast.json")
+DOC_TERMS_OUT_PATH = os.path.join(
+    DATA_DIR, "test", "extracted_terms_ecology.fast.jsonl"
+)
+TERMS2TYPES_OUT_PATH = os.path.join(
+    DATA_DIR, "test", "terms2types_pred_ecology.fast.json"
+)
+TYPES2DOCS_OUT_PATH = os.path.join(
+    DATA_DIR, "test", "types2docs_pred_ecology.fast.json"
+)
 
 # Device selection
 DEVICE = (
@@ -27,7 +35,7 @@
 
 # Model config
 MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
-LOAD_IN_4BIT = (DEVICE == "cuda")  # 4-bit helps on GPU
+LOAD_IN_4BIT = DEVICE == "cuda"  # 4-bit helps on GPU
 
 # 1) Load LLM
 llm = LocalAutoLLM(device=DEVICE)
@@ -52,15 +60,17 @@
 
 # 4) Predict types for extracted terms, using the JSONL we just wrote
 typing_summary = learner.predict_types_from_terms(
-    doc_terms_jsonl=DOC_TERMS_OUT_PATH,   # read the predictions directly
-    doc_terms_list=None,                  # (not needed when doc_terms_jsonl is provided)
-    model_id=MODEL_ID,                    # reuse the same small model
+    doc_terms_jsonl=DOC_TERMS_OUT_PATH,  # read the predictions directly
+    doc_terms_list=None,  # (not needed when doc_terms_jsonl is provided)
+    model_id=MODEL_ID,  # reuse the same small model
     out_terms2types=TERMS2TYPES_OUT_PATH,
     out_types2docs=TYPES2DOCS_OUT_PATH,
     # use defaults for everything else
 )
 
-print(f"[types] {typing_summary['unique_terms']} unique terms | {typing_summary['types_count']} types")
+print(
+    f"[types] {typing_summary['unique_terms']} unique terms | {typing_summary['types_count']} types"
+)
 print(f"[saved] {TERMS2TYPES_OUT_PATH}")
 print(f"[saved] {TYPES2DOCS_OUT_PATH}")
 
diff --git a/examples/llm_learner_rwthdbis_taxonomy_discovery.py b/examples/llm_learner_rwthdbis_taxonomy_discovery.py
index fea5539..4412c5f 100644
--- a/examples/llm_learner_rwthdbis_taxonomy_discovery.py
+++ b/examples/llm_learner_rwthdbis_taxonomy_discovery.py
@@ -1,6 +1,6 @@
 # Import core modules from the OntoLearner library
-from ontolearner import LearnerPipeline, train_test_split
-from ontolearner import ChordOntology, RWTHDBISTaxonomyLearner
+from ontolearner import LearnerPipeline, train_test_split, ChordOntology
+from ontolearner.learner.taxonomy_discovery.rwthdbis import RWTHDBISSFTLearner
 
 # Load the Chord ontology, which exposes hierarchical (parent, child) relations for taxonomy discovery
 ontology = ChordOntology()
@@ -8,17 +8,16 @@
 
 # Extract typed taxonomic edges and split into train/test while preserving the structured shape
 train_data, test_data = train_test_split(
-    ontology.extract(),
-    test_size=0.2,
-    random_state=42
+    ontology.extract(), test_size=0.2, random_state=42
 )
 
 # Initialize a supervised taxonomy classifier (encoder-based fine-tuning)
 # Negative sampling controls the number of non-edge examples; bidirectional templates create both (p→c) and (c→p) views
 # Context features are optional and can be enabled with with_context=True and a JSON path of type descriptions
-learner = RWTHDBISTaxonomyLearner(
+learner = RWTHDBISSFTLearner(
     model_name="microsoft/deberta-v3-small",
     output_dir="./results/",
+    device="cpu",
     num_train_epochs=1,
     per_device_train_batch_size=8,
     gradient_accumulation_steps=4,
@@ -48,10 +47,12 @@
 )
 
 # Display the evaluation results
-print("Metrics:", outputs['metrics'])          # Shows {'precision': ..., 'recall': ..., 'f1_score': ...}
+print(
+    "Metrics:", outputs["metrics"]
+)  # Shows {'precision': ..., 'recall': ..., 'f1_score': ...}
 
 # Display total elapsed time for training + prediction + evaluation
-print("Elapsed time:", outputs['elapsed_time'])
+print("Elapsed time:", outputs["elapsed_time"])
 
 # Print all returned outputs (include predictions)
 print(outputs)
diff --git a/examples/llm_learner_rwthdbis_term_typing.py b/examples/llm_learner_rwthdbis_term_typing.py
index 67d207f..d9bdc4b 100644
--- a/examples/llm_learner_rwthdbis_term_typing.py
+++ b/examples/llm_learner_rwthdbis_term_typing.py
@@ -1,8 +1,8 @@
 # Import core modules from the OntoLearner library
 from ontolearner import LearnerPipeline, train_test_split, AgrO
-from ontolearner import RWTHDBISTermTypingLearner
+from ontolearner.learner.term_typing.rwthdbis import RWTHDBISSFTLearner
 
-#load the AgrO ontology.
+# load the AgrO ontology.
 # AgrO provides term-typing supervision where each term can be annotated with one or more types.
 ontology = AgrO()
 ontology.load()
@@ -13,9 +13,10 @@
 
 # Configure a supervised encoder-based classifier for term typing.
 # This fine-tunes DeBERTa v3 on (term → type) signals; increase epochs for stronger results.
-learner = RWTHDBISTermTypingLearner(
+learner = RWTHDBISSFTLearner(
     model_name="microsoft/deberta-v3-small",
     output_dir="./results/deberta-v3",
+    device="cpu",
     num_train_epochs=30,
     per_device_train_batch_size=16,
     gradient_accumulation_steps=2,
@@ -41,10 +42,12 @@
 )
 
 # Display the evaluation results
-print("Metrics:", outputs['metrics'])          # Shows {'precision': ..., 'recall': ..., 'f1_score': ...}
+print(
+    "Metrics:", outputs["metrics"]
+)  # Shows {'precision': ..., 'recall': ..., 'f1_score': ...}
 
 # Display total elapsed time for training + prediction + evaluation
-print("Elapsed time:", outputs['elapsed_time'])
+print("Elapsed time:", outputs["elapsed_time"])
 
 # Print all returned outputs (include predictions)
 print(outputs)
diff --git a/examples/llm_learner_sbunlp_fs_taxonomy_discovery.py b/examples/llm_learner_sbunlp_fs_taxonomy_discovery.py
index 19797a9..2200892 100644
--- a/examples/llm_learner_sbunlp_fs_taxonomy_discovery.py
+++ b/examples/llm_learner_sbunlp_fs_taxonomy_discovery.py
@@ -1,19 +1,22 @@
 # Import core modules from the OntoLearner library
 from ontolearner import GeoNames, train_test_split, LearnerPipeline
+
 # Import the specific Few-Shot Learner implementation
-from ontolearner import SBUNLPFewShotLearner
+from ontolearner.learner.taxonomy_discovery.sbunlp import SBUNLPFewShotLearner
 
 # Load ontology and split
 # Load the GeoNames ontology for taxonomy discovery.
 # GeoNames provides geographic parent-child relationships (is-a hierarchy).
 ontology = GeoNames()
 ontology.load()
-data = ontology.extract() # Extract the list of taxonomic relationships from the ontology object
+data = (
+    ontology.extract()
+)  # Extract the list of taxonomic relationships from the ontology object
 
 # Split the taxonomic relationships into train and test sets
 train_data, test_data = train_test_split(
     data,
-    test_size=0.6, # 60% of data used for testing (terms to find relations for)
+    test_size=0.6,  # 60% of data used for testing (terms to find relations for)
     random_state=42,
 )
 
@@ -22,19 +25,17 @@
 # This performs in-context learning via N x M batch prompting.
 llm_learner = SBUNLPFewShotLearner(
     # Model / decoding
-    model_name="Qwen/Qwen2.5-0.5B-Instruct", # The Qwen model to load
-    try_4bit=True,              # uses 4-bit if bitsandbytes + CUDA available for memory efficiency
-    max_new_tokens=140,         # limit the length of the model's response (for JSON output)
-    max_input_tokens=1500,      # limit the total prompt length (context window)
-    temperature=0.0,            # set to 0.0 for deterministic output (best for structured JSON)
-    top_p=1.0,                  # top-p sampling disabled with temperature=0.0
-
+    model_name="Qwen/Qwen2.5-0.5B-Instruct",  # The Qwen model to load
+    try_4bit=True,  # uses 4-bit if bitsandbytes + CUDA available for memory efficiency
+    max_new_tokens=140,  # limit the length of the model's response (for JSON output)
+    max_input_tokens=1500,  # limit the total prompt length (context window)
+    temperature=0.0,  # set to 0.0 for deterministic output (best for structured JSON)
+    top_p=1.0,  # top-p sampling disabled with temperature=0.0
     # Grid settings (N x M prompts)
-    n_train_chunks=7,           # N: split training examples (few-shot context) into 7 chunks
-    m_test_chunks=7,            # M: split test terms (vocabulary) into 7 chunks (total 49 prompts)
-
+    n_train_chunks=7,  # N: split training examples (few-shot context) into 7 chunks
+    m_test_chunks=7,  # M: split test terms (vocabulary) into 7 chunks (total 49 prompts)
     # Run controls
-    limit_prompts=None,         # None runs all N x M prompts; set to an integer for a dry-run
+    limit_prompts=None,  # None runs all N x M prompts; set to an integer for a dry-run
     output_dir="./outputs/taskC_batches",  # Optional: dump per-prompt JSON results for debugging
 )
 
@@ -43,8 +44,8 @@
 pipe = LearnerPipeline(
     llm=llm_learner,
     llm_id=llm_learner.model_name,
-    ontologizer_data=True,      # Let the learner flatten structured ontology objects via its tasks_* helpers
-    device="auto",              # automatically select CUDA or CPU
+    ontologizer_data=True,  # Let the learner flatten structured ontology objects via its tasks_* helpers
+    device="auto",  # automatically select CUDA or CPU
 )
 
 # Run the full learning pipeline on the taxonomy-discovery task
diff --git a/examples/llm_learner_sbunlp_text2onto.py b/examples/llm_learner_sbunlp_text2onto.py
index 564f641..cff543c 100644
--- a/examples/llm_learner_sbunlp_text2onto.py
+++ b/examples/llm_learner_sbunlp_text2onto.py
@@ -1,6 +1,7 @@
 import os
 import torch
-#Import all the required classes
+
+# Import all the required classes
 from ontolearner import SBUNLPText2OntoLearner
 from ontolearner.learner.text2onto.sbunlp import LocalAutoLLM
 
@@ -11,8 +12,8 @@
 
 # Ensure the base directories exist
 # Creates the train and test subdirectories if they don't already exist.
-os.makedirs(os.path.join(LOCAL_DATA_DIR, 'train'), exist_ok=True)
-os.makedirs(os.path.join(LOCAL_DATA_DIR, 'test'), exist_ok=True)
+os.makedirs(os.path.join(LOCAL_DATA_DIR, "train"), exist_ok=True)
+os.makedirs(os.path.join(LOCAL_DATA_DIR, "test"), exist_ok=True)
 
 # Define local file paths: POINTING TO ALREADY SAVED FILES
 # These files are used as input for the Fit and Predict phases.
@@ -22,10 +23,14 @@
 
 # Output files for predictions (saved directly under LOCAL_DATA_DIR/test)
 # These files will be created by the predict_terms/types methods.
-TERMS_PRED_OUT = "./dataset_llms4ol_2025/TaskA-Text2Onto/ecology/test/extracted_terms_ecology.jsonl"
-TYPES_PRED_OUT = "./dataset_llms4ol_2025/TaskA-Text2Onto/ecology/test/extracted_types_ecology.jsonl"
+TERMS_PRED_OUT = (
+    "./dataset_llms4ol_2025/TaskA-Text2Onto/ecology/test/extracted_terms_ecology.jsonl"
+)
+TYPES_PRED_OUT = (
+    "./dataset_llms4ol_2025/TaskA-Text2Onto/ecology/test/extracted_types_ecology.jsonl"
+)
 
-#Initialize and Load Learner ---
+# Initialize and Load Learner ---
 MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 # Determine the device for inference (GPU or CPU)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -47,7 +52,7 @@
     train_docs_jsonl=DOCS_ALL_PATH,
     terms2doc_json=TERMS2DOC_PATH,
     sample_size=28,
-    seed=123 # Seed for stratified random sampling stability
+    seed=123,  # Seed for stratified random sampling stability
 )
 
 MAX_NEW_TOKENS = 100
@@ -55,7 +60,7 @@
 terms_written = learner.predict_terms(
     docs_test_jsonl=DOCS_TEST_PATH,
     out_jsonl=TERMS_PRED_OUT,
-    max_new_tokens=MAX_NEW_TOKENS
+    max_new_tokens=MAX_NEW_TOKENS,
 )
 print(f"✅ Term Extraction Complete. Wrote {terms_written} prediction lines.")
 
@@ -63,7 +68,7 @@
 types_written = learner.predict_types(
     docs_test_jsonl=DOCS_TEST_PATH,
     out_jsonl=TYPES_PRED_OUT,
-    max_new_tokens=MAX_NEW_TOKENS
+    max_new_tokens=MAX_NEW_TOKENS,
 )
 print(f"✅ Type Extraction Complete. Wrote {types_written} prediction lines.")
 
@@ -77,5 +82,7 @@
     print(f"Final Type Extraction F1: {f1_type:.4f}")
 
 except Exception as e:
-     # Catches errors like missing sklearn (ImportError) or missing prediction files (FileNotFoundError)
-     print(f"❌ Evaluation Error: {e}. Ensure sklearn is installed and prediction files were created.")
+    # Catches errors like missing sklearn (ImportError) or missing prediction files (FileNotFoundError)
+    print(
+        f"❌ Evaluation Error: {e}. Ensure sklearn is installed and prediction files were created."
+    )
diff --git a/examples/llm_learner_sbunlp_zs_term_typing.py b/examples/llm_learner_sbunlp_zs_term_typing.py
index 75d01da..54c070c 100644
--- a/examples/llm_learner_sbunlp_zs_term_typing.py
+++ b/examples/llm_learner_sbunlp_zs_term_typing.py
@@ -1,30 +1,30 @@
 # Import core modules from the OntoLearner library
 from ontolearner import AgrO, train_test_split, LearnerPipeline
+
 # Import the specific Zero-Shot Learner implementation for Term Typing
-from ontolearner import SBUNLPZSLearner
+from ontolearner.learner.term_typing.sbunlp import SBUNLPZSLearner
 
 # Load ontology and split
 # Load the AgrO ontology for type inventory and test data.
 ontology = AgrO()
 ontology.load()
-data = ontology.extract() # Extract the full set of relationships/terms
+data = ontology.extract()  # Extract the full set of relationships/terms
 
 # Split the data into train (to learn type inventory) and test (terms to predict)
 train_data, test_data = train_test_split(
     data,
-    test_size=0.6, # 60% of data used for testing
+    test_size=0.6,  # 60% of data used for testing
     random_state=42,
 )
 
 # Configure the Qwen Zero-Shot learner (inference-only)
 # This learner's 'fit' phase learns the vocabulary of allowed type labels.
 llm_learner = SBUNLPZSLearner(
-    # Model / decoding
-    model_id="Qwen/Qwen2.5-0.5B-Instruct", # The Qwen model to load
-    # device= is auto-detected
-    max_new_tokens=64,         # Sufficient length for JSON list of types
-    temperature=0.0,           # Ensures deterministic (greedy) output
-    # token= None,             # Assuming public model access
+    device="cpu",
+    max_new_tokens=64,
+    temperature=0.0,
+    model_id="Qwen/Qwen2.5-0.5B-Instruct",
+    token=None,
 )
 
 # Build pipeline and run
@@ -33,7 +33,7 @@
     llm=llm_learner,
     llm_id=llm_learner.model_id,
     ontologizer_data=False,
-    device="cpu",             #  select CUDA or CPU
+    device="cpu",  #  select CUDA or CPU
 )
 
 # Run the full learning pipeline on the Term-Typing task
diff --git a/examples/llm_learner_skhnlp_sft_taxonomoy_discovery.py b/examples/llm_learner_skhnlp_sft_taxonomoy_discovery.py
index 3661a5b..5c87925 100644
--- a/examples/llm_learner_skhnlp_sft_taxonomoy_discovery.py
+++ b/examples/llm_learner_skhnlp_sft_taxonomoy_discovery.py
@@ -1,6 +1,6 @@
 # Import core modules from the OntoLearner library
 from ontolearner import GeoNames, train_test_split, LearnerPipeline
-from ontolearner import SKHNLPSequentialFTLearner
+from ontolearner.learner.taxonomy_discovery.skhnlp import SKHNLPSequentialFTLearner
 
 # Load ontology and split
 # Load the GeoNames ontology for taxonomy discovery.
@@ -10,11 +10,7 @@
 data = ontology.extract()
 
 # Split the taxonomic relationships into train and test sets
-train_data, test_data = train_test_split(
-    data,
-    test_size=0.2,
-    random_state=42
-)
+train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
 
 # Configure the learner with user-defined training args + device
 # Configure the supervised BERT SFT Learner for taxonomy discovery.
@@ -23,7 +19,7 @@
     model_name="bert-large-uncased",
     n_prompts=2,
     random_state=1403,
-    device="cpu", # Note: CPU training for BERT-Large is very slow.
+    device="cpu",  # Note: CPU training for BERT-Large is very slow.
     output_dir="./results/",
     num_train_epochs=1,
     per_device_train_batch_size=8,
diff --git a/examples/llm_learner_skhnlp_zs_taxonomoy_discovery.py b/examples/llm_learner_skhnlp_zs_taxonomoy_discovery.py
index 90391f5..fec0ddd 100644
--- a/examples/llm_learner_skhnlp_zs_taxonomoy_discovery.py
+++ b/examples/llm_learner_skhnlp_zs_taxonomoy_discovery.py
@@ -1,7 +1,8 @@
 # Import core modules from the OntoLearner library
-from ontolearner import GeoNames, train_test_split, LearnerPipeline, SKHNLPZSLearner
+from ontolearner import GeoNames, train_test_split, LearnerPipeline
+from ontolearner.learner.taxonomy_discovery.skhnlp import SKHNLPZSLearner
 
-#Load ontology and split data
+# Load ontology and split data
 # The GeoNames ontology provides geographic term types and relationships.
 ontology = GeoNames()
 ontology.load()
@@ -16,11 +17,11 @@
 # This model uses a fixed prompt and string normalization (Levenshtein) to classify terms.
 llm_learner = SKHNLPZSLearner(
     model_name="Qwen/Qwen2.5-0.5B-Instruct",
-    device="cpu",               # use "cuda" if you have a GPU
+    device="cpu",  # use "cuda" if you have a GPU
     max_new_tokens=16,
-    save_path="./outputs/",     # directory or full file path for CSV
+    save_path="./outputs/",  # directory or full file path for CSV
     verbose=True,
-    normalize_mode="levenshtein",      # "none" | "substring" | "levenshtein" | "auto"
+    normalize_mode="levenshtein",  # "none" | "substring" | "levenshtein" | "auto"
 )
 
 # Build pipeline and run
@@ -33,7 +34,7 @@
 
 # Run the full learning pipeline on the taxonomy-discovery task
 outputs = pipe(
-    train_data=train_data,        # zero-shot; ignored by the LLM learner
+    train_data=train_data,  # zero-shot; ignored by the LLM learner
     test_data=test_data,
     task="taxonomy-discovery",
     evaluate=True,
diff --git a/ontolearner/learner/taxonomy_discovery/__init__.py b/ontolearner/learner/taxonomy_discovery/__init__.py
deleted file mode 100644
index 57a845b..0000000
--- a/ontolearner/learner/taxonomy_discovery/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) 2025 SciKnowOrg
-#
-# Licensed under the MIT License (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      https://opensource.org/licenses/MIT
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .rwthdbis import RWTHDBISSFTLearner
-from .skhnlp import SKHNLPSequentialFTLearner, SKHNLPZSLearner
-from .sbunlp import SBUNLPFewShotLearner
-from .alexbek import AlexbekCrossAttnLearner
diff --git a/ontolearner/learner/taxonomy_discovery/alexbek.py b/ontolearner/learner/taxonomy_discovery/alexbek.py
index 616d50f..3623f16 100644
--- a/ontolearner/learner/taxonomy_discovery/alexbek.py
+++ b/ontolearner/learner/taxonomy_discovery/alexbek.py
@@ -24,33 +24,70 @@
 
 from ...base import AutoLearner
 
+
 class RMSNorm(nn.Module):
     """Root Mean Square normalization with learnable scale.
 
-    Computes:  y = weight * x / sqrt(mean(x^2) + eps)
+    Computes per-position normalization:
+        y = weight * x / sqrt(mean(x^2) + eps)
+
+    This variant normalizes over the last dimension and keeps scale as a
+    learnable parameter, similar to RMSNorm used in modern transformer stacks.
     """
 
     def __init__(self, dim: int, eps: float = 1e-6):
+        """Initialize the RMSNorm layer.
+
+        Args:
+            dim: Size of the last (feature) dimension to normalize over.
+            eps: Small constant added inside the square root for numerical
+                stability.
+        """
         super().__init__()
         self.eps = eps
         self.weight = nn.Parameter(torch.ones(dim))
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Apply RMS normalization.
+
+        Args:
+            x: Input tensor of shape (..., dim).
+
+        Returns:
+            Tensor of the same shape as `x`, RMS-normalized over the last axis.
+        """
         rms_inv = torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
         return self.weight * (x * rms_inv)
 
+
 class CrossAttentionHead(nn.Module):
     """Minimal multi-head *pair* scorer using cross-attention-style projections.
 
-    Given child vector c and parent vector p:
-      q = Wq * c, k = Wk * p
-      per-head score = (q_h · k_h) / sqrt(d_head)
-      aggregate by mean across heads, then sigmoid to get probability.
+    Given child vector `c` and parent vector `p`:
+        q = W_q * c,  k = W_k * p
+        score_head = (q_h · k_h) / sqrt(d_head)
+
+    We average the per-head scores and apply a sigmoid to produce a probability.
+    This is not a full attention block—just a learnable similarity function.
     """
 
-    def __init__(self, hidden_size: int, num_heads: int = 8, rms_norm_eps: float = 1e-6):
+    def __init__(
+        self, hidden_size: int, num_heads: int = 8, rms_norm_eps: float = 1e-6
+    ):
+        """Initialize projections and per-stream normalizers.
+
+        Args:
+            hidden_size: Dimensionality of input embeddings (child/parent).
+            num_heads: Number of subspaces to split the projection into.
+            rms_norm_eps: Epsilon for RMSNorm stability.
+
+        Raises:
+            AssertionError: If `hidden_size` is not divisible by `num_heads`.
+        """
         super().__init__()
-        assert hidden_size % num_heads == 0, "hidden_size must be divisible by num_heads"
+        assert hidden_size % num_heads == 0, (
+            "hidden_size must be divisible by num_heads"
+        )
         self.hidden_size = hidden_size
         self.num_heads = num_heads
         self.dim_per_head = hidden_size // num_heads
@@ -67,14 +104,17 @@ def __init__(self, hidden_size: int, num_heads: int = 8, rms_norm_eps: float = 1
         nn.init.xavier_uniform_(self.query_projection.weight)
         nn.init.xavier_uniform_(self.key_projection.weight)
 
-    def forward(self, child_embeddings: torch.Tensor, parent_embeddings: torch.Tensor) -> torch.Tensor:
+    def forward(
+        self, child_embeddings: torch.Tensor, parent_embeddings: torch.Tensor
+    ) -> torch.Tensor:
         """Score (child, parent) pairs.
 
         Args:
-            child_embeddings:  Tensor of shape (batch, hidden_size)
-            parent_embeddings: Tensor of shape (batch, hidden_size)
+            child_embeddings: Tensor of shape (batch, hidden_size).
+            parent_embeddings: Tensor of shape (batch, hidden_size).
+
         Returns:
-            Tensor of probabilities with shape (batch,)
+            Tensor of probabilities with shape (batch,), each in [0, 1].
         """
         batch_size, _ = child_embeddings.shape
 
@@ -95,14 +135,17 @@ def forward(self, child_embeddings: torch.Tensor, parent_embeddings: torch.Tenso
         # Map to probability
         return torch.sigmoid(mean_score)
 
+
 class AlexbekCrossAttnLearner(AutoLearner):
     """Cross-Attention Taxonomy Learner (inherits AutoLearner).
 
-    - Encodes type strings with a SentenceTransformer.
-    - Trains a small cross-attention head to score (parent, child) edges.
-    - Predicts probabilities for provided pairs.
+    Workflow
+    - Encode terms with a SentenceTransformer.
+    - Train a compact cross-attention head on (parent, child) pairs
+      (positives + sampled negatives) using BCE loss.
+    - Inference returns probabilities per pair; edges with prob >= 0.5 are
+      labeled as positive.
 
-    Helper functions live in this same module (below), *not* as class methods.
     """
 
     def __init__(
@@ -122,11 +165,26 @@ def __init__(
     ):
         """Configure the learner.
 
-        All configuration is kept directly on the learner (no separate Config class).
+        Args:
+            embedding_model: SentenceTransformer model id/path for term encoding.
+            device: 'cuda' or 'cpu'. If 'cuda' is requested but unavailable, CPU
+                is used.
+            num_heads: Number of heads in the cross-attention scorer.
+            lr: Learning rate for AdamW.
+            weight_decay: Weight decay for AdamW.
+            num_epochs: Number of epochs to train the head.
+            batch_size: Minibatch size for training and scoring loops.
+            neg_ratio: Number of sampled negatives per positive during training.
+            output_dir: Directory to store artifacts (reserved for future use).
+            seed: Random seed for reproducibility.
+            **kwargs: Passed through to `AutoLearner` base init.
+
+        Side Effects:
+            Creates `output_dir` if missing and seeds Python/Torch RNGs.
         """
         super().__init__(**kwargs)
 
-        # ----- hyperparameters / settings -----
+        # hyperparameters / settings
         self.embedding_model_id = embedding_model
         self.requested_device = device
         self.num_heads = num_heads
@@ -157,25 +215,62 @@ def __init__(
         torch.manual_seed(self.seed)
 
     def load(self, **kwargs: Any):
-        """Load the sentence embedding model and initialize the cross-attention head."""
+        """Load the sentence embedding model and initialize the cross-attention head.
+
+        Args:
+            **kwargs: Optional override, supports `embedding_model`.
+
+        Side Effects:
+            - Initializes `self.embedder` on the configured device.
+            - Probes and stores `self.embedding_dim`.
+            - Constructs `self.cross_attn_head` with the probed dimensionality.
+        """
         model_id = kwargs.get("embedding_model", self.embedding_model_id)
-        self.embedder = SentenceTransformer(model_id, trust_remote_code=True, device=str(self.device))
+        self.embedder = SentenceTransformer(
+            model_id, trust_remote_code=True, device=str(self.device)
+        )
 
         # Probe output dimensionality using a dummy encode
-        probe_embedding = self.embedder.encode(["_dim_probe_"], convert_to_tensor=True, normalize_embeddings=False)
+        probe_embedding = self.embedder.encode(
+            ["_dim_probe_"], convert_to_tensor=True, normalize_embeddings=False
+        )
         self.embedding_dim = int(probe_embedding.shape[-1])
 
         # Initialize the cross-attention head
-        self.cross_attn_head = CrossAttentionHead(hidden_size=self.embedding_dim, num_heads=self.num_heads).to(
-            self.device
-        )
+        self.cross_attn_head = CrossAttentionHead(
+            hidden_size=self.embedding_dim, num_heads=self.num_heads
+        ).to(self.device)
 
     def _taxonomy_discovery(self, data: Any, test: bool = False) -> Optional[Any]:
+        """Train or infer taxonomy edges according to the AutoLearner contract.
+
+        Training (`test=False`)
+            - Extract positives (parent, child) and the unique term set from `data`.
+            - Build/extend the term embedding cache.
+            - Sample negatives at ratio `self.negative_ratio`.
+            - Train the cross-attention head with BCE loss.
+
+        Inference (`test=True`)
+            - Ensure embeddings exist for all terms.
+            - Score candidate pairs and return per-pair probabilities and labels.
+
+        Args:
+            data: Ontology-like object exposing `type_taxonomies.taxonomies`,
+                where each item has `.parent` and `.child` string-like fields.
+            test: If True, perform inference instead of training.
+
+        Returns:
+            - `None` on training.
+            - On inference: List of dicts
+              `{"parent": str, "child": str, "score": float, "label": int}`.
+        """
         if self.embedder is None or self.cross_attn_head is None:
             self.load()
 
         if not test:
-            positive_pairs, unique_terms = self._extract_parent_child_pairs_and_terms(data)
+            positive_pairs, unique_terms = self._extract_parent_child_pairs_and_terms(
+                data
+            )
             self._ensure_term_embeddings(unique_terms)
             negative_pairs = self._sample_negative_pairs(
                 positive_pairs, unique_terms, ratio=self.negative_ratio, seed=self.seed
@@ -183,27 +278,42 @@ def _taxonomy_discovery(self, data: Any, test: bool = False) -> Optional[Any]:
             self._train_cross_attn_head(positive_pairs, negative_pairs)
             return None
         else:
-            candidate_pairs, unique_terms = self._extract_parent_child_pairs_and_terms(data)
+            candidate_pairs, unique_terms = self._extract_parent_child_pairs_and_terms(
+                data
+            )
             self._ensure_term_embeddings(unique_terms, append_only=True)
             probabilities = self._score_parent_child_pairs(candidate_pairs)
 
             predictions = [
-                {"parent": parent, "child": child, "score": float(prob), "label": int(prob >= 0.5)}
+                {
+                    "parent": parent,
+                    "child": child,
+                    "score": float(prob),
+                    "label": int(prob >= 0.5),
+                }
                 for (parent, child), prob in zip(candidate_pairs, probabilities)
             ]
             return predictions
 
-    def _ensure_term_embeddings(self, terms: List[str], append_only: bool = False) -> None:
+    def _ensure_term_embeddings(
+        self, terms: List[str], append_only: bool = False
+    ) -> None:
         """Encode terms with the sentence embedder and store in cache.
 
         Args:
-            terms: list of unique strings to embed
-            append_only: if True, only embed terms missing from cache
+            terms: List of unique term strings to embed.
+            append_only: If True, only embed terms missing from the cache;
+                otherwise (re)encode all provided terms.
+
+        Raises:
+            RuntimeError: If called before `load()`.
         """
         if self.embedder is None:
             raise RuntimeError("Call load() before building term embeddings")
 
-        terms_to_encode = [t for t in terms if t not in self.term_to_vector] if append_only else terms
+        terms_to_encode = (
+            [t for t in terms if t not in self.term_to_vector] if append_only else terms
+        )
         if not terms_to_encode:
             return
 
@@ -217,38 +327,78 @@ def _ensure_term_embeddings(self, terms: List[str], append_only: bool = False) -
         for term, embedding in zip(terms_to_encode, embeddings):
             self.term_to_vector[term] = embedding.detach().to(self.device)
 
-    def _pairs_as_tensors(self, pairs: List[Tuple[str, str]]) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Turn list of (parent, child) strings into two aligned tensors on device."""
+    def _pairs_as_tensors(
+        self, pairs: List[Tuple[str, str]]
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Convert string pairs into aligned embedding tensors on the correct device.
+
+        Args:
+            pairs: List of (parent, child) term strings.
+
+        Returns:
+            Tuple `(child_tensor, parent_tensor)` where each tensor has shape
+            `(batch, embedding_dim)` and is located on `self.device`.
+
+        Notes:
+            This function assumes that all terms in `pairs` are present in
+            `self.term_to_vector`. Use `_ensure_term_embeddings` beforehand.
+        """
         # child embeddings tensor of shape (batch, dim)
-        child_tensor = torch.stack([self.term_to_vector[child] for (_, child) in pairs], dim=0).to(self.device)
+        child_tensor = torch.stack(
+            [self.term_to_vector[child] for (_, child) in pairs], dim=0
+        ).to(self.device)
         # parent embeddings tensor of shape (batch, dim)
-        parent_tensor = torch.stack([self.term_to_vector[parent] for (parent, _) in pairs], dim=0).to(self.device)
+        parent_tensor = torch.stack(
+            [self.term_to_vector[parent] for (parent, _) in pairs], dim=0
+        ).to(self.device)
         return child_tensor, parent_tensor
 
-    def _train_cross_attn_head(self, positive_pairs: List[Tuple[str, str]], negative_pairs: List[Tuple[str, str]]) -> None:
-        """Train the cross-attention head with BCE loss on labeled pairs."""
+    def _train_cross_attn_head(
+        self,
+        positive_pairs: List[Tuple[str, str]],
+        negative_pairs: List[Tuple[str, str]],
+    ) -> None:
+        """Train the cross-attention head with BCE loss on labeled pairs.
+
+        The dataset is a concatenation of positives (label 1) and sampled
+        negatives (label 0). The head is optimized with AdamW.
+
+        Args:
+            positive_pairs: List of ground-truth (parent, child) edges.
+            negative_pairs: List of sampled non-edges.
+
+        Raises:
+            RuntimeError: If the head has not been initialized (call `load()`).
+        """
         if self.cross_attn_head is None:
             raise RuntimeError("Head not initialized. Call load().")
 
         self.cross_attn_head.train()
         optimizer = torch.optim.AdamW(
-            self.cross_attn_head.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay
+            self.cross_attn_head.parameters(),
+            lr=self.learning_rate,
+            weight_decay=self.weight_decay,
         )
 
         # Build a simple supervised dataset: 1 for positive, 0 for negative
-        labeled_pairs: List[Tuple[int, Tuple[str, str]]] = [(1, pc) for pc in positive_pairs] + [
-            (0, nc) for nc in negative_pairs
-        ]
+        labeled_pairs: List[Tuple[int, Tuple[str, str]]] = [
+            (1, pc) for pc in positive_pairs
+        ] + [(0, nc) for nc in negative_pairs]
         random.shuffle(labeled_pairs)
 
-        def iterate_minibatches(items: List[Tuple[int, Tuple[str, str]]], batch_size: int):
+        def iterate_minibatches(
+            items: List[Tuple[int, Tuple[str, str]]], batch_size: int
+        ):
+            """Yield contiguous minibatches of size `batch_size` from `items`."""
             for start in range(0, len(items), batch_size):
                 yield items[start : start + batch_size]
 
         for epoch in range(self.num_epochs):
             epoch_loss_sum = 0.0
             for minibatch in iterate_minibatches(labeled_pairs, self.batch_size):
-                labels = torch.tensor([y for y, _ in minibatch], dtype=torch.float32, device=self.device)
+                labels = torch.tensor(
+                    [y for y, _ in minibatch], dtype=torch.float32, device=self.device
+                )
                 string_pairs = [pc for _, pc in minibatch]
                 child_tensor, parent_tensor = self._pairs_as_tensors(string_pairs)
 
@@ -261,9 +411,18 @@ def iterate_minibatches(items: List[Tuple[int, Tuple[str, str]]], batch_size: in
 
                 epoch_loss_sum += float(loss.item()) * len(minibatch)
 
-
     def _score_parent_child_pairs(self, pairs: List[Tuple[str, str]]) -> List[float]:
-        """Compute probability scores for (parent, child) pairs."""
+        """Compute probability scores for (parent, child) pairs.
+
+        Args:
+            pairs: List of candidate (parent, child) edges to score.
+
+        Returns:
+            List of floats in [0, 1] corresponding to the input order.
+
+        Raises:
+            RuntimeError: If the head has not been initialized (call `load()`).
+        """
         if self.cross_attn_head is None:
             raise RuntimeError("Head not initialized. Call load().")
 
@@ -277,8 +436,23 @@ def _score_parent_child_pairs(self, pairs: List[Tuple[str, str]]) -> List[float]
                 scores.extend(prob.detach().cpu().tolist())
         return scores
 
-    def _extract_parent_child_pairs_and_terms(self, data):
-        parent_child_pairs = []
+    def _extract_parent_child_pairs_and_terms(
+        self, data: Any
+    ) -> Tuple[List[Tuple[str, str]], List[str]]:
+        """Extract (parent, child) edges and the set of unique terms from an ontology-like object.
+
+        The function expects `data.type_taxonomies.taxonomies` to be an iterable
+        of objects with `.parent` and `.child` string-like attributes.
+
+        Args:
+            data: Ontology-like container.
+
+        Returns:
+            A tuple `(pairs, terms)` where:
+                - `pairs` is a list of (parent, child) strings,
+                - `terms` is a sorted list of unique term strings (parents ∪ children).
+        """
+        parent_child_pairs: List[Tuple[str, str]] = []
         unique_terms = set()
         for edge in getattr(data, "type_taxonomies").taxonomies:
             parent, child = str(edge.parent), str(edge.child)
@@ -287,11 +461,32 @@ def _extract_parent_child_pairs_and_terms(self, data):
             unique_terms.add(child)
         return parent_child_pairs, sorted(unique_terms)
 
-    def _sample_negative_pairs(self, positive_pairs, terms, ratio: float = 1.0, seed: int = 42):
+    def _sample_negative_pairs(
+        self,
+        positive_pairs: List[Tuple[str, str]],
+        terms: List[str],
+        ratio: float = 1.0,
+        seed: int = 42,
+    ) -> List[Tuple[str, str]]:
+        """Sample random negative (parent, child) pairs not present in positives.
+
+        Sampling is uniform over the Cartesian product of `terms` excluding
+        (x, x) self-pairs and any pair found in `positive_pairs`.
+
+        Args:
+            positive_pairs: Known positive edges to exclude.
+            terms: Candidate vocabulary (parents ∪ children).
+            ratio: Number of negatives per positive to draw.
+            seed: RNG seed used for reproducible sampling.
+
+        Returns:
+            A list of sampled negative pairs of approximate length
+            `int(len(positive_pairs) * ratio)`.
+        """
         random.seed(seed)
         term_list = list(terms)
         positive_set = set(positive_pairs)
-        negatives = []
+        negatives: List[Tuple[str, str]] = []
         target_negative_count = int(len(positive_pairs) * ratio)
         while len(negatives) < target_negative_count:
             parent = random.choice(term_list)
diff --git a/ontolearner/learner/taxonomy_discovery/rwthdbis.py b/ontolearner/learner/taxonomy_discovery/rwthdbis.py
index 47989c5..c535016 100644
--- a/ontolearner/learner/taxonomy_discovery/rwthdbis.py
+++ b/ontolearner/learner/taxonomy_discovery/rwthdbis.py
@@ -16,9 +16,7 @@
 import os
 import random
 import re
-import time
 import platform
-import multiprocessing
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Callable
@@ -39,16 +37,45 @@
 
 from ...base import AutoLearner
 
+
 class RWTHDBISSFTLearner(AutoLearner):
     """
     Supervised classifier for (parent, child) taxonomy edges.
 
     Model input format:
-      "<relation template> ## <optional context>"
-
-    If no `context_json_path` is provided, the class precomputes a
-    context file ({ontology_name}_processed.json) directly from the ontology
-    object.
+        "<relation template> ## <optional context>"
+
+    Context building:
+        If no `context_json_path` is provided, the learner precomputes a fixed-name
+        context file `rwthdbis_onto_processed.json` under `output_dir/context/`
+        from the ontology terms and stores the path in `self.context_json_path`.
+
+    Attributes:
+        model_name: Hugging Face model identifier.
+        output_dir: Directory where checkpoints and tokenizer are saved/loaded.
+        min_predictions: If no candidate is predicted positive, return the top-k
+            by positive probability (k = min_predictions).
+        max_length: Maximum tokenized length for inputs.
+        per_device_train_batch_size: Micro-batch size per device.
+        gradient_accumulation_steps: Gradient accumulation steps.
+        num_train_epochs: Number of training epochs.
+        learning_rate: Optimizer LR.
+        weight_decay: Weight decay for AdamW.
+        logging_steps: Logging interval for Trainer.
+        save_strategy: HF saving strategy (e.g., 'epoch').
+        save_total_limit: Max checkpoints to keep.
+        fp16: Enable FP16 mixed precision.
+        bf16: Enable BF16 mixed precision (on supported hardware).
+        seed: Random seed for reproducibility.
+        negative_ratio: Number of negatives per positive during training.
+        bidirectional_templates: If True, also add reversed template examples.
+        context_json_path: Path to the preprocessed term-context JSON. If None,
+            the file is generated with the fixed prefix `rwthdbis_onto_*`.
+        ontology_name: Logical dataset/domain label used in prompts and filtering
+            (filenames still use the fixed `rwthdbis_onto_*` prefix).
+        device: user-defined argument as 'cuda' or 'cpu'.
+        model: Loaded/initialized `AutoModelForSequenceClassification`.
+        tokenizer: Loaded/initialized `AutoTokenizer`.
     """
 
     # Sentences containing any of these phrases are pruned from term_info.
@@ -78,7 +105,8 @@ def __init__(
         self,
         min_predictions: int = 1,
         model_name: str = "distilroberta-base",
-        output_dir: str = "./results/{model_name}",
+        output_dir: str = "./results/taxonomy-discovery",
+        device: str = "cpu",
         max_length: int = 256,
         per_device_train_batch_size: int = 8,
         gradient_accumulation_steps: int = 4,
@@ -94,56 +122,176 @@ def __init__(
         negative_ratio: int = 5,
         bidirectional_templates: bool = True,
         context_json_path: Optional[str] = None,
-        ontology_name: str = "Geonames"
+        ontology_name: str = "Geonames",
     ) -> None:
+        """
+        Initialize the taxonomy-edge learner and set training/inference knobs.
+
+        Notes:
+            - Output artifacts are written under `output_dir`, including
+              the model weights and tokenizer (for later `from_pretrained` loads).
+            - If `context_json_path` is not provided, a new context file named
+              `rwthdbis_onto_processed.json` is generated under `output_dir/context/`.
+        """
         super().__init__()
 
         self.model_name = model_name
-        self.safe_model_name = model_name.replace("/", "__")
+        safe_model_name = model_name.replace("/", "__")
 
-        resolved_output = output_dir.format(model_name=self.safe_model_name)
+        resolved_output = output_dir.format(model_name=safe_model_name)
         self.output_dir = str(Path(resolved_output))
         Path(self.output_dir).mkdir(parents=True, exist_ok=True)
 
-        self.min_predictions = int(min_predictions)
-        self.max_length = int(max_length)
-        self.per_device_train_batch_size = int(per_device_train_batch_size)
-        self.gradient_accumulation_steps = int(gradient_accumulation_steps)
-        self.num_train_epochs = float(num_train_epochs)
-        self.learning_rate = float(learning_rate)
-        self.weight_decay = float(weight_decay)
-        self.logging_steps = int(logging_steps)
-        self.save_strategy = str(save_strategy)
-        self.save_total_limit = int(save_total_limit)
-        self.fp16 = bool(fp16)
-        self.bf16 = bool(bf16)
-        self.seed = int(seed)
-
-        self.negative_ratio = int(negative_ratio)
-        self.bidirectional_templates = bool(bidirectional_templates)
+        # Store provided argument values as-is (types are enforced by callers).
+        self.min_predictions = min_predictions
+        self.max_length = max_length
+        self.per_device_train_batch_size = per_device_train_batch_size
+        self.gradient_accumulation_steps = gradient_accumulation_steps
+        self.num_train_epochs = num_train_epochs
+        self.learning_rate = learning_rate
+        self.weight_decay = weight_decay
+        self.logging_steps = logging_steps
+        self.save_strategy = save_strategy
+        self.save_total_limit = save_total_limit
+        self.fp16 = fp16
+        self.bf16 = bf16
+        self.seed = seed
+
+        self.negative_ratio = negative_ratio
+        self.bidirectional_templates = bidirectional_templates
         self.context_json_path = context_json_path
 
         self.ontology_name = ontology_name
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = device
         self.model: Optional[AutoModelForSequenceClassification] = None
         self.tokenizer: Optional[AutoTokenizer] = None
 
-        os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
-        os.environ.setdefault("WANDB_DISABLED", "true")
-        os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
+        # Context caches built from the context JSON.
+        self._context_exact: Dict[str, str] = {}  # lower(term) -> info
+        self._context_rows: List[
+            Dict[str, str]
+        ] = []  # [{'term': str, 'term_info': str}, ...]
+
+    def _is_windows(self) -> bool:
+        """Return True if the current OS is Windows (NT)."""
+        return (os.name == "nt") or (platform.system().lower() == "windows")
+
+    def _normalize_text(self, raw_text: str, *, drop_questions: bool = False) -> str:
+        """
+        Normalize plain text consistently across the pipeline.
+
+        Operations:
+            - Remove markdown-like link patterns (e.g., '[[1]](http://...)').
+            - Replace newlines with spaces; collapse repeated spaces.
+            - Optionally drop sentences containing '?' (useful for model generations).
+
+        Args:
+            raw_text: Input text to normalize.
+            drop_questions: If True, filter out sentences with '?'.
+
+        Returns:
+            str: Cleaned single-line string.
+        """
+        if raw_text is None:
+            return ""
+        text = str(raw_text)
+
+        # Remove simple markdown link artifacts like [[1]](http://...)
+        text = re.sub(r"\[\[\d+\]\]\(https?://[^\)]+\)", "", text)
+
+        # Replace newlines with spaces and collapse multiple spaces
+        text = text.replace("\n", " ")
+        text = re.sub(r"\s{2,}", " ", text)
+
+        if drop_questions:
+            sentences = [s.strip() for s in text.split(".")]
+            sentences = [s for s in sentences if s and "?" not in s]
+            text = ". ".join(sentences)
+
+        return text.strip()
+
+    def _default_gpt_inference_with_dataset(self, term: str, dataset_name: str) -> str:
+        """
+        Generate a plain-text description for `term`, conditioned on `dataset_name`,
+        via g4f (best-effort). Falls back to an empty string on failure.
+
+        The raw output is then normalized with `_normalize_text(drop_questions=True)`.
+
+        Args:
+            term: Term to describe.
+            dataset_name: Ontology/domain name used in the prompt.
+
+        Returns:
+            str: Cleaned paragraph describing the term, or "" on failure.
+        """
+        prompt = (
+            f"Here is a: {term}, which is of domain name :{dataset_name}, translate it into english, "
+            "Provide as detailed a definition of this term as possible in plain text.without any markdown format."
+            "No reference link in result. "
+            "- Focus on intrinsic properties; do not name other entities or explicit relationships.\n"
+            "- Include classification/type, defining features, scope/scale, roles/functions, and measurable attributes when applicable.\n"
+            "Output: Plain text paragraphs only, neutral and factual."
+            f"Make sure all provided information can be used for discovering implicit relation of other {dataset_name} term, but don't mention the relation in result."
+        )
+
+        try:
+            client = _G4FClient()
+            response = client.chat.completions.create(
+                model=g4f.models.default,
+                messages=[{"role": "user", "content": prompt}],
+            )
+            raw_text = (
+                response.choices[0].message.content
+                if response and response.choices
+                else ""
+            )
+        except Exception:
+            raw_text = ""  # best-effort fallback
 
-        self._context_exact: Dict[str, str] = {}       # lower(term) -> info
-        self._context_rows: List[Dict[str, str]] = []  # [{'term': str, 'term_info': str}, ...]
+        return self._normalize_text(raw_text, drop_questions=True)
 
     def _taxonomy_discovery(self, data: Any, test: bool = False) -> Optional[Any]:
+        """
+        AutoLearner hook: route to training or prediction.
+
+        Args:
+            data: Ontology-like object (has `.taxonomies` or `.type_taxonomies.taxonomies`).
+            test: If True, run inference; otherwise, train a model.
+
+        Returns:
+            If test=True, a list of accepted edges as dicts with keys `parent` and `child`;
+            otherwise None.
+        """
         return self._predict_pairs(data) if test else self._train_from_pairs(data)
 
     def _train_from_pairs(self, train_data: Any) -> None:
+        """
+        Train a binary classifier from ontology pairs.
+
+        Steps:
+            1) (Re)build the term-context JSON unless `context_json_path` is set.
+            2) Extract positive (parent, child) edges from `train_data`.
+            3) Sample negatives at `negative_ratio`.
+            4) Tokenize, instantiate HF Trainer, train, and save.
+
+        Args:
+            train_data: Ontology-like object with `.type_taxonomies.taxonomies`
+                (preferred) or `.taxonomies`, each item providing `parent` and `child`.
+
+        Raises:
+            ValueError: If no positive pairs are found.
+
+        Side Effects:
+            - Writes a trained model to `self.output_dir` (via `trainer.save_model`).
+            - Writes the tokenizer to `self.output_dir` (via `save_pretrained`).
+            - Sets `self.context_json_path` if it was previously unset.
+              The generated context file is named `rwthdbis_onto_processed.json`.
+        """
         # Always (re)build context from ontology unless an explicit file is provided
         if not self.context_json_path:
             context_dir = Path(self.output_dir) / "context"
             context_dir.mkdir(parents=True, exist_ok=True)
-            processed_context_file = context_dir / f"{self.ontology_name}_processed.json"
+            processed_context_file = context_dir / "rwthdbis_onto_processed.json"
 
             # Remove stale file then regenerate
             if processed_context_file.exists():
@@ -157,10 +305,12 @@ def _train_from_pairs(self, train_data: Any) -> None:
                 processed_dir=context_dir,
                 dataset_name=self.ontology_name,
                 num_workers=max(1, min(os.cpu_count() or 2, 4)),
-                provider=partial(self._default_gpt_inference_with_dataset, dataset_name=self.ontology_name),
+                provider=partial(
+                    self._default_gpt_inference_with_dataset,
+                    dataset_name=self.ontology_name,
+                ),
                 max_retries=5,
             )
-
             self.context_json_path = str(processed_context_file)
 
         # Reproducibility
@@ -175,19 +325,23 @@ def _train_from_pairs(self, train_data: Any) -> None:
         if not positive_pairs:
             raise ValueError("No positive (parent, child) pairs found in train_data.")
 
-        entity_names = sorted({parent for parent, _ in positive_pairs} | {child for _, child in positive_pairs})
+        entity_names = sorted(
+            {parent for parent, _ in positive_pairs}
+            | {child for _, child in positive_pairs}
+        )
         negative_pairs = self._generate_negatives(
             positives=positive_pairs,
             entities=entity_names,
             ratio=self.negative_ratio,
         )
 
-        labels, texts = self._build_text_dataset(positive_pairs, negative_pairs)
-
-
-        datasets = DatasetDict({"train": Dataset.from_dict({"label": labels, "text": texts})})
+        labels, input_texts = self._build_text_dataset(positive_pairs, negative_pairs)
+        dataset_dict = DatasetDict(
+            {"train": Dataset.from_dict({"label": labels, "text": input_texts})}
+        )
 
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+        # Ensure a pad token exists for robust padding across models.
         if self.tokenizer.pad_token is None:
             self.tokenizer.pad_token = (
                 getattr(self.tokenizer, "eos_token", None)
@@ -196,10 +350,15 @@ def _train_from_pairs(self, train_data: Any) -> None:
             )
 
         def tokenize_batch(batch: Dict[str, List[str]]):
-            return self.tokenizer(batch["text"], truncation=True, max_length=self.max_length)
+            """Tokenize a batch of input texts for HF Datasets mapping."""
+            return self.tokenizer(
+                batch["text"], truncation=True, max_length=self.max_length
+            )
 
-        tokenized = datasets.map(tokenize_batch, batched=True, remove_columns=["text"])
-        collator = DataCollatorWithPadding(self.tokenizer)
+        tokenized_dataset = dataset_dict.map(
+            tokenize_batch, batched=True, remove_columns=["text"]
+        )
+        data_collator = DataCollatorWithPadding(self.tokenizer)
 
         self.model = AutoModelForSequenceClassification.from_pretrained(
             self.model_name,
@@ -207,10 +366,14 @@ def tokenize_batch(batch: Dict[str, List[str]]):
             id2label={0: "incorrect", 1: "correct"},
             label2id={"incorrect": 0, "correct": 1},
         )
-        if getattr(self.model.config, "pad_token_id", None) is None and self.tokenizer.pad_token_id is not None:
+        # Ensure model has a pad_token_id if tokenizer provides one.
+        if (
+            getattr(self.model.config, "pad_token_id", None) is None
+            and self.tokenizer.pad_token_id is not None
+        ):
             self.model.config.pad_token_id = self.tokenizer.pad_token_id
 
-        train_args = TrainingArguments(
+        training_args = TrainingArguments(
             output_dir=self.output_dir,
             learning_rate=self.learning_rate,
             per_device_train_batch_size=self.per_device_train_batch_size,
@@ -220,7 +383,7 @@ def tokenize_batch(batch: Dict[str, List[str]]):
             save_strategy=self.save_strategy,
             save_total_limit=self.save_total_limit,
             logging_steps=self.logging_steps,
-            dataloader_pin_memory = bool(torch.cuda.is_available()),
+            dataloader_pin_memory=bool(torch.cuda.is_available()),
             fp16=self.fp16,
             bf16=self.bf16,
             report_to="none",
@@ -229,16 +392,30 @@ def tokenize_batch(batch: Dict[str, List[str]]):
 
         trainer = Trainer(
             model=self.model,
-            args=train_args,
-            train_dataset=tokenized["train"],
+            args=training_args,
+            train_dataset=tokenized_dataset["train"],
             tokenizer=self.tokenizer,
-            data_collator=collator,
+            data_collator=data_collator,
         )
         trainer.train()
-        trainer.save_model(self.output_dir)
+        trainer.save_model()
+        # Persist tokenizer alongside the model for from_pretrained() loads.
         self.tokenizer.save_pretrained(self.output_dir)
 
     def _predict_pairs(self, eval_data: Any) -> List[Dict[str, str]]:
+        """
+        Score candidate pairs and return those predicted as positive.
+
+        If no pair is predicted positive but `min_predictions` > 0, the top-k
+        pairs by positive probability are returned.
+
+        Args:
+            eval_data: Ontology-like object with either `.pairs` (preferred) or
+                `.type_taxonomies.taxonomies` / `.taxonomies`.
+
+        Returns:
+            list[dict]: Each dict has keys `parent` and `child`.
+        """
         import torch.nn.functional as F
 
         self._ensure_loaded_for_inference()
@@ -247,55 +424,90 @@ def _predict_pairs(self, eval_data: Any) -> List[Dict[str, str]]:
         if not candidate_pairs:
             return []
 
-        accepted: List[Dict[str, str]] = []
+        accepted_pairs: List[Dict[str, str]] = []
         scored_candidates: List[Tuple[float, str, str, int]] = []
 
         self.model.eval()
         with torch.no_grad():
             for parent_term, child_term in candidate_pairs:
                 input_text = self._format_input(parent_term, child_term)
-                inputs = self.tokenizer(input_text, return_tensors="pt", truncation=True, max_length=self.max_length)
-                inputs = {k: v.to(self.device) for k, v in inputs.items()}
+                inputs = self.tokenizer(
+                    input_text,
+                    return_tensors="pt",
+                    truncation=True,
+                    max_length=self.max_length,
+                )
+                inputs = {key: tensor.to(self.device) for key, tensor in inputs.items()}
                 logits = self.model(**inputs).logits
-                probs = F.softmax(logits, dim=-1).squeeze(0)
-                p_positive = float(probs[1].item())
+                probabilities = F.softmax(logits, dim=-1).squeeze(0)
+                p_positive = float(probabilities[1].item())
                 predicted_label = int(torch.argmax(logits, dim=-1).item())
-                scored_candidates.append((p_positive, parent_term, child_term, predicted_label))
+                scored_candidates.append(
+                    (p_positive, parent_term, child_term, predicted_label)
+                )
                 if predicted_label == 1:
-                    accepted.append({"parent": parent_term, "child": child_term})
+                    accepted_pairs.append({"parent": parent_term, "child": child_term})
 
-        if accepted:
-            return accepted
+        if accepted_pairs:
+            return accepted_pairs
 
         top_k = max(0, int(self.min_predictions))
         if top_k == 0:
             return []
         scored_candidates.sort(key=lambda item: item[0], reverse=True)
-        return [{"parent": parent_term, "child": child_term}
-                for (_prob, parent_term, child_term, _pred) in scored_candidates[:top_k]]
+        return [
+            {"parent": parent_term, "child": child_term}
+            for (_prob, parent_term, child_term, _pred) in scored_candidates[:top_k]
+        ]
 
     def _ensure_loaded_for_inference(self) -> None:
+        """
+        Load model and tokenizer from `self.output_dir` if not already loaded.
+
+        Side Effects:
+            - Sets `self.model` and `self.tokenizer`.
+            - Moves the model to `self.device`.
+            - Ensures `tokenizer.pad_token_id` is set if model config provides one.
+        """
         if self.model is not None and self.tokenizer is not None:
             return
-        self.model = AutoModelForSequenceClassification.from_pretrained(self.output_dir).to(self.device)
+        self.model = AutoModelForSequenceClassification.from_pretrained(
+            self.output_dir
+        ).to(self.device)
         self.tokenizer = AutoTokenizer.from_pretrained(self.output_dir)
-        if self.tokenizer.pad_token_id is None and getattr(self.model.config, "pad_token_id", None) is not None:
+        if (
+            self.tokenizer.pad_token_id is None
+            and getattr(self.model.config, "pad_token_id", None) is not None
+        ):
             self.tokenizer.pad_token_id = self.model.config.pad_token_id
 
     def _load_context_map(self) -> None:
-        """Build exact and fuzzy maps from {ontology_name}_processed.json."""
-        if not (self.context_json_path):
+        """
+        Populate in-memory maps from the context JSON (`self.context_json_path`).
+
+        Builds:
+            - `_context_exact`: dict mapping lowercased term → term_info.
+            - `_context_rows`: list of dict rows with 'term' and 'term_info'.
+
+        If `context_json_path` is falsy or loading fails, both structures become empty.
+        """
+        if not self.context_json_path:
             self._context_exact = {}
             self._context_rows = []
             return
         try:
             rows = json.load(open(self.context_json_path, "r", encoding="utf-8"))
             self._context_exact = {
-                str(row.get("term", "")).strip().lower(): str(row.get("term_info", "")).strip()
+                str(row.get("term", "")).strip().lower(): str(
+                    row.get("term_info", "")
+                ).strip()
                 for row in rows
             }
             self._context_rows = [
-                {"term": str(row.get("term", "")), "term_info": str(row.get("term_info", ""))}
+                {
+                    "term": str(row.get("term", "")),
+                    "term_info": str(row.get("term_info", "")),
+                }
                 for row in rows
             ]
         except Exception:
@@ -304,8 +516,17 @@ def _load_context_map(self) -> None:
 
     def _lookup_context_info(self, raw_term: str) -> str:
         """
-        Loose context lookup: split by commas, strip whitespace, case-insensitive
-        substring match against any row['term']. Join hits with '.'.
+        Retrieve textual context for a term using exact and simple fuzzy matching.
+
+        - Exact: lowercased term lookup in `_context_exact`.
+        - Fuzzy: split `raw_term` by commas, strip whitespace; treat each piece
+                 as a case-insensitive substring against row['term'].
+
+        Args:
+            raw_term: Original term string (possibly comma-separated).
+
+        Returns:
+            str: Concatenated matches' term_info ('.' joined). Empty string if none.
         """
         if not raw_term:
             return ""
@@ -329,27 +550,62 @@ def _lookup_context_info(self, raw_term: str) -> str:
 
     def _extract_positive_pairs(self, ontology_obj: Any) -> List[Tuple[str, str]]:
         """
-        Read pairs from ontology_obj.type_taxonomies.taxonomies (or fallback to .taxonomies).
-        Each item must provide 'parent' and 'child' attributes/keys.
+        Extract positive (parent, child) edges from an ontology-like object.
+
+        Reads from `ontology_obj.type_taxonomies.taxonomies` (preferred) or
+        falls back to `ontology_obj.taxonomies`. Each item must expose `parent`
+        and `child` as attributes or dict keys.
+
+        Returns:
+            list[tuple[str, str]]: (parent, child) pairs (may be empty).
         """
         type_taxonomies = getattr(ontology_obj, "type_taxonomies", None)
-        items = getattr(type_taxonomies, "taxonomies", None) if type_taxonomies is not None else getattr(ontology_obj, "taxonomies", None)
+        items = (
+            getattr(type_taxonomies, "taxonomies", None)
+            if type_taxonomies is not None
+            else getattr(ontology_obj, "taxonomies", None)
+        )
         pairs: List[Tuple[str, str]] = []
         if items:
             for item in items:
-                parent_term = getattr(item, "parent", None) if not isinstance(item, dict) else item.get("parent")
-                child_term = getattr(item, "child", None) if not isinstance(item, dict) else item.get("child")
+                parent_term = (
+                    getattr(item, "parent", None)
+                    if not isinstance(item, dict)
+                    else item.get("parent")
+                )
+                child_term = (
+                    getattr(item, "child", None)
+                    if not isinstance(item, dict)
+                    else item.get("child")
+                )
                 if parent_term and child_term:
                     pairs.append((str(parent_term), str(child_term)))
         return pairs
 
     def _extract_pairs_for_eval(self, ontology_obj: Any) -> List[Tuple[str, str]]:
+        """
+        Extract candidate pairs for evaluation.
+
+        Prefers `ontology_obj.pairs` if present; otherwise falls back to the
+        positive pairs from the ontology (see `_extract_positive_pairs`).
+
+        Returns:
+            list[tuple[str, str]]: Candidate (parent, child) pairs.
+        """
         candidate_pairs = getattr(ontology_obj, "pairs", None)
         if candidate_pairs:
             pairs: List[Tuple[str, str]] = []
             for item in candidate_pairs:
-                parent_term = getattr(item, "parent", None) if not isinstance(item, dict) else item.get("parent")
-                child_term = getattr(item, "child", None) if not isinstance(item, dict) else item.get("child")
+                parent_term = (
+                    getattr(item, "parent", None)
+                    if not isinstance(item, dict)
+                    else item.get("parent")
+                )
+                child_term = (
+                    getattr(item, "child", None)
+                    if not isinstance(item, dict)
+                    else item.get("child")
+                )
                 if parent_term and child_term:
                     pairs.append((str(parent_term), str(child_term)))
             return pairs
@@ -361,29 +617,66 @@ def _generate_negatives(
         entities: List[str],
         ratio: int,
     ) -> List[Tuple[str, str]]:
+        """
+        Sample negative edges by excluding known positives and self-pairs.
+
+        Constructs the cartesian product of entities (excluding (x, x)),
+        removes all known positives, and samples up to `ratio * len(positives)`
+        negatives uniformly at random.
+
+        Args:
+            positives: Known positive edges.
+            entities: Unique set/list of entity terms.
+            ratio: Target negatives per positive (lower-bounded by 1×).
+
+        Returns:
+            list[tuple[str, str]]: Sampled negative pairs (may be smaller).
+        """
         positive_set = set(positives)
-        all_possible = {(parent_term, child_term) for parent_term in entities for child_term in entities if parent_term != child_term}
+        all_possible = {
+            (parent, child)
+            for parent in entities
+            for child in entities
+            if parent != child
+        }
         negative_candidates = list(all_possible - positive_set)
 
         target_count = max(len(positive_set) * max(1, ratio), len(positive_set))
         sample_count = min(target_count, len(negative_candidates))
-        return random.sample(negative_candidates, k=sample_count) if sample_count > 0 else []
+        return (
+            random.sample(negative_candidates, k=sample_count)
+            if sample_count > 0
+            else []
+        )
 
     def _build_text_dataset(
         self,
         positives: List[Tuple[str, str]],
         negatives: List[Tuple[str, str]],
     ) -> Tuple[List[int], List[str]]:
+        """
+        Create parallel lists of labels and input texts for HF Datasets.
+
+        Builds formatted inputs using `_format_input`, and duplicates examples in
+        the reverse direction if `bidirectional_templates` is True.
+
+        Returns:
+            tuple[list[int], list[str]]: (labels, input_texts) where labels are
+            1 for positive and 0 for negative.
+        """
         self._load_context_map()
 
         labels: List[int] = []
         input_texts: List[str] = []
 
         def add_example(parent_term: str, child_term: str, label_value: int) -> None:
+            """Append one (and optionally reversed) example to the dataset."""
             input_texts.append(self._format_input(parent_term, child_term))
             labels.append(label_value)
             if self.bidirectional_templates:
-                input_texts.append(self._format_input(child_term, parent_term, reverse=True))
+                input_texts.append(
+                    self._format_input(child_term, parent_term, reverse=True)
+                )
                 labels.append(label_value)
 
         for parent_term, child_term in positives:
@@ -393,7 +686,15 @@ def add_example(parent_term: str, child_term: str, label_value: int) -> None:
 
         return labels, input_texts
 
-    def _format_input(self, parent_term: str, child_term: str, reverse: bool = False) -> str:
+    def _format_input(
+        self, parent_term: str, child_term: str, reverse: bool = False
+    ) -> str:
+        """
+        Format a (parent, child) pair into relation text + optional context.
+
+        Returns:
+            str: "<relation template> [## Context. 'parent': ... 'child': ...]"
+        """
         relation_text = (
             f"{child_term} is a subclass / child / subtype / descendant class of {parent_term}"
             if reverse
@@ -405,63 +706,70 @@ def _format_input(self, parent_term: str, child_term: str, reverse: bool = False
         if not parent_info and not child_info:
             return relation_text
 
-        context_text = f"## Context. '{parent_term}': {parent_info} '{child_term}': {child_info}"
+        context_text = (
+            f"## Context. '{parent_term}': {parent_info} '{child_term}': {child_info}"
+        )
         return f"{relation_text} {context_text}"
 
-    @staticmethod
-    def _is_windows() -> bool:
-        return (os.name == "nt") or (platform.system().lower() == "windows")
-
-    @staticmethod
-    def _default_gpt_inference_with_dataset(term: str, dataset_name: str) -> str:
-        """
-        Generate a plain-text description for `term`, tailored by `dataset_name`.
-        Uses g4f if available; otherwise returns an empty string.
+    def _fill_bucket_threaded(
+        self, bucket_rows: List[dict], output_path: Path, provider: Callable[[str], str]
+    ) -> None:
         """
-        prompt = (
-            f"Here is a: {term}, which is of domain name :{dataset_name}, translate it into english, "
-            "Provide as detailed a definition of this term as possible in plain text.without any markdown format."
-            "No reference link in result. "
-            "- Focus on intrinsic properties; do not name other entities or explicit relationships.\n"
-            "- Include classification/type, defining features, scope/scale, roles/functions, and measurable attributes when applicable.\n"
-            "Output: Plain text paragraphs only, neutral and factual."
-            f"Make sure all provided information can be used for discovering implicit relation of other {dataset_name} term, but don't mention the relation in result."
-        )
+        Populate a shard with provider-generated `term_info` using threads.
 
+        Resumes from `output_path` if it already exists, periodically writes
+        progress (every ~10 items), and finally dumps the full bucket to disk.
+        """
+        start_index = 0
         try:
-            client = _G4FClient()
-            response = client.chat.completions.create(
-                model=g4f.models.default,
-                messages=[{"role": "user", "content": prompt}],
-            )
-            raw_text = response.choices[0].message.content if response and response.choices else ""
+            if output_path.is_file():
+                existing_rows = json.load(open(output_path, "r", encoding="utf-8"))
+                if isinstance(existing_rows, list) and existing_rows:
+                    bucket_rows[: len(existing_rows)] = existing_rows
+                    start_index = len(existing_rows)
         except Exception:
-            raw_text = ""  # or some deterministic fallback
-
-        # Clean up
-        cleaned = re.sub(r"[\*\-\#]", "", raw_text)
-        cleaned = re.sub(r"\n\s*\n", " ", cleaned)
-        cleaned = cleaned.replace("\n", " ")
-        cleaned = re.sub(r"\s{2,}", " ", cleaned)
-        cleaned = re.sub(r"\[\[\d+\]\]\(https?://[^\)]+\)", "", cleaned)
-        sentences = [sentence for sentence in cleaned.split(".") if "?" not in sentence]
-        return ".".join(sentences).strip()
-
-    @staticmethod
-    def _clean_term_info(raw_text: str) -> str:
-        """Normalize whitespace and remove link artifacts."""
-        cleaned = re.sub(r"\[\[\d+\]\]\(https?://[^\)]+\)", "", str(raw_text))
-        cleaned = re.sub(r"\s+", " ", cleaned).strip()
-        return cleaned
-
-    @classmethod
-    def _merge_part_files(cls, dataset_name: str, merged_path: Path, part_paths: List[Path]) -> None:
+            pass
+
+        for row_index in range(start_index, len(bucket_rows)):
+            try:
+                bucket_rows[row_index]["term_info"] = provider(
+                    bucket_rows[row_index]["term"]
+                )
+            except Exception:
+                bucket_rows[row_index]["term_info"] = ""
+            if row_index % 10 == 1:
+                json.dump(
+                    bucket_rows[: row_index + 1],
+                    open(output_path, "w", encoding="utf-8"),
+                    ensure_ascii=False,
+                    indent=2,
+                )
+
+        json.dump(
+            bucket_rows,
+            open(output_path, "w", encoding="utf-8"),
+            ensure_ascii=False,
+            indent=2,
+        )
+
+    def _merge_part_files(
+        self, dataset_name: str, merged_path: Path, shard_paths: List[Path]
+    ) -> None:
+        """
+        Merge shard files into one JSON and filter boilerplate sentences.
+
+        - Reads shard lists/dicts from `shard_paths`.
+        - Drops sentences that contain markers in `_CONTEXT_REMOVALS` or the
+          `dataset_name` string.
+        - Normalizes the remaining text via `_normalize_text`.
+        - Writes merged JSON to `merged_path`, then best-effort deletes shards.
+        """
         merged_rows: List[dict] = []
-        for part_path in part_paths:
+        for shard_path in shard_paths:
             try:
-                if not part_path.is_file():
+                if not shard_path.is_file():
                     continue
-                part_content = json.load(open(part_path, "r", encoding="utf-8"))
+                part_content = json.load(open(shard_path, "r", encoding="utf-8"))
                 if isinstance(part_content, list):
                     merged_rows.extend(part_content)
                 elif isinstance(part_content, dict):
@@ -469,165 +777,111 @@ def _merge_part_files(cls, dataset_name: str, merged_path: Path, part_paths: Lis
             except Exception:
                 continue
 
-        removal_markers = list(cls._CONTEXT_REMOVALS) + [dataset_name]
+        removal_markers = list(self._CONTEXT_REMOVALS) + [dataset_name]
         for row in merged_rows:
             term_info_raw = str(row.get("term_info", ""))
             kept_sentences: List[str] = []
             for sentence in term_info_raw.split("."):
-                sentence_no_links = re.sub(r"\[\[\d+\]\]\(https?://[^\)]+\)", "", sentence)
+                sentence_no_links = re.sub(
+                    r"\[\[\d+\]\]\(https?://[^\)]+\)", "", sentence
+                )
                 if any(marker in sentence_no_links for marker in removal_markers):
                     continue
                 kept_sentences.append(sentence_no_links)
-            row["term_info"] = cls._clean_term_info(".".join(kept_sentences))
+            row["term_info"] = self._normalize_text(
+                ".".join(kept_sentences), drop_questions=False
+            )
 
         merged_path.parent.mkdir(parents=True, exist_ok=True)
-        json.dump(merged_rows, open(merged_path, "w", encoding="utf-8"), ensure_ascii=False, indent=4)
+        json.dump(
+            merged_rows,
+            open(merged_path, "w", encoding="utf-8"),
+            ensure_ascii=False,
+            indent=4,
+        )
 
         # best-effort cleanup
-        for part_path in part_paths:
+        for shard_path in shard_paths:
             try:
-                os.remove(part_path)
+                os.remove(shard_path)
             except Exception:
                 pass
 
-    @staticmethod
-    def _fill_bucket_threaded(bucket_rows: List[dict], output_path: Path, provider: Callable[[str], str]) -> None:
-        start_index = 0
-        try:
-            if output_path.is_file():
-                existing_rows = json.load(open(output_path, "r", encoding="utf-8"))
-                if isinstance(existing_rows, list) and existing_rows:
-                    bucket_rows[: len(existing_rows)] = existing_rows
-                    start_index = len(existing_rows)
-        except Exception:
-            pass
-
-        for row_index in range(start_index, len(bucket_rows)):
-            try:
-                bucket_rows[row_index]["term_info"] = provider(bucket_rows[row_index]["term"])
-            except Exception:
-                bucket_rows[row_index]["term_info"] = ""
-            if row_index % 10 == 1:
-                json.dump(bucket_rows[: row_index + 1], open(output_path, "w", encoding="utf-8"), ensure_ascii=False, indent=2)
-
-        json.dump(bucket_rows, open(output_path, "w", encoding="utf-8"), ensure_ascii=False, indent=2)
-
-    @staticmethod
-    def _fill_bucket_process(
-        worker_id: int,
-        bucket_rows: List[dict],
-        output_path: Path,
-        provider: Callable[[str], str],
-        progress_map: "multiprocessing.managers.DictProxy",
-    ) -> None:
-        current_index = 0
-        try:
-            if output_path.is_file():
-                existing_rows = json.load(open(output_path, "r", encoding="utf-8"))
-                if isinstance(existing_rows, list) and existing_rows:
-                    bucket_rows[: len(existing_rows)] = existing_rows
-                    current_index = len(existing_rows)
-        except Exception:
-            pass
-
-        progress_map[worker_id] = current_index
-
-        for row_index in range(current_index, len(bucket_rows)):
-            try:
-                bucket_rows[row_index]["term_info"] = provider(bucket_rows[row_index]["term"])
-            except Exception:
-                bucket_rows[row_index]["term_info"] = ""
-            progress_map[worker_id] = row_index + 1
-            if row_index % 10 == 1:
-                json.dump(bucket_rows[: row_index + 1], open(output_path, "w", encoding="utf-8"), ensure_ascii=False, indent=2)
-
-        json.dump(bucket_rows, open(output_path, "w", encoding="utf-8"), ensure_ascii=False, indent=2)
-        progress_map[worker_id] = len(bucket_rows)
-
-    @classmethod
     def _execute_for_terms(
-        cls,
+        self,
         terms: List[str],
         merged_path: Path,
-        part_paths: List[Path],
+        shard_paths: List[Path],
         provider: Callable[[str], str],
         dataset_name: str,
         num_workers: int = 2,
     ) -> None:
         """
-        Generate context for `terms`, writing shards to `part_paths`, then merge.
-        Threads on Windows; processes on POSIX.
+        Generate context for `terms`, writing shards to `shard_paths`, then merge.
+
+        Always uses threads (pickling-safe for instance methods).
+        Shows a tqdm progress bar and merges shards at the end.
         """
         worker_count = max(1, min(num_workers, os.cpu_count() or 2, 4))
-        all_rows = [{"id": row_index, "term": term, "term_info": ""} for row_index, term in enumerate(terms)]
+        all_rows = [
+            {"id": index, "term": term, "term_info": ""}
+            for index, term in enumerate(terms)
+        ]
 
         buckets: List[List[dict]] = [[] for _ in range(worker_count)]
         for reversed_index, row in enumerate(reversed(all_rows)):
             buckets[reversed_index % worker_count].append(row)
 
-        if cls._is_windows():
-            total_rows = len(terms)
-            progress_bar = tqdm(total=total_rows, desc=f"{dataset_name} generation (threads)")
-
-            def run_bucket(bucket_rows: List[dict], out_path: Path) -> int:
-                cls._fill_bucket_threaded(bucket_rows, out_path, provider)
-                return len(bucket_rows)
-
-            with ThreadPoolExecutor(max_workers=worker_count) as pool:
-                futures = [pool.submit(run_bucket, buckets[bucket_index], part_paths[bucket_index])
-                           for bucket_index in range(worker_count)]
-                for future in as_completed(futures):
-                    completed_count = future.result()
-                    if progress_bar:
-                        progress_bar.update(completed_count)
-            if progress_bar:
-                progress_bar.close()
-        else:
-            manager = multiprocessing.Manager()
-            progress_map = manager.dict({worker_index: 0 for worker_index in range(worker_count)})
-
-            processes: List[multiprocessing.Process] = []
-            for worker_index, bucket_rows in enumerate(buckets):
-                process = multiprocessing.Process(
-                    target=cls._fill_bucket_process,
-                    args=(worker_index, bucket_rows, part_paths[worker_index], provider, progress_map),
+        total_rows = len(terms)
+        progress_bar = tqdm(
+            total=total_rows, desc=f"{dataset_name} generation (threads)"
+        )
+
+        def run_bucket(bucket_rows: List[dict], out_path: Path) -> int:
+            self._fill_bucket_threaded(bucket_rows, out_path, provider)
+            return len(bucket_rows)
+
+        with ThreadPoolExecutor(max_workers=worker_count) as pool:
+            futures = [
+                pool.submit(
+                    run_bucket, buckets[bucket_index], shard_paths[bucket_index]
                 )
-                processes.append(process)
-                process.start()
-
-            total_rows = len(terms)
-            with tqdm(total=total_rows, desc=f"{dataset_name} generation") as progress_bar:
-                previous_total = 0
-                while any(process.is_alive() for process in processes):
-                    current_total = int(sum(progress_map.values()))
-                    progress_bar.update(current_total - previous_total)
-                    previous_total = current_total
-                    time.sleep(0.5)
-                current_total = int(sum(progress_map.values()))
-                if current_total > previous_total:
-                    progress_bar.update(current_total - previous_total)
-
-            for process in processes:
-                process.join()
-
-        cls._merge_part_files(dataset_name, merged_path, part_paths)
-
-    @classmethod
+                for bucket_index in range(worker_count)
+            ]
+            for future in as_completed(futures):
+                completed_count = future.result()
+                if progress_bar:
+                    progress_bar.update(completed_count)
+        if progress_bar:
+            progress_bar.close()
+
+        self._merge_part_files(dataset_name, merged_path, shard_paths)
+
     def _re_infer_short_entries(
-        cls,
+        self,
         merged_path: Path,
-        re_part_paths: List[Path],
+        re_shard_paths: List[Path],
         re_merged_path: Path,
         provider: Callable[[str], str],
         dataset_name: str,
         num_workers: int,
     ) -> int:
         """
-        Re-query terms with too-short term_info (< 50 chars). Returns remaining count.
+        Re-query terms whose `term_info` is too short (< 50 chars).
+
+        Process:
+            - Read `merged_path`.
+            - Filter boilerplate using `_CONTEXT_REMOVALS` and `dataset_name`.
+            - Split into short/long groups by length 50.
+            - Regenerate short group with `provider` in parallel (threads).
+            - Merge regenerated + long back into `merged_path`.
+
+        Returns:
+            int: Count of rows still < 50 chars after re-inference.
         """
         merged_rows = json.load(open(merged_path, "r", encoding="utf-8"))
 
-        removal_markers = list(cls._CONTEXT_REMOVALS) + [dataset_name]
+        removal_markers = list(self._CONTEXT_REMOVALS) + [dataset_name]
         short_rows: List[dict] = []
         long_rows: List[dict] = []
 
@@ -635,9 +889,14 @@ def _re_infer_short_entries(
             term_info_raw = str(row.get("term_info", ""))
             sentences = term_info_raw.split(".")
             for marker in removal_markers:
-                sentences = [sentence if marker not in sentence else "" for sentence in sentences]
-            filtered_info = re.sub(r"\[\[\d+\]\]\(https?://[^\)]+\)", "", ".".join(sentences))
+                sentences = [
+                    sentence if marker not in sentence else "" for sentence in sentences
+                ]
+            filtered_info = self._normalize_text(
+                ".".join(sentences), drop_questions=False
+            )
             row["term_info"] = filtered_info
+
             (short_rows if len(filtered_info) < 50 else long_rows).append(row)
 
         worker_count = max(1, min(num_workers, os.cpu_count() or 2, 4))
@@ -645,77 +904,83 @@ def _re_infer_short_entries(
         for row_index, row in enumerate(short_rows):
             buckets[row_index % worker_count].append(row)
 
-        # clean old re-inference shards
-        for path in re_part_paths:
+        # Clean old re-inference shards
+        for path in re_shard_paths:
             try:
                 os.remove(path)
             except Exception:
                 pass
 
         total_candidates = len(short_rows)
-        if cls._is_windows():
-            progress_bar = tqdm(total=total_candidates, desc=f"{dataset_name} re-inference (threads)")
-
-            def run_bucket(bucket_rows: List[dict], out_path: Path) -> int:
-                cls._fill_bucket_threaded(bucket_rows, out_path, provider)
-                return len(bucket_rows)
-
-            with ThreadPoolExecutor(max_workers=worker_count) as pool:
-                futures = [pool.submit(run_bucket, buckets[bucket_index], re_part_paths[bucket_index])
-                           for bucket_index in range(worker_count)]
-                for future in as_completed(futures):
-                    completed_count = future.result()
-                    if progress_bar:
-                        progress_bar.update(completed_count)
-            if progress_bar:
-                progress_bar.close()
-        else:
-            manager = multiprocessing.Manager()
-            progress_map = manager.dict({worker_index: 0 for worker_index in range(worker_count)})
-
-            processes: List[multiprocessing.Process] = []
-            for worker_index, bucket_rows in enumerate(buckets):
-                process = multiprocessing.Process(
-                    target=cls._fill_bucket_process,
-                    args=(worker_index, bucket_rows, re_part_paths[worker_index], provider, progress_map),
+        progress_bar = tqdm(
+            total=total_candidates, desc=f"{dataset_name} re-inference (threads)"
+        )
+
+        def run_bucket(bucket_rows: List[dict], out_path: Path) -> int:
+            self._fill_bucket_threaded(bucket_rows, out_path, provider)
+            return len(bucket_rows)
+
+        with ThreadPoolExecutor(max_workers=worker_count) as pool:
+            futures = [
+                pool.submit(
+                    run_bucket, buckets[bucket_index], re_shard_paths[bucket_index]
                 )
-                processes.append(process)
-                process.start()
-
-            with tqdm(total=total_candidates, desc=f"{dataset_name} re-inference") as progress_bar:
-                previous_total = 0
-                while any(process.is_alive() for process in processes):
-                    current_total = int(sum(progress_map.values()))
-                    progress_bar.update(current_total - previous_total)
-                    previous_total = current_total
-                    time.sleep(1)
-                if progress_bar.n < total_candidates:
-                    progress_bar.update(total_candidates - progress_bar.n)
-
-            for process in processes:
-                process.join()
-
-        # merge and write back
-        cls._merge_part_files(dataset_name, re_merged_path, re_part_paths)
-        new_rows = json.load(open(re_merged_path, "r", encoding="utf-8")) if re_merged_path.is_file() else []
+                for bucket_index in range(worker_count)
+            ]
+            for future in as_completed(futures):
+                completed_count = future.result()
+                if progress_bar:
+                    progress_bar.update(completed_count)
+        if progress_bar:
+            progress_bar.close()
+
+        # Merge and write back
+        self._merge_part_files(dataset_name, re_merged_path, re_shard_paths)
+        new_rows = (
+            json.load(open(re_merged_path, "r", encoding="utf-8"))
+            if re_merged_path.is_file()
+            else []
+        )
         final_rows = long_rows + new_rows
-        json.dump(final_rows, open(merged_path, "w", encoding="utf-8"), ensure_ascii=False, indent=4)
+        json.dump(
+            final_rows,
+            open(merged_path, "w", encoding="utf-8"),
+            ensure_ascii=False,
+            indent=4,
+        )
 
-        remaining_short = sum(1 for row in final_rows if len(str(row.get("term_info", ""))) < 50)
+        remaining_short = sum(
+            1 for row in final_rows if len(str(row.get("term_info", ""))) < 50
+        )
         return remaining_short
 
-    @staticmethod
-    def _extract_terms_from_ontology(ontology: Any) -> List[str]:
+    def _extract_terms_from_ontology(self, ontology: Any) -> List[str]:
         """
-        Collect unique term names from ontology.type_taxonomies.taxonomies.
+        Collect unique term names from `ontology.type_taxonomies.taxonomies`,
+        falling back to `ontology.taxonomies` if needed.
+
+        Returns:
+            list[str]: Sorted unique term list.
         """
         type_taxonomies = getattr(ontology, "type_taxonomies", None)
-        taxonomies = getattr(type_taxonomies, "taxonomies", None) if type_taxonomies is not None else getattr(ontology, "taxonomies", None)
+        taxonomies = (
+            getattr(type_taxonomies, "taxonomies", None)
+            if type_taxonomies is not None
+            else getattr(ontology, "taxonomies", None)
+        )
         unique_terms: set[str] = set()
         if taxonomies:
             for row in taxonomies:
-                parent_term = getattr(row, "parent", None) if not isinstance(row, dict) else row.get("parent")
-                child_term = getattr(row, "child", None) if not isinstance(row, dict) else row.get("child")
+                parent_term = (
+                    getattr(row, "parent", None)
+                    if not isinstance(row, dict)
+                    else row.get("parent")
+                )
+                child_term = (
+                    getattr(row, "child", None)
+                    if not isinstance(row, dict)
+                    else row.get("child")
+                )
                 if parent_term:
                     unique_terms.add(str(parent_term))
                 if child_term:
@@ -732,15 +997,32 @@ def preprocess_context_from_ontology(
         max_retries: int = 5,
     ) -> Path:
         """
-        Build {id, term, term_info} from an ontology object.
-        Always regenerates {dataset_name}_processed.json.
+        Build `{id, term, term_info}` rows from an ontology object.
+
+        Always regenerates the fixed-name file `rwthdbis_onto_processed.json`,
+        performing:
+            - Parallel generation of term_info in shards (`_execute_for_terms`),
+            - Re-inference rounds for short entries (`_re_infer_short_entries`),
+            - Final merge and cleanup,
+            - Updates `self.context_json_path`.
+
+        Filenames under `processed_dir`:
+            - merged: `rwthdbis_onto_processed.json`
+            - shards: `rwthdbis_onto_type_part{idx}.json`
+            - re-infer shards: `rwthdbis_onto_re_inference{idx}.json`
+            - re-infer merged: `rwthdbis_onto_Types_re_inference.json`
+
+        Returns:
+            Path: The merged context JSON path (`rwthdbis_onto_processed.json`).
         """
-        provider = provider or provider or partial(self._default_gpt_inference_with_dataset, dataset_name=dataset_name)
+        provider = provider or partial(
+            self._default_gpt_inference_with_dataset, dataset_name=dataset_name
+        )
 
         processed_dir = Path(processed_dir)
         processed_dir.mkdir(parents=True, exist_ok=True)
 
-        merged_path = processed_dir / f"{dataset_name}_processed.json"
+        merged_path = processed_dir / "rwthdbis_onto_processed.json"
         if merged_path.exists():
             try:
                 merged_path.unlink()
@@ -748,12 +1030,18 @@ def preprocess_context_from_ontology(
                 pass
 
         worker_count = max(1, min(num_workers, os.cpu_count() or 2, 4))
-        shard_paths = [processed_dir / f"{dataset_name}_type_part{shard_index}.json" for shard_index in range(worker_count)]
-        reinf_paths = [processed_dir / f"{dataset_name}_re_inference{shard_index}.json" for shard_index in range(worker_count)]
-        reinf_merged_path = processed_dir / f"{dataset_name}_Types_re_inference.json"
-
-        # remove any leftover shards
-        for path in shard_paths + reinf_paths + [reinf_merged_path]:
+        shard_paths = [
+            processed_dir / f"rwthdbis_onto_type_part{index}.json"
+            for index in range(worker_count)
+        ]
+        re_shard_paths = [
+            processed_dir / f"rwthdbis_onto_re_inference{index}.json"
+            for index in range(worker_count)
+        ]
+        re_merged_path = processed_dir / "rwthdbis_onto_Types_re_inference.json"
+
+        # Remove any leftover shards
+        for path in shard_paths + re_shard_paths + [re_merged_path]:
             try:
                 if path.exists():
                     path.unlink()
@@ -766,7 +1054,7 @@ def preprocess_context_from_ontology(
         self._execute_for_terms(
             terms=unique_terms,
             merged_path=merged_path,
-            part_paths=shard_paths,
+            shard_paths=shard_paths,
             provider=provider,
             dataset_name=dataset_name,
             num_workers=worker_count,
@@ -776,13 +1064,15 @@ def preprocess_context_from_ontology(
         while retry_round < max_retries:
             remaining_count = self._re_infer_short_entries(
                 merged_path=merged_path,
-                re_part_paths=reinf_paths,
-                re_merged_path=reinf_merged_path,
+                re_shard_paths=re_shard_paths,
+                re_merged_path=re_merged_path,
                 provider=provider,
                 dataset_name=dataset_name,
                 num_workers=worker_count,
             )
-            print(f"[Preprocess] Re-infer round {retry_round + 1} done. Remaining short entries: {remaining_count}")
+            print(
+                f"[Preprocess] Re-infer round {retry_round + 1} done. Remaining short entries: {remaining_count}"
+            )
             retry_round += 1
             if remaining_count == 0:
                 break
diff --git a/ontolearner/learner/taxonomy_discovery/sbunlp.py b/ontolearner/learner/taxonomy_discovery/sbunlp.py
index 9fc520d..660ec6e 100644
--- a/ontolearner/learner/taxonomy_discovery/sbunlp.py
+++ b/ontolearner/learner/taxonomy_discovery/sbunlp.py
@@ -1,45 +1,33 @@
 # Copyright (c) 2025 SciKnowOrg
-#
-# Licensed under the MIT License (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      https://opensource.org/licenses/MIT
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# License: MIT
 
 import os
 import re
 import json
-import importlib.util
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional
 
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from ...base import AutoLearner
 
+
 class SBUNLPFewShotLearner(AutoLearner):
     """
-    Taxonomy-discovery via N×M batch prompting with a small Qwen model.
-
-    Lifecycle
-    ---------
-    fit():
-        Cache + clean training parent–child pairs.
-    predict():
-        Chunk (train pairs × test terms), prompt per chunk pair, parse, merge,
-        and deduplicate predicted relations.
+    Few-shot taxonomy discovery via N×M batch prompting.
+
+    This learner:
+      - Caches & cleans gold parent–child pairs during `fit`.
+      - Splits (train pairs × test terms) into a grid of chunks.
+      - Builds an instruction prompt per grid cell with few-shot JSON examples.
+      - Generates and parses model outputs as JSON relations.
+      - Merges & deduplicates all predicted edges.
     """
 
     def __init__(
         self,
         model_name: str = "Qwen/Qwen2.5-0.5B-Instruct",
         try_4bit: bool = True,
+        device: str = "cpu",
         num_train_chunks: int = 7,
         num_test_chunks: int = 7,
         max_new_tokens: int = 140,
@@ -50,88 +38,117 @@ def __init__(
         output_dir: Optional[str] = None,
         **kwargs: Any,
     ) -> None:
+        """
+        Initialize the learner and core generation / batching settings.
+
+        Args:
+            model_name: HF id/path of the causal LLM (e.g., Qwen Instruct).
+            try_4bit: If True and on CUDA, load with 4-bit NF4 quantization.
+            device: "cpu" or "cuda" for model execution.
+            num_train_chunks: Number of chunks for the gold (parent, child) bank.
+            num_test_chunks: Number of chunks for the test term list.
+            max_new_tokens: Max new tokens to generate per prompt call.
+            max_input_tokens: Clip the *input* prompt to this many tokens (tail kept).
+            temperature: Sampling temperature; 0.0 uses greedy decoding.
+            top_p: Nucleus sampling parameter (used when temperature > 0).
+            limit_num_prompts: Optional hard cap on prompts issued (debug/cost).
+            output_dir: Optional directory to save per-batch JSON predictions.
+            **kwargs: Forwarded to the base class.
+        """
         super().__init__(**kwargs)
         self.model_name = model_name
         self.try_4bit = try_4bit
+        self.device = device
 
         self.num_train_chunks = num_train_chunks
         self.num_test_chunks = num_test_chunks
-
         self.max_new_tokens = max_new_tokens
         self.max_input_tokens = max_input_tokens
         self.temperature = temperature
         self.top_p = top_p
         self.limit_num_prompts = limit_num_prompts
-
         self.output_dir = output_dir
 
         self.tokenizer: Optional[AutoTokenizer] = None
         self.model: Optional[AutoModelForCausalLM] = None
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-
         self.train_pairs_clean: List[Dict[str, str]] = []
 
-    # ----------------------- small helpers ----------------------
-    def _clean_pairs(pair_rows: List[Dict[str, str]]) -> List[Dict[str, str]]:
-        """
-        Normalize, drop empty or self-relations, and deduplicate by (parent, child).
+    def _clean_pairs(self, pair_rows: List[Dict[str, str]]) -> List[Dict[str, str]]:
         """
-        cleaned_pairs: List[Dict[str, str]] = []
-        seen_parent_child: set[Tuple[str, str]] = set()
+        Normalize, filter, and deduplicate relation pairs.
 
-        for pair_record in pair_rows or []:
-            if not isinstance(pair_record, dict):
-                continue
+        Operations:
+          - Cast 'parent'/'child' to strings and strip whitespace.
+          - Drop rows with empty values.
+          - Drop self-relations (case-insensitive parent == child).
+          - Deduplicate by lowercase (parent, child).
 
-            parent_label = str(pair_record.get("parent", "")).strip()
-            child_label = str(pair_record.get("child", "")).strip()
-            if not parent_label or not child_label:
-                continue
+        Args:
+            pair_rows: Raw list of dicts with at least 'parent' and 'child'.
 
-            normalized_key = (parent_label.lower(), child_label.lower())
-            if normalized_key[0] == normalized_key[1]:  # parent==child
+        Returns:
+            Cleaned list of {'parent','child'} dicts.
+        """
+        cleaned, seen = [], set()
+        for rec in pair_rows or []:
+            if not isinstance(rec, dict):
+                continue
+            p = str(rec.get("parent", "")).strip()
+            c = str(rec.get("child", "")).strip()
+            if not p or not c:
                 continue
-            if normalized_key in seen_parent_child:
+            key = (p.lower(), c.lower())
+            if key[0] == key[1] or key in seen:
                 continue
+            seen.add(key)
+            cleaned.append({"parent": p, "child": c})
+        return cleaned
 
-            seen_parent_child.add(normalized_key)
-            cleaned_pairs.append({"parent": parent_label, "child": child_label})
+    def _chunk_list(self, items: List[Any], num_chunks: int) -> List[List[Any]]:
+        """
+        Split a list into `num_chunks` near-equal contiguous parts.
 
-        return cleaned_pairs
+        Args:
+            items: Sequence to split.
+            num_chunks: Number of chunks to produce; if <= 0, returns [items].
 
-    def _chunk_list(items: List[Any], num_chunks: int) -> List[List[Any]]:
-        """
-        Split `items` into `num_chunks` near-equal parts. Some chunks may be empty.
+        Returns:
+            List of chunks (some may be empty if len(items) < num_chunks).
         """
         if num_chunks <= 0:
             return [items]
-        total_items = len(items)
-        base_size, remainder = divmod(total_items, num_chunks)
-
-        chunks: List[List[Any]] = []
-        start_index = 0
-        for chunk_index in range(num_chunks):
-            current_size = base_size + (1 if chunk_index < remainder else 0)
-            end_index = start_index + current_size
-            chunks.append(items[start_index:end_index])
-            start_index = end_index
-        return chunks
-
-    def _ensure_dir(self, maybe_path: Optional[str]) -> None:
-        if maybe_path:
-            os.makedirs(maybe_path, exist_ok=True)
-
-    # ---------------------- model load/gen ----------------------
-    def load(self, **_: Any) -> None:
+        n = len(items)
+        base, rem = divmod(n, num_chunks)
+        out, start = [], 0
+        for i in range(num_chunks):
+            size = base + (1 if i < rem else 0)
+            out.append(items[start : start + size])
+            start += size
+        return out
+
+    def _ensure_dir(self, path: Optional[str]) -> None:
         """
-        Load tokenizer/model; use 4-bit nf4 on CUDA if available + requested.
+        Create a directory if `path` is a non-empty string.
+
+        Args:
+            path: Directory to create (recursively). Ignored if falsy.
         """
-        bnb_available = importlib.util.find_spec("bitsandbytes") is not None
-        use_4bit_quant = bool(self.try_4bit and bnb_available and self.device == "cuda")
+        if path:
+            os.makedirs(path, exist_ok=True)
 
+    def load(self, **_: Any) -> None:
+        """
+        Load tokenizer and model; optionally enable 4-bit quantization.
+
+        Assumes bitsandbytes is available if `try_4bit=True` on CUDA.
+        Sets tokenizer pad token if missing. Places model on GPU (device_map='auto')
+        when `device='cuda'`, otherwise on CPU.
+
+        Args:
+            **_: Unused kwargs for interface compatibility.
+        """
         quant_config = None
-        if use_4bit_quant:
-            from transformers import BitsAndBytesConfig
+        if self.try_4bit and self.device == "cuda":
             quant_config = BitsAndBytesConfig(
                 load_in_4bit=True,
                 bnb_4bit_compute_dtype=torch.float16,
@@ -140,8 +157,11 @@ def load(self, **_: Any) -> None:
             )
 
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
-        if self.tokenizer.pad_token is None:
-            self.tokenizer.pad_token = self.tokenizer.eos_token
+        if getattr(self.tokenizer, "pad_token_id", None) is None:
+            if getattr(self.tokenizer, "eos_token", None) is not None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            elif getattr(self.tokenizer, "unk_token", None) is not None:
+                self.tokenizer.pad_token = self.tokenizer.unk_token
 
         self.model = AutoModelForCausalLM.from_pretrained(
             self.model_name,
@@ -149,12 +169,26 @@ def load(self, **_: Any) -> None:
             torch_dtype=(torch.float16 if self.device == "cuda" else torch.float32),
             quantization_config=quant_config,
         )
+        if self.device == "cpu":
+            self.model.to("cpu")
 
     def _format_chat(self, user_text: str) -> str:
         """
-        Wrap user text with the model's chat template (if present).
+        Wrap plain text with the model's chat template, if provided.
+
+        Many instruction-tuned models expose `tokenizer.chat_template`.
+        If available, use it to construct a proper chat prompt; otherwise,
+        return the text unchanged.
+
+        Args:
+            user_text: Content of the user message.
+
+        Returns:
+            A generation-ready prompt string.
         """
-        if hasattr(self.tokenizer, "apply_chat_template") and getattr(self.tokenizer, "chat_template", None):
+        if hasattr(self.tokenizer, "apply_chat_template") and getattr(
+            self.tokenizer, "chat_template", None
+        ):
             return self.tokenizer.apply_chat_template(
                 [{"role": "user", "content": user_text}],
                 tokenize=False,
@@ -165,17 +199,31 @@ def _format_chat(self, user_text: str) -> str:
     @torch.no_grad()
     def _generate(self, prompt_text: str) -> str:
         """
-        Single prompt → model text. Clips *input* tokens to avoid overflow.
-        """
-        formatted_prompt = self._format_chat(prompt_text)
-        prompt_token_ids = self.tokenizer(formatted_prompt, add_special_tokens=False, return_tensors=None)["input_ids"]
-        if len(prompt_token_ids) > self.max_input_tokens:
-            prompt_token_ids = prompt_token_ids[-self.max_input_tokens:]
+        Generate text for a single prompt, guarding input length.
+
+        Steps:
+          1) Format prompt via chat template (if present).
+          2) Tokenize and clip the *input* to `max_input_tokens` (tail kept).
+          3) Call `model.generate` with configured decoding params.
+          4) Strip the echoed prompt from the decoded output (if present).
 
-        prompt_tensor = torch.tensor([prompt_token_ids]).to(self.model.device)
+        Args:
+            prompt_text: Textual prompt to feed the model.
 
-        generation = self.model.generate(
-            input_ids=prompt_tensor,
+        Returns:
+            Model continuation string (prompt-echo stripped when applicable).
+        """
+        formatted = self._format_chat(prompt_text)
+        ids = self.tokenizer(formatted, add_special_tokens=False, return_tensors=None)[
+            "input_ids"
+        ]
+        if len(ids) > self.max_input_tokens:
+            ids = ids[-self.max_input_tokens :]
+        device = next(self.model.parameters()).device
+        input_ids = torch.tensor([ids], device=device)
+
+        out = self.model.generate(
+            input_ids=input_ids,
             max_new_tokens=self.max_new_tokens,
             do_sample=(self.temperature > 0.0),
             temperature=self.temperature,
@@ -185,20 +233,37 @@ def _generate(self, prompt_text: str) -> str:
             use_cache=True,
         )
 
-        decoded_full = self.tokenizer.decode(generation[0], skip_special_tokens=True)
-        decoded_prompt = self.tokenizer.decode(prompt_tensor[0], skip_special_tokens=True)
-        return decoded_full[len(decoded_prompt):].strip() if decoded_full.startswith(decoded_prompt) else decoded_full.strip()
+        decoded_full = self.tokenizer.decode(out[0], skip_special_tokens=True)
+        decoded_prompt = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
+        return (
+            decoded_full[len(decoded_prompt) :].strip()
+            if decoded_full.startswith(decoded_prompt)
+            else decoded_full.strip()
+        )
 
-    # ------------------ prompt build & parsing ------------------
-    def _build_prompt(train_pairs_chunk: List[Dict[str, str]],
-                      test_terms_chunk: List[str]) -> str:
+    def _build_prompt(
+        self,
+        train_pairs_chunk: List[Dict[str, str]],
+        test_terms_chunk: List[str],
+    ) -> str:
         """
-        Few-shot with JSON examples + a block of test terms.
-        The model must return ONLY a JSON array of {parent, child}.
+        Construct a few-shot prompt with JSON examples and test terms.
+
+        The prompt:
+          - Shows several gold (parent, child) examples in JSON.
+          - Lists the test terms (one per line) between [PAIR] tags.
+          - Instructs to return ONLY a JSON array of {'parent','child'}.
+
+        Args:
+            train_pairs_chunk: Cleaned training relations for examples.
+            test_terms_chunk: The current chunk of test terms.
+
+        Returns:
+            The fully formatted prompt string.
         """
         examples_json = json.dumps(train_pairs_chunk, ensure_ascii=False, indent=2)
-        test_types_block = "\n".join(test_terms_chunk)
-        return (
+        test_block = "\n".join(test_terms_chunk)
+        prompt = (
             "From this file, extract all parent–child relations like in the examples.\n"
             "Return ONLY a JSON array of objects with keys 'parent' and 'child'.\n"
             "Output format:\n"
@@ -210,108 +275,128 @@ def _build_prompt(train_pairs_chunk: List[Dict[str, str]],
             f"{examples_json}\n\n"
             "TEST TYPES (between [PAIR] tags):\n"
             "[PAIR]\n"
-            f"{test_types_block}\n"
+            f"{test_block}\n"
             "[PAIR]\n"
             "Return only JSON."
         )
+        return prompt
 
-    def _parse_pairs(model_text: str) -> List[Dict[str, str]]:
-        """
-        Parse a model response into a list of {'parent','child'} dicts.
+    def _parse_pairs(self, text: str) -> List[Dict[str, str]]:
         """
-        def deduplicate_and_normalize(dict_list: List[Dict[str, str]]) -> List[Dict[str, str]]:
-            return SBUNLPFewShotLearner._clean_pairs(dict_list)
+        Parse a generation string into a list of relation dicts.
 
-        response_text = model_text.strip()
+        Parsing strategy:
+          1) Try to parse the entire string as JSON; expect a list.
+          2) Else, regex-extract the outermost JSON-like array and parse that.
+          3) On failure, return an empty list.
 
-        # 1) Direct JSON list
+        Args:
+            text: Raw model output.
+
+        Returns:
+            Cleaned list of {'parent','child'} dicts (possibly empty).
+        """
+        text = text.strip()
         try:
-            maybe_json = json.loads(response_text)
-            if isinstance(maybe_json, list):
-                return deduplicate_and_normalize(maybe_json)
+            obj = json.loads(text)
+            if isinstance(obj, list):
+                return self._clean_pairs(obj)
         except Exception:
             pass
-
-        # 2) Find outermost [ ... ] and parse that
-        outer_list_match = re.search(r"\[\s*(?:\{[\s\S]*?\}\s*,?\s*)*\]", response_text)
-        if outer_list_match:
+        m = re.search(r"\[\s*(?:\{[\s\S]*?\}\s*,?\s*)*\]", text)
+        if m:
             try:
-                array_json = json.loads(outer_list_match.group(0))
-                if isinstance(array_json, list):
-                    return deduplicate_and_normalize(array_json)
+                obj = json.loads(m.group(0))
+                if isinstance(obj, list):
+                    return self._clean_pairs(obj)
             except Exception:
                 pass
-
-        # 3) Nothing parsable
         return []
 
-    # --------------------- AutoLearner hooks --------------------
     def fit(self, train_data: Any, task: str, ontologizer: bool = True):
         """
-        Build the training example bank (parent–child pairs).
+        Cache and clean gold relations for few-shot prompting.
+
+        For `task == "taxonomy-discovery"`:
+          - If `ontologizer=True`, convert ontology-like input into
+            a list of {'parent','child'} via the base helper.
+          - Otherwise, accept a user-provided list directly.
+          - Store a cleaned, deduplicated bank in `self.train_pairs_clean`.
+
+        Args:
+            train_data: Ontology-like object or list of relation dicts.
+            task: Task selector (expects "taxonomy-discovery").
+            ontologizer: Whether to transform ontology inputs.
+
+        Returns:
+            None. (State is stored on the instance.)
         """
         if task != "taxonomy-discovery":
             return super().fit(train_data, task, ontologizer)
-
         if ontologizer:
-            # Convert ontology object → list of {"parent","child"} gold pairs
-            gold_pairs_from_ontology = self.tasks_ground_truth_former(
-                train_data, task="taxonomy-discovery"
-            )
-            self.train_pairs_clean = self._clean_pairs(gold_pairs_from_ontology)
+            gold = self.tasks_ground_truth_former(train_data, task="taxonomy-discovery")
+            self.train_pairs_clean = self._clean_pairs(gold)
         else:
-            # Already a Python list of dicts
             self.train_pairs_clean = self._clean_pairs(train_data)
 
-    def _taxonomy_discovery(self, data: Any, test: bool = False) -> Optional[Any]:
+    def _taxonomy_discovery(
+        self, data: Any, test: bool = False
+    ) -> Optional[List[Dict[str, str]]]:
         """
-        Main prediction path. Returns a deduplicated list of relations.
+        Run few-shot inference (test=True) or no-op during training.
+
+        Inference steps:
+          - Ensure tokenizer/model are loaded.
+          - Normalize `data` to a list of test terms (via base helper if needed).
+          - Create the N×M grid across (train_pairs_chunk × test_terms_chunk).
+          - For each cell: build prompt → generate → parse → (optionally) save.
+          - Merge and deduplicate all predicted pairs before returning.
+
+        Args:
+            data: Test input (ontology-like, list of strings, or mixed).
+            test: If True, perform prediction; otherwise return None.
+
+        Returns:
+            On `test=True`: deduplicated list of {'parent','child'}.
+            On `test=False`: None.
         """
         if not test:
             return None
-
         if self.model is None or self.tokenizer is None:
             self.load()
 
-        # Build test vocabulary of types/terms
         if isinstance(data, list) and (len(data) == 0 or isinstance(data[0], str)):
-            test_type_list: List[str] = data
+            test_terms: List[str] = data
         else:
-            test_type_list = super().tasks_data_former(
+            test_terms = super().tasks_data_former(
                 data=data, task="taxonomy-discovery", test=True
             )
 
-        # Create N×M grid
         train_chunks = self._chunk_list(self.train_pairs_clean, self.num_train_chunks)
-        test_chunks = self._chunk_list(test_type_list, self.num_test_chunks)
+        test_chunks = self._chunk_list(test_terms, self.num_test_chunks)
 
         self._ensure_dir(self.output_dir)
 
-        merged_predicted_pairs: List[Dict[str, str]] = []
-        issued_prompt_count = 0
+        merged: List[Dict[str, str]] = []
+        issued = 0
 
-        for train_chunk_index, train_pairs_chunk in enumerate(train_chunks, start=1):
-            for test_chunk_index, test_terms_chunk in enumerate(test_chunks, start=1):
-                issued_prompt_count += 1
-                if self.limit_num_prompts and issued_prompt_count > self.limit_num_prompts:
+        for ti, tr in enumerate(train_chunks, 1):
+            for si, ts in enumerate(test_chunks, 1):
+                issued += 1
+                if self.limit_num_prompts and issued > self.limit_num_prompts:
                     break
+                prompt = self._build_prompt(tr, ts)
+                resp = self._generate(prompt)
+                pairs = self._parse_pairs(resp)
 
-                prompt_text = self._build_prompt(train_pairs_chunk, test_terms_chunk)
-                model_response = self._generate(prompt_text)
-                parsed_relation_pairs = self._parse_pairs(model_response)
-
-                # Optional per-batch dump for debugging
                 if self.output_dir:
-                    batch_json_path = os.path.join(
-                        self.output_dir, f"pairs_T{train_chunk_index}_S{test_chunk_index}.json"
-                    )
-                    with open(batch_json_path, "w", encoding="utf-8") as fp:
-                        json.dump(parsed_relation_pairs, fp, ensure_ascii=False, indent=2)
+                    path = os.path.join(self.output_dir, f"pairs_T{ti}_S{si}.json")
+                    with open(path, "w", encoding="utf-8") as f:
+                        json.dump(pairs, f, ensure_ascii=False, indent=2)
 
-                merged_predicted_pairs.extend(parsed_relation_pairs)
+                merged.extend(pairs)
 
-            if self.limit_num_prompts and issued_prompt_count >= (self.limit_num_prompts or 0):
+            if self.limit_num_prompts and issued >= (self.limit_num_prompts or 0):
                 break
 
-        # Deduplicate final list
-        return self._clean_pairs(merged_predicted_pairs)
+        return self._clean_pairs(merged)
diff --git a/ontolearner/learner/taxonomy_discovery/skhnlp.py b/ontolearner/learner/taxonomy_discovery/skhnlp.py
index fbe53b4..c242aab 100644
--- a/ontolearner/learner/taxonomy_discovery/skhnlp.py
+++ b/ontolearner/learner/taxonomy_discovery/skhnlp.py
@@ -23,6 +23,7 @@
 from typing import Any, Optional, List, Tuple, Dict
 from transformers import (
     AutoTokenizer,
+    AutoModelForSequenceClassification,
     AutoModelForCausalLM,
     BertTokenizer,
     BertForSequenceClassification,
@@ -35,10 +36,20 @@
 from ...utils import taxonomy_split, train_test_split as ontology_split
 from ...data_structure import OntologyData, TaxonomicRelation
 
+
 class SKHNLPTaxonomyPrompts(AutoPrompt):
-    """Builds the 7 taxonomy prompts used during fine-tuning / inference."""
+    """Builds the 7 taxonomy prompts used during fine-tuning / inference.
+
+    The class stores a small inventory of prompt templates that verbalize the
+    (parent, child) relationship using different phrasings. Each template ends
+    with a masked token slot intended for True/False classification.
+    """
+
     def __init__(self) -> None:
-        super().__init__(prompt_template="{parent} is the superclass of {child}. This statement is [MASK].")
+        """Initialize prompt templates and the default prompt in the base class."""
+        super().__init__(
+            prompt_template="{parent} is the superclass of {child}. This statement is [MASK]."
+        )
         self.templates: List[str] = [
             "{parent} is the superclass of {child}. This statement is [MASK].",
             "{child} is a subclass of {parent}. This statement is [MASK].",
@@ -49,7 +60,17 @@ def __init__(self) -> None:
             "{parent} is an ancestor class of {child}. This statement is [MASK].",
         ]
 
-    def make(self, parent: str, child: str, template_idx: int) -> str:
+    def format(self, parent: str, child: str, template_idx: int) -> str:
+        """Render a prompt for a (parent, child) pair using a specific template.
+
+        Args:
+            parent: The parent/superclass label.
+            child: The child/subclass label.
+            template_idx: Index into the internal `templates` list.
+
+        Returns:
+            The fully formatted prompt string.
+        """
         return self.templates[template_idx].format(parent=parent, child=child)
 
 
@@ -66,20 +87,18 @@ class SKHNLPSequentialFTLearner(AutoLearner):
       * PREDICT/TEST: pairwise binary classification (returns label + score).
     """
 
-    # Fixed constants defining data split size and the proportional mix of
-    # negative sample types (reversed vs. manipulated) for balancing.
-    _EVAL_FRACTION: float = 0.16
-    _NEG_RATIO_REVERSED: float = 1/3
-    _NEG_RATIO_MANIPULATED: float = 2/3
-
     def __init__(
         self,
         # core
         model_name: str = "bert-large-uncased",
         n_prompts: int = 7,
         random_state: int = 1403,
-        device: Optional[str] = None,  # "cuda" | "cpu" | None (auto)
-
+        num_labels: int = 2,
+        device: str = "cpu",  # "cuda" | "cpu" | None (auto)
+        # data split & negative sampling (now configurable)
+        eval_fraction: float = 0.16,
+        neg_ratio_reversed: float = 1 / 3,
+        neg_ratio_manipulated: float = 2 / 3,
         # ---- expose TrainingArguments as individual user-defined args ----
         output_dir: str = "./results/",
         num_train_epochs: int = 1,
@@ -92,12 +111,52 @@ def __init__(
         eval_strategy: str = "epoch",
         save_strategy: str = "epoch",
         load_best_model_at_end: bool = True,
+        use_fast_tokenizer: Optional[bool] = None,
+        trust_remote_code: bool = False,
     ) -> None:
+        """Configure the sequential fine-tuning learner.
+
+        Args:
+            model_name: HF model id or local path for the BERT backbone.
+            n_prompts: Number of prompt variants to iterate over sequentially.
+            random_state: RNG seed for shuffling/sampling steps.
+            num_labels: Number of classes for the classifier head.
+            device: Force device ('cuda' or 'cpu'). If None, auto-detects CUDA.
+            eval_fraction: Fraction of positives to hold out for evaluation.
+            neg_ratio_reversed: Proportion of reversed-parent negatives vs positives.
+            neg_ratio_manipulated: Proportion of random-parent negatives vs positives.
+            output_dir: Directory where HF Trainer writes checkpoints/outputs.
+            num_train_epochs: Number of epochs per prompt.
+            per_device_train_batch_size: Training batch size per device.
+            per_device_eval_batch_size: Evaluation batch size per device.
+            warmup_steps: Linear warmup steps for LR scheduler.
+            weight_decay: Weight decay coefficient.
+            logging_dir: Directory for Trainer logs.
+            logging_steps: Interval for log events (in steps).
+            eval_strategy: Evaluation schedule ('no', 'steps', 'epoch').
+            save_strategy: Checkpoint save schedule ('no', 'steps', 'epoch').
+            load_best_model_at_end: Whether to restore the best checkpoint.
+            use_fast_tokenizer: Force fast/slow tokenizer. If None, try fast then fallback to slow.
+        Notes:
+            The model is fine-tuned *sequentially* across prompt columns.
+            You can control the eval split and negative sampling mix via
+            `eval_fraction`, `neg_ratio_reversed`, and `neg_ratio_manipulated`.
+        """
         super().__init__()
         self.model_name = model_name
         self.n_prompts = n_prompts
         self.random_state = random_state
-        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self.num_labels = num_labels
+        self.device = device
+
+        # user-tunable ratios / split
+        self._eval_fraction = float(eval_fraction)
+        self._neg_ratio_reversed = float(neg_ratio_reversed)
+        self._neg_ratio_manipulated = float(neg_ratio_manipulated)
+        if not (0.0 < self._eval_fraction < 1.0):
+            raise ValueError("eval_fraction must be in (0, 1).")
+        if self._neg_ratio_reversed < 0 or self._neg_ratio_manipulated < 0:
+            raise ValueError("neg_ratio_* must be >= 0.")
 
         self.tokenizer: Optional[BertTokenizer] = None
         self.model: Optional[BertForSequenceClassification] = None
@@ -109,6 +168,8 @@ def __init__(
         # Keep last train/eval tables for inspection
         self._last_train: Optional[pd.DataFrame] = None
         self._last_eval: Optional[pd.DataFrame] = None
+        self.trust_remote_code = bool(trust_remote_code)
+        self.use_fast_tokenizer = use_fast_tokenizer
 
         random.seed(self.random_state)
 
@@ -128,19 +189,77 @@ def __init__(
         )
 
     def load(self, model_id: Optional[str] = None, **_: Any) -> None:
-        """Load tokenizer and model; move model to the requested device."""
+        """Load tokenizer & model in a backbone-agnostic way; move model to self.device."""
         model_id = model_id or self.model_name
-        self.tokenizer = BertTokenizer.from_pretrained(model_id)
-        self.model = BertForSequenceClassification.from_pretrained(model_id, num_labels=2)
+
+        # ---- Tokenizer (robust fast→slow fallback unless explicitly set) ----
+        if self.use_fast_tokenizer is None:
+            try:
+                self.tokenizer = AutoTokenizer.from_pretrained(
+                    model_id, use_fast=True, trust_remote_code=self.trust_remote_code
+                )
+            except Exception as fast_err:
+                print(
+                    f"[tokenizer] Fast tokenizer failed: {fast_err}. Falling back to slow tokenizer..."
+                )
+                self.tokenizer = AutoTokenizer.from_pretrained(
+                    model_id, use_fast=False, trust_remote_code=self.trust_remote_code
+                )
+        else:
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                model_id,
+                use_fast=self.use_fast_tokenizer,
+                trust_remote_code=self.trust_remote_code,
+            )
+
+        # Ensure pad token exists (some models lack it)
+        if getattr(self.tokenizer, "pad_token", None) is None:
+            # Try sensible fallbacks
+            fallback = (
+                getattr(self.tokenizer, "eos_token", None)
+                or getattr(self.tokenizer, "sep_token", None)
+                or getattr(self.tokenizer, "cls_token", None)
+            )
+            if fallback is not None:
+                self.tokenizer.pad_token = fallback
+
+        # ---- Model (classifier head sized to self.num_labels) ----
+        self.model = AutoModelForSequenceClassification.from_pretrained(
+            model_id,
+            num_labels=self.num_labels,
+            trust_remote_code=self.trust_remote_code,
+            # Allows swapping in a new head size even if the checkpoint differs
+            ignore_mismatched_sizes=True,
+        )
+
+        # Make sure padding ids line up
+        if (
+            getattr(self.model.config, "pad_token_id", None) is None
+            and getattr(self.tokenizer, "pad_token_id", None) is not None
+        ):
+            self.model.config.pad_token_id = self.tokenizer.pad_token_id
+
+        # Set problem type (single-label classification by default)
+        # If you plan multi-label, you'd switch to "multi_label_classification"
         self.model.config.problem_type = "single_label_classification"
 
-        # place on device chosen by user (or auto)
-        target_device = self.device
-        if target_device not in {"cuda", "cpu"}:
-            target_device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.model.to(target_device)
+        # Move to target device
+        self.model.to(self.device)
 
     def tasks_ground_truth_former(self, data: Any, task: str) -> Any:
+        """Normalize ground-truth inputs for 'taxonomy-discovery'.
+
+        Supports DataFrame with columns ['parent','child',('label')],
+        list of dicts, or falls back to the base class behavior.
+
+        Args:
+            data: Input object to normalize.
+            task: Task name, passed from the outer pipeline.
+
+        Returns:
+            A list of dictionaries with keys 'parent', 'child', and optionally
+            'label' when present in the input.
+        """
         if task != "taxonomy-discovery":
             return super().tasks_ground_truth_former(data, task)
 
@@ -150,15 +269,29 @@ def tasks_ground_truth_former(self, data: Any, task: str) -> Any:
                     {"parent": p, "child": c, "label": bool(lbl)}
                     for p, c, lbl in zip(data["parent"], data["child"], data["label"])
                 ]
-            return [{"parent": p, "child": c} for p, c in zip(data["parent"], data["child"])]
+            return [
+                {"parent": p, "child": c} for p, c in zip(data["parent"], data["child"])
+            ]
 
         if isinstance(data, list):
             return data
 
         return super().tasks_ground_truth_former(data, task)
 
-    def _make_negatives(self, positives_df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
-        """Return (reversed_df, manipulated_df)."""
+    def _make_negatives(
+        self, positives_df: pd.DataFrame
+    ) -> Tuple[pd.DataFrame, pd.DataFrame]:
+        """Create two types of negatives from a positives table.
+
+        Returns:
+            A tuple `(reversed_df, manipulated_df)` where:
+                - `reversed_df`: pairs with parent/child columns swapped, label=False.
+                - `manipulated_df`: pairs with the parent replaced by a random
+                  *different* parent from the same pool, label=False.
+
+        Notes:
+            The input DataFrame must contain columns ['parent', 'child'].
+        """
         unique_parents = positives_df["parent"].unique().tolist()
 
         def as_reversed(df: pd.DataFrame) -> pd.DataFrame:
@@ -171,6 +304,7 @@ def with_random_parent(df: pd.DataFrame) -> pd.DataFrame:
             def pick_other_parent(p: str) -> str:
                 pool = [x for x in unique_parents if x != p]
                 return random.choice(pool) if pool else p
+
             out = df.copy()
             out["parent"] = out["parent"].apply(pick_other_parent)
             out["label"] = False
@@ -184,10 +318,23 @@ def _balance_with_negatives(
         reversed_df: pd.DataFrame,
         manipulated_df: pd.DataFrame,
     ) -> pd.DataFrame:
-        """Combine positives and negatives with the same ratios as before."""
+        """Combine positives with negatives using configured ratios.
+
+        Sampling ratios are defined by the instance settings
+        `self._neg_ratio_reversed` and `self._neg_ratio_manipulated`,
+        keeping the positives count unchanged.
+
+        Args:
+            positives_df: Positive pairs with `label=True`.
+            reversed_df: Negative pairs produced by flipping parent/child.
+            manipulated_df: Negative pairs with randomly reassigned parents.
+
+        Returns:
+            A deduplicated, shuffled DataFrame with a class-balanced mix.
+        """
         n_pos = len(positives_df)
-        n_rev = int(n_pos * self._NEG_RATIO_REVERSED)
-        n_man = int(n_pos * self._NEG_RATIO_MANIPULATED)
+        n_rev = int(n_pos * self._neg_ratio_reversed)
+        n_man = int(n_pos * self._neg_ratio_manipulated)
 
         combined = pd.concat(
             [
@@ -197,26 +344,75 @@ def _balance_with_negatives(
             ],
             ignore_index=True,
         )
-        combined = combined.drop_duplicates(subset=["parent", "child", "label"]).reset_index(drop=True)
+        combined = combined.drop_duplicates(
+            subset=["parent", "child", "label"]
+        ).reset_index(drop=True)
         return combined
 
     def _add_prompt_columns(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Append one column per prompt variant to the given pairs table.
+
+        For each row `(parent, child)`, creates columns `prompt_1 ... prompt_n`.
+
+        Args:
+            df: Input DataFrame with columns ['parent', 'child', ...].
+
+        Returns:
+            A copy of `df` including the newly added prompt columns.
+        """
         out = df.copy()
         for i in range(self.n_prompts):
-            out[f"prompt_{i+1}"] = out.apply(
-                lambda r, k=i: self.prompter.make(r["parent"], r["child"], k), axis=1
+            out[f"prompt_{i + 1}"] = out.apply(
+                lambda r, k=i: self.prompter.format(r["parent"], r["child"], k), axis=1
             )
         return out
 
-    def _df_from_relations(relations: List[TaxonomicRelation], label: bool = True) -> pd.DataFrame:
+    def _df_from_relations(
+        self, relations: List[TaxonomicRelation], label: bool = True
+    ) -> pd.DataFrame:
+        """Convert a list of `TaxonomicRelation` to a DataFrame.
+
+        Args:
+            relations: Iterable of `TaxonomicRelation(parent, child)`.
+            label: Class label to assign to all resulting rows.
+
+        Returns:
+            DataFrame with columns ['parent', 'child', 'label'].
+        """
         if not relations:
             return pd.DataFrame(columns=["parent", "child", "label"])
-        return pd.DataFrame([{"parent": r.parent, "child": r.child, "label": label} for r in relations])
+        return pd.DataFrame(
+            [{"parent": r.parent, "child": r.child, "label": label} for r in relations]
+        )
+
+    def _relations_from_df(self, df: pd.DataFrame) -> List[TaxonomicRelation]:
+        """Convert a DataFrame to a list of `TaxonomicRelation`.
 
-    def _relations_from_df(df: pd.DataFrame) -> List[TaxonomicRelation]:
-        return [TaxonomicRelation(parent=p, child=c) for p, c in zip(df["parent"], df["child"])]
+        Args:
+            df: DataFrame with columns ['parent', 'child'].
 
-    def _build_masked_prompt(self, parent: str, child: str, index_1_based: int, mask_token: str = "[MASK]") -> str:
+        Returns:
+            List of `TaxonomicRelation` objects in row order.
+        """
+        return [
+            TaxonomicRelation(parent=p, child=c)
+            for p, c in zip(df["parent"], df["child"])
+        ]
+
+    def _build_masked_prompt(
+        self, parent: str, child: str, index_1_based: int, mask_token: str = "[MASK]"
+    ) -> str:
+        """Construct one of several True/False prompts with a mask token.
+
+        Args:
+            parent: Parent label.
+            child: Child label.
+            index_1_based: 1-based index selecting a template.
+            mask_token: The token used to denote the masked label.
+
+        Returns:
+            A formatted prompt string.
+        """
         prompts_1based = [
             f"{parent} is the superclass of {child}. This statement is {mask_token}.",
             f"{child} is a subclass of {parent}. This statement is {mask_token}.",
@@ -226,18 +422,42 @@ def _build_masked_prompt(self, parent: str, child: str, index_1_based: int, mask
             f"{child} is a subtype of {parent}. This statement is {mask_token}.",
             f"{parent} is an ancestor class of {child}. This statement is {mask_token}.",
             f"{child} is a descendant classs of {child}. This statement is {mask_token}.",
-            f"\"{parent}\" is the superclass of \"{child}\". This statement is {mask_token}.",
+            f'"{parent}" is the superclass of "{child}". This statement is {mask_token}.',
         ]
         return prompts_1based[index_1_based - 1]
 
     @torch.no_grad()
     def _predict_prompt_true_false(self, sentence: str) -> bool:
+        """Run a single True/False prediction on a prompt.
+
+        Args:
+            sentence: Fully formatted prompt text.
+
+        Returns:
+            True iff the predicted class index is 1 (positive).
+        """
         enc = self.tokenizer(sentence, return_tensors="pt").to(self.model.device)
         logits = self.model(**enc).logits
         predicted_label = torch.argmax(logits, dim=1).item()
         return predicted_label == 1
 
     def _select_parent_via_prompts(self, child: str) -> str:
+        """Select the most likely parent for a given child via prompt voting.
+
+        The procedure:
+          1) Generate prompts for each candidate parent at increasing "levels".
+          2) Accumulate votes from the True/False classifier.
+          3) Resolve ties by recursing to the next level; after 4 levels, break ties randomly.
+
+        Args:
+            child: The child label whose parent should be predicted.
+
+        Returns:
+            The chosen parent string.
+
+        Raises:
+            AssertionError: If candidate parents were not initialized.
+        """
         assert self._candidate_parents, "Candidate parents not initialized."
         scores: dict[str, int] = {p: 0 for p in self._candidate_parents}
 
@@ -247,14 +467,18 @@ def prompt_indices_for_level(level: int) -> List[int]:
             return [2 * level, 2 * level + 1]
 
         def recurse(active_parents: List[str], level: int) -> str:
-            idxs = [i for i in prompt_indices_for_level(level) if 1 <= i <= self.n_prompts]
+            idxs = [
+                i for i in prompt_indices_for_level(level) if 1 <= i <= self.n_prompts
+            ]
             if idxs:
                 for parent in active_parents:
                     votes = sum(
                         1
                         for idx in idxs
                         if self._predict_prompt_true_false(
-                            self._build_masked_prompt(parent=parent, child=child, index_1_based=idx)
+                            self._build_masked_prompt(
+                                parent=parent, child=child, index_1_based=idx
+                            )
                         )
                     )
                     scores[parent] += votes
@@ -277,6 +501,15 @@ def _taxonomy_discovery(self, data: Any, test: bool = False):
         TEST:
           - OntologyData -> parent selection: [{'parent': predicted, 'child': child}]
           - DataFrame/list -> binary pair classification with 'label' + 'score'
+
+        Args:
+            data: One of {OntologyData, pandas.DataFrame, list[dict], list[tuple]}.
+            test: If True, run inference; otherwise perform training.
+
+        Returns:
+            - On training: None (model is fine-tuned in-place).
+            - On inference with OntologyData: list of {'parent','child'} predictions.
+            - On inference with pairs: list of dicts including 'label' and 'score'.
         """
         is_ontology_object = isinstance(data, OntologyData)
 
@@ -298,7 +531,9 @@ def _taxonomy_discovery(self, data: Any, test: bool = False):
                 if self._candidate_parents is None:
                     self._candidate_parents = parents_in_call
                 else:
-                    self._candidate_parents = sorted(set(self._candidate_parents).union(parents_in_call))
+                    self._candidate_parents = sorted(
+                        set(self._candidate_parents).union(parents_in_call)
+                    )
             else:
                 if self._candidate_parents is None:
                     self._candidate_parents = parents_in_call
@@ -317,7 +552,7 @@ def _taxonomy_discovery(self, data: Any, test: bool = False):
             true_probs_by_prompt: List[torch.Tensor] = []
 
             for i in range(self.n_prompts):
-                col = f"prompt_{i+1}"
+                col = f"prompt_{i + 1}"
                 enc = self.tokenizer(
                     prompts_df[col].tolist(),
                     return_tensors="pt",
@@ -333,18 +568,35 @@ def _taxonomy_discovery(self, data: Any, test: bool = False):
 
             results: List[dict[str, Any]] = []
             for p, c, s, yhat in zip(
-                pairs_df["parent"], pairs_df["child"], avg_true_prob.tolist(), predicted_bool
+                pairs_df["parent"],
+                pairs_df["child"],
+                avg_true_prob.tolist(),
+                predicted_bool,
             ):
-                results.append({"parent": p, "child": c, "label": int(bool(yhat)), "score": float(s)})
+                results.append(
+                    {
+                        "parent": p,
+                        "child": c,
+                        "label": int(bool(yhat)),
+                        "score": float(s),
+                    }
+                )
             return results
 
         if isinstance(data, OntologyData):
             train_onto, eval_onto = ontology_split(
-                data, test_size=self._EVAL_FRACTION, random_state=self.random_state, verbose=False
+                data,
+                test_size=self._eval_fraction,
+                random_state=self.random_state,
+                verbose=False,
             )
 
-            train_pos_rel: List[TaxonomicRelation] = getattr(train_onto.type_taxonomies, "taxonomies", []) or []
-            eval_pos_rel: List[TaxonomicRelation] = getattr(eval_onto.type_taxonomies, "taxonomies", []) or []
+            train_pos_rel: List[TaxonomicRelation] = (
+                getattr(train_onto.type_taxonomies, "taxonomies", []) or []
+            )
+            eval_pos_rel: List[TaxonomicRelation] = (
+                getattr(eval_onto.type_taxonomies, "taxonomies", []) or []
+            )
 
             train_pos_df = self._df_from_relations(train_pos_rel, label=True)
             eval_pos_df = self._df_from_relations(eval_pos_rel, label=True)
@@ -360,11 +612,17 @@ def _taxonomy_discovery(self, data: Any, test: bool = False):
 
         else:
             if "label" not in pairs_df.columns or pairs_df["label"].nunique() == 1:
-                positives_df = pairs_df[pairs_df.get("label", True)][["parent", "child"]].copy()
+                positives_df = pairs_df[pairs_df.get("label", True)][
+                    ["parent", "child"]
+                ].copy()
                 pos_rel = self._relations_from_df(positives_df)
 
                 tr_rel, ev_rel = taxonomy_split(
-                    pos_rel, train_terms=None, test_size=self._EVAL_FRACTION, random_state=self.random_state, verbose=False
+                    pos_rel,
+                    train_terms=None,
+                    test_size=self._eval_fraction,
+                    random_state=self.random_state,
+                    verbose=False,
                 )
                 train_pos_df = self._df_from_relations(tr_rel, label=True)
                 eval_pos_df = self._df_from_relations(ev_rel, label=True)
@@ -372,8 +630,12 @@ def _taxonomy_discovery(self, data: Any, test: bool = False):
                 tr_rev_df, tr_man_df = self._make_negatives(train_pos_df)
                 ev_rev_df, ev_man_df = self._make_negatives(eval_pos_df)
 
-                train_df = self._balance_with_negatives(train_pos_df, tr_rev_df, tr_man_df)
-                eval_df = self._balance_with_negatives(eval_pos_df, ev_rev_df, ev_man_df)
+                train_df = self._balance_with_negatives(
+                    train_pos_df, tr_rev_df, tr_man_df
+                )
+                eval_df = self._balance_with_negatives(
+                    eval_pos_df, ev_rev_df, ev_man_df
+                )
 
                 train_df = self._add_prompt_columns(train_df)
                 eval_df = self._add_prompt_columns(eval_df)
@@ -383,16 +645,30 @@ def _taxonomy_discovery(self, data: Any, test: bool = False):
                 pos_rel = self._relations_from_df(positives_df)
 
                 tr_rel, ev_rel = taxonomy_split(
-                    pos_rel, train_terms=None, test_size=self._EVAL_FRACTION, random_state=self.random_state, verbose=False
+                    pos_rel,
+                    train_terms=None,
+                    test_size=self._eval_fraction,
+                    random_state=self.random_state,
+                    verbose=False,
                 )
                 train_pos_df = self._df_from_relations(tr_rel, label=True)
                 eval_pos_df = self._df_from_relations(ev_rel, label=True)
 
                 negatives_df = pairs_df[pairs_df["label"]][["parent", "child"]].copy()
-                negatives_df = negatives_df.sample(frac=1.0, random_state=self.random_state).reset_index(drop=True)
-
-                n_eval_neg = max(1, int(len(negatives_df) * self._EVAL_FRACTION)) if len(negatives_df) > 0 else 0
-                eval_neg_df = negatives_df.iloc[:n_eval_neg].copy() if n_eval_neg > 0 else negatives_df.iloc[:0].copy()
+                negatives_df = negatives_df.sample(
+                    frac=1.0, random_state=self.random_state
+                ).reset_index(drop=True)
+
+                n_eval_neg = (
+                    max(1, int(len(negatives_df) * self._eval_fraction))
+                    if len(negatives_df) > 0
+                    else 0
+                )
+                eval_neg_df = (
+                    negatives_df.iloc[:n_eval_neg].copy()
+                    if n_eval_neg > 0
+                    else negatives_df.iloc[:0].copy()
+                )
                 train_neg_df = negatives_df.iloc[n_eval_neg:].copy()
 
                 train_neg_df["label"] = False
@@ -410,21 +686,36 @@ def _taxonomy_discovery(self, data: Any, test: bool = False):
 
         # Sequential fine-tuning across prompts
         for i in range(self.n_prompts):
-            prompt_col = f"prompt_{i+1}"
-            train_ds = Dataset.from_pandas(train_df[[prompt_col, "label"]].reset_index(drop=True))
-            eval_ds = Dataset.from_pandas(eval_df[[prompt_col, "label"]].reset_index(drop=True))
+            prompt_col = f"prompt_{i + 1}"
+            train_ds = Dataset.from_pandas(
+                train_df[[prompt_col, "label"]].reset_index(drop=True)
+            )
+            eval_ds = Dataset.from_pandas(
+                eval_df[[prompt_col, "label"]].reset_index(drop=True)
+            )
 
             train_ds = train_ds.rename_column("label", "labels")
             eval_ds = eval_ds.rename_column("label", "labels")
 
             def tokenize_batch(batch):
-                return self.tokenizer(batch[prompt_col], padding="max_length", truncation=True)
+                """Tokenize a batch for the current prompt column with truncation/padding."""
+                return self.tokenizer(
+                    batch[prompt_col], padding="max_length", truncation=True
+                )
 
-            train_ds = train_ds.map(tokenize_batch, batched=True, remove_columns=[prompt_col])
-            eval_ds = eval_ds.map(tokenize_batch, batched=True, remove_columns=[prompt_col])
+            train_ds = train_ds.map(
+                tokenize_batch, batched=True, remove_columns=[prompt_col]
+            )
+            eval_ds = eval_ds.map(
+                tokenize_batch, batched=True, remove_columns=[prompt_col]
+            )
 
-            train_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
-            eval_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
+            train_ds.set_format(
+                type="torch", columns=["input_ids", "attention_mask", "labels"]
+            )
+            eval_ds.set_format(
+                type="torch", columns=["input_ids", "attention_mask", "labels"]
+            )
 
             trainer = Trainer(
                 model=self.model,
@@ -481,13 +772,25 @@ class SKHNLPZSLearner(AutoLearner):
     def __init__(
         self,
         model_name: str = "Qwen/Qwen2.5-0.5B-Instruct",
-        device: Optional[str] = None,          # "cuda" | "cpu" | None (auto)
+        device: Optional[str] = None,  # "cuda" | "cpu" | None (auto)
         max_new_tokens: int = 16,
-        save_path: Optional[str] = None,       # directory or full path
+        save_path: Optional[str] = None,  # directory or full path
         verbose: bool = True,
-        normalize_mode: str = "none",          # "none" | "substring" | "levenshtein" | "auto"
+        normalize_mode: str = "none",  # "none" | "substring" | "levenshtein" | "auto"
         random_state: int = 1403,
     ) -> None:
+        """Configure the zero-shot learner.
+
+        Args:
+            model_name: HF model id/path for the instruction-tuned causal LLM.
+            device: Force device ('cuda' or 'cpu'), else auto-detect.
+            max_new_tokens: Generation length budget for each completion.
+            save_path: Optional CSV path or directory for saving predictions.
+            verbose: If True, print progress messages.
+            normalize_mode: Post-processing for class names
+                ('none' | 'substring' | 'levenshtein' | 'auto').
+            random_state: RNG seed for any sampling steps.
+        """
         super().__init__()
         self.model_name = model_name
         self.verbose = verbose
@@ -502,7 +805,7 @@ def __init__(
         if device is None:
             self._has_cuda = torch.cuda.is_available()
         else:
-            self._has_cuda = (device == "cuda")
+            self._has_cuda = device == "cuda"
         self._pipe_device = 0 if self._has_cuda else -1
         self._model_device_map = {"": "cuda"} if self._has_cuda else None
 
@@ -530,6 +833,13 @@ def __init__(
     def load(self, model_id: str = "") -> None:
         """
         Load tokenizer, model, and text-generation pipeline.
+
+        Args:
+            model_id: Optional HF id/path override; defaults to `self.model_name`.
+
+        Side Effects:
+            Initializes the tokenizer and model, configures the generation
+            pipeline on CPU/GPU, and sets a pad token if absent.
         """
         model_id = model_id or self.model_name
         if self.verbose:
@@ -538,7 +848,10 @@ def load(self, model_id: str = "") -> None:
         self._tokenizer = AutoTokenizer.from_pretrained(model_id)
 
         # Ensure a pad token is set for generation
-        if self._tokenizer.pad_token_id is None and self._tokenizer.eos_token_id is not None:
+        if (
+            self._tokenizer.pad_token_id is None
+            and self._tokenizer.eos_token_id is not None
+        ):
             self._tokenizer.pad_token = self._tokenizer.eos_token
 
         self._model = AutoModelForCausalLM.from_pretrained(
@@ -558,10 +871,19 @@ def load(self, model_id: str = "") -> None:
             print("Device set to use", "cuda" if self._has_cuda else "cpu")
             print("[ZeroShotTaxonomyLearner] Model loaded.")
 
-    def _taxonomy_discovery(self, data: Any, test: bool = False) -> Optional[List[Dict[str, str]]]:
+    def _taxonomy_discovery(
+        self, data: Any, test: bool = False
+    ) -> Optional[List[Dict[str, str]]]:
         """
         Zero-shot prediction over all incoming rows (no filtering/augmentation).
-        Returns a list of dictionaries: [{'parent': predicted_label, 'child': child}, ...]
+
+        Args:
+            data: One of {DataFrame, list[dict], list[tuple], Ontology-like}.
+            test: If False, training is skipped (zero-shot learner), and None is returned.
+
+        Returns:
+            On `test=True`, a list of dicts [{'parent': predicted_label, 'child': child}, ...].
+            On `test=False`, returns None.
         """
         if not test:
             if self.verbose:
@@ -617,16 +939,22 @@ def _taxonomy_discovery(self, data: Any, test: bool = False) -> Optional[List[Di
             eval_df.at[idx, "prediction"] = final_label
 
         # Return in the format expected by the pipeline
-        return [{"parent": p, "child": c} for p, c in zip(eval_df["prediction"], eval_df["child"])]
+        return [
+            {"parent": p, "child": c}
+            for p, c in zip(eval_df["prediction"], eval_df["child"])
+        ]
 
     def _generate_and_parse(self, child_term: str) -> (str, str):
         """
         Generate a completion for the given child term and extract the raw predicted class
         using the strict '#[ ... ]#' pattern.
 
-        Returns
-        -------
-        (raw_generation_text, parsed_prediction_or_unknown)
+        Args:
+            child_term: The child label to classify into one of the fixed classes.
+
+        Returns:
+            Tuple `(raw_generation_text, parsed_prediction_or_unknown)`, where the second
+            element is either the text inside '#[ ... ]#' or the string 'unknown'.
         """
         messages = [
             {"role": "system", "content": "You are a helpful classifier."},
@@ -654,13 +982,15 @@ def _generate_and_parse(self, child_term: str) -> (str, str):
         parsed = match.group(1).strip() if match else "unknown"
         return generation, parsed
 
-    # -------------------------------------------------------------------------
-    # Normalization helpers
-    # -------------------------------------------------------------------------
-
     def _normalize_substring_only(self, text: str) -> str:
         """
         Snap to a label if the string is equal to / contained in / contains a valid label (case-insensitive).
+
+        Args:
+            text: Raw class text to normalize.
+
+        Returns:
+            One of `CLASS_LIST` on a match; otherwise 'unknown'.
         """
         if not isinstance(text, str):
             return "unknown"
@@ -670,13 +1000,23 @@ def _normalize_substring_only(self, text: str) -> str:
 
         for label in self.CLASS_LIST:
             label_lower = label.lower()
-            if lowered == label_lower or lowered in label_lower or label_lower in lowered:
+            if (
+                lowered == label_lower
+                or lowered in label_lower
+                or label_lower in lowered
+            ):
                 return label
         return "unknown"
 
     def _normalize_levenshtein_only(self, text: str) -> str:
         """
         Snap to the nearest label by Levenshtein (edit) distance.
+
+        Args:
+            text: Raw class text to normalize.
+
+        Returns:
+            The nearest label in `CLASS_LIST`, or 'unknown' if input is empty/invalid.
         """
         if not isinstance(text, str):
             return "unknown"
@@ -697,37 +1037,59 @@ def _normalize_levenshtein_only(self, text: str) -> str:
     def _normalize_auto(self, text: str) -> str:
         """
         Cascade: try substring-first; if no match, fall back to Levenshtein snapping.
+
+        Args:
+            text: Raw class text to normalize.
+
+        Returns:
+            Normalized label string or 'unknown'.
         """
         snapped = self._normalize_substring_only(text)
-        return snapped if snapped != "unknown" else self._normalize_levenshtein_only(text)
+        return (
+            snapped if snapped != "unknown" else self._normalize_levenshtein_only(text)
+        )
 
-    def _to_dataframe(data: Any) -> pd.DataFrame:
+    def _to_dataframe(self, data: Any) -> pd.DataFrame:
         """
-        Normalize various input formats into a DataFrame with columns:
-        ['child', 'parent'] or ['child', 'parent', 'label'].
+        Normalize various input formats into a DataFrame.
+
+        Supported inputs:
+            * pandas.DataFrame with columns ['child','parent',('label')]
+            * list[dict] with keys 'child','parent',('label')
+            * list of tuples/lists: (child, parent) or (child, parent, label)
+            * Ontology-like object with `.type_taxonomies.taxonomies`
+
+        Args:
+            data: The source object to normalize.
+
+        Returns:
+            A pandas DataFrame with standardized columns.
+
+        Raises:
+            ValueError: If the input type/shape is not recognized.
         """
-        # Already a DataFrame
         if isinstance(data, pd.DataFrame):
             df = data.copy()
             df.columns = [str(c).lower() for c in df.columns]
             return df.reset_index(drop=True)
 
-        # List[dict]
         if isinstance(data, list) and data and isinstance(data[0], dict):
             rows = [{str(k).lower(): v for k, v in d.items()} for d in data]
             return pd.DataFrame(rows).reset_index(drop=True)
 
-        # Iterable of tuples/lists: (child, parent[, label])
         if isinstance(data, (list, tuple)) and data:
             first = data[0]
             if isinstance(first, (list, tuple)) and not isinstance(first, dict):
                 n = len(first)
                 if n >= 3:
-                    return pd.DataFrame(data, columns=["child", "parent", "label"]).reset_index(drop=True)
+                    return pd.DataFrame(
+                        data, columns=["child", "parent", "label"]
+                    ).reset_index(drop=True)
                 if n == 2:
-                    return pd.DataFrame(data, columns=["child", "parent"]).reset_index(drop=True)
+                    return pd.DataFrame(data, columns=["child", "parent"]).reset_index(
+                        drop=True
+                    )
 
-        # OntoLearner-style object (with .type_taxonomies.taxonomies)
         try:
             type_taxonomies = getattr(data, "type_taxonomies", None)
             if type_taxonomies is not None:
@@ -737,9 +1099,15 @@ def _to_dataframe(data: Any) -> pd.DataFrame:
                     for rel in taxonomies:
                         parent = getattr(rel, "parent", None)
                         child = getattr(rel, "child", None)
-                        label = getattr(rel, "label", None) if hasattr(rel, "label") else None
+                        label = (
+                            getattr(rel, "label", None)
+                            if hasattr(rel, "label")
+                            else None
+                        )
                         if parent is not None and child is not None:
-                            rows.append({"child": child, "parent": parent, "label": label})
+                            rows.append(
+                                {"child": child, "parent": parent, "label": label}
+                            )
                     if rows:
                         return pd.DataFrame(rows).reset_index(drop=True)
         except Exception:
@@ -751,10 +1119,19 @@ def _to_dataframe(data: Any) -> pd.DataFrame:
             ".type_taxonomies.taxonomies."
         )
 
-    def _resolve_save_path(save_path: str, default_filename: str) -> str:
+    def _resolve_save_path(self, save_path: str, default_filename: str) -> str:
         """
-        If `save_path` is a directory, join it with `default_filename`.
-        If it's a file path, return as-is.
+        Resolve a target file path from a directory or path-like input.
+
+        If `save_path` points to a directory, joins it with `default_filename`.
+        If it already looks like a file path (has an extension), returns as-is.
+
+        Args:
+            save_path: Directory or file path supplied by the caller.
+            default_filename: Basename to use when `save_path` is a directory.
+
+        Returns:
+            A concrete file path where outputs can be written.
         """
         base = os.path.basename(save_path)
         has_ext = os.path.splitext(base)[1] != ""
diff --git a/ontolearner/learner/term_typing/__init__.py b/ontolearner/learner/term_typing/__init__.py
deleted file mode 100644
index a42d716..0000000
--- a/ontolearner/learner/term_typing/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (c) 2025 SciKnowOrg
-#
-# Licensed under the MIT License (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      https://opensource.org/licenses/MIT
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .rwthdbis import RWTHDBISSFTLearner
-from .sbunlp import SBUNLPZSLearner
-from .alexbek import AlexbekRFLearner, AlexbekRAGLearner
diff --git a/ontolearner/learner/term_typing/alexbek.py b/ontolearner/learner/term_typing/alexbek.py
index 7aa6033..0db694b 100644
--- a/ontolearner/learner/term_typing/alexbek.py
+++ b/ontolearner/learner/term_typing/alexbek.py
@@ -12,6 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""Learners for supervised and retrieval-augmented *term typing*.
+
+This module implements two learners:
+
+- **AlexbekRFLearner** (retriever/classifier):
+  Encodes terms with a Hugging Face encoder, optionally augments with simple
+  graph features, and trains a One-vs-Rest RandomForest for multi-label typing.
+
+- **AlexbekRAGLearner** (retrieval-augmented generation):
+  Builds an in-memory example index with sentence embeddings, retrieves
+  nearest examples for each query term, then prompts an instruction-tuned
+  causal LLM to produce types, parsing the JSON response.
+
+Both learners conform to the `AutoLearner` / `AutoRetriever` APIs used in
+the outer pipeline.
+"""
+
 import gc
 import json
 import re
@@ -31,22 +48,19 @@
 
 from ...base import AutoLearner, AutoRetriever
 
+
 class AlexbekRFLearner(AutoRetriever):
     """
     Embedding-based multi-label classifier for *term typing*.
 
-    Pipeline overview:
-      1) Load a Hugging Face encoder (tokenizer + model).
-      2) Encode input terms into sentence embeddings.
-      3) Optionally augment with simple graph (co-occurrence) features.
-      4) Train a One-vs-Rest RandomForest on the concatenated features.
-      5) Predict multi-label types with a probability threshold (fallback to top-1).
-
-    API expected by LearnerPipeline:
-      - load(model_id)
-      - fit(data, task, ontologizer=True)
-      - predict(data, task, ontologizer=True)
-      - tasks_ground_truth_former(data, task)
+    Pipeline
+    1) Load a Hugging Face encoder (tokenizer + model).
+    2) Encode input terms into sentence embeddings.
+    3) Optionally augment with simple graph (co-occurrence) features.
+    4) Train a One-vs-Rest RandomForest on the concatenated features.
+    5) Predict multi-label types with a probability threshold (fallback to top-1).
+
+    Implements the `AutoRetriever` interface used by the outer pipeline.
     """
 
     def __init__(
@@ -58,6 +72,23 @@ def __init__(
         use_graph_features: bool = True,
         rf_kwargs: Optional[Dict[str, Any]] = None,
     ):
+        """Configure the RF-based multi-label learner.
+
+        Parameters
+        device:
+            Torch device spec ('cpu' or 'cuda').
+        batch_size:
+            Encoding mini-batch size for the transformer.
+        max_length:
+            Maximum input token length for the encoder tokenizer.
+        threshold:
+            Per-label probability threshold at prediction time.
+        use_graph_features:
+            If True, add simple graph features to embeddings.
+        rf_kwargs:
+            Optional RandomForest hyperparameters dictionary.
+
+        """
         # Runtime / inference settings
         self.device = torch.device(device)
         self.batch_size = batch_size
@@ -81,21 +112,50 @@ def __init__(
         self.term_graph: Optional[nx.Graph] = None
 
     def load(self, model_id: str, **_: Any) -> None:
-        """Load a Hugging Face encoder by model id (tokenizer + base model)."""
+        """Load a Hugging Face encoder by model id (tokenizer + base model).
+
+        Parameters
+        model_id:
+            HF model identifier or local path for an encoder backbone.
+
+        Side Effects
+        - Sets `self.model_name`, `self.tokenizer`, `self.embedding_model`.
+        - Puts the model in eval mode and moves it to `self.device`.
+        """
         self.model_name = model_id
         self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.embedding_model = AutoModel.from_pretrained(model_id)
         self.embedding_model.eval().to(self.device)
 
     def fit(self, data: Any, task: str, ontologizer: bool = True, **_: Any) -> None:
-        """Train the One-vs-Rest RandomForest on term embeddings (+ optional graph features)."""
+        """Train the One-vs-Rest RandomForest on term embeddings (+ optional graph features).
+
+        Parameters
+        data:
+            Training payload; supported formats are routed via `_as_term_types_dicts`.
+            Each example must contain at least `{"term": str, "types": List[str]}`.
+        task:
+            Must be `'term-typing'`.
+        ontologizer:
+            Unused here; accepted for API compatibility.
+        **_:
+            Ignored extra arguments.
+
+        Raises
+        ValueError
+            If `task` is not `'term-typing'` or if no valid examples are found.
+        """
         if task != "term-typing":
-            raise ValueError("OntologyTypeRFClassifier supports only task='term-typing'.")
+            raise ValueError(
+                "OntologyTypeRFClassifier supports only task='term-typing'."
+            )
 
         # Normalize incoming training data into a list of dicts: {term, types, RAG}
         training_rows = self._as_term_types_dicts(data)
         if not training_rows:
-            raise ValueError("No valid training examples found (need 'term' and 'types').")
+            raise ValueError(
+                "No valid training examples found (need 'term' and 'types')."
+            )
 
         # Split out terms and raw labels
         training_terms: List[str] = [row["term"] for row in training_rows]
@@ -110,7 +170,9 @@ def fit(self, data: Any, task: str, ontologizer: bool = True, **_: Any) -> None:
         # Optionally build a light-weight co-occurrence graph and extract features
         if self.use_graph_features:
             self.term_graph = self._create_term_graph(training_rows)
-            graph_features_train = self._extract_graph_features(self.term_graph, training_terms)
+            graph_features_train = self._extract_graph_features(
+                self.term_graph, training_terms
+            )
             X_train = np.hstack([term_embeddings_train, graph_features_train])
         else:
             self.term_graph = None
@@ -120,18 +182,48 @@ def fit(self, data: Any, task: str, ontologizer: bool = True, **_: Any) -> None:
         Y_train = self.label_binarizer.transform(raw_label_lists)
 
         # One-vs-Rest RandomForest (one binary RF per label)
-        self.ovr_random_forest = OneVsRestClassifier(RandomForestClassifier(**self.rf_kwargs))
+        self.ovr_random_forest = OneVsRestClassifier(
+            RandomForestClassifier(**self.rf_kwargs)
+        )
         self.ovr_random_forest.fit(X_train, Y_train)
 
-
-    def predict(self, data: Any, task: str, ontologizer: bool = True, **_: Any) -> List[Dict[str, Any]]:
+    def predict(
+        self, data: Any, task: str, ontologizer: bool = True, **_: Any
+    ) -> List[Dict[str, Any]]:
         """Predict multi-label types for input terms.
 
-        Returns a list of dicts with keys: {id, term, types}.
+        Parameters
+        data:
+            Evaluation payload; formats normalized by `_as_predict_terms_ids`.
+        task:
+            Must be `'term-typing'`.
+        ontologizer:
+            Unused here; accepted for API compatibility.
+        **_:
+            Ignored extra arguments.
+
+        Returns
+        List[Dict[str, Any]]
+            A list of dictionaries with keys:
+            - `id`: Original example id (if provided).
+            - `term`: Input term string.
+            - `types`: List of predicted label strings (selected by threshold or top-1).
+
+        Raises
+        ValueError
+            If `task` is not `'term-typing'`.
+        RuntimeError
+            If `load()` and `fit()` have not been called.
         """
         if task != "term-typing":
-            raise ValueError("OntologyTypeRFClassifier supports only task='term-typing'.")
-        if self.ovr_random_forest is None or self.tokenizer is None or self.embedding_model is None:
+            raise ValueError(
+                "OntologyTypeRFClassifier supports only task='term-typing'."
+            )
+        if (
+            self.ovr_random_forest is None
+            or self.tokenizer is None
+            or self.embedding_model is None
+        ):
             raise RuntimeError("Call load() and fit() before predict().")
 
         # Normalize prediction input into parallel lists of terms and example ids
@@ -142,7 +234,9 @@ def predict(self, data: Any, task: str, ontologizer: bool = True, **_: Any) -> L
 
         # Match feature layout used during training
         if self.use_graph_features and self.term_graph is not None:
-            graph_features_test = self._extract_graph_features(self.term_graph, test_terms)
+            graph_features_test = self._extract_graph_features(
+                self.term_graph, test_terms
+            )
             X_test = np.hstack([term_embeddings_test, graph_features_test])
         else:
             X_test = term_embeddings_test
@@ -160,7 +254,9 @@ def predict(self, data: Any, task: str, ontologizer: bool = True, **_: Any) -> L
             if len(selected_label_indices) == 0:
                 selected_label_indices = [int(np.argmax(label_probabilities))]
 
-            predicted_types = [label_names[label_idx] for label_idx in selected_label_indices]
+            predicted_types = [
+                label_names[label_idx] for label_idx in selected_label_indices
+            ]
 
             predictions.append(
                 {
@@ -172,20 +268,49 @@ def predict(self, data: Any, task: str, ontologizer: bool = True, **_: Any) -> L
         return predictions
 
     def tasks_ground_truth_former(self, data: Any, task: str) -> List[Dict[str, Any]]:
-        """Normalize ground-truth into a list of {id, term, types} dicts for evaluation."""
+        """Normalize ground-truth into a list of {id, term, types} dicts for evaluation.
+
+        Parameters
+        data:
+            Ground-truth payload; supported formats include objects exposing
+            `.term_typings`, a list of dicts, or a list of tuples/lists.
+        task:
+            Must be `'term-typing'`.
+
+        Returns
+        List[Dict[str, Any]]
+            A list of dictionaries with keys `id`, `term`, `types` (list of str).
+
+        Raises
+        ValueError
+            If `task` is not `'term-typing'`.
+        """
         if task != "term-typing":
-            raise ValueError("OntologyTypeRFClassifier supports only task='term-typing'.")
+            raise ValueError(
+                "OntologyTypeRFClassifier supports only task='term-typing'."
+            )
         return self._as_gold_id_term_types(data)
 
     def _encode(self, texts: List[str]) -> np.ndarray:
-        """Encode a list of strings into L2-normalized sentence embeddings (NumPy array).
+        """Encode a list of strings into L2-normalized sentence embeddings.
 
-        If no texts are provided, returns an empty array with width equal to the model hidden size.
+        Parameters
+        texts:
+            List of input texts/terms.
+
+        Returns
+        np.ndarray
+            Array of shape `(len(texts), hidden_size)` with L2-normalized
+            embeddings. If `texts` is empty, returns a `(0, hidden_size)` array.
         """
-        assert self.tokenizer is not None and self.embedding_model is not None, "Call load(model_id) first."
+        assert self.tokenizer is not None and self.embedding_model is not None, (
+            "Call load(model_id) first."
+        )
 
         if not texts:
-            hidden_size = getattr(getattr(self.embedding_model, "config", None), "hidden_size", 768)
+            hidden_size = getattr(
+                getattr(self.embedding_model, "config", None), "hidden_size", 768
+            )
             return np.zeros((0, hidden_size), dtype=np.float32)
 
         batch_embeddings: List[torch.Tensor] = []
@@ -208,11 +333,15 @@ def _encode(self, texts: List[str]) -> np.ndarray:
                 model_output = self.embedding_model(**tokenized_batch)
 
                 # Prefer dedicated pooler if provided; otherwise pool by last valid token
-                if hasattr(model_output, "pooler_output") and model_output.pooler_output is not None:
+                if (
+                    hasattr(model_output, "pooler_output")
+                    and model_output.pooler_output is not None
+                ):
                     sentence_embeddings = model_output.pooler_output
                 else:
                     sentence_embeddings = self._last_token_pool(
-                        model_output.last_hidden_state, tokenized_batch["attention_mask"]
+                        model_output.last_hidden_state,
+                        tokenized_batch["attention_mask"],
                     )
 
                 # L2-normalize embeddings for stability
@@ -230,18 +359,44 @@ def _encode(self, texts: List[str]) -> np.ndarray:
         # Concatenate all batches and convert to NumPy
         return torch.cat(batch_embeddings, dim=0).numpy()
 
-    def _last_token_pool(self, last_hidden_states: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
-        """Select the last *non-padding* token embedding for each sequence in the batch."""
+    def _last_token_pool(
+        self, last_hidden_states: torch.Tensor, attention_mask: torch.Tensor
+    ) -> torch.Tensor:
+        """Select the last *non-padding* token embedding for each sequence.
+
+        Parameters
+        last_hidden_states:
+            Tensor of shape `(batch, seq_len, hidden)`.
+        attention_mask:
+            Tensor of shape `(batch, seq_len)` with 1 for real tokens.
+
+        Returns
+        torch.Tensor
+            Tensor of shape `(batch, hidden)` with per-sequence pooled embeddings.
+        """
         last_valid_token_idx = attention_mask.sum(dim=1) - 1  # (batch,)
-        batch_row_idx = torch.arange(last_hidden_states.size(0), device=last_hidden_states.device)
+        batch_row_idx = torch.arange(
+            last_hidden_states.size(0), device=last_hidden_states.device
+        )
         return last_hidden_states[batch_row_idx, last_valid_token_idx]
 
     def _create_term_graph(self, training_rows: List[Dict[str, Any]]) -> nx.Graph:
         """Create a simple undirected co-occurrence graph from training rows.
 
-        Nodes: terms (with node attribute 'types').
-        Edges: between a term and each neighbor from its optional RAG list.
-               Edge weight = number of shared types (or 0.1 if none shared).
+        Graph Structure
+        Nodes
+            Terms (node attribute `'types'` is stored per term).
+        Edges
+            Between a term and each neighbor from its optional RAG list.
+            Edge weight = number of shared types (or 0.1 if none shared).
+
+        Parameters
+        training_rows:
+            Normalized rows with keys: `'term'`, `'types'`, optional `'RAG'`.
+
+        Returns
+        networkx.Graph
+            The constructed undirected graph.
         """
         graph = nx.Graph()
 
@@ -251,7 +406,7 @@ def _create_term_graph(self, training_rows: List[Dict[str, Any]]) -> nx.Graph:
             graph.add_node(term, types=term_types)
 
             # RAG may be a list of neighbor dicts like {"term": ..., "types": [...]}
-            for neighbor in (row.get("RAG", []) or []):
+            for neighbor in row.get("RAG", []) or []:
                 neighbor_term = neighbor.get("term")
                 neighbor_types = neighbor.get("types", [])
 
@@ -263,12 +418,24 @@ def _create_term_graph(self, training_rows: List[Dict[str, Any]]) -> nx.Graph:
 
         return graph
 
-    def _extract_graph_features(self, term_graph: nx.Graph, terms: List[str]) -> np.ndarray:
+    def _extract_graph_features(
+        self, term_graph: nx.Graph, terms: List[str]
+    ) -> np.ndarray:
         """Compute simple per-term graph features.
 
+        Feature Vector
         For each term we compute a 4-dim vector:
-          [degree, clustering_coefficient, degree_centrality, pagerank_score]
-        Returns an array of shape [len(terms), 4].
+        `[degree, clustering_coefficient, degree_centrality, pagerank_score]`
+
+        Parameters
+        term_graph:
+            Graph built over training terms.
+        terms:
+            List of term strings to extract features for.
+
+        Returns
+        np.ndarray
+            Array of shape `(len(terms), 4)` (dtype float32).
         """
         if len(term_graph):
             degree_centrality = nx.degree_centrality(term_graph)
@@ -293,7 +460,26 @@ def _extract_graph_features(self, term_graph: nx.Graph, terms: List[str]) -> np.
         return np.asarray(feature_rows, dtype=np.float32)
 
     def _as_term_types_dicts(self, data: Any) -> List[Dict[str, Any]]:
-        """Normalize diverse training data formats to a list of dicts: {term, types, RAG}."""
+        """Normalize diverse training data formats to a list of dicts: {term, types, RAG}.
+
+        Supported Inputs
+        - Object with attribute `.term_typings` (iterable of items exposing
+          `.term`, `.types`, optional `.RAG`).
+        - List of dicts with keys `term`, `types`, optional `RAG`.
+        - List/tuple of `(term, types[, RAG])`.
+
+        Parameters
+        data:
+            Training payload.
+
+        Returns
+        List[Dict[str, Any]]
+            Normalized dictionaries ready for training.
+
+        Raises
+        ValueError
+            If `data` is neither a list/tuple nor exposes `.term_typings`.
+        """
         normalized_rows: List[Dict[str, Any]] = []
 
         # Case 1: object with attribute `.term_typings`
@@ -308,13 +494,19 @@ def _as_term_types_dicts(self, data: Any) -> List[Dict[str, Any]]:
                 if not isinstance(type_list, list):
                     type_list = [type_list]
                 normalized_rows.append(
-                    {"term": str(term_text), "types": [str(x) for x in type_list], "RAG": rag_neighbors}
+                    {
+                        "term": str(term_text),
+                        "types": [str(x) for x in type_list],
+                        "RAG": rag_neighbors,
+                    }
                 )
             return normalized_rows
 
         # Otherwise: must be a list/tuple-like container
         if not isinstance(data, (list, tuple)):
-            raise ValueError("Training data must be a list/tuple or expose .term_typings")
+            raise ValueError(
+                "Training data must be a list/tuple or expose .term_typings"
+            )
 
         if not data:
             return normalized_rows
@@ -330,7 +522,11 @@ def _as_term_types_dicts(self, data: Any) -> List[Dict[str, Any]]:
                 if not isinstance(type_list, list):
                     type_list = [type_list]
                 normalized_rows.append(
-                    {"term": str(term_text), "types": [str(x) for x in type_list], "RAG": rag_neighbors}
+                    {
+                        "term": str(term_text),
+                        "types": [str(x) for x in type_list],
+                        "RAG": rag_neighbors,
+                    }
                 )
             return normalized_rows
 
@@ -345,13 +541,36 @@ def _as_term_types_dicts(self, data: Any) -> List[Dict[str, Any]]:
             if not isinstance(type_list, list):
                 type_list = [type_list]
             normalized_rows.append(
-                {"term": str(term_text), "types": [str(x) for x in type_list], "RAG": rag_neighbors}
+                {
+                    "term": str(term_text),
+                    "types": [str(x) for x in type_list],
+                    "RAG": rag_neighbors,
+                }
             )
 
         return normalized_rows
 
     def _as_predict_terms_ids(self, data: Any) -> Tuple[List[str], List[Any]]:
-        """Normalize prediction input into parallel lists: (terms, ids)."""
+        """Normalize prediction input into parallel lists: (terms, ids).
+
+        Supported Inputs
+        - Object with `.term_typings`.
+        - List of dicts with `term` and optional `id`.
+        - List of tuples/lists `(term, id[, ...])`.
+        - List of plain term strings.
+
+        Parameters
+        data:
+            Evaluation payload.
+
+        Returns
+        Tuple[List[str], List[Any]]
+            `(terms, example_ids)` lists aligned by index.
+
+        Raises
+        ValueError
+            If the input format is unsupported.
+        """
         terms: List[str] = []
         example_ids: List[Any] = []
 
@@ -392,7 +611,20 @@ def _as_predict_terms_ids(self, data: Any) -> Tuple[List[str], List[Any]]:
         raise ValueError("Unsupported predict() input format.")
 
     def _as_gold_id_term_types(self, data: Any) -> List[Dict[str, Any]]:
-        """Normalize gold labels into a list of dicts: {id, term, types}."""
+        """Normalize gold labels into a list of dicts: {id, term, types}.
+
+        Supported Inputs
+        Mirrors `_as_term_types_dicts`, but ensures an `id` is set.
+
+        Parameters
+        data:
+            Ground-truth payload.
+
+        Returns
+        List[Dict[str, Any]]
+            `{'id': Any, 'term': str, 'types': List[str]}` entries.
+
+        """
         gold_rows: List[Dict[str, Any]] = []
 
         # Case 1: object with attribute `.term_typings`
@@ -404,7 +636,13 @@ def _as_gold_id_term_types(self, data: Any) -> List[Dict[str, Any]]:
                 type_list = getattr(item, "types", [])
                 if not isinstance(type_list, list):
                     type_list = [type_list]
-                gold_rows.append({"id": gold_id, "term": term_text, "types": [str(t) for t in type_list]})
+                gold_rows.append(
+                    {
+                        "id": gold_id,
+                        "term": term_text,
+                        "types": [str(t) for t in type_list],
+                    }
+                )
             return gold_rows
 
         # Case 2: list/tuple container
@@ -419,7 +657,13 @@ def _as_gold_id_term_types(self, data: Any) -> List[Dict[str, Any]]:
                     type_list = row.get("types", [])
                     if not isinstance(type_list, list):
                         type_list = [type_list]
-                    gold_rows.append({"id": gold_id, "term": term_text, "types": [str(t) for t in type_list]})
+                    gold_rows.append(
+                        {
+                            "id": gold_id,
+                            "term": term_text,
+                            "types": [str(t) for t in type_list],
+                        }
+                    )
                 return gold_rows
 
             # 2b) list of tuples/lists: (term, types[, id])
@@ -432,35 +676,68 @@ def _as_gold_id_term_types(self, data: Any) -> List[Dict[str, Any]]:
                     gold_id = tuple_row[2] if len(tuple_row) > 2 else i
                     if not isinstance(type_list, list):
                         type_list = [type_list]
-                    gold_rows.append({"id": gold_id, "term": term_text, "types": [str(t) for t in type_list]})
+                    gold_rows.append(
+                        {
+                            "id": gold_id,
+                            "term": term_text,
+                            "types": [str(t) for t in type_list],
+                        }
+                    )
                 return gold_rows
 
-        raise ValueError("Unsupported ground-truth input format for tasks_ground_truth_former().")
+        raise ValueError(
+            "Unsupported ground-truth input format for tasks_ground_truth_former()."
+        )
+
 
 class AlexbekRAGLearner(AutoLearner):
     """Retrieval-Augmented Term Typing learner (single task: term-typing).
 
-    Flow:
-      1) fit: collect (term -> [types]) examples, build an in-memory index
-         using a sentence-embedding model.
-      2) predict: for each new term, retrieve top-k similar examples, compose a
-         structured prompt, query an instruction-tuned causal LLM, and parse types.
+    Flow
+    1) `fit`: collect (term -> [types]) examples, build an in-memory index
+       using a sentence-embedding model.
+    2) `predict`: for each new term, retrieve top-k similar examples, compose a
+       structured prompt, query an instruction-tuned causal LLM, and parse types.
 
-    Returns a list of dicts: {"term": str, "types": List[str], "id": Optional[str]}.
+    Returns
+    List[Dict[str, Any]]
+        `{"term": str, "types": List[str], "id": Optional[str]}` rows.
     """
 
     def __init__(
         self,
         llm_model_id: str = "Qwen/Qwen2.5-0.5B-Instruct",
         retriever_model_id: str = "sentence-transformers/all-MiniLM-L6-v2",
-        device: str = "auto",      # "auto" | "cuda" | "cpu"
-        token: str = "",           # HF token if needed
+        device: str = "auto",  # "auto" | "cuda" | "cpu"
+        token: str = "",  # HF token if needed
         top_k: int = 3,
         max_new_tokens: int = 256,
-        gen_batch_size: int = 4,   # generation batch size
+        gen_batch_size: int = 4,  # generation batch size
         enc_batch_size: int = 64,  # embedding batch size
-        **kwargs: Any,             # absorb extra pipeline-style args
+        **kwargs: Any,  # absorb extra pipeline-style args
     ) -> None:
+        """Configure the RAG learner.
+
+        Parameters
+        llm_model_id:
+            HF model id/path for the instruction-tuned causal LLM.
+        retriever_model_id:
+            Sentence-embedding model id for retrieval.
+        device:
+            Device policy ('auto'|'cuda'|'cpu') for the LLM.
+        token:
+            Optional HF token for gated models.
+        top_k:
+            Number of nearest examples to retrieve per query term.
+        max_new_tokens:
+            Decoding budget for the LLM.
+        gen_batch_size:
+            Number of prompts per generation batch.
+        enc_batch_size:
+            Number of texts per embedding batch.
+        **kwargs:
+            Extra configuration captured for downstream use.
+        """
         super().__init__()
 
         # Consolidated configuration for simple serialization
@@ -482,7 +759,7 @@ def __init__(
 
         # Retriever components
         self.embedder: Optional[SentenceTransformer] = None
-        self.indexed_corpus: List[str] = []                  # items: "<term> || [<types>...]"
+        self.indexed_corpus: List[str] = []  # items: "<term> || [<types>...]"
         self.corpus_embeddings: Optional[torch.Tensor] = None
 
         # Training cache of (term, [types]) tuples
@@ -497,15 +774,13 @@ def __init__(
             "2) Be concise. Respond ONLY in JSON using double quotes.\n"
             'Format: {"term":"...", "reasoning":"<<=100 words>>", "types":["...", "..."]}\n'
         )
-        self._user_prompt_template: str = (
-            """{examples}
+        self._user_prompt_template: str = """{examples}
 
             TERM: {term}
 
             TASK: Determine semantic types for the given term based on the domain ontology.
             Remember: types are generalizing categories, not the term itself. Respond in JSON.
             """
-        )
 
     def load(
         self,
@@ -515,7 +790,21 @@ def load(
         token: Optional[str] = None,
         **kwargs: Any,
     ) -> None:
-        """Load the LLM and the embedding retriever. Overrides constructor values if provided."""
+        """Load the LLM and the embedding retriever. Overrides constructor values if provided.
+
+        Parameters
+        model_id:
+            Optional override for the LLM model id.
+        retriever_id:
+            Optional override for the embedding model id.
+        device:
+            Optional override for device selection policy.
+        token:
+            Optional override for HF token.
+        **kwargs:
+            Extra values to store in `extra_cfg`.
+
+        """
         if model_id is not None:
             self.cfg["llm_model_id"] = model_id
         if retriever_id is not None:
@@ -556,10 +845,26 @@ def load(
         generation_cfg.num_beams = 1
 
         # Retriever
-        self.embedder = SentenceTransformer(self.cfg["retriever_model_id"], trust_remote_code=True)
+        self.embedder = SentenceTransformer(
+            self.cfg["retriever_model_id"], trust_remote_code=True
+        )
 
     def fit(self, train_data: Any, task: str, ontologizer: bool = True) -> None:
-        """Prepare the retrieval index from training examples."""
+        """Prepare the retrieval index from training examples.
+
+        Parameters
+        train_data:
+            Training payload containing terms and their types.
+        task:
+            Must be `'term-typing'`; other tasks are forwarded to base.
+        ontologizer:
+            Unused flag for API compatibility.
+
+        Side Effects
+        - Normalizes to a list of `(term, [types])`.
+        - Builds an indexable text corpus and (if embedder is loaded)
+          computes embeddings for retrieval.
+        """
         if task != "term-typing":
             return super().fit(train_data, task, ontologizer)
 
@@ -568,7 +873,8 @@ def fit(self, train_data: Any, task: str, ontologizer: bool = True) -> None:
 
         # Build the textual corpus to index
         self.indexed_corpus = [
-            f"{term} || {json.dumps(types, ensure_ascii=False)}" for term, types in self.train_term_types
+            f"{term} || {json.dumps(types, ensure_ascii=False)}"
+            for term, types in self.train_term_types
         ]
 
         # Embed the corpus if available; else fall back to zero-shot prompting
@@ -578,7 +884,23 @@ def fit(self, train_data: Any, task: str, ontologizer: bool = True) -> None:
             self.corpus_embeddings = None
 
     def predict(self, eval_data: Any, task: str, ontologizer: bool = True) -> Any:
-        """Predict types for evaluation items; returns a list of {term, types, id?}."""
+        """Predict types for evaluation items; returns a list of {term, types, id?}.
+
+        Parameters
+        eval_data:
+            Evaluation payload to type (terms + optional ids).
+        task:
+            Must be `'term-typing'`; other tasks are forwarded to base.
+        ontologizer:
+            Unused flag for API compatibility.
+
+        Returns
+        List[Dict[str, Any]]
+            For each input term, a dictionary with keys:
+            - `term`: The input term.
+            - `types`: A (unique, sorted) list of predicted types.
+            - `id`: Optional example id (if provided in input).
+        """
         if task != "term-typing":
             return super().predict(eval_data, task, ontologizer)
 
@@ -588,11 +910,15 @@ def predict(self, eval_data: Any, task: str, ontologizer: bool = True) -> Any:
 
         # Use RAG if we have an indexed corpus & embeddings; otherwise zero-shot
         rag_available = (
-            self.corpus_embeddings is not None and self.embedder is not None and len(self.indexed_corpus) > 0
+            self.corpus_embeddings is not None
+            and self.embedder is not None
+            and len(self.indexed_corpus) > 0
         )
 
         if rag_available:
-            neighbor_docs_per_query = self._retrieve_batch(eval_terms, top_k=int(self.cfg["top_k"]))
+            neighbor_docs_per_query = self._retrieve_batch(
+                eval_terms, top_k=int(self.cfg["top_k"])
+            )
         else:
             neighbor_docs_per_query = [[] for _ in eval_terms]
 
@@ -608,7 +934,9 @@ def predict(self, eval_data: Any, task: str, ontologizer: bool = True) -> Any:
 
         # Build standardized results
         results: List[Dict[str, Any]] = []
-        for term, example_id, predicted_types in zip(eval_terms, eval_ids, predicted_types_lists):
+        for term, example_id, predicted_types in zip(
+            eval_terms, eval_ids, predicted_types_lists
+        ):
             result_row: Dict[str, Any] = {
                 "term": term,
                 "types": sorted({t for t in predicted_types}),  # unique + sorted
@@ -617,11 +945,28 @@ def predict(self, eval_data: Any, task: str, ontologizer: bool = True) -> Any:
                 result_row["id"] = example_id
             results.append(result_row)
 
-        assert all(("term" in row and "types" in row) for row in results), "predict() must return term + types"
+        assert all(("term" in row and "types" in row) for row in results), (
+            "predict() must return term + types"
+        )
         return results
 
     def _unpack_train(self, data: Any) -> List[Tuple[str, List[str]]]:
-        """Extract (term, [types]) tuples from supported training payloads."""
+        """Extract `(term, [types])` tuples from supported training payloads.
+
+        Supported Inputs
+        - `data.term_typings` (objects exposing `.term` & `.types`)
+        - `list[dict]` with keys `'term'` and `'types'`
+        - `list[str]` → returns empty (nothing to index)
+        - other formats → empty
+
+        Parameters
+        data:
+            Training payload.
+
+        Returns
+        List[Tuple[str, List[str]]]
+            (term, types) tuples (types kept as strings).
+        """
         term_typings = getattr(data, "term_typings", None)
         if term_typings is not None:
             parsed_pairs: List[Tuple[str, List[str]]] = []
@@ -629,7 +974,9 @@ def _unpack_train(self, data: Any) -> List[Tuple[str, List[str]]]:
                 term = getattr(item, "term", None)
                 types = list(getattr(item, "types", []) or [])
                 if term and types:
-                    parsed_pairs.append((term, [t for t in types if isinstance(t, str)]))
+                    parsed_pairs.append(
+                        (term, [t for t in types if isinstance(t, str)])
+                    )
             return parsed_pairs
 
         if isinstance(data, list) and data and isinstance(data[0], dict):
@@ -638,17 +985,35 @@ def _unpack_train(self, data: Any) -> List[Tuple[str, List[str]]]:
                 term = row.get("term")
                 types = row.get("types") or []
                 if term and isinstance(types, list) and types:
-                    parsed_pairs.append((term, [t for t in types if isinstance(t, str)]))
+                    parsed_pairs.append(
+                        (term, [t for t in types if isinstance(t, str)])
+                    )
             return parsed_pairs
 
         # If only a list of strings is provided, there's nothing to index for RAG
-        if isinstance(data, (list, set, tuple)) and all(isinstance(x, str) for x in data):
+        if isinstance(data, (list, set, tuple)) and all(
+            isinstance(x, str) for x in data
+        ):
             return []
 
         return []
 
     def _unpack_eval(self, data: Any) -> Tuple[List[str], List[Optional[str]]]:
-        """Extract (terms, ids) from supported evaluation payloads."""
+        """Extract `(terms, ids)` from supported evaluation payloads.
+
+        Supported Inputs
+        - `data.term_typings` (objects exposing `.term` & optional `.id`)
+        - `list[str]`
+        - `list[dict]` with `term` and optional `id`
+
+        Parameters
+        data:
+            Evaluation payload.
+
+        Returns
+        Tuple[List[str], List[Optional[str]]]
+            Two lists aligned by index: terms and ids (ids may contain `None`).
+        """
         term_typings = getattr(data, "term_typings", None)
         if term_typings is not None:
             terms: List[str] = []
@@ -672,24 +1037,50 @@ def _unpack_eval(self, data: Any) -> Tuple[List[str], List[Optional[str]]]:
         return [], []
 
     def _encode_texts(self, texts: List[str]) -> torch.Tensor:
-        """Encode a batch of texts with the sentence-embedding model."""
+        """Encode a batch of texts with the sentence-embedding model.
+
+        Parameters
+        texts:
+            List of strings to embed.
+
+        Returns
+        torch.Tensor
+            Tensor of shape `(len(texts), hidden_dim)`. If `texts` is empty,
+            returns an empty tensor with 0 rows.
+        """
         batch_size = int(self.cfg["enc_batch_size"])
         batch_embeddings: List[torch.Tensor] = []
 
         for batch_start in range(0, len(texts), batch_size):
             batch_texts = texts[batch_start : batch_start + batch_size]
-            embeddings = self.embedder.encode(batch_texts, convert_to_tensor=True, show_progress_bar=False)
+            embeddings = self.embedder.encode(
+                batch_texts, convert_to_tensor=True, show_progress_bar=False
+            )
             batch_embeddings.append(embeddings)
 
-        return torch.cat(batch_embeddings, dim=0) if batch_embeddings else torch.empty(0)
+        return (
+            torch.cat(batch_embeddings, dim=0) if batch_embeddings else torch.empty(0)
+        )
 
     def _retrieve_batch(self, queries: List[str], top_k: int) -> List[List[str]]:
-        """Return for each query the top-k most similar corpus entries (as raw text rows)."""
+        """Return for each query the top-k most similar corpus entries.
+
+        Parameters
+        queries:
+            List of query terms.
+        top_k:
+            Number of neighbors to retrieve for each query.
+
+        Returns
+        List[List[str]]
+            For each query, a list of raw corpus strings formatted as
+            `"<term> || [\\"type1\\", ...]"`.
+        """
         if self.corpus_embeddings is None or not self.indexed_corpus:
             return [[] for _ in queries]
 
-        query_embeddings = self._encode_texts(queries)   # [Q, D]
-        doc_embeddings = self.corpus_embeddings          # [N, D]
+        query_embeddings = self._encode_texts(queries)  # [Q, D]
+        doc_embeddings = self.corpus_embeddings  # [N, D]
         if query_embeddings.shape[-1] != doc_embeddings.shape[-1]:
             raise ValueError(
                 f"Embedding dim mismatch: {query_embeddings.shape[-1]} vs {doc_embeddings.shape[-1]}"
@@ -705,7 +1096,16 @@ def _retrieve_batch(self, queries: List[str], top_k: int) -> List[List[str]]:
         return [[self.indexed_corpus[j] for j in row.tolist()] for row in top_indices]
 
     def _decode_examples(self, docs: List[str]) -> List[Tuple[str, List[str]]]:
-        """Parse raw corpus rows ('term || [types]') into (term, [types]) pairs."""
+        """Parse raw corpus rows ('term || [types]') into `(term, [types])` pairs.
+
+        Parameters
+        docs:
+            Raw strings from the index/corpus.
+
+        Returns
+        List[Tuple[str, List[str]]]
+            Parsed (term, types) pairs; malformed rows are skipped.
+        """
         example_pairs: List[Tuple[str, List[str]]] = []
         for raw_row in docs:
             try:
@@ -713,13 +1113,24 @@ def _decode_examples(self, docs: List[str]) -> List[Tuple[str, List[str]]]:
                 term = term_raw.strip()
                 types_list = json.loads(types_json.strip())
                 if isinstance(types_list, list):
-                    example_pairs.append((term, [t for t in types_list if isinstance(t, str)]))
+                    example_pairs.append(
+                        (term, [t for t in types_list if isinstance(t, str)])
+                    )
             except Exception:
                 continue
         return example_pairs
 
     def _format_examples(self, pairs: List[Tuple[str, List[str]]]) -> str:
-        """Format retrieved example pairs into a compact block for the prompt."""
+        """Format retrieved example pairs into a compact block for the prompt.
+
+        Parameters
+        pairs:
+            Retrieved `(term, [types])` examples.
+
+        Returns
+        str
+            Human-readable lines to provide *light* guidance to the LLM.
+        """
         if not pairs:
             return "EXAMPLES: (none provided)"
         lines: List[str] = ["CLASSIFICATION EXAMPLES:"]
@@ -730,12 +1141,34 @@ def _format_examples(self, pairs: List[Tuple[str, List[str]]]) -> str:
         return "\n".join(lines)
 
     def _compose_prompt(self, examples_block: str, term: str) -> str:
-        """Compose the final prompt from system + user blocks."""
-        user_block = self._user_prompt_template.format(examples=examples_block, term=term)
+        """Compose the final prompt from system + user blocks.
+
+        Parameters
+        examples_block:
+            Text block with retrieved examples.
+        term:
+            The query term to classify.
+
+        Returns
+        str
+            Full prompt string passed to the LLM.
+        """
+        user_block = self._user_prompt_template.format(
+            examples=examples_block, term=term
+        )
         return f"{self._system_prompt}\n\n{user_block}\n"
 
     def _generate_and_parse(self, prompts: List[str]) -> List[List[str]]:
-        """Run generation for a batch of prompts and parse the JSON 'types' from outputs."""
+        """Run generation for a batch of prompts and parse the JSON `'types'` from outputs.
+
+        Parameters
+        prompts:
+            Finalized prompts for the LLM.
+
+        Returns
+        List[List[str]]
+            For each prompt, a list of predicted type strings.
+        """
         batch_size = int(self.cfg["gen_batch_size"])
         all_predicted_types: List[List[str]] = []
 
@@ -744,7 +1177,9 @@ def _generate_and_parse(self, prompts: List[str]) -> List[List[str]]:
 
             # Tokenize and move to the LLM's device
             model_device = getattr(self.generation_model, "device", None)
-            encodings = self.tokenizer(prompt_batch, return_tensors="pt", padding=True).to(model_device)
+            encodings = self.tokenizer(
+                prompt_batch, return_tensors="pt", padding=True
+            ).to(model_device)
             input_token_length = encodings["input_ids"].shape[1]
 
             # Deterministic decoding (greedy)
@@ -762,9 +1197,14 @@ def _generate_and_parse(self, prompts: List[str]) -> List[List[str]]:
 
             # Slice off the prompt tokens and decode only newly generated tokens
             new_token_span = generated_tokens[:, input_token_length:]
-            decoded_texts = [self.tokenizer.decode(seq, skip_special_tokens=True) for seq in new_token_span]
-
-            parsed_types_per_prompt = [self._parse_types(text) for text in decoded_texts]
+            decoded_texts = [
+                self.tokenizer.decode(seq, skip_special_tokens=True)
+                for seq in new_token_span
+            ]
+
+            parsed_types_per_prompt = [
+                self._parse_types(text) for text in decoded_texts
+            ]
             all_predicted_types.extend(parsed_types_per_prompt)
 
         return all_predicted_types
@@ -772,11 +1212,19 @@ def _generate_and_parse(self, prompts: List[str]) -> List[List[str]]:
     def _parse_types(self, text: str) -> List[str]:
         """Extract a list of type strings from LLM output.
 
-        Attempts (in order):
-          1) Strict JSON object with "types".
-          2) Regex-extract JSON object containing "types".
-          3) Regex-extract first bracketed list.
-          4) Comma-split fallback.
+        Parsing Strategy (in order)
+        1) Strict JSON object with `"types"`.
+        2) Regex-extract JSON object containing `"types"`.
+        3) Regex-extract first bracketed list.
+        4) Comma-split fallback.
+
+        Parameters
+        text:
+            Raw LLM output to parse.
+
+        Returns
+        List[str]
+            Parsed list of type strings (possibly empty if parsing fails).
         """
         try:
             obj = json.loads(text)
@@ -786,7 +1234,9 @@ def _parse_types(self, text: str) -> List[str]:
             pass
 
         try:
-            obj_match = re.search(r'\{[^{}]*"types"\s*:\s*\[[^\]]*\][^{}]*\}', text, re.S)
+            obj_match = re.search(
+                r'\{[^{}]*"types"\s*:\s*\[[^\]]*\][^{}]*\}', text, re.S
+            )
             if obj_match:
                 obj = json.loads(obj_match.group(0))
                 types = obj.get("types", [])
@@ -795,9 +1245,12 @@ def _parse_types(self, text: str) -> List[str]:
             pass
 
         try:
-            list_match = re.search(r'\[([^\]]+)\]', text)
+            list_match = re.search(r"\[([^\]]+)\]", text)
             if list_match:
-                items = [x.strip().strip('"').strip("'") for x in list_match.group(1).split(",")]
+                items = [
+                    x.strip().strip('"').strip("'")
+                    for x in list_match.group(1).split(",")
+                ]
                 return [t for t in items if t]
         except Exception:
             pass
diff --git a/ontolearner/learner/term_typing/rwthdbis.py b/ontolearner/learner/term_typing/rwthdbis.py
index f27fd56..c8df797 100644
--- a/ontolearner/learner/term_typing/rwthdbis.py
+++ b/ontolearner/learner/term_typing/rwthdbis.py
@@ -27,10 +27,10 @@
     TrainingArguments,
     set_seed,
 )
-from transformers import DebertaV2Tokenizer
 
 from ...base import AutoLearner
 
+
 class RWTHDBISSFTLearner(AutoLearner):
     """
     Supervised term-typing
@@ -44,6 +44,7 @@ def __init__(
         model_name: str = "microsoft/deberta-v3-small",
         trained_model_path: Optional[str] = None,
         output_dir: Optional[str] = None,
+        device: str = "cpu",
         max_length: int = 64,
         per_device_train_batch_size: int = 16,
         gradient_accumulation_steps: int = 2,
@@ -55,8 +56,35 @@ def __init__(
         save_total_limit: int = 1,
         fp16: bool = False,
         bf16: bool = False,
-        seed: int = 42
+        seed: int = 42,
     ) -> None:
+        """Initialize the term-typing learner and configure training defaults.
+
+        Args:
+            model_name: Backbone HF model identifier (used if `trained_model_path` is None).
+            trained_model_path: Optional path to a fine-tuned checkpoint for loading.
+            output_dir: Directory to write checkpoints and tokenizer; defaults to './term_typing'.
+            device: user-defined argument as 'cuda' or 'cpu'.
+            max_length: Maximum tokenized sequence length.
+            per_device_train_batch_size: Per-device batch size during training.
+            gradient_accumulation_steps: Number of update accumulation steps.
+            num_train_epochs: Training epochs.
+            learning_rate: Optimizer learning rate.
+            weight_decay: Weight decay coefficient.
+            logging_steps: Logging interval (steps) for the Trainer.
+            save_strategy: Checkpoint save strategy (e.g., 'epoch', 'steps', 'no').
+            save_total_limit: Maximum number of checkpoints to keep.
+            fp16: Enable mixed precision (FP16) if supported.
+            bf16: Enable mixed precision (BF16) if supported.
+            seed: Random seed for reproducibility.
+
+        Side Effects:
+            Creates `output_dir` if it does not exist.
+
+        Notes:
+            The learner predicts exactly one label per term at inference time
+            (argmax over logits).
+        """
         super().__init__()
         self.model_name = model_name
         self.trained_model_path = trained_model_path
@@ -76,7 +104,7 @@ def __init__(
         self.bf16 = bf16
         self.seed = seed
 
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = device
         self.model: Optional[AutoModelForSequenceClassification] = None
         self.tokenizer: Optional[AutoTokenizer] = None
         self.id2label: Dict[int, str] = {}
@@ -84,44 +112,53 @@ def __init__(
 
     def _term_typing(self, data: Any, test: bool = False) -> Optional[Any]:
         """
-        train: expects ontology-like object with .term_typings
-        test:  returns List[{"term": str, "types": [str]}] (for evaluator)
-        """
-        if not test:
-            return self._train_from_term_typings(train_data=data)
+        Train or run inference for term typing, depending on `test`.
 
-        terms = self._collect_eval_terms(data)
-        return self._predict_structured_output(terms)
+        When `test=False`, trains on `data.term_typings`.
+        When `test=True`, predicts labels for provided terms.
 
-    def _load_robust_tokenizer(self, backbone: str) -> AutoTokenizer:
-        try:
-            return AutoTokenizer.from_pretrained(backbone, use_fast=True)
-        except Exception as fast_err:
-            print(f"[tokenizer] Fast tokenizer failed: {fast_err}. Trying DebertaV2Tokenizer (slow)...")
+        Args:
+            data: If training, an object with `.term_typings` where each item has
+                `term` and `types` (list[str]). If testing, either a `List[str]`
+                of raw term texts or an object with `.term_typings`.
+            test: If True, runs inference; otherwise trains.
 
-        try:
-            return DebertaV2Tokenizer.from_pretrained(backbone)
-        except Exception as slow_err:
-            print(f"[tokenizer] DebertaV2Tokenizer failed: {slow_err}. Trying AutoTokenizer(use_fast=False)...")
+        Returns:
+            If `test=True`: a list of dicts like
+            `[{"term": "<text>", "types": ["<label_str>"]}, ...]`.
+            If `test=False`: None.
 
-        try:
-            return AutoTokenizer.from_pretrained(backbone, use_fast=False)
-        except Exception as final_err:
-            raise RuntimeError(
-                "Failed to load a tokenizer for this DeBERTa model.\n"
-                "Try:\n"
-                "  - pip install --upgrade sentencepiece\n"
-                "  - ensure network access for model files\n"
-                "  - clear your HF cache and retry\n"
-                "  - pin versions: transformers==4.43.*, tokenizers<0.20\n"
-                f"Original error: {final_err}"
-            )
+        Raises:
+            ValueError: If required fields are missing from `data`.
+        """
+        if test:
+            terms = self._collect_eval_terms(data)
+            return self._predict_structured_output(terms)
+        else:
+            self._train_from_term_typings(train_data=data)
+            return None
 
     def _expand_multilabel_training_rows(
         self, term_typings: List[Any]
     ) -> Tuple[List[str], List[int], Dict[int, str], Dict[str, int]]:
         """
-        From multi-label instances -> (texts, label_ids), and label maps.
+        Expand multi-label instances into single-label rows and derive label maps.
+
+        Each training instance with fields:
+            - `term`: str-like
+            - `types`: list of label strings
+        is expanded into len(types) rows with the same `term` and individual labels.
+
+        Args:
+            term_typings: Sequence of objects (e.g., dataclasses) exposing
+                `.term` and `.types`.
+
+        Returns:
+            A tuple `(texts, label_ids, id2label, label2id)`:
+                - texts: Flattened list of term strings (one per label).
+                - label_ids: Parallel list of integer label ids.
+                - id2label: Mapping from id -> label string.
+                - label2id: Mapping from label string -> id.
         """
         label_strings: List[str] = []
         for instance in term_typings:
@@ -143,18 +180,53 @@ def _expand_multilabel_training_rows(
 
     def _collect_eval_terms(self, eval_data: Any) -> List[str]:
         """
-        Accepts List[str] OR object with .term_typings; returns list of term texts.
+        Collect the list of term texts to predict for evaluation.
+
+        Accepts either:
+            - A `List[str]` of raw term texts, or
+            - An object with `.term_typings`, from which `.term` is extracted.
+
+        Args:
+            eval_data: Input carrier for terms.
+
+        Returns:
+            List of term strings.
+
+        Raises:
+            ValueError: If `eval_data` lacks the expected structure.
         """
         if isinstance(eval_data, list) and all(isinstance(x, str) for x in eval_data):
             terms = eval_data
         else:
             term_typings = getattr(eval_data, "term_typings", None)
             if term_typings is None:
-                raise ValueError("Provide a List[str] OR an object with .term_typings for test=True.")
+                raise ValueError(
+                    "Provide a List[str] OR an object with .term_typings for test=True."
+                )
             terms = [str(instance.term) for instance in term_typings]
         return terms
 
     def _train_from_term_typings(self, train_data: Any) -> None:
+        """Train the term-typing classifier from `.term_typings`.
+
+        Steps:
+            1) Seed RNGs for reproducibility.
+            2) Expand multi-label examples into single-label rows.
+            3) Build HF `DatasetDict`, tokenizer, and data collator.
+            4) Initialize `AutoModelForSequenceClassification`.
+            5) Train with `Trainer` and save model/tokenizer to `output_dir`.
+
+        Args:
+            train_data: Object with `.term_typings`; each item exposes
+                `.term` (text) and `.types` (list[str]).
+
+        Raises:
+            ValueError: If `train_data` does not provide `.term_typings`.
+
+        Side Effects:
+            Writes a trained model to `self.output_dir` and updates
+            `self.id2label` / `self.label2id`.
+        """
         set_seed(self.seed)
         random.seed(self.seed)
         torch.manual_seed(self.seed)
@@ -165,15 +237,26 @@ def _train_from_term_typings(self, train_data: Any) -> None:
         if term_typings is None:
             raise ValueError("train_data must provide .term_typings for term-typing.")
 
-        texts, label_ids, self.id2label, self.label2id = self._expand_multilabel_training_rows(term_typings)
+        texts, label_ids, self.id2label, self.label2id = (
+            self._expand_multilabel_training_rows(term_typings)
+        )
 
-        dataset = DatasetDict({"train": Dataset.from_dict({"labels": label_ids, "text": texts})})
+        dataset = DatasetDict(
+            {"train": Dataset.from_dict({"labels": label_ids, "text": texts})}
+        )
 
         backbone = self.trained_model_path or self.model_name
-        self.tokenizer = self._load_robust_tokenizer(backbone)
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(backbone, use_fast=True)
+        except Exception:
+            # fallback if fast tokenizer isn't available
+            self.tokenizer = AutoTokenizer.from_pretrained(backbone, use_fast=False)
 
         def tokenize_batch(batch: Dict[str, List[str]]):
-            return self.tokenizer(batch["text"], truncation=True, max_length=self.max_length)
+            """Tokenize a batch of texts with truncation and max length."""
+            return self.tokenizer(
+                batch["text"], truncation=True, max_length=self.max_length
+            )
 
         tokenized = dataset.map(tokenize_batch, batched=True, remove_columns=["text"])
         data_collator = DataCollatorWithPadding(self.tokenizer)
@@ -185,7 +268,10 @@ def tokenize_batch(batch: Dict[str, List[str]]):
             label2id=self.label2id,
         )
 
-        if getattr(self.model.config, "pad_token_id", None) is None and self.tokenizer.pad_token_id is not None:
+        if (
+            getattr(self.model.config, "pad_token_id", None) is None
+            and self.tokenizer.pad_token_id is not None
+        ):
             self.model.config.pad_token_id = self.tokenizer.pad_token_id
 
         training_args = TrainingArguments(
@@ -216,11 +302,20 @@ def tokenize_batch(batch: Dict[str, List[str]]):
         self.tokenizer.save_pretrained(self.output_dir)
 
     def _ensure_loaded_for_inference(self) -> None:
+        """Load model/tokenizer for inference if not already loaded.
+
+        Loads from `trained_model_path` if set, otherwise from `output_dir`.
+        Also restores `id2label`/`label2id` from the model config when present,
+        moves the model to the configured device, and sets eval mode.
+        """
         if self.model is not None and self.tokenizer is not None:
             return
         model_path = self.trained_model_path or self.output_dir
         self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
-        self.tokenizer = self._load_robust_tokenizer(model_path)
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
+        except Exception:
+            self.tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
 
         cfg = self.model.config
         if hasattr(cfg, "id2label") and hasattr(cfg, "label2id"):
@@ -230,20 +325,49 @@ def _ensure_loaded_for_inference(self) -> None:
         self.model.to(self.device).eval()
 
     def _predict_label_ids(self, terms: List[str]) -> List[int]:
+        """Predict label ids (argmax) for a list of term strings.
+
+        Ensures model/tokenizer are loaded, then performs forward passes
+        term-by-term and collects the argmax label id.
+
+        Args:
+            terms: List of raw term texts.
+
+        Returns:
+            List of integer label ids corresponding to `terms`.
+        """
         self._ensure_loaded_for_inference()
         predictions: List[int] = []
-        for term_text in tqdm(terms, desc="Inference", bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}"):
-            inputs = self.tokenizer(term_text, return_tensors="pt", truncation=True, max_length=self.max_length)
+        for term_text in tqdm(
+            terms, desc="Inference", bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}"
+        ):
+            inputs = self.tokenizer(
+                term_text,
+                return_tensors="pt",
+                truncation=True,
+                max_length=self.max_length,
+            )
             inputs = {name: tensor.to(self.device) for name, tensor in inputs.items()}
             with torch.no_grad():
                 logits = self.model(**inputs).logits
                 predictions.append(int(torch.argmax(logits, dim=-1).item()))
         return predictions
 
-    def _predict_structured_output(self, terms: List[str]) -> List[Dict[str, List[str]]]:
+    def _predict_structured_output(
+        self, terms: List[str]
+    ) -> List[Dict[str, List[str]]]:
         """
-        Convert predicted IDs into evaluator format:
-        [{"term": "<text>", "types": ["<label_str>"]}, ...]
+        Convert predicted label IDs into evaluator-friendly structured outputs.
+
+        The output format is:
+            [{"term": "<text>", "types": ["<label_str>"]}, ...]
+
+        Args:
+            terms: Raw term texts to classify.
+
+        Returns:
+            List of dicts mapping each input term to a list with its predicted
+            label string. Falls back to stringified id if label mapping is absent.
         """
         label_ids = self._predict_label_ids(terms)
         id2label_map = self.id2label or {}  # fallback handled below
diff --git a/ontolearner/learner/term_typing/sbunlp.py b/ontolearner/learner/term_typing/sbunlp.py
index f838bd0..d5c0114 100644
--- a/ontolearner/learner/term_typing/sbunlp.py
+++ b/ontolearner/learner/term_typing/sbunlp.py
@@ -20,123 +20,152 @@
 
 from ...base import AutoLearner
 
+
 class SBUNLPZSLearner(AutoLearner):
     """
     Qwen-based blind term typing learner (Task B), implemented as an AutoLearner.
 
-    This class reproduces the notebook logic:
-      - Fit phase learns the *allowed type inventory* from training data.
-      - Predict phase performs blind prompting per term using the learned type list.
-      - Outputs are restricted to the allowed types and returned as [{"id", "types"}].
-
-    Expected I/O (recommended):
-      - fit(train_data, task="term-typing", ontologizer=True):
-          The framework's AutoLearner.tasks_data_former() provides a unique list of
-          type labels; we store it to `self.allowed_types`.
-      - predict(eval_data, task="term-typing", ontologizer=False):
-          Pass a list of dicts with keys {"id": str, "term": str} so IDs are preserved.
-          Returns a list of dicts [{"id": ..., "types": [...] }].
+    Lifecycle:
+      • `fit(...)` learns/records the allowed type inventory from the training payload.
+      • `load(...)` explicitly loads the tokenizer/model (pass `model_id`/`token` here).
+      • `predict(...)` prompts the model per term and returns normalized types limited
+        to the learned inventory.
     """
 
     def __init__(
         self,
-        model_id: str = "Qwen/Qwen2.5-0.5B-Instruct",
-        device: Optional[str] = None,
+        device: str = "cpu",
         max_new_tokens: int = 64,
         temperature: float = 0.0,
+        model_id: str = "Qwen/Qwen2.5-0.5B-Instruct",
         token: Optional[str] = None,
     ) -> None:
         """
+        Configure runtime knobs. Model identity and auth are provided to `load(...)`.
+
         Args:
-            model_id: HF model id for Qwen.
-            device: "cuda", "mps", or "cpu". Auto-detected if None.
-            max_new_tokens: Generation cap per prompt.
-            temperature: Not used for greedy decoding (kept for future).
-            token: HF token if the model is gated.
+            device: Torch device policy ("cuda", "mps", or "cpu").
+            max_new_tokens: Max tokens to generate per prompt (greedy decoding).
+            temperature: Reserved for future sampling; generation is greedy here.
+            model_id: Fallback model id/path used if `load()` is called without args.
+            token: Fallback HF token used if `load()` is called without args.
+
+        Side Effects:
+            Initializes runtime configuration, instance defaults for `load()`,
+            and placeholders for `tokenizer`, `model`, and `allowed_types`.
         """
         super().__init__()
-
-        # Basic configuration
-        self.model_id = model_id
-        # default device detection: prefer CUDA if available
-        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self.device = device
         self.max_new_tokens = max_new_tokens
         self.temperature = temperature
+
+        # Defaults that load() may use when its args are None
+        self.model_id = model_id
         self.token = token
 
-        # Model/tokenizer placeholders (populated by load())
+        # Placeholders populated by load()
         self.tokenizer: Optional[AutoTokenizer] = None
         self.model: Optional[AutoModelForCausalLM] = None
 
-        # Learned inventory of allowed type labels (populated by fit())
+        # Learned inventory
         self.allowed_types: List[str] = []
 
-        # Regex used to extract quoted strings from model output (e.g. "type")
+        # Regex used to extract quoted strings from model output (e.g., "type")
         self._quoted_re = re.compile(r'"([^"]+)"')
 
-    def load(self, **kwargs: Any):
+    def load(
+        self,
+        model_id: Optional[str] = None,
+        token: Optional[str] = None,
+        dtype: Optional[torch.dtype] = None,
+    ):
         """
-        Load Qwen model and tokenizer.
+        Load tokenizer and model weights explicitly.
 
-        NOTE:
-          - The HF arguments used here mirror your original code (`token=...`).
-            You may see a deprecation warning for `torch_dtype` (older transformers);
-            switching to `dtype=` is recommended but I did not change behavior here.
-        """
-        # Respect overrides from kwargs if provided
-        model_id = kwargs.get("model_id", self.model_id)
-        token = kwargs.get("token", self.token)
+        Argument precedence:
+          1) Use `model_id` / `token` passed to this method (if provided).
+          2) Else fall back to `self.model_id` / `self.token`.
+
+        Device & dtype:
+          • If `dtype` is None, the default is float16 on CUDA/MPS and float32 on CPU.
+          • `device_map` is `"auto"` for non-CPU devices, `"cpu"` otherwise.
+
+        Args:
+            model_id: HF model id/path to load. If None, uses `self.model_id`.
+            token: HF token if the model is gated. If None, uses `self.token`.
+            dtype: Optional torch dtype override (e.g., `torch.float16`).
 
-        # Load tokenizer. If the model is gated, pass token (original code uses `token`).
-        # If your environment requires `use_auth_token=` replace here.
-        self.tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
+        Returns:
+            self
+        """
+        resolved_model_id = model_id or self.model_id
+        resolved_token = token if token is not None else self.token
 
-        # Ensure tokenizer has a pad token (some models omit it)
+        # Tokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            resolved_model_id, token=resolved_token
+        )
         if self.tokenizer.pad_token is None:
+            # Prefer EOS as pad if available
             self.tokenizer.pad_token = self.tokenizer.eos_token
 
-        # Device mapping for from_pretrained -> keep same behavior as original code
+        # Device & dtype
+        if dtype is None:
+            if self.device == "cpu":
+                resolved_dtype = torch.float32
+            else:
+                # Works for CUDA and Apple MPS
+                resolved_dtype = torch.float16
+        else:
+            resolved_dtype = dtype
+
         device_map = "auto" if self.device != "cpu" else "cpu"
-        # original code used torch_dtype; left as-is to avoid behavioral change
-        torch_dtype = torch.float16 if self.device != "cpu" else torch.float32
 
-        # Load the model weights. This can be heavy; keep same params as original.
         self.model = AutoModelForCausalLM.from_pretrained(
-            model_id,
+            resolved_model_id,
             device_map=device_map,
-            torch_dtype=torch_dtype,
-            token=token,
+            torch_dtype=resolved_dtype,  # keep torch_dtype for broad Transformers compatibility
+            token=resolved_token,
         )
         return self
 
-    # -------------------------------------------------------------------------
-    # Fit / Predict interface
-    # -------------------------------------------------------------------------
     def fit(self, train_data: Any, task: str, ontologizer: bool = True):
         """
         Learn the allowed type inventory from the training data.
 
-        Expected behavior:
-          - If `tasks_data_former(..., test=False)` returns a list of strings,
-            set allowed_types to that list (deduped & sorted).
-          - If it returns a list of dicts (relationships), extract unique 'parent'
-            fields and use those as the allowed type inventory.
+        Normalization rules:
+          • If `ontologizer=True`, the framework's `tasks_data_former(..., test=False)`
+            is used to normalize `train_data`.
+          • If a container exposes `.term_typings`, types are collected from there.
+          • If the normalized data is a list of dicts with `"parent"`, unique parents
+            become the allowed types.
+          • If it's a list of strings, that unique set becomes the allowed types.
 
-        This method contains a tolerant branch for the framework's custom container:
-          If the returned `train_fmt` is not a list but has a `.term_typings` attribute
-          (e.g., OntologyData object used by the framework), iterate that attribute
-          and collect any `types` values found.
+        Args:
+            train_data: Training payload provided by the pipeline.
+            task: Must be `"term-typing"`.
+            ontologizer: If True, normalize via `tasks_data_former()` first.
+
+        Returns:
+            self
+
+        Raises:
+            ValueError: If `task` is not `"term-typing"`.
+            TypeError: If the training data cannot be normalized to a list of
+                strings or relationship dicts.
         """
-        train_fmt = self.tasks_data_former(data=train_data, task=task, test=False) if ontologizer else train_data
+        train_fmt = (
+            self.tasks_data_former(data=train_data, task=task, test=False)
+            if ontologizer
+            else train_data
+        )
         if task != "term-typing":
             raise ValueError("SBUNLPZSLearner only implements 'term-typing'.")
 
         # If framework passed a container with `.term_typings`, extract types from there
         if not isinstance(train_fmt, list):
-            # handle OntologyData-like object with attribute 'term_typings'
             if hasattr(train_fmt, "term_typings"):
                 try:
-                    # term_typings is expected to be an iterable of objects with attribute `types`
                     collected = set()
                     for tt in getattr(train_fmt, "term_typings") or []:
                         # tt.types could be list[str] or a single str
@@ -147,7 +176,6 @@ def fit(self, train_data: Any, task: str, ontologizer: bool = True):
                         else:
                             tvals = None
 
-                        # Normalize both list and single-string cases
                         if isinstance(tvals, (list, tuple, set)):
                             for x in tvals:
                                 if isinstance(x, str):
@@ -155,145 +183,180 @@ def fit(self, train_data: Any, task: str, ontologizer: bool = True):
                         elif isinstance(tvals, str):
                             collected.add(tvals)
 
-                    # If we successfully collected types, set allowed_types and return
                     if collected:
                         self.allowed_types = sorted(collected)
                         return self
-                    # else fall through to error below (no types found)
                 except Exception:
-                    # If anything unexpected occurs while iterating term_typings,
-                    # gracefully fall through and raise the original TypeError below.
+                    # Fall through to error below if unexpected issues occur.
                     pass
 
-            # not a supported non-list type -> keep original behavior (raise)
             raise TypeError("For term-typing, expected a list of type labels at fit().")
 
         # At this point train_fmt is a list (original logic preserved)
         if train_fmt and isinstance(train_fmt[0], dict) and "parent" in train_fmt[0]:
             # Case A: Received raw relationships/pairs (e.g., from train_test_split).
-            # Extract unique parent types from the relationship records.
             unique_types = set(r.get("parent") for r in train_fmt if r.get("parent"))
             self.allowed_types = sorted(unique_types)
         elif all(isinstance(x, str) for x in train_fmt):
             # Case B: Received a clean list of type labels (List[str]).
             self.allowed_types = sorted(set(train_fmt))
         else:
-            # The input is a list but not in either expected format -> raise
-            raise TypeError("For term-typing, input data format for fit() is invalid. Expected list of strings (types) or list of relationships (dicts).")
+            raise TypeError(
+                "For term-typing, input data format for fit() is invalid. "
+                "Expected list of strings (types) or list of relationships (dicts)."
+            )
 
         return self
 
     def predict(self, eval_data: Any, task: str, ontologizer: bool = True) -> Any:
         """
-        Predict types for each term.
+        Predict types for each term and return standardized rows.
 
         Expected inputs:
-          - With ontologizer=True: a list[str] of term strings (IDs are autogenerated).
-          - With ontologizer=False: a list[dict] where each dict has keys {'id','term'}.
+          • With `ontologizer=True`: a `list[str]` of terms (IDs are auto-generated),
+            or a container exposing `.term_typings` from which `{'id','term'}` pairs
+            can be extracted.
+          • With `ontologizer=False`: a `list[dict]` of `{'id','term'}` to preserve IDs.
+
+        Args:
+            eval_data: Evaluation payload as described above.
+            task: Must be `"term-typing"`.
+            ontologizer: If True, normalize through the pipeline’s data former.
 
-        This method tolerantly converts common framework containers (e.g., an
-        OntologyData object exposing `.term_typings`) into the expected list[dict]
-        shape so that the internal _term_typing() can run unchanged.
+        Returns:
+            A list of dictionaries:
+                `{"id": str, "term": str, "types": List[str]}`.
         """
         if task != "term-typing":
             # Delegate to base for other tasks (not implemented here)
             return super().predict(eval_data, task, ontologizer=ontologizer)
 
-        def _extract_list_of_dicts_from_term_typings(obj) -> Optional[List[Dict[str, str]]]:
-            """
-            Helper: try to produce a list of {"id","term"} dicts from objects
-            exposing a `term_typings` iterable. Supports either object-like
-            TermTyping (attributes) or dict-style entries.
-            """
+        def _extract_list_of_dicts_from_term_typings(
+            obj,
+        ) -> Optional[List[Dict[str, str]]]:
+            """Try to derive `[{id, term}, ...]` from an object with `.term_typings`."""
             tts = getattr(obj, "term_typings", None)
             if tts is None:
                 return None
             out = []
             for tt in tts:
-                # support object-style TermTyping (attributes) and dict-style
                 if isinstance(tt, dict):
-                    # try several common key names for ID
                     tid = tt.get("ID") or tt.get("id") or tt.get("Id") or tt.get("ID_")
                     tterm = tt.get("term") or tt.get("label") or tt.get("name")
                 else:
-                    # object-style access
-                    tid = getattr(tt, "ID", None) or getattr(tt, "id", None) or getattr(tt, "Id", None)
-                    tterm = getattr(tt, "term", None) or getattr(tt, "label", None) or getattr(tt, "name", None)
+                    tid = (
+                        getattr(tt, "ID", None)
+                        or getattr(tt, "id", None)
+                        or getattr(tt, "Id", None)
+                    )
+                    tterm = (
+                        getattr(tt, "term", None)
+                        or getattr(tt, "label", None)
+                        or getattr(tt, "name", None)
+                    )
                 if tid is None or tterm is None:
-                    # skip malformed entry - this is defensive so downstream code has valid inputs
                     continue
                 out.append({"id": str(tid), "term": str(tterm)})
             return out if out else None
 
         # Case A: ontologizer=True -> framework often provides list[str]
         if ontologizer:
-            if isinstance(eval_data, list) and all(isinstance(x, str) for x in eval_data):
-                # Simple case: convert list of terms to list of dicts with generated IDs
-                eval_pack = [{"id": f"TT_{i:06d}", "term": t} for i, t in enumerate(eval_data)]
+            if isinstance(eval_data, list) and all(
+                isinstance(x, str) for x in eval_data
+            ):
+                eval_pack = [
+                    {"id": f"TT_{i:06d}", "term": t} for i, t in enumerate(eval_data)
+                ]
             else:
-                # Try to extract from a framework container (e.g., OntologyData)
                 maybe = _extract_list_of_dicts_from_term_typings(eval_data)
                 if maybe is not None:
                     eval_pack = maybe
                 else:
-                    # Last resort: if eval_data is some iterable of strings, convert it
-                    try:
-                        if hasattr(eval_data, "__iter__") and not isinstance(eval_data, (str, bytes)):
-                            lst = list(eval_data)
-                            if all(isinstance(x, str) for x in lst):
-                                eval_pack = [{"id": f"TT_{i:06d}", "term": t} for i, t in enumerate(lst)]
-                            else:
-                                raise TypeError("With ontologizer=True, eval_data must be list[str] of terms.")
+                    # Last resort: attempt to coerce iterables of str
+                    if hasattr(eval_data, "__iter__") and not isinstance(
+                        eval_data, (str, bytes)
+                    ):
+                        lst = list(eval_data)
+                        if all(isinstance(x, str) for x in lst):
+                            eval_pack = [
+                                {"id": f"TT_{i:06d}", "term": t}
+                                for i, t in enumerate(lst)
+                            ]
                         else:
-                            raise TypeError("With ontologizer=True, eval_data must be list[str] of terms.")
-                    except TypeError:
-                        # re-raise to preserve original error semantics
-                        raise
-            # Delegate to internal inference routine
+                            raise TypeError(
+                                "With ontologizer=True, eval_data must be list[str] of terms."
+                            )
+                    else:
+                        raise TypeError(
+                            "With ontologizer=True, eval_data must be list[str] of terms."
+                        )
             return self._term_typing(eval_pack, test=True)
 
-        # Case B: ontologizer=False -> we expect list[dict], but tolerate common containers
+        # Case B: ontologizer=False -> expect list[dict], but tolerate containers
         else:
-            if isinstance(eval_data, list) and all(isinstance(x, dict) for x in eval_data):
+            if isinstance(eval_data, list) and all(
+                isinstance(x, dict) for x in eval_data
+            ):
                 eval_pack = eval_data
             else:
-                # Try to extract from framework container (term_typings)
                 maybe = _extract_list_of_dicts_from_term_typings(eval_data)
                 if maybe is not None:
                     eval_pack = maybe
                 else:
-                    # As a final attempt, allow eval_data to be a dict with a list under some known keys
                     if isinstance(eval_data, dict):
                         for key in ("term_typings", "terms", "items"):
-                            if key in eval_data and isinstance(eval_data[key], (list, tuple)):
+                            if key in eval_data and isinstance(
+                                eval_data[key], (list, tuple)
+                            ):
                                 converted = []
                                 for x in eval_data[key]:
-                                    # Accept dict-style entries that include id and term/name
-                                    if isinstance(x, dict) and ("id" in x or "ID" in x) and ("term" in x or "name" in x):
+                                    if (
+                                        isinstance(x, dict)
+                                        and ("id" in x or "ID" in x)
+                                        and ("term" in x or "name" in x)
+                                    ):
                                         tid = x.get("ID") or x.get("id")
                                         tterm = x.get("term") or x.get("name")
-                                        converted.append({"id": str(tid), "term": str(tterm)})
+                                        converted.append(
+                                            {"id": str(tid), "term": str(tterm)}
+                                        )
                                 if converted:
                                     eval_pack = converted
                                     break
                         else:
-                            # Could not convert; raise same TypeError as before
-                            raise TypeError("With ontologizer=False, eval_data must be a list of dicts with keys {'id','term'}.")
+                            raise TypeError(
+                                "With ontologizer=False, eval_data must be a list of dicts with keys {'id','term'}."
+                            )
                     else:
-                        # Not a supported container -> raise
-                        raise TypeError("With ontologizer=False, eval_data must be a list of dicts with keys {'id','term'}.")
-            # Delegate to internal inference routine
+                        raise TypeError(
+                            "With ontologizer=False, eval_data must be a list of dicts with keys {'id','term'}."
+                        )
             return self._term_typing(eval_pack, test=True)
 
-
-    # -------------------------------------------------------------------------
-    # Internal task implementations (AutoLearner hooks)
-    # -------------------------------------------------------------------------
     def _term_typing(self, data: Any, test: bool = False) -> Optional[Any]:
         """
-        Core implementation:
-         - training mode (test=False): `data` is a list of allowed type labels -> store them.
-         - inference mode (test=True): `data` is a list of {"id","term"} -> produce [{"id","types"}].
+        Internal implementation of the *term-typing* task.
+
+        Training mode (`test=False`):
+          • Expects a `list[str]` of allowed types. Stores a sorted unique copy.
+
+        Inference mode (`test=True`):
+          • Expects a `list[dict]` of `{"id","term"}` items.
+          • Requires `load()` to have been called (model/tokenizer available).
+          • Builds a blind prompt per item, generates text, parses quoted
+            candidates, and filters them to `self.allowed_types`.
+
+        Args:
+            data: See the mode-specific expectations above.
+            test: Set `True` to run inference; `False` to store the type inventory.
+
+        Returns:
+            • `None` in training mode.
+            • `list[dict]` with `{"id","term","types":[...]}` in inference mode.
+
+        Raises:
+            TypeError: If `data` is not in the expected shape for the mode.
+            RuntimeError: If model/tokenizer are not loaded at inference time.
         """
         if not test:
             # training: expect a list of strings (type labels)
@@ -304,49 +367,58 @@ def _term_typing(self, data: Any, test: bool = False) -> Optional[Any]:
 
         # Inference path
         if not isinstance(data, list) or not all(isinstance(x, dict) for x in data):
-            raise TypeError("At prediction time, expected a list of {'id','term'} dicts.")
+            raise TypeError(
+                "At prediction time, expected a list of {'id','term'} dicts."
+            )
 
-        # Ensure model and tokenizer are loaded
         if self.model is None or self.tokenizer is None:
-            raise RuntimeError("Model/tokenizer not loaded. Call .load() before predict().")
+            raise RuntimeError(
+                "Model/tokenizer not loaded. Call .load() before predict()."
+            )
 
         results = []
         for item in data:
-            # preserve incoming IDs and terms
             term_id = item["id"]
             term_text = item["term"]
-
-            # build the blind JSON-prompt that instructs the model to output types
             prompt = self._build_blind_prompt(term_id, term_text, self.allowed_types)
-
-            # generate and parse model output into allowed types
             types = self._generate_and_parse_types(prompt)
-
-            # append result for this term (keep original id)
-            # include the original term so downstream evaluation (and any consumers) can match by term
             results.append({"id": term_id, "term": term_text, "types": types})
 
         return results
 
-    # -------------------------------------------------------------------------
-    # Prompting + parsing
-    # -------------------------------------------------------------------------
-
-    def _format_types_inline(allowed: List[str]) -> str:
+    def _format_types_inline(self, allowed: List[str]) -> str:
         """
-        Format allowed types as comma-separated quoted strings for insertion into the prompt.
-        Example: '"type1", "type2", "type3"'
+        Format the allowed types for inline inclusion in prompts.
+
+        Args:
+            allowed: List of allowed type labels.
+
+        Returns:
+            A comma-separated string of quoted types, e.g.:
+            `"type1", "type2", "type3"`. Returns an empty string for an empty list.
         """
-        return ", ".join(f'"{t}"' for t in allowed)
+        if not allowed:
+            return ""
+        return ", ".join(f'"{t}"' for t in allowed if isinstance(t, str) and t.strip())
 
-    def _build_blind_prompt(self, term_id: str, term: str, allowed_types: List[str]) -> str:
+    def _build_blind_prompt(
+        self, term_id: str, term: str, allowed_types: List[str]
+    ) -> str:
         """
-        Construct the prompt given a single term. The prompt:
-          - Instructs the model to produce a JSON array of {id, types} objects.
-          - Provides the allowed types list (so the model should only use those).
-          - Includes the single input item for which the model must decide types.
+        Construct the blind JSON prompt for a single term.
+
+        The prompt:
+          • Instructs the model to produce ONLY a JSON array of `{id, types}` objects.
+          • Provides the allowed types list so the model should only use those.
+          • Includes the single input item for which the model must decide types.
+
+        Args:
+            term_id: Identifier to carry through to the output JSON.
+            term: The input term string to classify.
+            allowed_types: Inventory used to constrain outputs.
 
-        Note: This is the same blind-prompting approach used in the original notebook.
+        Returns:
+            The full prompt string to feed to the LLM.
         """
         allowed_str = self._format_types_inline(allowed_types)
         return (
@@ -367,14 +439,22 @@ def _build_blind_prompt(self, term_id: str, term: str, allowed_types: List[str])
 
     def _generate_and_parse_types(self, prompt: str) -> List[str]:
         """
-        Greedy generate, then extract quoted strings and filter by allowed types.
-
-        Important details:
-          - We assert model/tokenizer presence before calling.
-          - Tokenized inputs are moved to the model device (original code uses .to(self.model.device)).
-          - The decoded text is scanned for quoted substrings using self._quoted_re.
-          - Only quoted strings that are present in self.allowed_types are kept.
-          - Returned list is unique & sorted for deterministic ordering.
+        Greedy-generate text, extract candidate types, and filter to the inventory.
+
+        Workflow:
+          1) Tokenize the prompt and generate deterministically (greedy).
+          2) Decode and extract quoted substrings via regex (e.g., `"type"`).
+          3) Keep only those candidates that exist in `self.allowed_types`.
+          4) Return a unique, sorted list (stable across runs).
+
+        Args:
+            prompt: Fully formatted prompt string.
+
+        Returns:
+            List of predicted type labels (possibly empty if none found).
+
+        Raises:
+            AssertionError: If `model` or `tokenizer` are unexpectedly `None`.
         """
         assert self.model is not None and self.tokenizer is not None
 
@@ -393,8 +473,6 @@ def _generate_and_parse_types(self, prompt: str) -> List[str]:
         text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         candidates = self._quoted_re.findall(text)
 
-        # Filter candidates to the allowed inventory
+        # Filter candidates to the allowed inventory and stabilize order.
         filtered = [c for c in candidates if c in self.allowed_types]
-
-        # Return unique & sorted for stability across runs
         return sorted(set(filtered))
diff --git a/ontolearner/learner/text2onto/__init__.py b/ontolearner/learner/text2onto/__init__.py
deleted file mode 100644
index 6408881..0000000
--- a/ontolearner/learner/text2onto/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) 2025 SciKnowOrg
-#
-# Licensed under the MIT License (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      https://opensource.org/licenses/MIT
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .sbunlp import SBUNLPFewShotLearner
-from .alexbek import AlexbekFewShotLearner
diff --git a/ontolearner/learner/text2onto/alexbek.py b/ontolearner/learner/text2onto/alexbek.py
index 5760dca..f1692f7 100644
--- a/ontolearner/learner/text2onto/alexbek.py
+++ b/ontolearner/learner/text2onto/alexbek.py
@@ -31,6 +31,7 @@
 
     class _PredictedTypesSchema(BaseModel):
         """Schema used when generating structured JSON { "types": [...] }."""
+
         types: List[str]
 
     OUTLINES_AVAILABLE: bool = True
@@ -41,6 +42,7 @@ class _PredictedTypesSchema(BaseModel):
     OutlinesTFModel = None
     outlines_generate_json = None
 
+
 class LocalAutoLLM(AutoLLM):
     """
     Minimal local LLM helper.
@@ -101,11 +103,15 @@ def load(self, model_id: str, *, load_in_4bit: bool = False) -> None:
                 token=self.token,
             )
         else:
-            device_map = "auto" if (self.device != "cpu" and torch.cuda.is_available()) else None
+            device_map = (
+                "auto" if (self.device != "cpu" and torch.cuda.is_available()) else None
+            )
             self.model = AutoModelForCausalLM.from_pretrained(
                 model_id,
                 device_map=device_map,
-                torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+                torch_dtype=torch.bfloat16
+                if torch.cuda.is_available()
+                else torch.float32,
                 token=self.token,
             )
 
@@ -134,11 +140,17 @@ def generate(self, prompts: List[str], max_new_tokens: int = 128) -> List[str]:
             Decoded new-token texts (no special tokens, stripped).
         """
         if self.model is None or self.tokenizer is None:
-            raise RuntimeError("Call .load(model_id) on LocalAutoLLM before generate().")
+            raise RuntimeError(
+                "Call .load(model_id) on LocalAutoLLM before generate()."
+            )
 
-        tokenized_batch = self.tokenizer(prompts, return_tensors="pt", padding=True, truncation=True)
+        tokenized_batch = self.tokenizer(
+            prompts, return_tensors="pt", padding=True, truncation=True
+        )
         input_seq_len = tokenized_batch["input_ids"].shape[1]
-        tokenized_batch = {k: v.to(self.model.device) for k, v in tokenized_batch.items()}
+        tokenized_batch = {
+            k: v.to(self.model.device) for k, v in tokenized_batch.items()
+        }
 
         with torch.no_grad():
             outputs = self.model.generate(
@@ -151,7 +163,11 @@ def generate(self, prompts: List[str], max_new_tokens: int = 128) -> List[str]:
 
         # Only return the newly generated part for each row in the batch
         continuation_token_ids = outputs[:, input_seq_len:]
-        return [self.tokenizer.decode(row, skip_special_tokens=True).strip() for row in continuation_token_ids]
+        return [
+            self.tokenizer.decode(row, skip_special_tokens=True).strip()
+            for row in continuation_token_ids
+        ]
+
 
 class AlexbekFewShotLearner(AutoLearner):
     """
@@ -168,6 +184,7 @@ class AlexbekFewShotLearner(AutoLearner):
         Reads your A1 results (docs→terms), predicts types for each term, and
         writes two files: terms2types_pred.json + types2docs_pred.json
     """
+
     def __init__(self, model: LocalAutoLLM, device: str = "cpu", **_: Any) -> None:
         """
         Initialize learner state and canned prompts.
@@ -243,7 +260,9 @@ def fit(
         # Load item -> [doc_ids]
         item_to_docs_map = self._load_json(terms2doc_json)
         if not isinstance(item_to_docs_map, dict):
-            raise ValueError(f"{terms2doc_json} must be a JSON dict mapping item -> [doc_ids]")
+            raise ValueError(
+                f"{terms2doc_json} must be a JSON dict mapping item -> [doc_ids]"
+            )
 
         # Reverse mapping: doc_id -> [items]
         doc_id_to_items_map: Dict[str, List[str]] = {}
@@ -258,17 +277,25 @@ def fit(
             if not doc_row:
                 continue
             doc_title = str(doc_row.get("title", ""))  # be defensive (may be None)
-            doc_text = self._to_text(doc_row.get("text", ""))  # string-ify list if needed
+            doc_text = self._to_text(
+                doc_row.get("text", "")
+            )  # string-ify list if needed
             if not doc_text:
                 continue
-            gold_items = self._unique_preserve([s for s in labeled_items if isinstance(s, str)])
+            gold_items = self._unique_preserve(
+                [s for s in labeled_items if isinstance(s, str)]
+            )
             if gold_items:
                 exemplar_candidates.append((doc_title, doc_text, gold_items))
 
         if not exemplar_candidates:
-            raise RuntimeError("No candidate docs with items found to build few-shot exemplars.")
+            raise RuntimeError(
+                "No candidate docs with items found to build few-shot exemplars."
+            )
 
-        chosen_exemplars = rng.sample(exemplar_candidates, k=min(sample_size, len(exemplar_candidates)))
+        chosen_exemplars = rng.sample(
+            exemplar_candidates, k=min(sample_size, len(exemplar_candidates))
+        )
         # Reuse exemplars for both docs→terms and docs→types prompting
         self._fewshot_terms_docs = chosen_exemplars
         self._fewshot_types_docs = chosen_exemplars
@@ -315,7 +342,10 @@ def predict_terms(
             text = self._to_text(document_row.get("text", ""))
 
             fewshot_block = self._format_fewshot_block(
-                self._system_prompt_terms, self._fewshot_terms_docs, key="terms", k=few_shot_k
+                self._system_prompt_terms,
+                self._fewshot_terms_docs,
+                key="terms",
+                k=few_shot_k,
             )
             user_block = self._format_user_block(title, text)
 
@@ -323,7 +353,9 @@ def predict_terms(
             document_order.append(document_id)
 
         generations = self.model.generate(prompts, max_new_tokens=max_new_tokens)
-        parsed_term_lists = [self._parse_json_list(generated, key="terms") for generated in generations]
+        parsed_term_lists = [
+            self._parse_json_list(generated, key="terms") for generated in generations
+        ]
 
         os.makedirs(os.path.dirname(out_jsonl) or ".", exist_ok=True)
         lines_written = 0
@@ -334,7 +366,6 @@ def predict_terms(
                 lines_written += 1
         return lines_written
 
-
     def predict_types(
         self,
         *,
@@ -377,7 +408,10 @@ def predict_types(
             text = self._to_text(document_row.get("text", ""))
 
             fewshot_block = self._format_fewshot_block(
-                self._system_prompt_types, self._fewshot_types_docs, key="types", k=few_shot_k
+                self._system_prompt_types,
+                self._fewshot_types_docs,
+                key="types",
+                k=few_shot_k,
             )
             user_block = self._format_user_block(title, text)
 
@@ -385,7 +419,9 @@ def predict_types(
             document_order.append(document_id)
 
         generations = self.model.generate(prompts, max_new_tokens=max_new_tokens)
-        parsed_type_lists = [self._parse_json_list(generated, key="types") for generated in generations]
+        parsed_type_lists = [
+            self._parse_json_list(generated, key="types") for generated in generations
+        ]
 
         os.makedirs(os.path.dirname(out_jsonl) or ".", exist_ok=True)
         lines_written = 0
@@ -426,7 +462,9 @@ def evaluate_extraction_f1(
         gold_doc_to_items: Dict[str, set] = {}
         for item_label, doc_id_list in item_to_doc_ids.items():
             for document_id in doc_id_list:
-                gold_doc_to_items.setdefault(document_id, set()).add(self._norm(item_label))
+                gold_doc_to_items.setdefault(document_id, set()).add(
+                    self._norm(item_label)
+                )
 
         # Build predictions: doc_id -> set(items)
         pred_doc_to_items: Dict[str, set] = {}
@@ -435,7 +473,9 @@ def evaluate_extraction_f1(
                 row = json.loads(line.strip())
                 document_id = str(row.get("id", ""))
                 items_list = row.get("terms" if key == "term" else "types", [])
-                pred_doc_to_items[document_id] = {self._norm(x) for x in items_list if isinstance(x, str)}
+                pred_doc_to_items[document_id] = {
+                    self._norm(x) for x in items_list if isinstance(x, str)
+                }
 
         # Micro counts
         true_positive = false_positive = false_negative = 0
@@ -447,18 +487,34 @@ def evaluate_extraction_f1(
             false_positive += len(pred_set - gold_set)
             false_negative += len(gold_set - pred_set)
 
-        precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) else 0.0
-        recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) else 0.0
-        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) else 0.0
+        precision = (
+            true_positive / (true_positive + false_positive)
+            if (true_positive + false_positive)
+            else 0.0
+        )
+        recall = (
+            true_positive / (true_positive + false_negative)
+            if (true_positive + false_negative)
+            else 0.0
+        )
+        f1 = (
+            2 * precision * recall / (precision + recall)
+            if (precision + recall)
+            else 0.0
+        )
         return f1
 
     def predict_types_from_terms(
         self,
         *,
-        doc_terms_jsonl: Optional[str] = None,            # formerly a1_results_jsonl
-        doc_terms_list: Optional[List[Dict]] = None,      # formerly a1_results_list
-        few_shot_jsonl: Optional[str] = None,             # JSONL lines: {"term":"...", "types":[...]}
-        rag_terms_json: Optional[str] = None,             # JSON list; items may contain "term" and "RAG":[...]
+        doc_terms_jsonl: Optional[str] = None,  # formerly a1_results_jsonl
+        doc_terms_list: Optional[List[Dict]] = None,  # formerly a1_results_list
+        few_shot_jsonl: Optional[
+            str
+        ] = None,  # JSONL lines: {"term":"...", "types":[...]}
+        rag_terms_json: Optional[
+            str
+        ] = None,  # JSON list; items may contain "term" and "RAG":[...]
         random_few_shot: Optional[int] = 3,
         model_id: str = "Qwen/Qwen2.5-1.5B-Instruct",
         use_structured_output: bool = True,
@@ -507,7 +563,9 @@ def predict_types_from_terms(
             in_memory_results=doc_terms_list,
         )
         if not doc_term_extractions:
-            raise ValueError("No document→terms results provided (doc_terms_jsonl/doc_terms_list).")
+            raise ValueError(
+                "No document→terms results provided (doc_terms_jsonl/doc_terms_list)."
+            )
 
         # Prepare unique term list and term→doc occurrences
         unique_terms = self._collect_unique_terms_from_extractions(doc_term_extractions)
@@ -525,7 +583,11 @@ def predict_types_from_terms(
                         json_obj = json.loads(raw_line)
                     except Exception:
                         continue
-                    if isinstance(json_obj, dict) and "term" in json_obj and "types" in json_obj:
+                    if (
+                        isinstance(json_obj, dict)
+                        and "term" in json_obj
+                        and "types" in json_obj
+                    ):
                         global_few_shot_examples.append(json_obj)
 
         # Optional per-term RAG examples: {normalized_term -> [examples]}
@@ -536,8 +598,12 @@ def predict_types_from_terms(
                 if isinstance(rag_payload, list):
                     for rag_item in rag_payload:
                         if isinstance(rag_item, dict):
-                            normalized_term = self._normalize_term(rag_item.get("term", ""))
-                            rag_examples_lookup[normalized_term] = rag_item.get("RAG", [])
+                            normalized_term = self._normalize_term(
+                                rag_item.get("term", "")
+                            )
+                            rag_examples_lookup[normalized_term] = rag_item.get(
+                                "RAG", []
+                            )
             except Exception:
                 pass
 
@@ -550,7 +616,10 @@ def predict_types_from_terms(
             normalized_term = self._normalize_term(term_text)
 
             # Prefer per-term RAG for this term, else use global few-shot
-            few_shot_examples_for_term = rag_examples_lookup.get(normalized_term, None) or global_few_shot_examples
+            few_shot_examples_for_term = (
+                rag_examples_lookup.get(normalized_term, None)
+                or global_few_shot_examples
+            )
 
             # Build conversation and prompt
             conversation_messages = self._build_conv_for_type_infer(
@@ -558,28 +627,51 @@ def predict_types_from_terms(
                 few_shot_examples=few_shot_examples_for_term,
                 random_k=random_few_shot,
             )
-            typing_prompt_string = self._apply_chat_template_safe_types(typing_tokenizer, conversation_messages)
+            typing_prompt_string = self._apply_chat_template_safe_types(
+                typing_tokenizer, conversation_messages
+            )
 
             predicted_types: List[str] = []
             raw_generation_text: str = ""
 
             # Structured JSON path (if requested and available)
-            if use_structured_output and OUTLINES_AVAILABLE and _PredictedTypesSchema is not None:
+            if (
+                use_structured_output
+                and OUTLINES_AVAILABLE
+                and _PredictedTypesSchema is not None
+            ):
                 try:
                     outlines_model = OutlinesTFModel(typing_model, typing_tokenizer)  # type: ignore
-                    generator = outlines_generate_json(outlines_model, _PredictedTypesSchema)  # type: ignore
+                    generator = outlines_generate_json(
+                        outlines_model, _PredictedTypesSchema
+                    )  # type: ignore
                     structured = generator(typing_prompt_string, max_tokens=512)
-                    predicted_types = [label for label in structured.types if isinstance(label, str)]
-                    raw_generation_text = json.dumps({"types": predicted_types}, ensure_ascii=False)
+                    predicted_types = [
+                        label for label in structured.types if isinstance(label, str)
+                    ]
+                    raw_generation_text = json.dumps(
+                        {"types": predicted_types}, ensure_ascii=False
+                    )
                 except Exception:
                     # Fall back to greedy decoding
                     use_structured_output = False
 
             # Greedy decode fallback
-            if not use_structured_output or not OUTLINES_AVAILABLE or _PredictedTypesSchema is None:
-                tokenized_prompt = typing_tokenizer(typing_prompt_string, return_tensors="pt", truncation=True, max_length=2048)
+            if (
+                not use_structured_output
+                or not OUTLINES_AVAILABLE
+                or _PredictedTypesSchema is None
+            ):
+                tokenized_prompt = typing_tokenizer(
+                    typing_prompt_string,
+                    return_tensors="pt",
+                    truncation=True,
+                    max_length=2048,
+                )
                 if torch.cuda.is_available():
-                    tokenized_prompt = {name: tensor.cuda() for name, tensor in tokenized_prompt.items()}
+                    tokenized_prompt = {
+                        name: tensor.cuda() for name, tensor in tokenized_prompt.items()
+                    }
                 with torch.no_grad():
                     output_ids = typing_model.generate(
                         **tokenized_prompt,
@@ -588,14 +680,18 @@ def predict_types_from_terms(
                         num_beams=1,
                         pad_token_id=typing_tokenizer.eos_token_id,
                     )
-                new_token_span = output_ids[0][tokenized_prompt["input_ids"].shape[1]:]
-                raw_generation_text = typing_tokenizer.decode(new_token_span, skip_special_tokens=True)
+                new_token_span = output_ids[0][tokenized_prompt["input_ids"].shape[1] :]
+                raw_generation_text = typing_tokenizer.decode(
+                    new_token_span, skip_special_tokens=True
+                )
                 predicted_types = self._extract_types_from_text(raw_generation_text)
 
-            term_to_predicted_types_list.append({
-                "term": term_text,
-                "predicted_types": sorted(set(predicted_types)),
-            })
+            term_to_predicted_types_list.append(
+                {
+                    "term": term_text,
+                    "predicted_types": sorted(set(predicted_types)),
+                }
+            )
 
         # 7) Build types→docs from (term→types) and (term→docs)
         types_to_doc_id_set: Dict[str, set] = {}
@@ -603,16 +699,24 @@ def predict_types_from_terms(
             normalized_term = self._normalize_term(term_prediction["term"])
             doc_ids_for_term = term_to_doc_ids_map.get(normalized_term, [])
             for type_label in term_prediction.get("predicted_types", []):
-                types_to_doc_id_set.setdefault(type_label, set()).update(doc_ids_for_term)
+                types_to_doc_id_set.setdefault(type_label, set()).update(
+                    doc_ids_for_term
+                )
 
         types_to_doc_ids: Dict[str, List[str]] = {
-            type_label: sorted(doc_id_set) for type_label, doc_id_set in types_to_doc_id_set.items()
+            type_label: sorted(doc_id_set)
+            for type_label, doc_id_set in types_to_doc_id_set.items()
         }
 
         # 8) Save outputs
         os.makedirs(os.path.dirname(out_terms2types) or ".", exist_ok=True)
         with open(out_terms2types, "w", encoding="utf-8") as fp_terms2types:
-            json.dump(term_to_predicted_types_list, fp_terms2types, ensure_ascii=False, indent=2)
+            json.dump(
+                term_to_predicted_types_list,
+                fp_terms2types,
+                ensure_ascii=False,
+                indent=2,
+            )
 
         os.makedirs(os.path.dirname(out_types2docs) or ".", exist_ok=True)
         with open(out_types2docs, "w", encoding="utf-8") as fp_types2docs:
@@ -635,7 +739,6 @@ def _load_json(self, path: str) -> Dict[str, Any]:
         with open(path, "r", encoding="utf-8") as file_obj:
             return json.load(file_obj)
 
-
     def _iter_json_objects(self, blob: str) -> Iterable[Dict[str, Any]]:
         """
         Iterate over *all* JSON objects found inside a string.
@@ -669,7 +772,6 @@ def _iter_json_objects(self, blob: str) -> Iterable[Dict[str, Any]]:
             yield json_obj
             cursor_index = end_index
 
-
     def _load_documents_jsonl(self, path: str) -> Dict[str, Dict[str, Any]]:
         """
         Robust reader that supports:
@@ -727,7 +829,6 @@ def _load_documents_jsonl(self, path: str) -> Dict[str, Dict[str, Any]]:
 
         return documents_by_id
 
-
     def _to_text(self, text_field: Any) -> str:
         """
         Convert a 'text' field into a single string (handles list-of-strings).
@@ -748,7 +849,6 @@ def _to_text(self, text_field: Any) -> str:
             return " ".join(str(part) for part in text_field)
         return str(text_field) if text_field is not None else ""
 
-
     def _unique_preserve(self, values: List[str]) -> List[str]:
         """
         Deduplicate values while preserving the original order.
@@ -771,7 +871,6 @@ def _unique_preserve(self, values: List[str]) -> List[str]:
                 ordered_values.append(candidate)
         return ordered_values
 
-
     def _norm(self, text: str) -> str:
         """
         Lowercased, single-spaced normalization (for comparisons).
@@ -788,7 +887,6 @@ def _norm(self, text: str) -> str:
         """
         return " ".join(text.lower().split())
 
-
     def _normalize_term(self, term: str) -> str:
         """
         Normalization tailored for term keys / lookups.
@@ -805,7 +903,6 @@ def _normalize_term(self, term: str) -> str:
         """
         return " ".join(str(term).strip().split()).lower()
 
-
     def _format_fewshot_block(
         self,
         system_prompt: str,
@@ -846,10 +943,13 @@ def _format_fewshot_block(
         for example_title, example_text, gold_list in fewshot_examples[:k]:
             lines.append("### Example")
             lines.append(f"User:\nTitle: {example_title}\n{example_text}")
-            lines.append(f'Assistant:\n{{"{key}": ' + json.dumps(gold_list, ensure_ascii=False) + "}")
+            lines.append(
+                f'Assistant:\n{{"{key}": '
+                + json.dumps(gold_list, ensure_ascii=False)
+                + "}"
+            )
         return "\n".join(lines)
 
-
     def _format_user_block(self, title: str, text: str) -> str:
         """
         Format the 'Task' block for the current document.
@@ -868,7 +968,6 @@ def _format_user_block(self, title: str, text: str) -> str:
         """
         return f"### Task\nUser:\nTitle: {title}\n{text}"
 
-
     def _parse_json_list(self, generated_text: str, *, key: str) -> List[str]:
         """
         Extract a list from model output, trying:
@@ -911,23 +1010,34 @@ def _parse_json_list(self, generated_text: str, *, key: str) -> List[str]:
 
         # 3) Fallback: comma-split (last resort)
         if "," in generated_text:
-            return [part.strip().strip('"').strip("'") for part in generated_text.split(",") if part.strip()]
+            return [
+                part.strip().strip('"').strip("'")
+                for part in generated_text.split(",")
+                if part.strip()
+            ]
         return []
 
-
-    def _apply_chat_template_safe_types(self, tokenizer: AutoTokenizer, messages: List[Dict[str, str]]) -> str:
+    def _apply_chat_template_safe_types(
+        self, tokenizer: AutoTokenizer, messages: List[Dict[str, str]]
+    ) -> str:
         """
         Safely build a prompt string for chat models. Uses the model's chat template
         when available; otherwise falls back to a simple concatenation.
         """
         try:
-            return tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
+            return tokenizer.apply_chat_template(
+                messages, add_generation_prompt=True, tokenize=False
+            )
         except Exception:
-            system_text = next((m["content"] for m in messages if m.get("role") == "system"), "")
-            last_user_text = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
+            system_text = next(
+                (m["content"] for m in messages if m.get("role") == "system"), ""
+            )
+            last_user_text = next(
+                (m["content"] for m in reversed(messages) if m.get("role") == "user"),
+                "",
+            )
             return f"{system_text}\n\nUser:\n{last_user_text}\n\nAssistant:"
 
-
     def _build_conv_for_type_infer(
         self,
         term: str,
@@ -938,20 +1048,27 @@ def _build_conv_for_type_infer(
         Create a chat-style conversation for a single term→types query,
         optionally prepending few-shot examples.
         """
-        messages: List[Dict[str, str]] = [{"role": "system", "content": self._system_prompt_term_to_types}]
+        messages: List[Dict[str, str]] = [
+            {"role": "system", "content": self._system_prompt_term_to_types}
+        ]
         examples = list(few_shot_examples or [])
         if random_k and len(examples) > random_k:
             import random as _rnd
+
             examples = _rnd.sample(examples, random_k)
         for exemplar in examples:
             example_term = exemplar.get("term", "")
             example_types = exemplar.get("types", [])
             messages.append({"role": "user", "content": f"Term: {example_term}"})
-            messages.append({"role": "assistant", "content": json.dumps({"types": example_types}, ensure_ascii=False)})
+            messages.append(
+                {
+                    "role": "assistant",
+                    "content": json.dumps({"types": example_types}, ensure_ascii=False),
+                }
+            )
         messages.append({"role": "user", "content": f"Term: {term}"})
         return messages
 
-
     def _extract_types_from_text(self, generated_text: str) -> List[str]:
         """
         Parse {"types":[...]} from a free-form generation.
@@ -961,13 +1078,18 @@ def _extract_types_from_text(self, generated_text: str) -> List[str]:
             if object_match:
                 json_obj = json.loads(object_match.group(0))
                 types_array = json_obj.get("types", [])
-                return [type_label for type_label in types_array if isinstance(type_label, str)]
+                return [
+                    type_label
+                    for type_label in types_array
+                    if isinstance(type_label, str)
+                ]
         except Exception:
             pass
         return []
 
-
-    def _load_llm_for_types(self, model_id: str) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
+    def _load_llm_for_types(
+        self, model_id: str
+    ) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
         """
         Load a *separate* small chat model for Term→Types (keeps LocalAutoLLM untouched).
         """
@@ -981,7 +1103,6 @@ def _load_llm_for_types(self, model_id: str) -> Tuple[AutoModelForCausalLM, Auto
         )
         return model, tokenizer
 
-
     def _load_doc_term_extractions(
         self,
         *,
@@ -1002,17 +1123,26 @@ def _load_doc_term_extractions(
         normalized_records: List[Dict] = []
 
         def _coerce_to_record(source_row: Dict) -> Optional[Dict]:
-            document_id = str(source_row.get("id", "")) or str(source_row.get("doc_id", ""))
+            document_id = str(source_row.get("id", "")) or str(
+                source_row.get("doc_id", "")
+            )
             if not document_id:
                 return None
             terms = source_row.get("extracted_terms")
             if terms is None:
                 terms = source_row.get("terms")
-            if terms is None and "payload" in source_row and isinstance(source_row["payload"], dict):
+            if (
+                terms is None
+                and "payload" in source_row
+                and isinstance(source_row["payload"], dict)
+            ):
                 terms = source_row["payload"].get("terms")
             if not isinstance(terms, list):
                 terms = []
-            return {"id": document_id, "extracted_terms": [t for t in terms if isinstance(t, str)]}
+            return {
+                "id": document_id,
+                "extracted_terms": [t for t in terms if isinstance(t, str)],
+            }
 
         if in_memory_results is not None:
             for source_row in in_memory_results:
@@ -1053,8 +1183,9 @@ def _coerce_to_record(source_row: Dict) -> Optional[Dict]:
 
         return normalized_records
 
-
-    def _collect_unique_terms_from_extractions(self, doc_term_extractions: List[Dict]) -> List[str]:
+    def _collect_unique_terms_from_extractions(
+        self, doc_term_extractions: List[Dict]
+    ) -> List[str]:
         """
         Collect unique terms (original casing) from normalized document→terms results.
         """
@@ -1068,8 +1199,9 @@ def _collect_unique_terms_from_extractions(self, doc_term_extractions: List[Dict
                     ordered_unique_terms.append(term_text.strip())
         return ordered_unique_terms
 
-
-    def _build_term_to_doc_ids(self, doc_term_extractions: List[Dict]) -> Dict[str, List[str]]:
+    def _build_term_to_doc_ids(
+        self, doc_term_extractions: List[Dict]
+    ) -> Dict[str, List[str]]:
         """
         Build lookup: normalized_term -> sorted unique list of doc_ids.
         """
@@ -1081,4 +1213,7 @@ def _build_term_to_doc_ids(self, doc_term_extractions: List[Dict]) -> Dict[str,
                 if not normalized or not document_id:
                     continue
                 term_to_doc_set.setdefault(normalized, set()).add(document_id)
-        return {normalized_term: sorted(doc_ids) for normalized_term, doc_ids in term_to_doc_set.items()}
+        return {
+            normalized_term: sorted(doc_ids)
+            for normalized_term, doc_ids in term_to_doc_set.items()
+        }
diff --git a/ontolearner/learner/text2onto/sbunlp.py b/ontolearner/learner/text2onto/sbunlp.py
index 8ab617d..49067e2 100644
--- a/ontolearner/learner/text2onto/sbunlp.py
+++ b/ontolearner/learner/text2onto/sbunlp.py
@@ -25,6 +25,7 @@
 
 from ...base import AutoLearner, AutoLLM
 
+
 # -----------------------------------------------------------------------------
 # Concrete AutoLLM: local HF wrapper that follows the AutoLLM interface
 # -----------------------------------------------------------------------------
@@ -34,19 +35,29 @@ class LocalAutoLLM(AutoLLM):
     Uses 4-bit quantization for efficiency and greedy decoding by default.
     """
 
-    def __init__(self, label_mapper: Any = None, device: str = "cpu", token: str = "") -> None:
+    def __init__(
+        self, label_mapper: Any = None, device: str = "cpu", token: str = ""
+    ) -> None:
         super().__init__(label_mapper=label_mapper, device=device, token=token)
         self.model = None
         self.tokenizer = None
 
-    def load(self, model_id: str, load_in_4bit: bool = False, dtype: str = "auto", trust_remote_code: bool = True):
+    def load(
+        self,
+        model_id: str,
+        load_in_4bit: bool = False,
+        dtype: str = "auto",
+        trust_remote_code: bool = True,
+    ):
         """Load tokenizer + model, applying 4-bit quantization if specified and possible."""
 
         # Determine the target data type (default to float32 for CPU, float16 for GPU)
-        torch_dtype_val = (torch.float16 if torch.cuda.is_available() else torch.float32)
+        torch_dtype_val = torch.float16 if torch.cuda.is_available() else torch.float32
 
         # Load the tokenizer
-        self.tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust_remote_code)
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_id, trust_remote_code=trust_remote_code
+        )
         if self.tokenizer.pad_token is None:
             self.tokenizer.pad_token = self.tokenizer.eos_token
 
@@ -78,7 +89,13 @@ def load(self, model_id: str, load_in_4bit: bool = False, dtype: str = "auto", t
         if self.device == "cpu":
             self.model.to("cpu")
 
-    def generate(self, inputs: List[str], max_new_tokens: int = 64, temperature: float = 0.0, top_p: float = 1.0) -> List[str]:
+    def generate(
+        self,
+        inputs: List[str],
+        max_new_tokens: int = 64,
+        temperature: float = 0.0,
+        top_p: float = 1.0,
+    ) -> List[str]:
         """Generate continuations for a list of prompts, returning only the generated part."""
         if self.model is None or self.tokenizer is None:
             raise RuntimeError("Model/tokenizer not loaded. Call .load() first.")
@@ -100,7 +117,9 @@ def generate(self, inputs: List[str], max_new_tokens: int = 64, temperature: flo
                 input_ids=input_ids,
                 attention_mask=attention_mask,
                 max_new_tokens=max_new_tokens,
-                do_sample=(temperature > 0.0), # Use greedy decoding if temperature is 0.0
+                do_sample=(
+                    temperature > 0.0
+                ),  # Use greedy decoding if temperature is 0.0
                 temperature=temperature,
                 top_p=top_p,
                 pad_token_id=self.tokenizer.eos_token_id,
@@ -109,20 +128,25 @@ def generate(self, inputs: List[str], max_new_tokens: int = 64, temperature: flo
         # --- Post-processing: Extract only the generated tail ---
         decoded_outputs: List[str] = []
         for i, output_ids in enumerate(outputs):
-            full_decoded_text = self.tokenizer.decode(output_ids, skip_special_tokens=True)
+            full_decoded_text = self.tokenizer.decode(
+                output_ids, skip_special_tokens=True
+            )
             prompt_text = self.tokenizer.decode(input_ids[i], skip_special_tokens=True)
 
             # Safely strip the prompt text from the full output
             if full_decoded_text.startswith(prompt_text):
-                generated_tail = full_decoded_text[len(prompt_text):].strip()
+                generated_tail = full_decoded_text[len(prompt_text) :].strip()
             else:
                 # Fallback extraction (less robust if padding affects token indices)
                 prompt_len = input_ids.shape[1]
-                generated_tail = self.tokenizer.decode(output_ids[prompt_len:], skip_special_tokens=True).strip()
+                generated_tail = self.tokenizer.decode(
+                    output_ids[prompt_len:], skip_special_tokens=True
+                ).strip()
             decoded_outputs.append(generated_tail)
 
         return decoded_outputs
 
+
 # -----------------------------------------------------------------------------
 # Main Learner: SBUNLPFewShotLearner (Task A Text2Onto)
 # -----------------------------------------------------------------------------
@@ -195,7 +219,11 @@ def build_stratified_fewshot_prompt(
             num_to_sample_from_stratum = int(num_sample_docs * proportion)
 
             if num_to_sample_from_stratum > 0:
-                sampled_documents.extend(random.sample(stratum_docs, min(num_to_sample_from_stratum, num_stratum_docs)))
+                sampled_documents.extend(
+                    random.sample(
+                        stratum_docs, min(num_to_sample_from_stratum, num_stratum_docs)
+                    )
+                )
 
         # Deduplicate sampled documents by ID and adjust count to exactly 'sample_size'
         unique_docs_by_id = {}
@@ -207,8 +235,12 @@ def build_stratified_fewshot_prompt(
         if len(final_sample_docs) > num_sample_docs:
             final_sample_docs = random.sample(final_sample_docs, num_sample_docs)
         elif len(final_sample_docs) < num_sample_docs:
-            remaining_docs = [d for d in corpus_documents if d.get("id", "") not in unique_docs_by_id]
-            needed_count = min(num_sample_docs - len(final_sample_docs), len(remaining_docs))
+            remaining_docs = [
+                d for d in corpus_documents if d.get("id", "") not in unique_docs_by_id
+            ]
+            needed_count = min(
+                num_sample_docs - len(final_sample_docs), len(remaining_docs)
+            )
             final_sample_docs.extend(random.sample(remaining_docs, needed_count))
 
         # Format the few-shot exemplar text block
@@ -299,21 +331,31 @@ def build_types_fewshot_block(
                     picked_count += 1
 
                     if picked_count >= sample_per_term:
-                        break # Move to the next term
+                        break  # Move to the next term
 
         prompt_block = "\n".join(prompt_lines)
         self.fewshot_types_block = prompt_block
         return prompt_block
 
-    def fit(self, train_docs_jsonl: str, terms2doc_json: str, sample_size: int = 28, seed: int = 123) -> None:
+    def fit(
+        self,
+        train_docs_jsonl: str,
+        terms2doc_json: str,
+        sample_size: int = 28,
+        seed: int = 123,
+    ) -> None:
         """
         Fit phase: Builds and caches the few-shot prompt blocks from the training files.
         No model training occurs (Few-Shot/In-Context Learning).
         """
         # Build prompt block for Term extraction
-        _ = self.build_stratified_fewshot_prompt(train_docs_jsonl, terms2doc_json, sample_size=sample_size, seed=seed)
+        _ = self.build_stratified_fewshot_prompt(
+            train_docs_jsonl, terms2doc_json, sample_size=sample_size, seed=seed
+        )
         # Build prompt block for Type extraction
-        _ = self.build_types_fewshot_block(train_docs_jsonl, terms2doc_json, sample_per_term=1)
+        _ = self.build_types_fewshot_block(
+            train_docs_jsonl, terms2doc_json, sample_per_term=1
+        )
 
     # -------------------------
     # Inference helpers (prompt construction and output parsing)
@@ -376,10 +418,18 @@ def _parse_list_like(self, raw_string: str) -> List[str]:
     def _call_model_one(self, prompt: str, max_new_tokens: int = 120) -> str:
         """Calls the underlying LocalAutoLLM for a single prompt. Returns the raw tail output."""
         # self.model is an instance of LocalAutoLLM
-        model_output = self.model.generate([prompt], max_new_tokens=max_new_tokens, temperature=0.0, top_p=1.0)
+        model_output = self.model.generate(
+            [prompt], max_new_tokens=max_new_tokens, temperature=0.0, top_p=1.0
+        )
         return model_output[0] if model_output else ""
 
-    def predict_terms(self, docs_test_jsonl: str, out_jsonl: str, max_lines: int = -1, max_new_tokens: int = 120) -> int:
+    def predict_terms(
+        self,
+        docs_test_jsonl: str,
+        out_jsonl: str,
+        max_lines: int = -1,
+        max_new_tokens: int = 120,
+    ) -> int:
         """
         Runs Term Extraction on the test documents and saves results to a JSONL file.
         Returns: The count of individual terms written.
@@ -388,7 +438,10 @@ def predict_terms(self, docs_test_jsonl: str, out_jsonl: str, max_lines: int = -
             raise RuntimeError("Few-shot block for terms is empty. Call fit() first.")
 
         num_written_terms = 0
-        with open(docs_test_jsonl, "r", encoding="utf-8") as file_in, open(out_jsonl, "w", encoding="utf-8") as file_out:
+        with (
+            open(docs_test_jsonl, "r", encoding="utf-8") as file_in,
+            open(out_jsonl, "w", encoding="utf-8") as file_out,
+        ):
             for line_index, line in enumerate(file_in, start=1):
                 if 0 < max_lines < line_index:
                     break
@@ -396,7 +449,7 @@ def predict_terms(self, docs_test_jsonl: str, out_jsonl: str, max_lines: int = -
                 try:
                     document = json.loads(line.strip())
                 except Exception:
-                    continue # Skip malformed JSON lines
+                    continue  # Skip malformed JSON lines
 
                 doc_id = document.get("id", "unknown")
                 title = document.get("title", "")
@@ -410,7 +463,10 @@ def predict_terms(self, docs_test_jsonl: str, out_jsonl: str, max_lines: int = -
                 # Write extracted terms
                 for term_or_type in predicted_terms:
                     if isinstance(term_or_type, str) and term_or_type.strip():
-                        file_out.write(json.dumps({"doc_id": doc_id, "term": term_or_type.strip()}) + "\n")
+                        file_out.write(
+                            json.dumps({"doc_id": doc_id, "term": term_or_type.strip()})
+                            + "\n"
+                        )
                         num_written_terms += 1
 
                 # Lightweight memory management for long runs
@@ -421,7 +477,13 @@ def predict_terms(self, docs_test_jsonl: str, out_jsonl: str, max_lines: int = -
 
         return num_written_terms
 
-    def predict_types(self, docs_test_jsonl: str, out_jsonl: str, max_lines: int = -1, max_new_tokens: int = 120) -> int:
+    def predict_types(
+        self,
+        docs_test_jsonl: str,
+        out_jsonl: str,
+        max_lines: int = -1,
+        max_new_tokens: int = 120,
+    ) -> int:
         """
         Runs Type Extraction on the test documents and saves results to a JSONL file.
         Returns: The count of individual types written.
@@ -430,7 +492,10 @@ def predict_types(self, docs_test_jsonl: str, out_jsonl: str, max_lines: int = -
             raise RuntimeError("Few-shot block for types is empty. Call fit() first.")
 
         num_written_types = 0
-        with open(docs_test_jsonl, "r", encoding="utf-8") as file_in, open(out_jsonl, "w", encoding="utf-8") as file_out:
+        with (
+            open(docs_test_jsonl, "r", encoding="utf-8") as file_in,
+            open(out_jsonl, "w", encoding="utf-8") as file_out,
+        ):
             for line_index, line in enumerate(file_in, start=1):
                 if 0 < max_lines < line_index:
                     break
@@ -438,7 +503,7 @@ def predict_types(self, docs_test_jsonl: str, out_jsonl: str, max_lines: int = -
                 try:
                     document = json.loads(line.strip())
                 except Exception:
-                    continue # Skip malformed JSON lines
+                    continue  # Skip malformed JSON lines
 
                 doc_id = document.get("id", "unknown")
                 title = document.get("title", "")
@@ -452,7 +517,10 @@ def predict_types(self, docs_test_jsonl: str, out_jsonl: str, max_lines: int = -
                 # Write extracted types
                 for term_or_type in predicted_types:
                     if isinstance(term_or_type, str) and term_or_type.strip():
-                        file_out.write(json.dumps({"doc_id": doc_id, "type": term_or_type.strip()}) + "\n")
+                        file_out.write(
+                            json.dumps({"doc_id": doc_id, "type": term_or_type.strip()})
+                            + "\n"
+                        )
                         num_written_types += 1
 
                 if line_index % 50 == 0:
@@ -475,7 +543,9 @@ def load_gold_pairs(self, terms2doc_path: str) -> Set[Tuple[str, str]]:
                 gold_pairs.add((doc_id, clean_term))
         return gold_pairs
 
-    def load_predicted_pairs(self, predicted_jsonl_path: str, key: str = "term") -> Set[Tuple[str, str]]:
+    def load_predicted_pairs(
+        self, predicted_jsonl_path: str, key: str = "term"
+    ) -> Set[Tuple[str, str]]:
         """Load predicted (doc_id, term/type) pairs from a JSONL file, lowercased."""
         predicted_pairs = set()
         with open(predicted_jsonl_path, "r", encoding="utf-8") as file_handle:
@@ -490,7 +560,9 @@ def load_predicted_pairs(self, predicted_jsonl_path: str, key: str = "term") ->
                     predicted_pairs.add((doc_id, value.strip().lower()))
         return predicted_pairs
 
-    def evaluate_extraction_f1(self, terms2doc_path: str, predicted_jsonl: str, key: str = "term") -> float:
+    def evaluate_extraction_f1(
+        self, terms2doc_path: str, predicted_jsonl: str, key: str = "term"
+    ) -> float:
         """
         Computes set-based binary Precision, Recall, and F1 score against the gold pairs.
         """
@@ -507,6 +579,7 @@ def evaluate_extraction_f1(self, terms2doc_path: str, predicted_jsonl: str, key:
 
         # Use scikit-learn for metric calculation
         from sklearn.metrics import precision_recall_fscore_support
+
         precision, recall, f1, _ = precision_recall_fscore_support(
             y_true, y_pred, average="binary", zero_division=0
         )

From ec2313528859e11ad28401de93a9797e4c353f2d Mon Sep 17 00:00:00 2001
From: KrishnaRani <thakurkrishnarani@gmail.com>
Date: Tue, 11 Nov 2025 10:00:08 +0100
Subject: [PATCH 6/7] removing changes from __init__.py files

---
 ontolearner/__init__.py         | 24 +-----------------------
 ontolearner/learner/__init__.py |  9 ---------
 2 files changed, 1 insertion(+), 32 deletions(-)

diff --git a/ontolearner/__init__.py b/ontolearner/__init__.py
index 5ebd3f6..a1b5d5a 100644
--- a/ontolearner/__init__.py
+++ b/ontolearner/__init__.py
@@ -29,18 +29,7 @@
                       AutoRetrieverLearner,
                       AutoRAGLearner,
                       StandardizedPrompting,
-                      LabelMapper,
-                      RWTHDBISTaxonomyLearner,
-                      RWTHDBISTermTypingLearner,
-                      SKHNLPZSLearner,
-                      SKHNLPSequentialFTLearner,
-                      SBUNLPFewShotLearner,
-                      SBUNLPZSLearner,
-                      SBUNLPText2OntoLearner,
-                      AlexbekCrossAttnLearner,
-                      AlexbekRFLearner,
-                      AlexbekRAGLearner,
-                      AlexbekFewShotLearner)
+                      LabelMapper,)
 from ._learner import LearnerPipeline
 
 from .processor import Processor
@@ -58,17 +47,6 @@
     "LabelMapper",
     "LearnerPipeline",
     "Processor",
-    "RWTHDBISTaxonomyLearner",
-    "RWTHDBISTermTypingLearner",
-    "SKHNLPZSLearner",
-    "SKHNLPSequentialFTLearner",
-    "SBUNLPFewShotLearner",
-    "SBUNLPZSLearner",
-    "SBUNLPText2OntoLearner",
-    "AlexbekCrossAttnLearner",
-    "AlexbekRFLearner",
-    "AlexbekRAGLearner",
-    "AlexbekFewShotLearner",
     "data_structure",
     "text2onto",
     "ontology",
diff --git a/ontolearner/learner/__init__.py b/ontolearner/learner/__init__.py
index 71020e8..0baf580 100644
--- a/ontolearner/learner/__init__.py
+++ b/ontolearner/learner/__init__.py
@@ -17,12 +17,3 @@
 from .rag import AutoRAGLearner
 from .prompt import StandardizedPrompting
 from .label_mapper import LabelMapper
-from .taxonomy_discovery.rwthdbis import RWTHDBISSFTLearner as RWTHDBISTaxonomyLearner
-from .term_typing.rwthdbis        import RWTHDBISSFTLearner as RWTHDBISTermTypingLearner
-from .taxonomy_discovery.skhnlp import SKHNLPSequentialFTLearner, SKHNLPZSLearner
-from .taxonomy_discovery.sbunlp import SBUNLPFewShotLearner
-from .term_typing.sbunlp import SBUNLPZSLearner
-from .text2onto import SBUNLPFewShotLearner as SBUNLPText2OntoLearner
-from .taxonomy_discovery.alexbek import AlexbekCrossAttnLearner
-from .term_typing.alexbek import AlexbekRFLearner, AlexbekRAGLearner
-from .text2onto.alexbek import AlexbekFewShotLearner

From 2d49d94e2a42c3afd49ff5ee0907be123fcc3dcc Mon Sep 17 00:00:00 2001
From: KrishnaRani <thakurkrishnarani@gmail.com>
Date: Tue, 11 Nov 2025 13:17:44 +0100
Subject: [PATCH 7/7] Changes removed from requirements.txt

---
 requirements.txt | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 28a92bb..3ce19f7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,7 +20,3 @@ sentence-transformers~=5.1.0
 scikit-learn~=1.6.1
 bitsandbytes~=0.45.1
 mistral-common[sentencepiece]~=1.8.5
-g4f
-protobuf<5
-accelerate>=0.26.0
-Levenshtein