sciknoworg · Krishna-Rani-t · Oct 22, 2025 · Oct 29, 2025 · Oct 29, 2025 · Nov 3, 2025
diff --git a/examples/llm_learner_rwthdbis_taxonomy_discovery.py b/examples/llm_learner_rwthdbis_taxonomy_discovery.py
@@ -0,0 +1,57 @@
+# Import core modules from the OntoLearner library
+from ontolearner import LearnerPipeline, train_test_split
+from ontolearner import ChordOntology, RWTHDBISTaxonomyLearner
+
+# Load the Chord ontology, which exposes hierarchical (parent, child) relations for taxonomy discovery
+ontology = ChordOntology()
+ontology.load()  # Read entities, type system, and taxonomic edges into memory
+
+# Extract typed taxonomic edges and split into train/test while preserving the structured shape
+train_data, test_data = train_test_split(
+    ontology.extract(),
+    test_size=0.2,
+    random_state=42
+)
+
+# Initialize a supervised taxonomy classifier (encoder-based fine-tuning)
+# Negative sampling controls the number of non-edge examples; bidirectional templates create both (p→c) and (c→p) views
+# Context features are optional and can be enabled with with_context=True and a JSON path of type descriptions
+learner = RWTHDBISTaxonomyLearner(
+    model_name="microsoft/deberta-v3-small",
+    output_dir="./results/",
+    num_train_epochs=1,
+    per_device_train_batch_size=8,
+    gradient_accumulation_steps=4,
+    learning_rate=2e-5,
+    max_length=256,
+    seed=42,
+    negative_ratio=5,
+    bidirectional_templates=True,
+    context_json_path=None,
+    ontology_name=ontology.ontology_full_name,
+)
+
+# Build the pipeline
+pipeline = LearnerPipeline(
+    llm=learner,
+    llm_id=learner.model_name,
+    ontologizer_data=False,
+)
+
+# # Run the full learning pipeline on the taxonomy-discovery task
+outputs = pipeline(
+    train_data=train_data,
+    test_data=test_data,
+    task="taxonomy-discovery",
+    evaluate=True,
+    ontologizer_data=False,
+)
+
+# Display the evaluation results
+print("Metrics:", outputs['metrics'])          # Shows {'precision': ..., 'recall': ..., 'f1_score': ...}
+
+# Display total elapsed time for training + prediction + evaluation
+print("Elapsed time:", outputs['elapsed_time'])
+
+# Print all returned outputs (include predictions)
+print(outputs)
diff --git a/examples/llm_learner_rwthdbis_term_typing.py b/examples/llm_learner_rwthdbis_term_typing.py
@@ -0,0 +1,50 @@
+# Import core modules from the OntoLearner library
+from ontolearner import LearnerPipeline, train_test_split, AgrO
+from ontolearner import RWTHDBISTermTypingLearner
+
+#load the AgrO ontology.
+# AgrO provides term-typing supervision where each term can be annotated with one or more types.
+ontology = AgrO()
+ontology.load()
+data = ontology.extract()
+
+# Split the labeled term-typing data into train and test sets
+train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
+
+# Configure a supervised encoder-based classifier for term typing.
+# This fine-tunes DeBERTa v3 on (term → type) signals; increase epochs for stronger results.
+learner = RWTHDBISTermTypingLearner(
+    model_name="microsoft/deberta-v3-small",
+    output_dir="./results/deberta-v3",
+    num_train_epochs=30,
+    per_device_train_batch_size=16,
+    gradient_accumulation_steps=2,
+    learning_rate=2e-5,
+    max_length=64,
+    seed=42,
+)
+
+# Build the pipeline and pass raw structured objects end-to-end.
+pipeline = LearnerPipeline(
+    llm=learner,
+    llm_id=learner.model_name,
+    ontologizer_data=False,
+)
+
+# Run the full learning pipeline on the term-typing task
+outputs = pipeline(
+    train_data=train_data,
+    test_data=test_data,
+    task="term-typing",
+    evaluate=True,
+    ontologizer_data=False,
+)
+
+# Display the evaluation results
+print("Metrics:", outputs['metrics'])          # Shows {'precision': ..., 'recall': ..., 'f1_score': ...}
+
+# Display total elapsed time for training + prediction + evaluation
+print("Elapsed time:", outputs['elapsed_time'])
+
+# Print all returned outputs (include predictions)
+print(outputs)
diff --git a/ontolearner/__init__.py b/ontolearner/__init__.py
@@ -29,7 +29,9 @@
                       AutoRetrieverLearner,
                       AutoRAGLearner,
                       StandardizedPrompting,
-                      LabelMapper)
+                      LabelMapper,
+                      RWTHDBISTaxonomyLearner,
+                      RWTHDBISTermTypingLearner)
 from ._learner import LearnerPipeline
 
 from .processor import Processor
@@ -47,6 +49,8 @@
     "LabelMapper",
     "LearnerPipeline",
     "Processor",
+    "RWTHDBISTaxonomyLearner",
+    "RWTHDBISTermTypingLearner",
     "data_structure",
     "text2onto",
     "ontology",

diff --git a/ontolearner/learner/__init__.py b/ontolearner/learner/__init__.py
@@ -17,3 +17,5 @@
 from .rag import AutoRAGLearner
 from .prompt import StandardizedPrompting
 from .label_mapper import LabelMapper
+from .taxonomy_discovery.rwthdbis import RWTHDBISSFTLearner as RWTHDBISTaxonomyLearner
+from .term_typing.rwthdbis        import RWTHDBISSFTLearner as RWTHDBISTermTypingLearner
diff --git a/ontolearner/learner/taxonomy_discovery/__init__.py b/ontolearner/learner/taxonomy_discovery/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025 SciKnowOrg
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://opensource.org/licenses/MIT
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .rwthdbis import RWTHDBISSFTLearner