Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions examples/llm_learner_rwthdbis_taxonomy_discovery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Import core modules from the OntoLearner library
from ontolearner import LearnerPipeline, train_test_split
from ontolearner import ChordOntology, RWTHDBISTaxonomyLearner

# Load the Chord ontology, which exposes hierarchical (parent, child) relations for taxonomy discovery
ontology = ChordOntology()
ontology.load() # Read entities, type system, and taxonomic edges into memory

# Extract typed taxonomic edges and split into train/test while preserving the structured shape
train_data, test_data = train_test_split(
ontology.extract(),
test_size=0.2,
random_state=42
)

# Initialize a supervised taxonomy classifier (encoder-based fine-tuning)
# Negative sampling controls the number of non-edge examples; bidirectional templates create both (p→c) and (c→p) views
# Context features are optional and can be enabled with with_context=True and a JSON path of type descriptions
learner = RWTHDBISTaxonomyLearner(
model_name="microsoft/deberta-v3-small",
output_dir="./results/",
num_train_epochs=1,
per_device_train_batch_size=8,
gradient_accumulation_steps=4,
learning_rate=2e-5,
max_length=256,
seed=42,
negative_ratio=5,
bidirectional_templates=True,
context_json_path=None,
ontology_name=ontology.ontology_full_name,
)

# Build the pipeline
pipeline = LearnerPipeline(
llm=learner,
llm_id=learner.model_name,
ontologizer_data=False,
)

# # Run the full learning pipeline on the taxonomy-discovery task
outputs = pipeline(
train_data=train_data,
test_data=test_data,
task="taxonomy-discovery",
evaluate=True,
ontologizer_data=False,
)

# Display the evaluation results
print("Metrics:", outputs['metrics']) # Shows {'precision': ..., 'recall': ..., 'f1_score': ...}

# Display total elapsed time for training + prediction + evaluation
print("Elapsed time:", outputs['elapsed_time'])

# Print all returned outputs (include predictions)
print(outputs)
50 changes: 50 additions & 0 deletions examples/llm_learner_rwthdbis_term_typing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Import core modules from the OntoLearner library
from ontolearner import LearnerPipeline, train_test_split, AgrO
from ontolearner import RWTHDBISTermTypingLearner

#load the AgrO ontology.
# AgrO provides term-typing supervision where each term can be annotated with one or more types.
ontology = AgrO()
ontology.load()
data = ontology.extract()

# Split the labeled term-typing data into train and test sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Configure a supervised encoder-based classifier for term typing.
# This fine-tunes DeBERTa v3 on (term → type) signals; increase epochs for stronger results.
learner = RWTHDBISTermTypingLearner(
model_name="microsoft/deberta-v3-small",
output_dir="./results/deberta-v3",
num_train_epochs=30,
per_device_train_batch_size=16,
gradient_accumulation_steps=2,
learning_rate=2e-5,
max_length=64,
seed=42,
)

# Build the pipeline and pass raw structured objects end-to-end.
pipeline = LearnerPipeline(
llm=learner,
llm_id=learner.model_name,
ontologizer_data=False,
)

# Run the full learning pipeline on the term-typing task
outputs = pipeline(
train_data=train_data,
test_data=test_data,
task="term-typing",
evaluate=True,
ontologizer_data=False,
)

# Display the evaluation results
print("Metrics:", outputs['metrics']) # Shows {'precision': ..., 'recall': ..., 'f1_score': ...}

# Display total elapsed time for training + prediction + evaluation
print("Elapsed time:", outputs['elapsed_time'])

# Print all returned outputs (include predictions)
print(outputs)
6 changes: 5 additions & 1 deletion ontolearner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@
AutoRetrieverLearner,
AutoRAGLearner,
StandardizedPrompting,
LabelMapper)
LabelMapper,
RWTHDBISTaxonomyLearner,
RWTHDBISTermTypingLearner)
from ._learner import LearnerPipeline

from .processor import Processor
Expand All @@ -47,6 +49,8 @@
"LabelMapper",
"LearnerPipeline",
"Processor",
"RWTHDBISTaxonomyLearner",
"RWTHDBISTermTypingLearner",
"data_structure",
"text2onto",
"ontology",
Expand Down
2 changes: 2 additions & 0 deletions ontolearner/learner/__init__.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Krishna-Rani-t I see this is becoming problematic! So here is the new idea:

Let's not import the models here! so

from .taxonomy_discovery.skhnlp import SKHNLPSequentialFTLearner, SKHNLPZSLearner
from .taxonomy_discovery.sbunlp import SBUNLPFewShotLearner
from .term_typing.sbunlp import SBUNLPZSLearner
from .text2onto import SBUNLPFewShotLearner as SBUNLPText2OntoLearner

or similar works will be removed from this init, and in the ontolearner/init.py you DO NOT NEED to do the following imports:

RWTHDBISTaxonomyLearner,
                      RWTHDBISTermTypingLearner,
                      SKHNLPZSLearner,
                      SKHNLPSequentialFTLearner,
                      SBUNLPFewShotLearner,
                      SBUNLPZSLearner,
                      SBUNLPText2OntoLearner)

In your examples, for loading lets say SKHNLPZSLearner, you will do this:

from ontolearner.learner.taxonomy_discover import SKHNLPZSLearner

so if you use the same class name inside the learner/term_typing / it will be

from ontolearner.learner.term_typing import SKHNLPZSLearner

Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@
from .rag import AutoRAGLearner
from .prompt import StandardizedPrompting
from .label_mapper import LabelMapper
from .taxonomy_discovery.rwthdbis import RWTHDBISSFTLearner as RWTHDBISTaxonomyLearner
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

using as here is not recommended. So i would recommend this way of importing.

from .taxonomy_discovery import RWTHDBISSFTLearner
  • don't add .rwdhdbis
  • keep only the class name from RWTHDBISSFTLearner to RWTHDBISTaxonomyLearner

from .term_typing.rwthdbis import RWTHDBISSFTLearner as RWTHDBISTermTypingLearner
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The previous line comment also applicable to this line of code as well.

15 changes: 15 additions & 0 deletions ontolearner/learner/taxonomy_discovery/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) 2025 SciKnowOrg
#
# Licensed under the MIT License (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://opensource.org/licenses/MIT
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .rwthdbis import RWTHDBISSFTLearner
Loading