From 7fbd6e3803a33c04e0ed632c2507b29beb59aad6 Mon Sep 17 00:00:00 2001 From: Tom Aarsen Date: Thu, 6 Apr 2023 20:02:30 +0200 Subject: [PATCH 1/4] Integrate SpanMarker library --- js/src/lib/interfaces/Libraries.ts | 12 ++++++++++++ js/src/lib/interfaces/LibrariesToTasks.ts | 3 +++ 2 files changed, 15 insertions(+) diff --git a/js/src/lib/interfaces/Libraries.ts b/js/src/lib/interfaces/Libraries.ts index 9a13f0347..7709cf9c1 100644 --- a/js/src/lib/interfaces/Libraries.ts +++ b/js/src/lib/interfaces/Libraries.ts @@ -23,6 +23,7 @@ export enum ModelLibrary { "sentence-transformers" = "Sentence Transformers", "sklearn" = "Scikit-learn", "spacy" = "spaCy", + "span_marker" = "SpanMarker", "speechbrain" = "speechbrain", "tensorflowtts" = "TensorFlowTTS", "timm" = "Timm", @@ -314,6 +315,11 @@ nlp = spacy.load("${nameWithoutNamespace(model.id)}") import ${nameWithoutNamespace(model.id)} nlp = ${nameWithoutNamespace(model.id)}.load()`; +const span_marker = (model: ModelData) => + `from span_marker import SpanMarkerModel + + model = SpanMarkerModel.from_pretrained("${model.id}")`; + const stanza = (model: ModelData) => `import stanza @@ -528,6 +534,12 @@ export const MODEL_LIBRARIES_UI_ELEMENTS: Partial Date: Wed, 12 Apr 2023 12:19:15 +0200 Subject: [PATCH 2/4] Add missing SpanMarker docs --- docs/hub/_toctree.yml | 2 ++ docs/hub/models-libraries.md | 1 + docs/hub/span_marker.md | 63 ++++++++++++++++++++++++++++++++++++ tasks/src/const.ts | 2 +- 4 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 docs/hub/span_marker.md diff --git a/docs/hub/_toctree.yml b/docs/hub/_toctree.yml index 2fd2d33a7..b30dd4e3a 100644 --- a/docs/hub/_toctree.yml +++ b/docs/hub/_toctree.yml @@ -77,6 +77,8 @@ title: Sentence Transformers - local: spacy title: spaCy + - local: span_marker + title: SpanMarker - local: speechbrain title: SpeechBrain - local: stable-baselines3 diff --git a/docs/hub/models-libraries.md b/docs/hub/models-libraries.md index 2b6335473..6d45c1ce6 100644 --- a/docs/hub/models-libraries.md +++ b/docs/hub/models-libraries.md @@ -26,6 +26,7 @@ The table below summarizes the supported libraries and their level of integratio | [Sample Factory](https://github.com/alex-petrenko/sample-factory) | Codebase for high throughput asynchronous reinforcement learning. | ❌ | ✅ | ✅ | ✅ | | [Sentence Transformers](https://github.com/UKPLab/sentence-transformers) | Compute dense vector representations for sentences, paragraphs, and images. | ✅ | ✅ | ✅ | ✅ | | [spaCy](https://github.com/explosion/spaCy) | Advanced Natural Language Processing in Python and Cython. | ✅ | ✅ | ✅ | ✅ | +| [SpanMarker](https://github.com/tomaarsen/SpanMarkerNER) | Familiar, simple and state-of-the-art Named Entity Recognition. | ✅ | ✅ | ✅ | ✅ | | [Scikit Learn (using skops)](https://skops.readthedocs.io/en/stable/) | Machine Learning in Python. | ✅ | ✅ | ✅ | ✅ | | [Speechbrain](https://speechbrain.github.io/) | A PyTorch Powered Speech Toolkit. | ✅ | ✅ | ✅ | ❌ | | [Stable-Baselines3](https://github.com/DLR-RM/stable-baselines3) | Set of reliable implementations of deep reinforcement learning algorithms in PyTorch | ❌ | ✅ | ✅ | ✅ | diff --git a/docs/hub/span_marker.md b/docs/hub/span_marker.md new file mode 100644 index 000000000..3a9f9cd61 --- /dev/null +++ b/docs/hub/span_marker.md @@ -0,0 +1,63 @@ +# Using SpanMarker at Hugging Face + +SpanMarker is a framework for training powerful Named Entity Recognition models using familiar encoders such as BERT, RoBERTa and DeBERTa. Tightly implemented on top of the 🤗 Transformers library, SpanMarker can take good advantage of its valuable functionality. + +## Exploring SpanMarker in the Hub + +You can find `span_marker` models by filtering at the left of the [models page](https://huggingface.co/models?library=span_marker). + +All models on the Hub come with these useful features: +1. An automatically generated model card with a brief description. +2. An interactive widget you can use to play with the model directly in the browser. +3. An Inference API that allows you to make inference requests. + +## Installation + +To get started, you can follow the [SpanMarker installation guide](https://tomaarsen.github.io/SpanMarkerNER/install.html). You can also use the following one-line install through pip: + +``` +pip install -U span_marker +``` + +## Using existing models + +All `span_marker` models can easily be loaded from the Hub. + +```py +from span_marker import SpanMarkerModel + +model = SpanMarkerModel.from_pretrained("tomaarsen/span-marker-bert-base-fewnerd-fine-super") +``` + +Once loaded, you can use [`SpanMarkerModel.predict`](https://tomaarsen.github.io/SpanMarkerNER/api/span_marker.modeling.html#span_marker.modeling.SpanMarkerModel.predict) to perform inference. + +```py +model.predict("Amelia Earhart flew her single engine Lockheed Vega 5B across the Atlantic to Paris.") +``` +```json +[ + {"span": "Amelia Earhart", "label": "person-other", "score": 0.7629689574241638, "char_start_index": 0, "char_end_index": 14}, + {"span": "Lockheed Vega 5B", "label": "product-airplane", "score": 0.9833564758300781, "char_start_index": 38, "char_end_index": 54}, + {"span": "Atlantic", "label": "location-bodiesofwater", "score": 0.7621214389801025, "char_start_index": 66, "char_end_index": 74}, + {"span": "Paris", "label": "location-GPE", "score": 0.9807717204093933, "char_start_index": 78, "char_end_index": 83} +] +``` + +If you want to load a specific SpanMarker model, you can click `Use in SpanMarker` and you will be given a working snippet! + + + +## Additional resources + +* SpanMarker [repository](https://github.com/tomaarsen/SpanMarkerNER). +* SpanMarker [docs](https://tomaarsen.github.io/SpanMarkerNER). diff --git a/tasks/src/const.ts b/tasks/src/const.ts index 51c047f3b..d8c033cbc 100644 --- a/tasks/src/const.ts +++ b/tasks/src/const.ts @@ -40,7 +40,7 @@ export const TASKS_MODEL_LIBRARIES: Record = { "text-to-video": [], "text2text-generation": ["transformers"], "time-series-forecasting": [], - "token-classification": ["adapter-transformers", "flair", "spacy", "stanza", "transformers"], + "token-classification": ["adapter-transformers", "flair", "spacy", "span_marker", "stanza", "transformers"], "translation": ["transformers"], "unconditional-image-generation": [], "visual-question-answering": [], From 5b86efa67abe1ddc6a170adaf93a54a6605bcee6 Mon Sep 17 00:00:00 2001 From: Tom Aarsen Date: Mon, 1 May 2023 15:45:19 +0200 Subject: [PATCH 3/4] Use 'span-marker' instead of 'span_marker' as lib key 1. This is more in line with the other libraries, and 2. my codebase has been using 'span-marker' from the start. If I stick with span_marker, then all models will get the 'span_marker' AND 'span-marker' tags. --- js/src/lib/interfaces/Libraries.ts | 4 ++-- js/src/lib/interfaces/LibrariesToTasks.ts | 2 +- tasks/src/const.ts | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/js/src/lib/interfaces/Libraries.ts b/js/src/lib/interfaces/Libraries.ts index 7709cf9c1..1adefb2fe 100644 --- a/js/src/lib/interfaces/Libraries.ts +++ b/js/src/lib/interfaces/Libraries.ts @@ -23,7 +23,7 @@ export enum ModelLibrary { "sentence-transformers" = "Sentence Transformers", "sklearn" = "Scikit-learn", "spacy" = "spaCy", - "span_marker" = "SpanMarker", + "span-marker" = "SpanMarker", "speechbrain" = "speechbrain", "tensorflowtts" = "TensorFlowTTS", "timm" = "Timm", @@ -534,7 +534,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS: Partial = { "text-to-video": [], "text2text-generation": ["transformers"], "time-series-forecasting": [], - "token-classification": ["adapter-transformers", "flair", "spacy", "span_marker", "stanza", "transformers"], + "token-classification": ["adapter-transformers", "flair", "spacy", "span-marker", "stanza", "transformers"], "translation": ["transformers"], "unconditional-image-generation": [], "visual-question-answering": [], From 24db3d947c11d6ab513b29b42748b10dda747a06 Mon Sep 17 00:00:00 2001 From: Tom Aarsen Date: Mon, 1 May 2023 15:56:08 +0200 Subject: [PATCH 4/4] Update the concise docs --- docs/hub/span_marker.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/hub/span_marker.md b/docs/hub/span_marker.md index 3a9f9cd61..6965ad701 100644 --- a/docs/hub/span_marker.md +++ b/docs/hub/span_marker.md @@ -1,6 +1,6 @@ # Using SpanMarker at Hugging Face -SpanMarker is a framework for training powerful Named Entity Recognition models using familiar encoders such as BERT, RoBERTa and DeBERTa. Tightly implemented on top of the 🤗 Transformers library, SpanMarker can take good advantage of its valuable functionality. +[SpanMarker](https://github.com/tomaarsen/SpanMarkerNER) is a framework for training powerful Named Entity Recognition models using familiar encoders such as BERT, RoBERTa and DeBERTa. Tightly implemented on top of the 🤗 Transformers library, SpanMarker can take good advantage of it. As a result, SpanMarker will be intuitive to use for anyone familiar with Transformers. ## Exploring SpanMarker in the Hub @@ -59,5 +59,5 @@ TODO: Add this, but then with SpanMarker ## Additional resources -* SpanMarker [repository](https://github.com/tomaarsen/SpanMarkerNER). -* SpanMarker [docs](https://tomaarsen.github.io/SpanMarkerNER). +* SpanMarker [repository](https://github.com/tomaarsen/SpanMarkerNER) +* SpanMarker [docs](https://tomaarsen.github.io/SpanMarkerNER)