diff --git a/docs/hub/_toctree.yml b/docs/hub/_toctree.yml index a78741eff..a6bbb8644 100644 --- a/docs/hub/_toctree.yml +++ b/docs/hub/_toctree.yml @@ -77,6 +77,8 @@ title: Sentence Transformers - local: spacy title: spaCy + - local: span_marker + title: SpanMarker - local: speechbrain title: SpeechBrain - local: stable-baselines3 diff --git a/docs/hub/models-libraries.md b/docs/hub/models-libraries.md index 2b6335473..6d45c1ce6 100644 --- a/docs/hub/models-libraries.md +++ b/docs/hub/models-libraries.md @@ -26,6 +26,7 @@ The table below summarizes the supported libraries and their level of integratio | [Sample Factory](https://github.com/alex-petrenko/sample-factory) | Codebase for high throughput asynchronous reinforcement learning. | ❌ | ✅ | ✅ | ✅ | | [Sentence Transformers](https://github.com/UKPLab/sentence-transformers) | Compute dense vector representations for sentences, paragraphs, and images. | ✅ | ✅ | ✅ | ✅ | | [spaCy](https://github.com/explosion/spaCy) | Advanced Natural Language Processing in Python and Cython. | ✅ | ✅ | ✅ | ✅ | +| [SpanMarker](https://github.com/tomaarsen/SpanMarkerNER) | Familiar, simple and state-of-the-art Named Entity Recognition. | ✅ | ✅ | ✅ | ✅ | | [Scikit Learn (using skops)](https://skops.readthedocs.io/en/stable/) | Machine Learning in Python. | ✅ | ✅ | ✅ | ✅ | | [Speechbrain](https://speechbrain.github.io/) | A PyTorch Powered Speech Toolkit. | ✅ | ✅ | ✅ | ❌ | | [Stable-Baselines3](https://github.com/DLR-RM/stable-baselines3) | Set of reliable implementations of deep reinforcement learning algorithms in PyTorch | ❌ | ✅ | ✅ | ✅ | diff --git a/docs/hub/span_marker.md b/docs/hub/span_marker.md new file mode 100644 index 000000000..6965ad701 --- /dev/null +++ b/docs/hub/span_marker.md @@ -0,0 +1,63 @@ +# Using SpanMarker at Hugging Face + +[SpanMarker](https://github.com/tomaarsen/SpanMarkerNER) is a framework for training powerful Named Entity Recognition models using familiar encoders such as BERT, RoBERTa and DeBERTa. Tightly implemented on top of the 🤗 Transformers library, SpanMarker can take good advantage of it. As a result, SpanMarker will be intuitive to use for anyone familiar with Transformers. + +## Exploring SpanMarker in the Hub + +You can find `span_marker` models by filtering at the left of the [models page](https://huggingface.co/models?library=span_marker). + +All models on the Hub come with these useful features: +1. An automatically generated model card with a brief description. +2. An interactive widget you can use to play with the model directly in the browser. +3. An Inference API that allows you to make inference requests. + +## Installation + +To get started, you can follow the [SpanMarker installation guide](https://tomaarsen.github.io/SpanMarkerNER/install.html). You can also use the following one-line install through pip: + +``` +pip install -U span_marker +``` + +## Using existing models + +All `span_marker` models can easily be loaded from the Hub. + +```py +from span_marker import SpanMarkerModel + +model = SpanMarkerModel.from_pretrained("tomaarsen/span-marker-bert-base-fewnerd-fine-super") +``` + +Once loaded, you can use [`SpanMarkerModel.predict`](https://tomaarsen.github.io/SpanMarkerNER/api/span_marker.modeling.html#span_marker.modeling.SpanMarkerModel.predict) to perform inference. + +```py +model.predict("Amelia Earhart flew her single engine Lockheed Vega 5B across the Atlantic to Paris.") +``` +```json +[ + {"span": "Amelia Earhart", "label": "person-other", "score": 0.7629689574241638, "char_start_index": 0, "char_end_index": 14}, + {"span": "Lockheed Vega 5B", "label": "product-airplane", "score": 0.9833564758300781, "char_start_index": 38, "char_end_index": 54}, + {"span": "Atlantic", "label": "location-bodiesofwater", "score": 0.7621214389801025, "char_start_index": 66, "char_end_index": 74}, + {"span": "Paris", "label": "location-GPE", "score": 0.9807717204093933, "char_start_index": 78, "char_end_index": 83} +] +``` + +If you want to load a specific SpanMarker model, you can click `Use in SpanMarker` and you will be given a working snippet! + + + +## Additional resources + +* SpanMarker [repository](https://github.com/tomaarsen/SpanMarkerNER) +* SpanMarker [docs](https://tomaarsen.github.io/SpanMarkerNER) diff --git a/js/src/lib/interfaces/Libraries.ts b/js/src/lib/interfaces/Libraries.ts index 9a13f0347..1adefb2fe 100644 --- a/js/src/lib/interfaces/Libraries.ts +++ b/js/src/lib/interfaces/Libraries.ts @@ -23,6 +23,7 @@ export enum ModelLibrary { "sentence-transformers" = "Sentence Transformers", "sklearn" = "Scikit-learn", "spacy" = "spaCy", + "span-marker" = "SpanMarker", "speechbrain" = "speechbrain", "tensorflowtts" = "TensorFlowTTS", "timm" = "Timm", @@ -314,6 +315,11 @@ nlp = spacy.load("${nameWithoutNamespace(model.id)}") import ${nameWithoutNamespace(model.id)} nlp = ${nameWithoutNamespace(model.id)}.load()`; +const span_marker = (model: ModelData) => + `from span_marker import SpanMarkerModel + + model = SpanMarkerModel.from_pretrained("${model.id}")`; + const stanza = (model: ModelData) => `import stanza @@ -528,6 +534,12 @@ export const MODEL_LIBRARIES_UI_ELEMENTS: Partial = { "text-to-video": [], "text2text-generation": ["transformers"], "time-series-forecasting": [], - "token-classification": ["adapter-transformers", "flair", "spacy", "stanza", "transformers"], + "token-classification": ["adapter-transformers", "flair", "spacy", "span-marker", "stanza", "transformers"], "translation": ["transformers"], "unconditional-image-generation": [], "visual-question-answering": [],