huggingface · dakenf · May 23, 2023 · May 23, 2023 · May 23, 2023 · May 23, 2023
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@ __pycache__
 .vscode
 node_modules
 .cache
+.idea
 
 # Do not track build artifacts/generated files
 /dist

diff --git a/README.md b/README.md
@@ -32,13 +32,12 @@ Transformers.js is designed to be functionally equivalent to Hugging Face's [tra
   - 📝 **Natural Language Processing**: text classification, named entity recognition, question answering, language modeling, summarization, translation, multiple choice, and text generation.
   - 🖼️ **Computer Vision**: image classification, object detection, and segmentation.
   - 🗣️ **Audio**: automatic speech recognition and audio classification.
-  - 🐙 **Multimodal**: zero-shot image classification.
+  - 🐙 **Multimodal**: zero-shot image classification and text-to-image generation.
 
 Transformers.js uses [ONNX Runtime](https://onnxruntime.ai/) to run models in the browser. The best part about it, is that you can easily [convert](#convert-your-models-to-onnx) your pretrained PyTorch, TensorFlow, or JAX models to ONNX using [🤗 Optimum](https://github.com/huggingface/optimum#onnx--onnx-runtime). 
 
 For more information, check out the full [documentation](https://huggingface.co/docs/transformers.js).
 
-
 ## Quick tour
 
 
@@ -102,6 +101,17 @@ Alternatively, you can use it in vanilla JS, without any bundler, by using a CDN
 </script>
 ```
 
+## GPU acceleration in Node.js:
+
+**Windows and macOS**:
+
+Works out of the box
+
+**Linux**:
+1. Install CUDA https://docs.nvidia.com/cuda/cuda-installation-guide-linux/
+2. Install cuDNN https://developer.nvidia.com/rdp/cudnn-archive
+3. Install onnxruntime-linux-x64-gpu-1.14.1 https://github.com/microsoft/onnxruntime/releases/tag/v1.14.1
+
 
 ## Examples
 
@@ -230,7 +240,7 @@ to open up a feature request [here](https://github.com/xenova/transformers.js/is
 | [Document Question Answering](https://huggingface.co/tasks/document-question-answering)         | `document-question-answering`  | Answering questions on document images. | ❌ |
 | [Feature Extraction](https://huggingface.co/tasks/feature-extraction)         |  `feature-extraction`  | Transforming raw data into numerical features that can be processed while preserving the information in the original dataset. | ✅ |
 | [Image-to-Text](https://huggingface.co/tasks/image-to-text)         |  `image-to-text`  | Output text from a given image. | ✅ |
-| [Text-to-Image](https://huggingface.co/tasks/text-to-image)         |  `text-to-image`  | Generates images from input text.  | ❌ |
+| [Text-to-Image](https://huggingface.co/tasks/text-to-image)         |  `text-to-image`  | Generates images from input text.  | ✅ |
 | [Visual Question Answering](https://huggingface.co/tasks/visual-question-answering)         |  `visual-question-answering`  | Answering open-ended questions based on an image. | ❌ |
 | [Zero-Shot Image Classification](https://huggingface.co/tasks/zero-shot-image-classification) | `zero-shot-image-classification`  | Classifying images into classes that are unseen during training. | ✅ |
 

diff --git a/docs/snippets/0_introduction.snippet b/docs/snippets/0_introduction.snippet
@@ -5,7 +5,7 @@ Transformers.js is designed to be functionally equivalent to Hugging Face's [tra
   - 📝 **Natural Language Processing**: text classification, named entity recognition, question answering, language modeling, summarization, translation, multiple choice, and text generation.
   - 🖼️ **Computer Vision**: image classification, object detection, and segmentation.
   - 🗣️ **Audio**: automatic speech recognition and audio classification.
-  - 🐙 **Multimodal**: zero-shot image classification.
+  - 🐙 **Multimodal**: zero-shot image classification and text-to-image generation.
 
 Transformers.js uses [ONNX Runtime](https://onnxruntime.ai/) to run models in the browser. The best part about it, is that you can easily [convert](/custom_usage#convert-your-models-to-onnx) your pretrained PyTorch, TensorFlow, or JAX models to ONNX using [🤗 Optimum](https://github.com/huggingface/optimum#onnx--onnx-runtime). 
 

diff --git a/docs/snippets/2_installation.snippet b/docs/snippets/2_installation.snippet
@@ -10,3 +10,14 @@ Alternatively, you can use it in vanilla JS, without any bundler, by using a CDN
     import { pipeline } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers';
 </script>
 ```
+
+## GPU acceleration in Node.js:
+
+**Windows and macOS**:
+
+Works out of the box
+
+**Linux**:
+1. Install CUDA https://docs.nvidia.com/cuda/cuda-installation-guide-linux/
+2. Install cuDNN https://developer.nvidia.com/rdp/cudnn-archive
+3. Install onnxruntime-linux-x64-gpu-1.14.1 https://github.com/microsoft/onnxruntime/releases/tag/v1.14.1
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -42,7 +42,7 @@
     "sharp": "^0.32.0"
   },
   "optionalDependencies": {
-    "onnxruntime-node": "^1.14.0"
+    "onnxruntime-node-gpu": "^1.14.0"
   },
   "devDependencies": {
     "@types/jest": "^29.5.1",
@@ -68,12 +68,12 @@
     "path": false,
     "url": false,
     "sharp": false,
-    "onnxruntime-node": false,
+    "onnxruntime-node-gpu": false,
     "stream/web": false
   },
   "publishConfig": {
     "access": "public"
   },
   "jsdelivr": "./dist/transformers.min.js",
   "unpkg": "./dist/transformers.min.js"
-}
+}
diff --git a/src/backends/onnx.js b/src/backends/onnx.js
@@ -6,23 +6,23 @@
  * So, we just import both packages, and use the appropriate one based on the environment:
  *   - When running in node, we use `onnxruntime-node`.
  *   - When running in the browser, we use `onnxruntime-web` (`onnxruntime-node` is not bundled).
- * 
+ *
  * This module is not directly exported, but can be accessed through the environment variables:
  * ```javascript
  * import { env } from '@xenova/transformers';
  * console.log(env.backends.onnx);
  * ```
- * 
+ *
  * @module backends/onnx
  */
 
 // NOTE: Import order matters here. We need to import `onnxruntime-node` before `onnxruntime-web`.
-import * as ONNX_NODE from 'onnxruntime-node';
+import * as ONNX_NODE from 'onnxruntime-node-gpu';
 import * as ONNX_WEB from 'onnxruntime-web';
 
 export let ONNX;
 
-export const executionProviders = [
+export let executionProviders = [
     // 'webgpu',
     'wasm'
 ];
@@ -31,8 +31,20 @@ if (typeof process !== 'undefined' && process?.release?.name === 'node') {
     // Running in a node-like environment.
     ONNX = ONNX_NODE;
 
-    // Add `cpu` execution provider, with higher precedence that `wasm`.
-    executionProviders.unshift('cpu');
+    // Add `cpu` and os-specific execution provider
+    switch (process.platform) {
+        case 'darwin':
+            executionProviders = ['coreml', 'cpu'];
+            break;
+        case 'linux':
+            executionProviders = ['cuda', 'cpu']
+            break;
+        case 'win32':
+            executionProviders = ['directml', 'cpu']
+            break
+        default:
+            executionProviders = ['cpu']
+    }
 
 } else {
     // Running in a browser-environment
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,7 @@ __pycache__ @@
     .vscode
     node_modules
     .cache
+    .idea
     # Do not track build artifacts/generated files
     /dist
@@ Expand Down @@