Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lib/ruby_llm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
'openrouter' => 'OpenRouter',
'gpustack' => 'GPUStack',
'mistral' => 'Mistral',
'togetherai' => 'TogetherAI',
'vertexai' => 'VertexAI',
'pdf' => 'PDF',
'UI' => 'UI'
Expand Down Expand Up @@ -100,6 +101,7 @@ def logger
RubyLLM::Provider.register :openrouter, RubyLLM::Providers::OpenRouter
RubyLLM::Provider.register :perplexity, RubyLLM::Providers::Perplexity
RubyLLM::Provider.register :vertexai, RubyLLM::Providers::VertexAI
RubyLLM::Provider.register :togetherai, RubyLLM::Providers::TogetherAI

if defined?(Rails::Railtie)
require 'ruby_llm/railtie'
Expand Down
1 change: 1 addition & 0 deletions lib/ruby_llm/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class Configuration
:gpustack_api_base,
:gpustack_api_key,
:mistral_api_key,
:togetherai_api_key,
# Default models
:default_model,
:default_embedding_model,
Expand Down
35 changes: 35 additions & 0 deletions lib/ruby_llm/providers/togetherai.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# frozen_string_literal: true

module RubyLLM
module Providers
# Together.ai API integration.
class TogetherAI < Provider
include TogetherAI::Chat
include TogetherAI::Models

def api_base
'https://api.together.xyz/v1'
end

def headers
headers_hash = { 'Content-Type' => 'application/json' }

if @config.togetherai_api_key && [email protected]_api_key.empty?
headers_hash['Authorization'] = "Bearer #{@config.togetherai_api_key}"
end

headers_hash
end

class << self
def capabilities
TogetherAI::Capabilities
end

def configuration_requirements
%i[togetherai_api_key]
end
end
end
end
end
273 changes: 273 additions & 0 deletions lib/ruby_llm/providers/togetherai/capabilities.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
# frozen_string_literal: true

module RubyLLM
module Providers
class TogetherAI
# Capabilities for the Together.ai provider
module Capabilities
def self.supports_streaming?(model_id)
# Most chat models support streaming, exclude specialized non-chat models
supports_chat_for?(model_id)
end

def self.supports_vision?(model_id)
supports_vision_for?(model_id)
end

def self.supports_functions?(model_id)
supports_tools_for?(model_id)
end

def self.supports_json_mode?(model_id)
# Most chat models support JSON mode, exclude specialized models
supports_chat_for?(model_id) && !model_id.match?(/whisper|voxtral/i)
end

def self.model_type(model_id)
return 'embedding' if supports_embeddings_for?(model_id)
return 'image' if supports_images_for?(model_id)
return 'audio' if supports_audio_for?(model_id)
return 'moderation' if supports_moderation_for?(model_id)

'chat'
end

def self.normalize_temperature(temperature, _model)
# Together.ai accepts temperature values between 0.0 and 2.0
return temperature if temperature.nil?

temperature.clamp(0.0, 2.0)
end

def self.max_tokens_for_model(_model)
# Default max tokens for Together.ai models
# This would ideally be model-specific
4096
end

def self.format_display_name(model_id)
model_id.split('/').last.tr('-', ' ').titleize
end

def self.model_family(model_id)
case model_id
when /llama/i then 'llama'
when /qwen/i then 'qwen'
when /mistral/i then 'mistral'
when /deepseek/i then 'deepseek'
when /gemma/i then 'gemma'
when /moonshot/i then 'kimi'
when /glm/i then 'glm'
when /cogito/i then 'cogito'
when /arcee/i then 'arcee'
when /marin/i then 'marin'
when /gryphe/i then 'mythomax'
when /openai/i then 'openai'
else 'other'
end
end

def self.context_window_for(model_id)
# Context windows based on Together.ai model specifications
# Using a hash lookup for better performance and maintainability
context_windows = {
# Ultra large context (1M+ tokens)
'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8' => 524_288,
'meta-llama/Llama-4-Scout-17B-16E-Instruct' => 327_680,

# 256K+ context models
'moonshotai/Kimi-K2-Instruct-0905' => 262_144,
'moonshotai/Kimi-K2-Thinking' => 262_144,
'Qwen/Qwen3-235B-A22B-Thinking-2507' => 262_144,
'Qwen/Qwen3-235B-A22B-Instruct-2507-tput' => 262_144,
'Qwen/Qwen3-Next-80B-A3B-Instruct' => 262_144,
'Qwen/Qwen3-Next-80B-A3B-Thinking' => 262_144,
'Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8' => 256_000,

# ~200K context models
'zai-org/GLM-4.6' => 202_752,

# ~160K context models
'deepseek-ai/DeepSeek-R1' => 163_839,
'deepseek-ai/DeepSeek-R1-0528-tput' => 163_839,
'deepseek-ai/DeepSeek-V3' => 163_839,

# ~130K context models
'meta-llama/Llama-3.3-70B-Instruct-Turbo' => 131_072,
'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo' => 131_072,
'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo' => 130_815,
'meta-llama/Llama-3.2-3B-Instruct-Turbo' => 131_072,
'deepseek-ai/DeepSeek-R1-Distill-Llama-70B' => 131_072,
'deepseek-ai/DeepSeek-R1-Distill-Qwen-14B' => 131_072,
'zai-org/GLM-4.5-Air-FP8' => 131_072,

# ~128K context models
'moonshotai/Kimi-K2-Instruct' => 128_000,
'deepseek-ai/DeepSeek-V3.1' => 128_000,
'openai/gpt-oss-120b' => 128_000,
'openai/gpt-oss-20b' => 128_000,
'arcee-ai/virtuoso-medium-v2' => 128_000,
'arcee-ai/virtuoso-large' => 128_000,
'arcee-ai/maestro-reasoning' => 128_000,
'arcee_ai/arcee-spotlight' => 128_000,

# ~40K context models
'Qwen/Qwen3-235B-A22B-fp8-tput' => 40_960,
'mistralai/Magistral-Small-2506' => 40_960,

# ~32K context models (most common)
'Qwen/Qwen2.5-7B-Instruct-Turbo' => 32_768,
'Qwen/Qwen2.5-72B-Instruct-Turbo' => 32_768,
'Qwen/Qwen2.5-VL-72B-Instruct' => 32_768,
'Qwen/Qwen2.5-Coder-32B-Instruct' => 32_768,
'Qwen/QwQ-32B' => 32_768,
'mistralai/Mistral-Small-24B-Instruct-2501' => 32_768,
'mistralai/Mistral-7B-Instruct-v0.2' => 32_768,
'mistralai/Mistral-7B-Instruct-v0.3' => 32_768,
'google/gemma-3n-E4B-it' => 32_768,
'arcee-ai/coder-large' => 32_768,
'arcee-ai/caller' => 32_768,
'arcee-ai/arcee-blitz' => 32_768,

# ~8K context models
'meta-llama/Llama-3.3-70B-Instruct-Turbo-Free' => 8_193,
'meta-llama/Meta-Llama-3-8B-Instruct-Lite' => 8_192,
'meta-llama/Llama-3-70b-chat-hf' => 8_192,
'mistralai/Mistral-7B-Instruct-v0.1' => 8_192,
'google/gemma-2b-it' => 8_192,

# ~4K context models
'marin-community/marin-8b-instruct' => 4_096,
'Gryphe/MythoMax-L2-13b' => 4_096
}

# Check for exact match first
return context_windows[model_id] if context_windows.key?(model_id)

# Pattern matching for model families
case model_id
when %r{^deepcogito/cogito-v2.*} then 32_768
when %r{^Qwen/Qwen3.*235B.*} then 262_144
when %r{^meta-llama/Llama-4.*} then 1_048_576
else 16_384 # Default context window for unknown models
end
end

def self.max_tokens_for(model_id)
max_tokens_for_model(model_id)
end

def self.modalities_for(model_id)
input_modalities = ['text']
output_modalities = ['text']

input_modalities << 'image' if supports_vision_for?(model_id)
input_modalities << 'audio' if supports_audio_for?(model_id) && !model_id.match?(/sonic/i)

output_modalities = ['image'] if supports_images_for?(model_id)
output_modalities << 'audio' if model_id.match?(/sonic|voxtral/i)

{ input: input_modalities, output: output_modalities }
end

def self.capabilities_for(model_id)
capabilities = primary_capabilities(model_id)
capabilities.concat(chat_capabilities(model_id)) if supports_chat_for?(model_id)
capabilities.concat(specialized_capabilities(model_id))
capabilities
end

def self.primary_capabilities(model_id)
[].tap do |caps|
caps << 'chat' if supports_chat_for?(model_id)
caps << 'embeddings' if supports_embeddings_for?(model_id)
caps << 'images' if supports_images_for?(model_id)
end
end

def self.chat_capabilities(model_id)
[].tap do |caps|
caps << 'streaming' if supports_streaming?(model_id)
caps << 'tools' if supports_tools_for?(model_id)
caps << 'json_mode' if supports_json_mode?(model_id)
caps << 'vision' if supports_vision_for?(model_id)
end
end

def self.specialized_capabilities(model_id)
[].tap do |caps|
caps << 'transcription' if supports_audio_for?(model_id)
caps << 'moderation' if supports_moderation_for?(model_id)
end
end

def self.supports_tools_for?(model_id)
# Most chat models support function calling, exclude non-chat models
return false if supports_embeddings_for?(model_id)
return false if supports_images_for?(model_id)
return false if supports_audio_for?(model_id)
return false if supports_moderation_for?(model_id)

true
end

def self.supports_chat_for?(model_id)
# Chat models are the main category, exclude non-chat models
return false if supports_embeddings_for?(model_id)
return false if supports_images_for?(model_id)
return false if supports_audio_for?(model_id) && !supports_vision_for?(model_id)
return false if supports_moderation_for?(model_id) && !model_id.match?(/Llama-Guard/i)

true
end

def self.supports_embeddings_for?(model_id)
# Embedding models
model_id.match?(/bge-|m2-bert|gte-|multilingual-e5/i)
end

# Methods for detecting different model capabilities
def self.supports_images_for?(model_id)
# Image generation models (FLUX, Stable Diffusion, Imagen)
model_id.match?(/FLUX|stable-diffusion|imagen/i)
end

def self.supports_vision_for?(model_id)
# Vision models (multimodal models that can process images)
model_id.match?(/Scout|VL|spotlight/i)
end

def self.supports_video_for?(_model_id)
false # Video generation support will be added in future PR
end

def self.supports_audio_for?(model_id)
# Audio models (TTS and transcription)
model_id.match?(/sonic|whisper|voxtral|orpheus/i)
end

def self.supports_transcription_for?(model_id)
# Transcription-specific models
model_id.match?(/whisper/i)
end

def self.supports_moderation_for?(model_id)
# Moderation models
model_id.match?(/Guard|VirtueGuard/i)
end

def self.supports_rerank_for?(_model_id)
false # Rerank support will be added in future PR
end

def self.pricing_for(_model_id)
# Placeholder pricing - should be model-specific
{
input_tokens: 0.001,
output_tokens: 0.002
}
end
end
end
end
end
Loading