Skip to content

Commit 6845988

Browse files
authored
Ollama ps command for showing currently loaded models (ollama#4327)
1 parent 9eed4a9 commit 6845988

File tree

10 files changed

+193
-50
lines changed

10 files changed

+193
-50
lines changed

api/client.go

+9
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,15 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) {
354354
return &lr, nil
355355
}
356356

357+
// List running models.
358+
func (c *Client) ListRunning(ctx context.Context) (*ListResponse, error) {
359+
var lr ListResponse
360+
if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
361+
return nil, err
362+
}
363+
return &lr, nil
364+
}
365+
357366
// Copy copies a model - creating a model with another name from an existing
358367
// model.
359368
func (c *Client) Copy(ctx context.Context, req *CopyRequest) error {

api/types.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -289,10 +289,12 @@ type ListResponse struct {
289289
type ModelResponse struct {
290290
Name string `json:"name"`
291291
Model string `json:"model"`
292-
ModifiedAt time.Time `json:"modified_at"`
292+
ModifiedAt time.Time `json:"modified_at,omitempty"`
293293
Size int64 `json:"size"`
294294
Digest string `json:"digest"`
295295
Details ModelDetails `json:"details,omitempty"`
296+
ExpiresAt time.Time `json:"expires_at,omitempty"`
297+
SizeVRAM int64 `json:"size_vram,omitempty"`
296298
}
297299

298300
type TokenResponse struct {

cmd/cmd.go

+75
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"fmt"
1313
"io"
1414
"log"
15+
"math"
1516
"net"
1617
"net/http"
1718
"os"
@@ -324,6 +325,18 @@ func RunHandler(cmd *cobra.Command, args []string) error {
324325
}
325326
opts.Format = format
326327

328+
keepAlive, err := cmd.Flags().GetString("keepalive")
329+
if err != nil {
330+
return err
331+
}
332+
if keepAlive != "" {
333+
d, err := time.ParseDuration(keepAlive)
334+
if err != nil {
335+
return err
336+
}
337+
opts.KeepAlive = &api.Duration{Duration: d}
338+
}
339+
327340
prompts := args[1:]
328341
// prepend stdin to the prompt if provided
329342
if !term.IsTerminal(int(os.Stdin.Fd())) {
@@ -496,6 +509,52 @@ func ListHandler(cmd *cobra.Command, args []string) error {
496509
return nil
497510
}
498511

512+
func ListRunningHandler(cmd *cobra.Command, args []string) error {
513+
client, err := api.ClientFromEnvironment()
514+
if err != nil {
515+
return err
516+
}
517+
518+
models, err := client.ListRunning(cmd.Context())
519+
if err != nil {
520+
return err
521+
}
522+
523+
var data [][]string
524+
525+
for _, m := range models.Models {
526+
if len(args) == 0 || strings.HasPrefix(m.Name, args[0]) {
527+
var procStr string
528+
switch {
529+
case m.SizeVRAM == 0:
530+
procStr = "100% CPU"
531+
case m.SizeVRAM == m.Size:
532+
procStr = "100% GPU"
533+
case m.SizeVRAM > m.Size || m.Size == 0:
534+
procStr = "Unknown"
535+
default:
536+
sizeCPU := m.Size - m.SizeVRAM
537+
cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 100)
538+
procStr = fmt.Sprintf("%d%%/%d%% CPU/GPU", int(cpuPercent), int(100-cpuPercent))
539+
}
540+
data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, format.HumanTime(m.ExpiresAt, "Never")})
541+
}
542+
}
543+
544+
table := tablewriter.NewWriter(os.Stdout)
545+
table.SetHeader([]string{"NAME", "ID", "SIZE", "PROCESSOR", "UNTIL"})
546+
table.SetHeaderAlignment(tablewriter.ALIGN_LEFT)
547+
table.SetAlignment(tablewriter.ALIGN_LEFT)
548+
table.SetHeaderLine(false)
549+
table.SetBorder(false)
550+
table.SetNoWhiteSpace(true)
551+
table.SetTablePadding("\t")
552+
table.AppendBulk(data)
553+
table.Render()
554+
555+
return nil
556+
}
557+
499558
func DeleteHandler(cmd *cobra.Command, args []string) error {
500559
client, err := api.ClientFromEnvironment()
501560
if err != nil {
@@ -672,6 +731,7 @@ type runOptions struct {
672731
Images []api.ImageData
673732
Options map[string]interface{}
674733
MultiModal bool
734+
KeepAlive *api.Duration
675735
}
676736

677737
type displayResponseState struct {
@@ -766,6 +826,10 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
766826
Options: opts.Options,
767827
}
768828

829+
if opts.KeepAlive != nil {
830+
req.KeepAlive = opts.KeepAlive
831+
}
832+
769833
if err := client.Chat(cancelCtx, req, fn); err != nil {
770834
if errors.Is(err, context.Canceled) {
771835
return nil, nil
@@ -1075,6 +1139,7 @@ func NewCLI() *cobra.Command {
10751139
RunE: RunHandler,
10761140
}
10771141

1142+
runCmd.Flags().String("keepalive", "", "Duration to keep a model loaded (e.g. 5m)")
10781143
runCmd.Flags().Bool("verbose", false, "Show timings for response")
10791144
runCmd.Flags().Bool("insecure", false, "Use an insecure registry")
10801145
runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
@@ -1123,6 +1188,14 @@ Environment Variables:
11231188
PreRunE: checkServerHeartbeat,
11241189
RunE: ListHandler,
11251190
}
1191+
1192+
psCmd := &cobra.Command{
1193+
Use: "ps",
1194+
Short: "List running models",
1195+
PreRunE: checkServerHeartbeat,
1196+
RunE: ListRunningHandler,
1197+
}
1198+
11261199
copyCmd := &cobra.Command{
11271200
Use: "cp SOURCE DESTINATION",
11281201
Short: "Copy a model",
@@ -1146,6 +1219,7 @@ Environment Variables:
11461219
pullCmd,
11471220
pushCmd,
11481221
listCmd,
1222+
psCmd,
11491223
copyCmd,
11501224
deleteCmd,
11511225
} {
@@ -1160,6 +1234,7 @@ Environment Variables:
11601234
pullCmd,
11611235
pushCmd,
11621236
listCmd,
1237+
psCmd,
11631238
copyCmd,
11641239
deleteCmd,
11651240
)

cmd/interactive.go

+5
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ func loadModel(cmd *cobra.Command, opts *runOptions) error {
5656
Model: opts.Model,
5757
Messages: []api.Message{},
5858
}
59+
60+
if opts.KeepAlive != nil {
61+
chatReq.KeepAlive = opts.KeepAlive
62+
}
63+
5964
err = client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
6065
p.StopAndClear()
6166
if len(opts.Messages) > 0 {

format/time.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@ func humanTime(t time.Time, zeroValue string) string {
6060
}
6161

6262
delta := time.Since(t)
63-
if delta < 0 {
63+
if int(delta.Hours())/24/365 < -20 {
64+
return "Forever"
65+
} else if delta < 0 {
6466
return humanDuration(-delta) + " from now"
6567
}
6668

format/time_test.go

+10
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,14 @@ func TestHumanTime(t *testing.T) {
3232
v := now.Add(800 * time.Millisecond)
3333
assertEqual(t, HumanTime(v, ""), "Less than a second from now")
3434
})
35+
36+
t.Run("time way in the future", func(t *testing.T) {
37+
v := now.Add(24 * time.Hour * 365 * 200)
38+
assertEqual(t, HumanTime(v, ""), "Forever")
39+
})
40+
41+
t.Run("time way in the future lowercase", func(t *testing.T) {
42+
v := now.Add(24 * time.Hour * 365 * 200)
43+
assertEqual(t, HumanTimeLower(v, ""), "forever")
44+
})
3545
}

llm/server.go

+5
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ type LlamaServer interface {
3838
Detokenize(ctx context.Context, tokens []int) (string, error)
3939
Close() error
4040
EstimatedVRAM() uint64
41+
EstimatedTotal() uint64
4142
}
4243

4344
// llmServer is an instance of the llama.cpp server
@@ -955,6 +956,10 @@ func (s *llmServer) EstimatedVRAM() uint64 {
955956
return s.estimatedVRAM
956957
}
957958

959+
func (s *llmServer) EstimatedTotal() uint64 {
960+
return s.estimatedTotal
961+
}
962+
958963
func parseDurationMs(ms float64) time.Duration {
959964
dur, err := time.ParseDuration(fmt.Sprintf("%fms", ms))
960965
if err != nil {

server/routes.go

+29
Original file line numberDiff line numberDiff line change
@@ -979,6 +979,7 @@ func (s *Server) GenerateRoutes() http.Handler {
979979
r.POST("/api/show", s.ShowModelHandler)
980980
r.POST("/api/blobs/:digest", s.CreateBlobHandler)
981981
r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
982+
r.GET("/api/ps", s.ProcessHandler)
982983

983984
// Compatibility endpoints
984985
r.POST("/v1/chat/completions", openai.Middleware(), s.ChatHandler)
@@ -1137,6 +1138,34 @@ func streamResponse(c *gin.Context, ch chan any) {
11371138
})
11381139
}
11391140

1141+
func (s *Server) ProcessHandler(c *gin.Context) {
1142+
models := []api.ModelResponse{}
1143+
1144+
for _, v := range s.sched.loaded {
1145+
model := v.model
1146+
modelDetails := api.ModelDetails{
1147+
Format: model.Config.ModelFormat,
1148+
Family: model.Config.ModelFamily,
1149+
Families: model.Config.ModelFamilies,
1150+
ParameterSize: model.Config.ModelType,
1151+
QuantizationLevel: model.Config.FileType,
1152+
}
1153+
1154+
mr := api.ModelResponse{
1155+
Model: model.ShortName,
1156+
Name: model.ShortName,
1157+
Size: int64(v.estimatedTotal),
1158+
SizeVRAM: int64(v.estimatedVRAM),
1159+
Digest: model.Digest,
1160+
Details: modelDetails,
1161+
ExpiresAt: v.expiresAt,
1162+
}
1163+
models = append(models, mr)
1164+
}
1165+
1166+
c.JSON(http.StatusOK, api.ListResponse{Models: models})
1167+
}
1168+
11401169
// ChatPrompt builds up a prompt from a series of messages for the currently `loaded` model
11411170
func chatPrompt(ctx context.Context, runner *runnerRef, template string, messages []api.Message, numCtx int) (string, error) {
11421171
encode := func(s string) ([]int, error) {

0 commit comments

Comments
 (0)