From 427fbf847edfc82b21e5386c97fb3e2be4f0f8a9 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 1 Jan 2024 12:20:22 +0100
Subject: [PATCH] feat: add trimsuffix

---
 api/backend/llm.go                 |  5 ++++-
 api/config/config.go               | 22 ++++++++++++----------
 examples/configurations/phi-2.yaml |  2 ++
 3 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/api/backend/llm.go b/api/backend/llm.go
index 62eef4d8f1f..bd320b6155a 100644
--- a/api/backend/llm.go
+++ b/api/backend/llm.go
@@ -159,6 +159,9 @@ func Finetune(config config.Config, input, prediction string) string {
 	for _, c := range config.TrimSpace {
 		prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
 	}
-	return prediction
 
+	for _, c := range config.TrimSuffix {
+		prediction = strings.TrimSpace(strings.TrimSuffix(prediction, c))
+	}
+	return prediction
 }
diff --git a/api/config/config.go b/api/config/config.go
index bfcc7a6b8ed..3c7f2e2f771 100644
--- a/api/config/config.go
+++ b/api/config/config.go
@@ -103,16 +103,18 @@ type LLMConfig struct {
 	StopWords       []string `yaml:"stopwords"`
 	Cutstrings      []string `yaml:"cutstrings"`
 	TrimSpace       []string `yaml:"trimspace"`
-	ContextSize     int      `yaml:"context_size"`
-	NUMA            bool     `yaml:"numa"`
-	LoraAdapter     string   `yaml:"lora_adapter"`
-	LoraBase        string   `yaml:"lora_base"`
-	LoraScale       float32  `yaml:"lora_scale"`
-	NoMulMatQ       bool     `yaml:"no_mulmatq"`
-	DraftModel      string   `yaml:"draft_model"`
-	NDraft          int32    `yaml:"n_draft"`
-	Quantization    string   `yaml:"quantization"`
-	MMProj          string   `yaml:"mmproj"`
+	TrimSuffix      []string `yaml:"trimsuffix"`
+
+	ContextSize  int     `yaml:"context_size"`
+	NUMA         bool    `yaml:"numa"`
+	LoraAdapter  string  `yaml:"lora_adapter"`
+	LoraBase     string  `yaml:"lora_base"`
+	LoraScale    float32 `yaml:"lora_scale"`
+	NoMulMatQ    bool    `yaml:"no_mulmatq"`
+	DraftModel   string  `yaml:"draft_model"`
+	NDraft       int32   `yaml:"n_draft"`
+	Quantization string  `yaml:"quantization"`
+	MMProj       string  `yaml:"mmproj"`
 
 	RopeScaling    string  `yaml:"rope_scaling"`
 	YarnExtFactor  float32 `yaml:"yarn_ext_factor"`
diff --git a/examples/configurations/phi-2.yaml b/examples/configurations/phi-2.yaml
index 8deec58b05c..67cef0cc088 100644
--- a/examples/configurations/phi-2.yaml
+++ b/examples/configurations/phi-2.yaml
@@ -3,6 +3,8 @@ context_size: 2048
 f16: true
 gpu_layers: 90
 mmap: true
+trimsuffix: 
+- "\n"
 parameters:
   model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
   temperature: 0.2