Skip to content

Commit 5411936

Browse files
authored
1.7.0 release updates (#454)
* remove deprecated models * add new openai models * up passthrough limit * up litellm version * up refact version to 1.7.0 * upgrade cython
1 parent c5162ff commit 5411936

File tree

6 files changed

+37
-137
lines changed

6 files changed

+37
-137
lines changed

Dockerfile

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ RUN cd /tmp/refact-lsp \
4444
COPY . /tmp/app
4545
RUN echo "refact $(git -C /tmp/app rev-parse HEAD)" >> /refact-build-info.txt
4646
RUN pip install ninja
47+
RUN pip install -U cython
4748
RUN pip install /tmp/app -v --no-build-isolation && rm -rf /tmp/app
4849

4950
ENV REFACT_PERM_DIR "/perm_storage"

README.md

-13
Original file line numberDiff line numberDiff line change
@@ -106,22 +106,9 @@ Extensions > Refact.ai Assistant > Settings > Infurl
106106
| Model | Completion | Chat | Fine-tuning | [Deprecated](## "Will be removed in next versions") |
107107
|---------------------------------------------------------------------------------------------------|------------|------|-------------|-----------------------------------------------------|
108108
| [Refact/1.6B](https://huggingface.co/smallcloudai/Refact-1_6B-fim) | + | | + | |
109-
| [starcoder/1b/base](https://huggingface.co/smallcloudai/starcoderbase-1b) | + | | + | + |
110-
| [starcoder/3b/base](https://huggingface.co/smallcloudai/starcoderbase-3b) | + | | + | + |
111-
| [starcoder/7b/base](https://huggingface.co/smallcloudai/starcoderbase-7b) | + | | + | + |
112-
| [starcoder/15b/base](https://huggingface.co/TheBloke/starcoder-GPTQ) | + | | | + |
113-
| [starcoder/15b/plus](https://huggingface.co/TheBloke/starcoderplus-GPTQ) | + | | | + |
114109
| [starcoder2/3b/base](https://huggingface.co/bigcode/starcoder2-3b) | + | | + | |
115110
| [starcoder2/7b/base](https://huggingface.co/bigcode/starcoder2-7b) | + | | + | |
116111
| [starcoder2/15b/base](https://huggingface.co/bigcode/starcoder2-15b) | + | | + | |
117-
| [wizardcoder/15b](https://huggingface.co/TheBloke/WizardCoder-15B-1.0-GPTQ) | + | | | + |
118-
| [codellama/7b](https://huggingface.co/TheBloke/CodeLlama-7B-fp16) | + | | + | + |
119-
| [starchat/15b/beta](https://huggingface.co/TheBloke/starchat-beta-GPTQ) | | + | | + |
120-
| [wizardlm/7b](https://huggingface.co/TheBloke/WizardLM-7B-V1.0-Uncensored-GPTQ) | | + | | + |
121-
| [wizardlm/13b](https://huggingface.co/TheBloke/WizardLM-13B-V1.1-GPTQ) | | + | | + |
122-
| [wizardlm/30b](https://huggingface.co/TheBloke/WizardLM-30B-fp16) | | + | | + |
123-
| [llama2/7b](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GPTQ) | | + | | + |
124-
| [llama2/13b](https://huggingface.co/TheBloke/Llama-2-13B-chat-GPTQ) | | + | | + |
125112
| [deepseek-coder/1.3b/base](https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base) | + | | + | |
126113
| [deepseek-coder/5.7b/mqa-base](https://huggingface.co/deepseek-ai/deepseek-coder-5.7bmqa-base) | + | | + | |
127114
| [magicoder/6.7b](https://huggingface.co/TheBloke/Magicoder-S-DS-6.7B-GPTQ) | | + | | |

refact_known_models/huggingface.py

-118
Original file line numberDiff line numberDiff line change
@@ -1,122 +1,4 @@
11
huggingface_mini_db = {
2-
"starcoder/15b/base": {
3-
"backend": "autogptq",
4-
"model_path": "TheBloke/starcoder-GPTQ",
5-
"model_class_kwargs": {},
6-
"required_memory_mb": 18000,
7-
"T": 4096,
8-
"filter_caps": ["completion"],
9-
"deprecated": True,
10-
},
11-
"starcoder/15b/plus": {
12-
"backend": "autogptq",
13-
"model_path": "TheBloke/starcoderplus-GPTQ",
14-
"model_class_kwargs": {},
15-
"required_memory_mb": 18000,
16-
"T": 4096,
17-
"filter_caps": ["completion"],
18-
"deprecated": True,
19-
},
20-
"starchat/15b/beta": {
21-
"backend": "autogptq",
22-
"model_path": "TheBloke/starchat-beta-GPTQ",
23-
"model_class_kwargs": {},
24-
"required_memory_mb": 18000,
25-
"T": 4096,
26-
"filter_caps": ["chat"],
27-
"deprecated": True,
28-
},
29-
"starcoder/1b/base": {
30-
"backend": "transformers",
31-
"model_path": "smallcloudai/starcoderbase-1b",
32-
"model_class_kwargs": {},
33-
"required_memory_mb": 8000,
34-
"T": 8192,
35-
"filter_caps": ["completion", "finetune"],
36-
"deprecated": True,
37-
},
38-
"starcoder/3b/base": {
39-
"backend": "transformers",
40-
"model_path": "smallcloudai/starcoderbase-3b",
41-
"model_class_kwargs": {},
42-
"required_memory_mb": 12000,
43-
"T": 4096,
44-
"filter_caps": ["completion", "finetune"],
45-
"deprecated": True,
46-
},
47-
"starcoder/7b/base": {
48-
"backend": "transformers",
49-
"model_path": "smallcloudai/starcoderbase-7b",
50-
"model_class_kwargs": {},
51-
"required_memory_mb": 20000,
52-
"T": 4096,
53-
"filter_caps": ["completion", "finetune"],
54-
"deprecated": True,
55-
},
56-
"wizardcoder/15b": {
57-
"backend": "autogptq",
58-
"model_path": "TheBloke/WizardCoder-15B-1.0-GPTQ",
59-
"model_class_kwargs": {},
60-
"required_memory_mb": 18000,
61-
"T": 4096,
62-
"filter_caps": ["completion"],
63-
"deprecated": True,
64-
},
65-
"wizardlm/7b": {
66-
"backend": "autogptq",
67-
"model_path": "TheBloke/WizardLM-7B-V1.0-Uncensored-GPTQ",
68-
"model_class_kwargs": {},
69-
"required_memory_mb": 8000,
70-
"T": 2048,
71-
"filter_caps": ["chat"],
72-
"deprecated": True,
73-
},
74-
"wizardlm/13b": {
75-
"backend": "autogptq",
76-
"model_path": "TheBloke/WizardLM-13B-V1.1-GPTQ",
77-
"model_class_kwargs": {},
78-
"required_memory_mb": 14000,
79-
"T": 2048,
80-
"filter_caps": ["chat"],
81-
"deprecated": True,
82-
},
83-
"llama2/7b": {
84-
"backend": "autogptq",
85-
"model_path": "TheBloke/Llama-2-7b-Chat-GPTQ",
86-
"model_class_kwargs": {},
87-
"required_memory_mb": 8000,
88-
"T": 2048,
89-
"filter_caps": ["chat"],
90-
"deprecated": True,
91-
},
92-
"llama2/13b": {
93-
"backend": "autogptq",
94-
"model_path": "TheBloke/Llama-2-13B-chat-GPTQ",
95-
"model_class_kwargs": {},
96-
"required_memory_mb": 14000,
97-
"T": 2048,
98-
"filter_caps": ["chat"],
99-
"deprecated": True,
100-
},
101-
"codellama/7b": {
102-
"backend": "transformers",
103-
"model_path": "TheBloke/CodeLlama-7B-fp16",
104-
"model_class_kwargs": {},
105-
"required_memory_mb": 14000,
106-
"T": 2048,
107-
"filter_caps": ["completion"],
108-
"deprecated": True,
109-
},
110-
"wizardlm/30b": {
111-
"backend": "transformers",
112-
"model_path": "TheBloke/WizardLM-30B-fp16",
113-
"model_class_kwargs": {
114-
"load_in_4bit": True,
115-
},
116-
"T": 2048,
117-
"filter_caps": ["chat"],
118-
"deprecated": True,
119-
},
1202
"deepseek-coder/1.3b/base": {
1213
"backend": "transformers",
1224
"model_path": "deepseek-ai/deepseek-coder-1.3b-base",

refact_known_models/passthrough.py

+33
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,37 @@
8989
"pp1000t_generated": 15_000,
9090
"filter_caps": ["chat", "tools"],
9191
},
92+
"gpt-4o-2024-05-13": {
93+
"backend": "litellm",
94+
"provider": "openai",
95+
"tokenizer_path": "Xenova/gpt-4o",
96+
"resolve_as": "gpt-4o-2024-05-13",
97+
"T": 128_000,
98+
"T_out": 4096,
99+
"pp1000t_prompt": 5_000,
100+
"pp1000t_generated": 15_000, # $15.00 / 1M tokens
101+
"filter_caps": ["chat", "tools"],
102+
},
103+
"gpt-4o-2024-08-06": {
104+
"backend": "litellm",
105+
"provider": "openai",
106+
"tokenizer_path": "Xenova/gpt-4o",
107+
"resolve_as": "gpt-4o-2024-08-06",
108+
"T": 128_000,
109+
"T_out": 4096,
110+
"pp1000t_prompt": 2_500,
111+
"pp1000t_generated": 10_000, # $15.00 / 1M tokens
112+
"filter_caps": ["chat", "tools"]
113+
},
114+
"gpt-4o-mini": {
115+
"backend": "litellm",
116+
"provider": "openai",
117+
"tokenizer_path": "Xenova/gpt-4o",
118+
"resolve_as": "gpt-4o-mini-2024-07-18",
119+
"T": 128_000,
120+
"T_out": 4096,
121+
"pp1000t_prompt": 150,
122+
"pp1000t_generated": 600, # $0.60 / 1M tokens
123+
"filter_caps": ["chat", "tools"],
124+
},
92125
}

refact_webgui/webgui/selfhost_model_resolve.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def resolve_model_context_size(model_name: str, model_assigner: ModelAssigner) -
3939
if model_name in model_assigner.models_db:
4040
return model_assigner.model_assignment["model_assign"][model_name]["n_ctx"]
4141

42-
PASSTHROUGH_MAX_TOKENS_LIMIT = 16_000
42+
PASSTHROUGH_MAX_TOKENS_LIMIT = 64_000
4343

4444
if model_name in model_assigner.passthrough_mini_db:
4545
if max_tokens := model_assigner.passthrough_mini_db[model_name].get('T'):

setup.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,7 @@ class PyPackage:
3535
"refact_webgui": PyPackage(
3636
requires=["aiohttp", "aiofiles", "cryptography", "fastapi==0.100.0", "giturlparse", "pydantic>=2",
3737
"starlette==0.27.0", "uvicorn", "uvloop", "termcolor", "python-multipart", "more_itertools",
38-
"scyllapy==1.3.0", "pandas>=2.0.3",
39-
# NOTE: litellm has bug with anthropic streaming, so we're staying on this version for now
40-
"litellm==1.42.0",
41-
],
38+
"scyllapy==1.3.0", "pandas>=2.0.3", "litellm>=1.44.24"],
4239
requires_packages=["refact_known_models", "refact_utils"],
4340
data=["webgui/static/*", "webgui/static/components/modals/*",
4441
"webgui/static/dashboards/*", "webgui/static/assets/*", "webgui/static/utils/*",]),
@@ -94,7 +91,7 @@ def get_install_requires(packages):
9491

9592
setup(
9693
name="refact-self-hosting",
97-
version="1.6.4",
94+
version="1.7.0",
9895
py_modules=list(setup_packages.keys()),
9996
package_data={
10097
name: py_package.data

0 commit comments

Comments
 (0)