From d4e26fcbf7920663b77f04508817d9bb9a2d3559 Mon Sep 17 00:00:00 2001 From: FFAMax Date: Sun, 27 Oct 2024 21:36:42 -0700 Subject: [PATCH 1/3] Update device_capabilities.py --- exo/topology/device_capabilities.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/exo/topology/device_capabilities.py b/exo/topology/device_capabilities.py index a5f3597c3..5c69e57d8 100644 --- a/exo/topology/device_capabilities.py +++ b/exo/topology/device_capabilities.py @@ -94,6 +94,9 @@ def to_dict(self): "NVIDIA TITAN RTX": DeviceFlops(fp32=16.31*TFLOPS, fp16=32.62*TFLOPS, int8=65.24*TFLOPS), # GTX 10 series "NVIDIA GEFORCE GTX 1050 TI": DeviceFlops(fp32=2.0*TFLOPS, fp16=4.0*TFLOPS, int8=8.0*TFLOPS), + "NVIDIA GEFORCE GTX 1070": DeviceFlops(fp32=6.463*TFLOPS, fp16=0.101*TFLOPS, int8=25.852*TFLOPS), + "NVIDIA GEFORCE GTX 1080": DeviceFlops(fp32=8.873*TFLOPS, fp16=0.138*TFLOPS, int8=35.492*TFLOPS), + "NVIDIA GEFORCE GTX 1080 TI": DeviceFlops(fp32=11.34*TFLOPS, fp16=0.177*TFLOPS, int8=45.36*TFLOPS), # GTX 16 series "NVIDIA GeForce GTX 1660 TI": DeviceFlops(fp32=4.8*TFLOPS, fp16=9.6*TFLOPS, int8=19.2*TFLOPS), # QUADRO RTX Ampere series From dbf40d78373ae47e9199a92359fc2874854525e4 Mon Sep 17 00:00:00 2001 From: FFAMax Date: Sun, 27 Oct 2024 23:12:17 -0700 Subject: [PATCH 2/3] Update main.py: Default timeout 90->900 On slow setups (~1 token per second) average response may take ~600-1000 tokens. In most cases it will lead to timeout (network error which is not). Fixing to reduce exceptions. Who looking for better performance and know what to do need adjust with a knowledge how it will impact. By default making it will work for most cases. --- exo/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/main.py b/exo/main.py index 90bcc73c5..b8bdb152d 100644 --- a/exo/main.py +++ b/exo/main.py @@ -43,7 +43,7 @@ parser.add_argument("--discovery-config-path", type=str, default=None, help="Path to discovery config json file") parser.add_argument("--wait-for-peers", type=int, default=0, help="Number of peers to wait to connect to before starting") parser.add_argument("--chatgpt-api-port", type=int, default=8000, help="ChatGPT API port") -parser.add_argument("--chatgpt-api-response-timeout", type=int, default=90, help="ChatGPT API response timeout in seconds") +parser.add_argument("--chatgpt-api-response-timeout", type=int, default=900, help="ChatGPT API response timeout in seconds") parser.add_argument("--max-generate-tokens", type=int, default=10000, help="Max tokens to generate in each request") parser.add_argument("--inference-engine", type=str, default=None, help="Inference engine to use (mlx, tinygrad, or dummy)") parser.add_argument("--disable-tui", action=argparse.BooleanOptionalAction, help="Disable TUI") From ab3e76a4ec4f0274938fd2f828fcc70adcc1cc55 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Sat, 23 Nov 2024 23:06:48 +0400 Subject: [PATCH 3/3] change chatgpt-api-response-timeout default back to 90 --- exo/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/main.py b/exo/main.py index 5e7ec4553..395de02b8 100644 --- a/exo/main.py +++ b/exo/main.py @@ -49,7 +49,7 @@ parser.add_argument("--discovery-config-path", type=str, default=None, help="Path to discovery config json file") parser.add_argument("--wait-for-peers", type=int, default=0, help="Number of peers to wait to connect to before starting") parser.add_argument("--chatgpt-api-port", type=int, default=52415, help="ChatGPT API port") -parser.add_argument("--chatgpt-api-response-timeout", type=int, default=900, help="ChatGPT API response timeout in seconds") +parser.add_argument("--chatgpt-api-response-timeout", type=int, default=90, help="ChatGPT API response timeout in seconds") parser.add_argument("--max-generate-tokens", type=int, default=10000, help="Max tokens to generate in each request") parser.add_argument("--inference-engine", type=str, default=None, help="Inference engine to use (mlx, tinygrad, or dummy)") parser.add_argument("--disable-tui", action=argparse.BooleanOptionalAction, help="Disable TUI")