Support py binding & fix encoding for Windows (#81)

* Support py binding & fix encoding for Windows * Parallel setup by default & add ci for windows
li-plus · Aug 7, 2023 · 8980eb1 · 8980eb1
1 parent 5985bc9
commit 8980eb1
Show file tree

Hide file tree

Showing 7 changed files with 129 additions and 15 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -12,11 +12,12 @@ on:
 jobs:
   build:
 
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        os: [ubuntu-latest, macos-latest]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:
     - uses: actions/checkout@v3
@@ -41,3 +42,25 @@ jobs:
       run: |
         cd tests
         pytest test_chatglm_cpp.py
+
+  build-windows:
+
+    runs-on: windows-latest
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        submodules: true
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.8"
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install pytest
+        pip install . -v
+    - name: Test with pytest
+      run: |
+        cd tests
+        pytest test_chatglm_cpp.py
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 # ide
 .vscode/
+.vs/
 
 # macOS
 .DS_Store

diff --git a/README.md b/README.md
@@ -11,11 +11,17 @@ C++ implementation of [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) and [Cha
 
 ## Features
 
+Highlights:
 * [x] Pure C++ implementation based on [ggml](https://github.com/ggerganov/ggml), working in the same way as [llama.cpp](https://github.com/ggerganov/llama.cpp).
 * [x] Accelerated memory-efficient CPU inference with int4/int8 quantization, optimized KV cache and parallel computing.
 * [x] Streaming generation with typewriter effect.
 * [x] Python binding, web demo, and more possibilities.
 
+Support Matrix:
+* Hardwares: x86/arm CPU, NVIDIA GPU, Apple Silicon GPU
+* Platforms: Linux, MacOS, Windows
+* Models: ChatGLM, ChatGLM2, CodeGeeX2
+
 ## Getting Started
 
 **Preparation**
@@ -58,7 +64,7 @@ For LoRA model, add `-l <lora_model_name_or_path>` flag to merge your LoRA weigh
 Compile the project using CMake:
 ```sh
 cmake -B build
-cmake --build build -j
+cmake --build build -j --config Release
 ```
 
 Now you may chat with the quantized ChatGLM-6B model by running:

diff --git a/main.cpp b/main.cpp
@@ -141,10 +141,6 @@ static void chat(Args &args) {
     chatglm::GenerationConfig gen_config(args.max_length, args.max_context_length, args.temp > 0, args.top_k,
                                          args.top_p, args.temp, args.num_threads);
 
-#if defined(_WIN32)
-    _setmode(_fileno(stdin), _O_WTEXT);
-#endif
-
     if (args.verbose) {
         std::cout << "system info: | "
                   << "AVX = " << ggml_cpu_has_avx() << " | "
@@ -232,6 +228,11 @@ static void chat(Args &args) {
 }
 
 int main(int argc, char **argv) {
+#if defined(_WIN32)
+    SetConsoleOutputCP(CP_UTF8);
+    _setmode(_fileno(stdin), _O_WTEXT);
+#endif
+
     try {
         Args args = parse_args(argc, argv);
         chat(args);

diff --git a/pyproject.toml b/pyproject.toml
@@ -23,7 +23,10 @@ classifiers = [
     "Intended Audience :: Science/Research",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
     "License :: OSI Approved :: MIT License",
-    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
 ]
 dynamic = ["version"]
 

diff --git a/setup.py b/setup.py
@@ -1,3 +1,5 @@
+# reference: https://github.com/pybind/cmake_example
+
 import os
 import re
 import subprocess
@@ -7,8 +9,18 @@
 from setuptools import Extension, find_packages, setup
 from setuptools.command.build_ext import build_ext
 
+# Convert distutils Windows platform specifiers to CMake -A arguments
+PLAT_TO_CMAKE = {
+    "win32": "Win32",
+    "win-amd64": "x64",
+    "win-arm32": "ARM",
+    "win-arm64": "ARM64",
+}
 
-# reference: https://github.com/pybind/cmake_example
+
+# A CMakeExtension needs a sourcedir instead of a file list.
+# The name must be the _single_ output extension from the CMake build.
+# If you need multiple extensions, see scikit-build.
 class CMakeExtension(Extension):
     def __init__(self, name: str, sourcedir: str = "") -> None:
         super().__init__(name, sources=[])
@@ -17,27 +29,95 @@ def __init__(self, name: str, sourcedir: str = "") -> None:
 
 class CMakeBuild(build_ext):
     def build_extension(self, ext: CMakeExtension) -> None:
+        # Must be in this form due to bug in .resolve() only fixed in Python 3.10+
         ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name)
         extdir = ext_fullpath.parent.resolve()
 
+        # Using this requires trailing slash for auto-detection & inclusion of
+        # auxiliary "native" libs
+
         debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug
         cfg = "Debug" if debug else "Release"
 
+        # CMake lets you override the generator - we need to check this.
+        # Can be set with Conda-Build, for example.
+        cmake_generator = os.environ.get("CMAKE_GENERATOR", "")
+
+        # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON
+        # EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code
+        # from Python.
         cmake_args = [
             f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}",
             f"-DPYTHON_EXECUTABLE={sys.executable}",
-            f"-DCMAKE_BUILD_TYPE={cfg}",
+            f"-DCMAKE_BUILD_TYPE={cfg}",  # not used on MSVC, but no harm
             f"-DCHATGLM_ENABLE_PYBIND=ON",
         ]
-
+        build_args = []
+        # Adding CMake arguments set as environment variable
+        # (needed e.g. to build for ARM OSx on conda-forge)
         if "CMAKE_ARGS" in os.environ:
             cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item]
 
+        if self.compiler.compiler_type != "msvc":
+            # Using Ninja-build since it a) is available as a wheel and b)
+            # multithreads automatically. MSVC would require all variables be
+            # exported for Ninja to pick it up, which is a little tricky to do.
+            # Users can override the generator with CMAKE_GENERATOR in CMake
+            # 3.15+.
+            if not cmake_generator or cmake_generator == "Ninja":
+                try:
+                    import ninja
+
+                    ninja_executable_path = Path(ninja.BIN_DIR) / "ninja"
+                    cmake_args += [
+                        "-GNinja",
+                        f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}",
+                    ]
+                except ImportError:
+                    pass
+
+        else:
+            # Single config generators are handled "normally"
+            single_config = any(x in cmake_generator for x in {"NMake", "Ninja"})
+
+            # CMake allows an arch-in-generator style for backward compatibility
+            contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"})
+
+            # Specify the arch if using MSVC generator, but only if it doesn't
+            # contain a backward-compatibility arch spec already in the
+            # generator name.
+            if not single_config and not contains_arch:
+                cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]]
+
+            # Multi-config generators have a different way to specify configs
+            if not single_config:
+                cmake_args += [f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"]
+                build_args += ["--config", cfg]
+
+        if sys.platform.startswith("darwin"):
+            # Cross-compile support for macOS - respect ARCHFLAGS if set
+            archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", ""))
+            if archs:
+                cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))]
+
+        # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level
+        # across all generators.
+        # if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
+        #     # self.parallel is a Python 3 only way to set parallel jobs by hand
+        #     # using -j in the build_ext call, not supported by pip or PyPA-build.
+        #     if hasattr(self, "parallel") and self.parallel:
+        #         # CMake 3.12+ only.
+        #         build_args += [f"-j{self.parallel}"]
+
+        # Compile in parallel by default
+        build_args += [f"-j"]
+
         build_temp = Path(self.build_temp) / ext.name
-        build_temp.mkdir(parents=True, exist_ok=True)
+        if not build_temp.exists():
+            build_temp.mkdir(parents=True)
 
-        subprocess.run(["cmake", ext.sourcedir] + cmake_args, cwd=build_temp, check=True)
-        subprocess.run(["cmake", "--build", ".", "-j"], cwd=build_temp, check=True)
+        subprocess.run(["cmake", ext.sourcedir, *cmake_args], cwd=build_temp, check=True)
+        subprocess.run(["cmake", "--build", ".", *build_args], cwd=build_temp, check=True)
 
 
 HERE = Path(__file__).resolve().parent

diff --git a/third_party/ggml b/third_party/ggml