Skip to content

Commit

Permalink
Support py binding & fix encoding for Windows (#81)
Browse files Browse the repository at this point in the history
* Support py binding & fix encoding for Windows

* Parallel setup by default & add ci for windows
  • Loading branch information
li-plus authored Aug 7, 2023
1 parent 5985bc9 commit 8980eb1
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 15 deletions.
27 changes: 25 additions & 2 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ on:
jobs:
build:

runs-on: ubuntu-latest
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10"]
os: [ubuntu-latest, macos-latest]
python-version: ["3.8", "3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v3
Expand All @@ -41,3 +42,25 @@ jobs:
run: |
cd tests
pytest test_chatglm_cpp.py
build-windows:

runs-on: windows-latest

steps:
- uses: actions/checkout@v3
with:
submodules: true
- name: Set up Python 3.8
uses: actions/setup-python@v4
with:
python-version: "3.8"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install pytest
pip install . -v
- name: Test with pytest
run: |
cd tests
pytest test_chatglm_cpp.py
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# ide
.vscode/
.vs/

# macOS
.DS_Store
Expand Down
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,17 @@ C++ implementation of [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) and [Cha

## Features

Highlights:
* [x] Pure C++ implementation based on [ggml](https://github.com/ggerganov/ggml), working in the same way as [llama.cpp](https://github.com/ggerganov/llama.cpp).
* [x] Accelerated memory-efficient CPU inference with int4/int8 quantization, optimized KV cache and parallel computing.
* [x] Streaming generation with typewriter effect.
* [x] Python binding, web demo, and more possibilities.

Support Matrix:
* Hardwares: x86/arm CPU, NVIDIA GPU, Apple Silicon GPU
* Platforms: Linux, MacOS, Windows
* Models: ChatGLM, ChatGLM2, CodeGeeX2

## Getting Started

**Preparation**
Expand Down Expand Up @@ -58,7 +64,7 @@ For LoRA model, add `-l <lora_model_name_or_path>` flag to merge your LoRA weigh
Compile the project using CMake:
```sh
cmake -B build
cmake --build build -j
cmake --build build -j --config Release
```

Now you may chat with the quantized ChatGLM-6B model by running:
Expand Down
9 changes: 5 additions & 4 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,6 @@ static void chat(Args &args) {
chatglm::GenerationConfig gen_config(args.max_length, args.max_context_length, args.temp > 0, args.top_k,
args.top_p, args.temp, args.num_threads);

#if defined(_WIN32)
_setmode(_fileno(stdin), _O_WTEXT);
#endif

if (args.verbose) {
std::cout << "system info: | "
<< "AVX = " << ggml_cpu_has_avx() << " | "
Expand Down Expand Up @@ -232,6 +228,11 @@ static void chat(Args &args) {
}

int main(int argc, char **argv) {
#if defined(_WIN32)
SetConsoleOutputCP(CP_UTF8);
_setmode(_fileno(stdin), _O_WTEXT);
#endif

try {
Args args = parse_args(argc, argv);
chat(args);
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ classifiers = [
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
]
dynamic = ["version"]

Expand Down
92 changes: 86 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# reference: https://github.com/pybind/cmake_example

import os
import re
import subprocess
Expand All @@ -7,8 +9,18 @@
from setuptools import Extension, find_packages, setup
from setuptools.command.build_ext import build_ext

# Convert distutils Windows platform specifiers to CMake -A arguments
PLAT_TO_CMAKE = {
"win32": "Win32",
"win-amd64": "x64",
"win-arm32": "ARM",
"win-arm64": "ARM64",
}

# reference: https://github.com/pybind/cmake_example

# A CMakeExtension needs a sourcedir instead of a file list.
# The name must be the _single_ output extension from the CMake build.
# If you need multiple extensions, see scikit-build.
class CMakeExtension(Extension):
def __init__(self, name: str, sourcedir: str = "") -> None:
super().__init__(name, sources=[])
Expand All @@ -17,27 +29,95 @@ def __init__(self, name: str, sourcedir: str = "") -> None:

class CMakeBuild(build_ext):
def build_extension(self, ext: CMakeExtension) -> None:
# Must be in this form due to bug in .resolve() only fixed in Python 3.10+
ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name)
extdir = ext_fullpath.parent.resolve()

# Using this requires trailing slash for auto-detection & inclusion of
# auxiliary "native" libs

debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug
cfg = "Debug" if debug else "Release"

# CMake lets you override the generator - we need to check this.
# Can be set with Conda-Build, for example.
cmake_generator = os.environ.get("CMAKE_GENERATOR", "")

# Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON
# EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code
# from Python.
cmake_args = [
f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}",
f"-DPYTHON_EXECUTABLE={sys.executable}",
f"-DCMAKE_BUILD_TYPE={cfg}",
f"-DCMAKE_BUILD_TYPE={cfg}", # not used on MSVC, but no harm
f"-DCHATGLM_ENABLE_PYBIND=ON",
]

build_args = []
# Adding CMake arguments set as environment variable
# (needed e.g. to build for ARM OSx on conda-forge)
if "CMAKE_ARGS" in os.environ:
cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item]

if self.compiler.compiler_type != "msvc":
# Using Ninja-build since it a) is available as a wheel and b)
# multithreads automatically. MSVC would require all variables be
# exported for Ninja to pick it up, which is a little tricky to do.
# Users can override the generator with CMAKE_GENERATOR in CMake
# 3.15+.
if not cmake_generator or cmake_generator == "Ninja":
try:
import ninja

ninja_executable_path = Path(ninja.BIN_DIR) / "ninja"
cmake_args += [
"-GNinja",
f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}",
]
except ImportError:
pass

else:
# Single config generators are handled "normally"
single_config = any(x in cmake_generator for x in {"NMake", "Ninja"})

# CMake allows an arch-in-generator style for backward compatibility
contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"})

# Specify the arch if using MSVC generator, but only if it doesn't
# contain a backward-compatibility arch spec already in the
# generator name.
if not single_config and not contains_arch:
cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]]

# Multi-config generators have a different way to specify configs
if not single_config:
cmake_args += [f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"]
build_args += ["--config", cfg]

if sys.platform.startswith("darwin"):
# Cross-compile support for macOS - respect ARCHFLAGS if set
archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", ""))
if archs:
cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))]

# Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level
# across all generators.
# if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
# # self.parallel is a Python 3 only way to set parallel jobs by hand
# # using -j in the build_ext call, not supported by pip or PyPA-build.
# if hasattr(self, "parallel") and self.parallel:
# # CMake 3.12+ only.
# build_args += [f"-j{self.parallel}"]

# Compile in parallel by default
build_args += [f"-j"]

build_temp = Path(self.build_temp) / ext.name
build_temp.mkdir(parents=True, exist_ok=True)
if not build_temp.exists():
build_temp.mkdir(parents=True)

subprocess.run(["cmake", ext.sourcedir] + cmake_args, cwd=build_temp, check=True)
subprocess.run(["cmake", "--build", ".", "-j"], cwd=build_temp, check=True)
subprocess.run(["cmake", ext.sourcedir, *cmake_args], cwd=build_temp, check=True)
subprocess.run(["cmake", "--build", ".", *build_args], cwd=build_temp, check=True)


HERE = Path(__file__).resolve().parent
Expand Down
2 changes: 1 addition & 1 deletion third_party/ggml
Submodule ggml updated from 627ed1 to a30107

0 comments on commit 8980eb1

Please sign in to comment.