Skip to content
59 changes: 59 additions & 0 deletions quantize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/python3
Comment thread
SuajCarrot marked this conversation as resolved.
Outdated

"""Script to execute quantization on a given model."""

import subprocess
import argparse
import sys
import os


def main():
"""Parse the command line arguments and execute the script."""

parser = argparse.ArgumentParser(
prog='Quantization Script',
description='This script quantizes a model.'
)

parser.add_argument("models", nargs='+', dest='models')
Comment thread
SuajCarrot marked this conversation as resolved.
Outdated
parser.add_argument(
'-r', '--remove-16', action='store_true', dest='remove_f16',
help='Remove the f16 model after quantizing it.'
)

args = parser.parse_args()

for model in args.models:

model_path = os.path.join("models", model, "ggml-model-f16.bin")
Comment thread
SuajCarrot marked this conversation as resolved.
Outdated

for i in os.listdir(model_path):

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there's another PR about parallelizing the quantizations. here it would be easy just to wrap this in a multiprocessing.Pool() (with the subprocess calls extracted out to a top-level function): https://docs.python.org/3/library/multiprocessing.html

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand exactly what can be parallelized here, is it quantizing many models at the same time? The code from the latest commit removed the loop that acts upon the incorrect listing of the file (for i in os.listdir(model_path)) by the way.

subprocess.run(
["./quantize", i, i.replace("f16", "q4_0"), "2"],
Comment thread
SuajCarrot marked this conversation as resolved.
Outdated
shell=True,
check=True
)

if args.remove_f16:
os.remove(i)


if __name__ == "__main__":
try:
main()

except subprocess.CalledProcessError:
print("An error ocurred while trying to quantize the models.")
sys.exit(1)

except FileNotFoundError as err:
print(
f'A FileNotFoundError exception was raised while executing the \
script:\n{err}\nMake sure you are located in the root of the \
repository and that the models are in the "models" directory.'
)
sys.exit(1)

except KeyboardInterrupt:
sys.exit(0)