diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..afdbb1c
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,22 @@
+FROM python:3.12.3-alpine
+
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV PYTHONUNBUFFERED 1
+
+RUN apk add --no-cache \
+    build-base \
+    python3-dev \
+    py3-pip \
+    lapack-dev \
+    gfortran \
+    libffi-dev
+
+WORKDIR /app
+
+COPY .  /app/
+COPY pyproject.toml  /app/
+                                                   
+RUN pip install poetry
+RUN poetry install --no-root
+
+CMD [ "python3", "extract_text.py" ] 
\ No newline at end of file
diff --git a/README.md b/README.md
index d5ce939..306f83f 100644
--- a/README.md
+++ b/README.md
@@ -89,6 +89,30 @@ text = dictionary_output(pdf, sort=False, page_range=[1,2,3]) # Optional argumen
 
 If you want more customization, check out the `pdftext.extraction._get_pages` function for a starting point to dig deeper.  pdftext is a pretty thin wrapper around [pypdfium2](https://pypdfium2.readthedocs.io/en/stable/), so you might want to look at the documentation for that as well.
 
+# Run on Docker
+Clone a project
+```
+git clone repository
+
+```
+
+Build a docker image
+```
+cd pdftext
+docker build -t pdftext .
+
+```
+
+Running with docker
+```
+# write out a text file
+docker run pdftext PDF_PATH --out_path output.txt
+
+# write out a json file
+docker run pdftext PDF_PATH --out_path output.txt --json
+
+```
+
 # Benchmarks
 
 I benchmarked extraction speed and accuracy of [pymupdf](https://pymupdf.readthedocs.io/en/latest/), [pdfplumber](https://github.com/jsvine/pdfplumber), and pdftext.  I chose pymupdf because it extracts blocks and lines.  Pdfplumber extracts words and bboxes.  I did not benchmark pypdf, even though it is a great library, because it doesn't provide individual character/line/block and bbox information.
diff --git a/poetry.lock b/poetry.lock
index eb9abc9..3c80cad 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1911,4 +1911,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.13,!=3.9.7"
-content-hash = "52cfc286016e488015d31fb2f8b9b92a715b81a352dfbd6dbbacb88808fb0294"
+content-hash = "52cfc286016e488015d31fb2f8b9b92a715b81a352dfbd6dbbacb88808fb0294"
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index a71f9de..9a94791 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,4 +35,4 @@ requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.poetry.scripts]
-pdftext = "extract_text:main"
+pdftext = "extract_text:main"
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..8ea5bed
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+joblib==1.4.0
+numpy==1.26.4
+pydantic==2.7.1
+pydantic-settings==2.2.1
+pypdfium2==4.29.0
+scikit-learn==1.4.2
+