Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ share/python-wheels/
.installed.cfg
*.egg
MANIFEST
.vscode/

# PyInstaller
# Usually these files are written by a python script from a template
Expand Down
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.10.6
15 changes: 9 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
FROM python:3.12.4-slim


RUN apt-get update -qq && apt-get install -y curl && rm -rf /var/lib/apt/lists/*

# Allow statements and log messages to immediately appear in the Knative logs
ENV PYTHONUNBUFFERED True
ENV APP_HOME /app
ENV PORT 8080
ENV HOST 0.0.0.0
ENV STORAGE_BASE /
ENV STORAGE_DIR storage
ENV PYTHONUNBUFFERED=True
ENV APP_HOME=/app
ENV PORT=8080
ENV HOST=0.0.0.0
ENV STORAGE_BASE=/
ENV STORAGE_DIR=storage

# Python app installation
WORKDIR $APP_HOME
Expand Down
10 changes: 10 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# Define image details
IMAGE_NAME="gcp-storage-emulator"
IMAGE_TAG="latest"
DOCKERFILE_PATH="."
DOCKERHUB_USERNAME="jamesmtc"

# Build the image for linux, x86, and arm64
docker buildx build --platform linux/amd64,linux/arm64 --tag $DOCKERHUB_USERNAME/$IMAGE_NAME:$IMAGE_TAG --push $DOCKERFILE_PATH
42 changes: 42 additions & 0 deletions globby.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from wcmatch import glob

def glob_with_array(file_paths, pattern):
matched_files = [
path for path in file_paths if
glob.globmatch(path, pattern, flags=glob.GLOBSTAR | glob.BRACE)
or
glob.globmatch(path, pattern.replace('**', '*/*'), flags=glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB)
]
return matched_files

def test_match_glob():
# File paths with distinct patterns

matcher = glob.globmatch("foo/bar", "foo*bar", flags=glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB)
print(f"{matcher}")

blob_names = ["foo/bar", "foo/baz", "foo/foobar", "foobar"]

match_glob_results = {
"foo*bar": ["foobar"],
"foo**bar": ["foo/bar", "foo/foobar", "foobar"],
"**/foobar": ["foo/foobar", "foobar"],
"*/ba[rz]": ["foo/bar", "foo/baz"],
"*/ba[!a-y]": ["foo/baz"],
"**/{foobar,baz}": ["foo/baz", "foo/foobar", "foobar"],
"foo/{foo*,*baz}": ["foo/baz", "foo/foobar"],
}

# Iterate through the match glob patterns and expected results
for match_glob, expected_names in match_glob_results.items():
glob_results = glob_with_array(blob_names, match_glob)
print(f"\n")
if glob_results == expected_names:
print(f"Matched: {match_glob}")
else:
print(f"Not matched: {match_glob}")
print(f"Expected: {expected_names}")
print(f"Actual: {glob_results}")
print(f"\n")

test_match_glob()
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ requires = [
]

[tool.pytest.ini_options]
pythonpath = [
"."
]
addopts = "--cov=src --cov-report=xml --cov-branch"
testpaths = [
"tests"
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
install_requires=[
"fs",
"google-crc32c",
"wcmatch"
],
python_requires=">=3.7",
)
4 changes: 4 additions & 0 deletions src/gcp_storage_emulator/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@ class NotFound(Exception):

class Conflict(Exception):
pass


class BadRequest(Exception):
pass
4 changes: 3 additions & 1 deletion src/gcp_storage_emulator/handlers/buckets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from http import HTTPStatus

from gcp_storage_emulator import settings
from gcp_storage_emulator.exceptions import Conflict, NotFound
from gcp_storage_emulator.exceptions import Conflict, NotFound, BadRequest

logger = logging.getLogger("api.bucket")

Expand Down Expand Up @@ -110,5 +110,7 @@ def delete(request, response, storage, *args, **kwargs):
storage.delete_bucket(name)
except NotFound:
response.status = HTTPStatus.NOT_FOUND
except BadRequest:
response.status = HTTPStatus.BAD_REQUEST
except Conflict:
response.status = HTTPStatus.CONFLICT
7 changes: 5 additions & 2 deletions src/gcp_storage_emulator/handlers/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import google_crc32c

from gcp_storage_emulator.exceptions import Conflict, NotFound
from gcp_storage_emulator.exceptions import Conflict, NotFound, BadRequest

logger = logging.getLogger("api.object")

Expand Down Expand Up @@ -354,13 +354,16 @@ def get(request, response, storage, *args, **kwargs):
def ls(request, response, storage, *args, **kwargs):
bucket_name = request.params["bucket_name"]
prefix = request.query.get("prefix")[0] if request.query.get("prefix") else None
matchGlob = request.query.get("matchGlob")[0] if request.query.get("matchGlob") else None
delimiter = (
request.query.get("delimiter")[0] if request.query.get("delimiter") else None
)
try:
files, prefixes = storage.get_file_list(bucket_name, prefix, delimiter)
files, prefixes = storage.get_file_list(bucket_name, prefix, delimiter, matchGlob)
except NotFound:
response.status = HTTPStatus.NOT_FOUND
except BadRequest:
response.status = HTTPStatus.BAD_REQUEST
else:
response.json({"kind": "storage#object", "prefixes": prefixes, "items": files})

Expand Down
14 changes: 11 additions & 3 deletions src/gcp_storage_emulator/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,12 @@ def __init__(self, request_handler, method):
self._path = request_handler.path
self._request_handler = request_handler
self._server_address = request_handler.server.server_address
self._base_url = "http://{}:{}".format(
self._server_address[0], self._server_address[1]
)
if request_handler.hostname:
self._base_url = "http://{}".format(request_handler.hostname)
else:
self._base_url = "http://{}:{}".format(
self._request_handler.server_address[0], self._request_handler.server_address[1]
)
self._full_url = self._base_url + self._path
self._parsed_url = urlparse(self._full_url)
self._query = parse_qs(self._parsed_url.query)
Expand Down Expand Up @@ -351,22 +354,27 @@ def __init__(self, storage, *args, **kwargs):
super().__init__(*args, **kwargs)

def do_GET(self):
self.hostname = self.headers.get('Host')
router = Router(self)
router.handle(GET)

def do_POST(self):
self.hostname = self.headers.get('Host')
router = Router(self)
router.handle(POST)

def do_DELETE(self):
self.hostname = self.headers.get('Host')
router = Router(self)
router.handle(DELETE)

def do_PUT(self):
self.hostname = self.headers.get('Host')
router = Router(self)
router.handle(PUT)

def do_PATCH(self):
self.hostname = self.headers.get('Host')
router = Router(self)
router.handle(PATCH)

Expand Down
70 changes: 47 additions & 23 deletions src/gcp_storage_emulator/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
import logging
import os
from hashlib import sha256

from wcmatch import glob
import re
import fs
from fs.errors import FileExpected, ResourceNotFound

from gcp_storage_emulator.exceptions import Conflict, NotFound
from gcp_storage_emulator.exceptions import Conflict, NotFound, BadRequest
from gcp_storage_emulator.settings import STORAGE_BASE, STORAGE_DIR

# Real buckets can't start with an underscore
Expand Down Expand Up @@ -92,7 +93,7 @@ def get_bucket(self, bucket_name):

return self.buckets.get(bucket_name)

def get_file_list(self, bucket_name, prefix=None, delimiter=None):
def get_file_list(self, bucket_name, prefix=None, delimiter=None, match_glob=None):
"""Lists all the blobs in the bucket that begin with the prefix.

This can be used to list all blobs in a "folder", e.g. "public/".
Expand Down Expand Up @@ -123,29 +124,52 @@ def get_file_list(self, bucket_name, prefix=None, delimiter=None):
if bucket_name not in self.buckets:
raise NotFound

prefix_len = 0
prefixes = []
bucket_objects = self.objects.get(bucket_name, {})
if prefix:
prefix_len = len(prefix)
objs = list(
file_object
for file_name, file_object in bucket_objects.items()
if file_name.startswith(prefix)
and (not delimiter or delimiter not in file_name[prefix_len:])
)
objs = []
prefixes = set()



# If matchGlob is provided, filter objects using the glob pattern
if match_glob:
# Requests that use the matchGlob parameter fail if they also include a delimiter parameter set to a value other than /.
if delimiter:
if delimiter != "/":
raise BadRequest("When listing with a glob pattern, the only supported delimiter is '/'.",)
else:
objs = [
file_object for file_name, file_object in bucket_objects.items()
if '/' not in file_name
if not re.search(r'/[^*]\*\//gm', match_glob)
if glob.globmatch(file_name.split("/")[-1], match_glob.replace('**/', '*',).replace('**', '*'), flags=glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB)
]

else:
objs = [
file_object for file_name, file_object in bucket_objects.items()
if glob.globmatch(file_name, match_glob, flags=glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB)
or glob.globmatch(file_name, match_glob.replace('**', '*/*'), flags=glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB)
]

# If matchGlob is not provided, apply the prefix and delimiter filtering
else:
objs = list(bucket_objects.values())
if delimiter:
prefixes = list(
file_name[:prefix_len]
+ file_name[prefix_len:].split(delimiter, 1)[0]
+ delimiter
for file_name in list(bucket_objects)
if file_name.startswith(prefix or "")
and delimiter in file_name[prefix_len:]
)
for file_name, file_object in bucket_objects.items():
if prefix is None or file_name.startswith(prefix):
prefix_len = len(prefix) if prefix else 0
if delimiter:
if delimiter in file_name[prefix_len:]:
prefix_end_index = file_name.find(delimiter, prefix_len) + len(delimiter)
prefixes.add(file_name[:prefix_end_index])
else:
objs.append(file_object)
else:
objs.append(file_object)

# Convert prefixes set to a sorted list
prefixes = sorted(list(prefixes))

return objs, prefixes


def create_bucket(self, bucket_name, bucket_obj):
"""Create a bucket object representation and save it to the current fs
Expand Down
67 changes: 66 additions & 1 deletion tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from io import BytesIO
from tempfile import NamedTemporaryFile
from unittest import TestCase as BaseTestCase

import sys
import fs
import requests
from google.api_core.exceptions import BadRequest, Conflict, NotFound
Expand Down Expand Up @@ -597,6 +597,71 @@ def test_list_blobs_with_prefix_and_delimiter(self):
self._assert_blob_list(blobs, [blob_1, blob_2])
self.assertEqual(blobs.prefixes, {"a/b/"})


def test_bucket_list_blobs_w_match_glob(self):
bucket = self._client.create_bucket("bucket_name")

# File names with distinct patterns
blob_names = ["foo/bar", "foo/baz", "foo/foobar", "foobar"]
for name in blob_names:
blob = bucket.blob(name)
blob.upload_from_string("helloworld")

match_glob_results = {
"foo*bar": ["foobar"],
"foo**bar": ["foo/bar", "foo/foobar", "foobar"],
"**/foobar": ["foo/foobar", "foobar"],
"*/ba[rz]": ["foo/bar", "foo/baz"],
"*/ba[!a-y]": ["foo/baz"],
"**/{foobar,baz}": ["foo/baz", "foo/foobar", "foobar"],
"foo/{foo*,*baz}": ["foo/baz", "foo/foobar"],
}

# Iterate through the match glob patterns and expected results
for match_glob, expected_names in match_glob_results.items():
file_objs = self._client.list_blobs(bucket, match_glob=match_glob)
filtered_names = [obj.name for obj in file_objs if obj]
self.assertEqual(filtered_names, expected_names)

def test_bucket_list_blobs_w_match_glob_and_delimiter(self):
bucket = self._client.create_bucket("bucket_name")

# File names with distinct patterns
blob_names = ["all/foo/bar", "foo/baz", "foo/389_bar", "bar", "baz"]
for name in blob_names:
blob = bucket.blob(name)
blob.upload_from_string("helloworld")

match_glob_results = {
"foo*bar": [],
"foo**bar": [],
"**/bar": ["bar"],
"*/bar": [],
"*/ba[rz]": [],
"**ba[rz]": ["bar","baz"],
"*/ba[!a-y]": [],
"*ba[!a-y]": ["baz"],
"**/{foobar,baz}": ["baz"],
"foo/{foo*,*baz}": [],
"*{foo*,*baz}": ["baz"],
}

# Iterate through the match glob patterns and expected results
for match_glob, expected_names in match_glob_results.items():
file_objs = self._client.list_blobs(bucket, match_glob=match_glob, delimiter="/")
filtered_names = [obj.name for obj in file_objs if obj]
self.assertEqual(filtered_names, expected_names)


def test_wrong_delimiter_with_matchGlob(self):
bucket = self._client.create_bucket("bucket_name")

try:
self._client.list_blobs(bucket, delimiter="*", match_glob="*.pdf")
except Exception as ex:
exc_type, exc_obj, exc_tb = sys.exc_info()
self.assertEqual(BadRequest, exc_type)

def test_bucket_copy_existing(self):
bucket = self._client.create_bucket("bucket_name")

Expand Down
Loading