Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update BINARY_SUPPORT to use Content-Encoding to identify if data is binary #971

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions tests/test_binary_support_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
API_STAGE = "dev"
APP_FUNCTION = "app"
APP_MODULE = "tests.test_wsgi_binary_support_app"
BINARY_SUPPORT = True
CONTEXT_HEADER_MAPPINGS = {}
DEBUG = "True"
DJANGO_SETTINGS = None
DOMAIN = "api.example.com"
ENVIRONMENT_VARIABLES = {}
LOG_LEVEL = "DEBUG"
PROJECT_NAME = "binary_support_settings"
COGNITO_TRIGGER_MAPPING = {}
125 changes: 125 additions & 0 deletions tests/test_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from mock import Mock

from tests.utils import is_base64
from zappa.handler import LambdaHandler
from zappa.utilities import merge_headers

Expand Down Expand Up @@ -225,6 +226,130 @@ def test_exception_handler_on_web_request(self):
self.assertEqual(response["statusCode"], 500)
mocked_exception_handler.assert_called()

def test_wsgi_script_binary_support_with_content_encoding(self):
"""
Ensure that response body is base64 encoded when BINARY_SUPPORT is enabled and Content-Encoding header is present.
""" # don't linebreak so that whole line is shown during nosetest readout
lh = LambdaHandler("tests.test_binary_support_settings")

text_plain_event = {
"body": "",
"resource": "/{proxy+}",
"requestContext": {},
"queryStringParameters": {},
"headers": {
"Host": "1234567890.execute-api.us-east-1.amazonaws.com",
},
"pathParameters": {"proxy": "return/request/url"},
"httpMethod": "GET",
"stageVariables": {},
"path": "/content_encoding_header_json1",
}

# A likely scenario is that the application would be gzip compressing some json response. That's checked first.
response = lh.handler(text_plain_event, None)

self.assertEqual(response["statusCode"], 200)
self.assertIn("isBase64Encoded", response)
self.assertTrue(is_base64(response["body"]))

# We also verify that some unknown mimetype with a Content-Encoding also encodes to b64. This route serves
# bytes in the response.

text_arbitrary_event = {
**text_plain_event,
**{"path": "/content_encoding_header_textarbitrary1"},
}

response = lh.handler(text_arbitrary_event, None)

self.assertEqual(response["statusCode"], 200)
self.assertIn("isBase64Encoded", response)
self.assertTrue(is_base64(response["body"]))

# This route is similar to the above, but it serves its response as text and not bytes. That the response
# isn't bytes shouldn't matter because it still has a Content-Encoding header.

application_json_event = {
**text_plain_event,
**{"path": "/content_encoding_header_textarbitrary2"},
}

response = lh.handler(application_json_event, None)

self.assertEqual(response["statusCode"], 200)
self.assertIn("isBase64Encoded", response)
self.assertTrue(is_base64(response["body"]))

def test_wsgi_script_binary_support_without_content_encoding_edgecases(
self,
):
"""
Ensure zappa response bodies are NOT base64 encoded when BINARY_SUPPORT is enabled and the mimetype is "application/json" or starts with "text/".
""" # don't linebreak so that whole line is shown during nosetest readout

lh = LambdaHandler("tests.test_binary_support_settings")

text_plain_event = {
"body": "",
"resource": "/{proxy+}",
"requestContext": {},
"queryStringParameters": {},
"headers": {
"Host": "1234567890.execute-api.us-east-1.amazonaws.com",
},
"pathParameters": {"proxy": "return/request/url"},
"httpMethod": "GET",
"stageVariables": {},
"path": "/textplain_mimetype_response1",
}

for path in [
"/textplain_mimetype_response1", # text/plain mimetype should not be turned to base64
"/textarbitrary_mimetype_response1", # text/arbitrary mimetype should not be turned to base64
"/json_mimetype_response1", # application/json mimetype should not be turned to base64
]:
event = {**text_plain_event, "path": path}
response = lh.handler(event, None)

self.assertEqual(response["statusCode"], 200)
self.assertNotIn("isBase64Encoded", response)
self.assertFalse(is_base64(response["body"]))

def test_wsgi_script_binary_support_without_content_encoding(
self,
):
"""
Ensure zappa response bodies are base64 encoded when BINARY_SUPPORT is enabled and Content-Encoding is absent.
""" # don't linebreak so that whole line is shown during nosetest readout

lh = LambdaHandler("tests.test_binary_support_settings")

text_plain_event = {
"body": "",
"resource": "/{proxy+}",
"requestContext": {},
"queryStringParameters": {},
"headers": {
"Host": "1234567890.execute-api.us-east-1.amazonaws.com",
},
"pathParameters": {"proxy": "return/request/url"},
"httpMethod": "GET",
"stageVariables": {},
"path": "/textplain_mimetype_response1",
}

for path in [
"/arbitrarybinary_mimetype_response1",
"/arbitrarybinary_mimetype_response2",
]:
event = {**text_plain_event, "path": path}
response = lh.handler(event, None)

self.assertEqual(response["statusCode"], 200)
self.assertIn("isBase64Encoded", response)
self.assertTrue(is_base64(response["body"]))

def test_wsgi_script_on_cognito_event_request(self):
"""
Ensure that requests sent by cognito behave sensibly
Expand Down
63 changes: 63 additions & 0 deletions tests/test_wsgi_binary_support_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
###
# This test application exists to confirm how Zappa handles WSGI application
# _responses_ when Binary Support is enabled.
###

import gzip
import json

from flask import Flask, Response, send_file

app = Flask(__name__)


@app.route("/textplain_mimetype_response1", methods=["GET"])
def text_mimetype_response_1():
return Response(response="OK", mimetype="text/plain")


@app.route("/textarbitrary_mimetype_response1", methods=["GET"])
def text_mimetype_response_2():
return Response(response="OK", mimetype="text/arbitary")


@app.route("/json_mimetype_response1", methods=["GET"])
def json_mimetype_response_1():
return Response(response=json.dumps({"some": "data"}), mimetype="application/json")


@app.route("/arbitrarybinary_mimetype_response1", methods=["GET"])
def arbitrary_mimetype_response_1():
return Response(response=b"some binary data", mimetype="arbitrary/binary_mimetype")


@app.route("/arbitrarybinary_mimetype_response2", methods=["GET"])
def arbitrary_mimetype_response_3():
return Response(response="doesnt_matter", mimetype="definitely_not_text")


@app.route("/content_encoding_header_json1", methods=["GET"])
def response_with_content_encoding_1():
return Response(
response=gzip.compress(json.dumps({"some": "data"}).encode()),
mimetype="application/json",
headers={"Content-Encoding": "gzip"},
)


@app.route("/content_encoding_header_textarbitrary1", methods=["GET"])
def response_with_content_encoding_2():
return Response(
response=b"OK",
mimetype="text/arbitrary",
headers={"Content-Encoding": "something_arbitrarily_binary"},
)


@app.route("/content_encoding_header_textarbitrary2", methods=["GET"])
def response_with_content_encoding_3():
return Response(
response="OK",
mimetype="text/arbitrary",
headers={"Content-Encoding": "with_content_type_but_not_bytes_response"},
)
9 changes: 9 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import functools
import os
from contextlib import contextmanager
Expand Down Expand Up @@ -74,3 +75,11 @@ def stub_open(*args, **kwargs):

with patch("__builtin__.open", stub_open):
yield mock_open, mock_file


def is_base64(test_string: str) -> bool:
# Taken from https://stackoverflow.com/a/45928164/3200002
try:
return base64.b64encode(base64.b64decode(test_string)).decode() == test_string
except Exception:
return False
18 changes: 16 additions & 2 deletions zappa/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,14 +583,28 @@ def handler(self, event, context):
)

if response.data:
if (
# We base64 encode for two reasons when BINARY_SUPPORT is enabled:
# - Content-Encoding is present, which is commonly used by compression mechanisms to indicate
# that the content is in br/gzip/deflate/etc encoding
# (Related: https://github.com/zappa/Zappa/issues/908). Content like this must be
# transmitted as b64.
# - The response is assumed to be some binary format (since BINARY_SUPPORT is enabled and it
# isn't application/json or text/)
if settings.BINARY_SUPPORT and response.headers.get(
"Content-Encoding"
Quidge marked this conversation as resolved.
Show resolved Hide resolved
):
zappa_returndict["body"] = base64.b64encode(
response.data
).decode()
zappa_returndict["isBase64Encoded"] = True
elif (
settings.BINARY_SUPPORT
and not response.mimetype.startswith("text/")
and response.mimetype != "application/json"
):
zappa_returndict["body"] = base64.b64encode(
response.data
).decode("utf-8")
).decode()
zappa_returndict["isBase64Encoded"] = True
else:
zappa_returndict["body"] = response.get_data(as_text=True)
Expand Down