Skip to content
This repository has been archived by the owner on Aug 31, 2022. It is now read-only.

Create dataset #8

Merged
merged 16 commits into from
Nov 23, 2015
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,23 @@ Retrieving data is easy! Use SQL-style keyword args to filter data, or lookup an
>>> client.get("/resource/nimj-3ivp/193.json", exclude_system_fields=False)
{u'geolocation': {u'latitude': u'21.6711', u'needs_recoding': False, u'longitude': u'142.9236'}, u'version': u'C', u':updated_at': 1348778988, u'number_of_stations': u'136', u'region': u'Mariana Islands region', u':created_meta': u'21484', u'occurred_at': u'2012-09-13T11:19:07', u':id': 193, u'source': u'us', u'depth': u'300.70', u'magnitude': u'4.4', u':meta': u'{\n}', u':updated_meta': u'21484', u':position': 193, u'earthquake_id': u'c000cmsq', u':created_at': 1348778988}

Create a dataset

>>> columns = [{"fieldName": "delegation", "name": "Delegation", "dataTypeName": "text"}, {"fieldName": "members", "name": "Members", "dataTypeName": "number"}]
>>> tags = ["politics", "geography"]
>>> client.create("Delegates", description="List of delegates", columns=columns, row_identifier="delegation", tags=tags, category="Transparency")
{u'id': u'2frc-hyvj', u'name': u'Foo Bar', u'description': u'test dataset', u'publicationStage': u'unpublished', u'columns': [ { u'name': u'Foo', u'dataTypeName': u'text', u'fieldName': u'foo', ... }, { u'name': u'Bar', u'dataTypeName': u'number', u'fieldName': u'bar', ... } ], u'metadata': { u'rowIdentifier': 230641051 }, ... }

Publish a dataset after creating it (take it out of 'working copy' mode)

>>> client.publish("/resource/eb9n-hr43.json")
{u'id': u'2frc-hyvj', u'name': u'Foo Bar', u'description': u'test dataset', u'publicationStage': u'unpublished', u'columns': [ { u'name': u'Foo', u'dataTypeName': u'text', u'fieldName': u'foo', ... }, { u'name': u'Bar', u'dataTypeName': u'number', u'fieldName': u'bar', ... } ], u'metadata': { u'rowIdentifier': 230641051 }, ... }

Set the permissions of a dataset to public or private

>>> client.set_permission("/resource/eb9n-hr43.json", "public")
<Response [200]>

Create a new row in an existing dataset

>>> data = [{'Delegation': 'AJU', 'Name': 'Alaska', 'Key': 'AL', 'Entity': 'Juneau'}]
Expand Down
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,23 @@ Retrieving data is easy! Use SQL-style keyword args to filter data, or lookup an
>>> client.get("/resource/nimj-3ivp/193.json", exclude_system_fields=False)
{u'geolocation': {u'latitude': u'21.6711', u'needs_recoding': False, u'longitude': u'142.9236'}, u'version': u'C', u':updated_at': 1348778988, u'number_of_stations': u'136', u'region': u'Mariana Islands region', u':created_meta': u'21484', u'occurred_at': u'2012-09-13T11:19:07', u':id': 193, u'source': u'us', u'depth': u'300.70', u'magnitude': u'4.4', u':meta': u'{\n}', u':updated_meta': u'21484', u':position': 193, u'earthquake_id': u'c000cmsq', u':created_at': 1348778988}

Create a dataset

>>> columns = [{"fieldName": "delegation", "name": "Delegation", "dataTypeName": "text"}, {"fieldName": "members", "name": "Members", "dataTypeName": "number"}]
>>> tags = ["politics", "geography"]
>>> client.create("Delegates", description="List of delegates", columns=columns, row_identifier="delegation", tags=tags, category="Transparency")
{u'id': u'2frc-hyvj', u'name': u'Foo Bar', u'description': u'test dataset', u'publicationStage': u'unpublished', u'columns': [ { u'name': u'Foo', u'dataTypeName': u'text', u'fieldName': u'foo', ... }, { u'name': u'Bar', u'dataTypeName': u'number', u'fieldName': u'bar', ... } ], u'metadata': { u'rowIdentifier': 230641051 }, ... }

Publish a dataset after creating it (take it out of 'working copy' mode)

>>> client.publish("/resource/eb9n-hr43.json")
{u'id': u'2frc-hyvj', u'name': u'Foo Bar', u'description': u'test dataset', u'publicationStage': u'unpublished', u'columns': [ { u'name': u'Foo', u'dataTypeName': u'text', u'fieldName': u'foo', ... }, { u'name': u'Bar', u'dataTypeName': u'number', u'fieldName': u'bar', ... } ], u'metadata': { u'rowIdentifier': 230641051 }, ... }

Set the permissions of a dataset to public or private

>>> client.set_permission("/resource/eb9n-hr43.json", "public")
<Response [200]>

Create a new row in an existing dataset

>>> data = [{'Delegation': 'AJU', 'Name': 'Alaska', 'Key': 'AL', 'Entity': 'Juneau'}]
Expand Down
59 changes: 51 additions & 8 deletions sodapy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def __init__(self, domain, app_token, username=None, password=None,
session_adapter["adapter"])
self.uri_prefix = session_adapter["prefix"]
else:
self.uri_prefix = "https"
self.uri_prefix = "https://"
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

session.mount() works when passed mock but not when https - for https to work, it needs to be passed https://. I'm not sure why this is, but the documentation suggests this approach fits fine. It required a few changes elsewhere, removing :// from a few string constructors. The alternative to doing that is to inject a :// into the session.mount() call, but I chose not to do that in case, in theory, you wanted to use a longer prefix like http://google.com (which would work with this approach)


def authentication_validation(self, username, password, access_token):
'''
Expand All @@ -76,8 +76,50 @@ def authentication_validation(self, username, password, access_token):
" OAuth2.0. Please use only one authentication"
" method.")

def create(self, file_object):
raise NotImplementedError()
def create(self, name, **kwargs):
'''
Create a dataset, including the field types. Optionally, specify args such as:
description : description of the dataset
columns : list of columns (see docs/tests for list structure)
category : must exist in /admin/metadata
tags : array of tag strings
row_identifier : field name of primary key
'''
public = kwargs.pop("public", False)
published = kwargs.pop("published", False)

payload = {"name": name}

if("row_identifier" in kwargs):
payload["metadata"] = {
"rowIdentifier": kwargs.pop("row_identifier", None)
}

payload.update(kwargs)
payload = _clear_empty_values(payload)

return self._perform_update("post", "/api/views.json", payload)

def set_permission(self, resource, permission="private"):
'''
Set a dataset's permissions to private or public
Options are private, public
'''
params = {
"method": "setPermission",
"value": "public.read" if permission == "public" else permission
}
resource = resource.rsplit("/", 1)[-1] # just get the dataset id

return self._perform_request("put", "/api/views/" + resource, params=params)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, you're right. Doing stuff like this is silly. I like the idea of only requiring the user to pass the 4x4 code + content-type extension and then building the URLs in the calls depending on which version of the API we are hitting. That's kind of asking a lot from you though, so I'll merge this first and do the cleanup in a separate commit.


def publish(self, resource):
'''
The create() method creates a dataset in a "working copy" state.
This method publishes it.
'''
resource = resource.rsplit("/", 1)[-1].split(".")[0] # just get the dataset id
return self._perform_request("post", "/api/views/" + resource + "/publication.json")

def get(self, resource, **kwargs):
'''
Expand Down Expand Up @@ -145,7 +187,7 @@ def replace(self, resource, payload):
return self._perform_update("put", resource, payload)

def _perform_update(self, method, resource, payload):
if isinstance(payload, list):
if isinstance(payload, list) or isinstance(payload, dict):
response = self._perform_request(method, resource,
data=json.dumps(payload))
elif isinstance(payload, file):
Expand Down Expand Up @@ -184,19 +226,20 @@ def _perform_request(self, request_type, resource, **kwargs):
raise Exception("Unknown request type. Supported request types are"
": {0}".format(", ".join(request_type_methods)))

uri = "{0}://{1}{2}".format(self.uri_prefix, self.domain, resource)
uri = "{0}{1}{2}".format(self.uri_prefix, self.domain, resource)

# set a timeout, just to be safe
kwargs["timeout"] = 10

response = getattr(self.session, request_type)(uri, **kwargs)

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove whitespace.

# handle errors
if response.status_code not in (200, 202):
_raise_for_status(response)

# deletes have no content body, simply return the whole response
if request_type == "delete":
# when responses have no content body (ie. delete, set_permission),
# simply return the whole response
if not response.text:
return response

# for other request types, return most useful data
Expand Down
65 changes: 65 additions & 0 deletions tests/test_data/create_foobar.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{
"id": "2frc-hyvj",
"name": "Foo Bar",
"averageRating": 0,
"createdAt": 1448018696,
"description": "test dataset",
"downloadCount": 0,
"newBackend": false,
"numberOfComments": 0,
"oid": 14929734,
"publicationAppendEnabled": false,
"publicationGroup": 5638965,
"publicationStage": "unpublished",
"rowIdentifierColumnId": 230641051,
"rowsUpdatedAt": 1448018697,
"rowsUpdatedBy": "gxfh-uqsf",
"tableId": 5638965,
"totalTimesRated": 0,
"viewCount": 0,
"viewLastModified": 1448018697,
"viewType": "tabular",
"columns": [
{
"id": 230641050,
"name": "Foo",
"dataTypeName": "text",
"fieldName": "foo",
"position": 1,
"renderTypeName": "text",
"tableColumnId": 32762225,
"format": {}
},
{
"id": 230641051,
"name": "Bar",
"dataTypeName": "number",
"fieldName": "bar",
"position": 2,
"renderTypeName": "number",
"tableColumnId": 32762226,
"format": {}
}
],
"metadata": {
"rowIdentifier": 230641051
},
"owner": {},
"query": {},
"rights": [
"read",
"write",
"add",
"delete",
"grant",
"add_column",
"remove_column",
"update_column",
"update_view",
"delete_view"
],
"tableAuthor": {},
"flags": [
"default"
]
}
Empty file added tests/test_data/empty.txt
Empty file.
87 changes: 82 additions & 5 deletions tests/test_soda.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import json


PREFIX = "mock"
PREFIX = "http://"
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The requests library drops querystring parameters when the protocol is not http/https. (details)

DOMAIN = "fakedomain.com"
PATH = "/songs.json"
APPTOKEN = "FakeAppToken"
Expand Down Expand Up @@ -113,7 +113,7 @@ def test_delete():
client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
session_adapter=mock_adapter)

uri = "{0}://{1}{2}".format(PREFIX, DOMAIN, PATH)
uri = "{0}{1}{2}".format(PREFIX, DOMAIN, PATH)
adapter.register_uri("DELETE", uri, status_code=200)
response = client.delete(PATH)
assert response.status_code == 200
Expand All @@ -124,14 +124,91 @@ def test_delete():
assert isinstance(e, requests_mock.exceptions.NoMockAddress)
finally:
client.close()

def test_create():
mock_adapter = {}
mock_adapter["prefix"] = PREFIX
adapter = requests_mock.Adapter()
mock_adapter["adapter"] = adapter
client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
session_adapter=mock_adapter)

response_data = "create_foobar.txt"
resource = "/api/views.json"
set_up_mock(adapter, "POST", response_data, 200, resource=resource)

columns = [
{"fieldName": "foo", "name": "Foo", "dataTypeName": "text"},
{"fieldName": "bar", "name": "Bar", "dataTypeName": "number"}
]
tags = ["foo", "bar"]
response = client.create("Foo Bar", description="test dataset",
columns=columns, tags=tags, row_identifier="bar")

request = adapter.request_history[0]
request_payload = json.loads(request.text) # can't figure out how to use .json

# Test request payload
for dataset_key in ["name", "description", "columns", "tags"]:
assert dataset_key in request_payload

for column_key in ["fieldName", "name", "dataTypeName"]:
assert column_key in request_payload["columns"][0]

# Test response
assert isinstance(response, dict)
assert len(response.get("id")) == 9
client.close()

def test_set_permission():
mock_adapter = {}
mock_adapter["prefix"] = PREFIX
adapter = requests_mock.Adapter()
mock_adapter["adapter"] = adapter
client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
session_adapter=mock_adapter)

response_data = "empty.txt"
resource = "/api/views" + PATH
set_up_mock(adapter, "PUT", response_data, 200, resource=resource)

# Test response
response = client.set_permission(PATH, permission="public")
assert response.status_code == 200

# Test request
request = adapter.request_history[0]
qs = request.url.split("?")[-1]
assert qs == "method=setPermission&value=public.read"
client.close()

def test_publish():
mock_adapter = {}
mock_adapter["prefix"] = PREFIX
adapter = requests_mock.Adapter()
mock_adapter["adapter"] = adapter
client = Socrata(DOMAIN, APPTOKEN, username=USERNAME, password=PASSWORD,
session_adapter=mock_adapter)

response_data = "create_foobar.txt"
resource = "/api/views/songs/publication.json" # publish() removes .json
set_up_mock(adapter, "POST", response_data, 200, resource=resource)

response = client.publish("/resource/songs.json") # hard-coded so request uri is matched
assert isinstance(response, dict)
assert len(response.get("id")) == 9
client.close()

def set_up_mock(adapter, method, response, response_code,
reason="OK", auth=None):
reason="OK", auth=None, resource=PATH):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Necessary to accommodate the more complicated paths these new methods introduce

path = os.path.join(TEST_DATA_PATH, response)
with open(path, "rb") as f:
body = json.load(f)
uri = "{0}://{1}{2}".format(PREFIX, DOMAIN, PATH)
try:
body = json.load(f)
except ValueError:
body = None

uri = "{0}{1}{2}".format(PREFIX, DOMAIN, resource)
headers = {
"content-type": "application/json; charset=utf-8"
}
Expand Down