-
Notifications
You must be signed in to change notification settings - Fork 19
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improvement/revamp cursor iteration #44
base: master
Are you sure you want to change the base?
Changes from 5 commits
20a43a1
61dc775
5915e13
ae6993d
18dd233
60b82b7
8c671cb
2deb232
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,7 +23,6 @@ def __init__(self, client, service_name, representation, **specs): | |
self.service_name = service_name | ||
self.representation = representation | ||
self.specs = specs | ||
self.retrieved = 0 | ||
self._skip = 0 | ||
self._limit = float('inf') | ||
|
||
|
@@ -39,21 +38,11 @@ def __getitem__(self, idx): | |
|
||
def __iter__(self): | ||
"""Iterate over all AppNexus objects matching the specifications""" | ||
retrieved = 0 | ||
for page in self.iter_pages(): | ||
data = self.extract_data(page) | ||
if self._skip >= len(data): | ||
self._skip -= len(data) | ||
continue | ||
elif self._skip: | ||
self._skip = 0 | ||
data = data[self._skip:] | ||
lasting = self._limit - self.retrieved | ||
if not lasting: | ||
break | ||
elif lasting < len(data): | ||
data = data[:lasting] | ||
for entity in data: | ||
self.retrieved += 1 | ||
retrieved += 1 | ||
yield entity | ||
|
||
def extract_data(self, page): | ||
|
@@ -86,15 +75,17 @@ def get_page(self, start_element=0, num_elements=None): | |
specs.update(start_element=start_element, num_elements=num_elements) | ||
return self.client.get(self.service_name, **specs) | ||
|
||
def iter_pages(self, skip_elements=0): | ||
def iter_pages(self): | ||
"""Iterate as much as needed to get all available pages""" | ||
start_element = skip_elements | ||
start_element = self._skip | ||
num_elements = min(self._limit, self.batch_size) | ||
count = -1 | ||
while start_element < count or count == -1: | ||
page = self.get_page(start_element) | ||
page = self.get_page(start_element, num_elements) | ||
shnups marked this conversation as resolved.
Show resolved
Hide resolved
|
||
yield page | ||
start_element = page["start_element"] + page["num_elements"] | ||
count = page["count"] | ||
start_element = start_element + page["num_elements"] | ||
num_elements = min(page["count"] - num_elements, self.batch_size) | ||
count = min(page["count"], self._skip + self._limit) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't we just keep There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't address this comment. |
||
|
||
def count(self): | ||
"""Returns the number of elements matching the specifications""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,9 @@ | |
from appnexus.client import AppNexusClient | ||
from appnexus.cursor import Cursor | ||
|
||
from .helpers import gen_random_collection | ||
from .helpers import gen_ordered_collection, gen_random_collection | ||
|
||
COLLECTION_SIZE = 324 | ||
|
||
|
||
@pytest.fixture | ||
|
@@ -55,7 +57,12 @@ def response_dict2(): | |
|
||
@pytest.fixture | ||
def random_response_dict(): | ||
return gen_random_collection(count=324) | ||
return gen_random_collection(count=COLLECTION_SIZE) | ||
|
||
|
||
@pytest.fixture | ||
def ordered_response_dict(): | ||
return gen_ordered_collection(start_element=0, count=COLLECTION_SIZE) | ||
|
||
|
||
@pytest.fixture | ||
|
@@ -74,6 +81,31 @@ def random_cursor(mocker, random_response_dict): | |
return Cursor(client, "campaign", representations.raw) | ||
|
||
|
||
@pytest.fixture | ||
def ordered_cursor(mocker, ordered_response_dict): | ||
client = AppNexusClient("test", "test") | ||
mocker.patch.object(client, "get") | ||
client.get.side_effect = ordered_response_dict | ||
return Cursor(client, "campaign", representations.raw) | ||
|
||
|
||
def mock_ordered_cursor(mocker, start=0, count=COLLECTION_SIZE, factor=1): | ||
client = AppNexusClient("test", "test") | ||
mocker.patch.object(client, "get") | ||
client.get.side_effect = gen_ordered_collection(start, count) * factor | ||
cursor = Cursor(client, "campaign", representations.raw) | ||
mocker.patch.object(cursor, "get_page", wraps=cursor.get_page) | ||
return cursor | ||
|
||
|
||
@pytest.fixture | ||
def double_ordered_cursor(mocker, ordered_response_dict): | ||
client = AppNexusClient("test", "test") | ||
mocker.patch.object(client, "get") | ||
client.get.side_effect = ordered_response_dict * 2 | ||
return Cursor(client, "campaign", representations.raw) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This fixture doesn't seem used anywhere. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed |
||
|
||
|
||
def test_cursor_count(cursor, response_dict): | ||
assert cursor.count() == response_dict["count"] | ||
|
||
|
@@ -164,3 +196,111 @@ def test_uncallable_representation(): | |
def test_requests_volume_on_iteration(cursor): | ||
_ = [r for r in cursor] | ||
assert cursor.client.get.call_count == 1 | ||
|
||
|
||
def test_skip_none(mocker): | ||
cursor = mock_ordered_cursor(mocker, start=0, count=COLLECTION_SIZE) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you use this rather than your There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No good reason in this case, updated |
||
results = [r for r in cursor] | ||
assert len(results) == COLLECTION_SIZE | ||
assert results[0]['id'] == 0 | ||
assert results[-1]['id'] == COLLECTION_SIZE - 1 | ||
assert cursor.get_page.call_count == 4 | ||
|
||
|
||
def test_skip_ten(mocker): | ||
skip = 10 | ||
cursor = mock_ordered_cursor(mocker, start=skip, count=COLLECTION_SIZE) | ||
cursor.skip(skip) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure to understand why you There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The goal of this test and the ones following is to validate that the cursor is iterating over pages properly taking I think the tests are ok as is (used them to validate the algo changes) though I admit that they could be a bit better. |
||
results = [r for r in cursor] | ||
assert len(results) == COLLECTION_SIZE - skip | ||
assert results[0]['id'] == skip | ||
assert results[-1]['id'] == COLLECTION_SIZE - 1 | ||
assert cursor.get_page.call_count == 4 | ||
|
||
|
||
def test_skip_hundred_ten(mocker): | ||
skip = 110 | ||
cursor = mock_ordered_cursor(mocker, start=skip, count=COLLECTION_SIZE) | ||
cursor.skip(skip) | ||
results = [r for r in cursor] | ||
assert len(results) == COLLECTION_SIZE - skip | ||
assert results[0]['id'] == skip | ||
assert results[-1]['id'] == COLLECTION_SIZE - 1 | ||
assert cursor.get_page.call_count == 3 | ||
|
||
|
||
def test_skip_twice(mocker): | ||
skip = 10 | ||
cursor = mock_ordered_cursor(mocker, start=skip, count=COLLECTION_SIZE, | ||
factor=2) | ||
cursor.skip(skip) | ||
results = [r for r in cursor] | ||
assert len(results) == COLLECTION_SIZE - skip | ||
assert results[0]['id'] == skip | ||
assert cursor.get_page.call_count == 4 | ||
results = [r for r in cursor] | ||
assert len(results) == COLLECTION_SIZE - skip | ||
assert results[0]['id'] == skip | ||
assert cursor.get_page.call_count == 8 | ||
|
||
|
||
def test_limit_ten(mocker): | ||
limit = 10 | ||
cursor = mock_ordered_cursor(mocker, start=0, count=limit) | ||
cursor.limit(limit) | ||
results = [r for r in cursor] | ||
assert len(results) == limit | ||
assert results[0]['id'] == 0 | ||
assert results[-1]['id'] == limit - 1 | ||
assert cursor.get_page.call_count == 1 | ||
|
||
|
||
def test_limit_hundred_ten(mocker): | ||
limit = 110 | ||
cursor = mock_ordered_cursor(mocker, start=0, count=limit) | ||
cursor.limit(limit) | ||
results = [r for r in cursor] | ||
assert len(results) == limit | ||
assert results[0]['id'] == 0 | ||
assert results[-1]['id'] == limit - 1 | ||
assert cursor.get_page.call_count == 2 | ||
|
||
|
||
def test_limit_thousand(mocker): | ||
limit = 1000 | ||
cursor = mock_ordered_cursor(mocker, start=0, count=COLLECTION_SIZE) | ||
cursor.limit(limit) | ||
results = [r for r in cursor] | ||
assert len(results) == COLLECTION_SIZE | ||
assert results[0]['id'] == 0 | ||
assert results[-1]['id'] == COLLECTION_SIZE - 1 | ||
assert cursor.get_page.call_count == 4 | ||
|
||
|
||
def test_limit_twice(mocker): | ||
limit = 50 | ||
cursor = mock_ordered_cursor(mocker, start=0, count=limit, factor=2) | ||
cursor.limit(limit) | ||
results = [r for r in cursor] | ||
assert len(results) == limit | ||
assert results[0]['id'] == 0 | ||
assert results[-1]['id'] == limit - 1 | ||
assert cursor.get_page.call_count == 1 | ||
results = [r for r in cursor] | ||
assert len(results) == limit | ||
assert results[0]['id'] == 0 | ||
assert results[-1]['id'] == limit - 1 | ||
assert cursor.get_page.call_count == 2 | ||
|
||
|
||
def test_skip_and_limit(mocker): | ||
skip = 10 | ||
limit = 150 | ||
cursor = mock_ordered_cursor(mocker, start=skip, count=skip + limit) | ||
cursor.skip(skip) | ||
cursor.limit(limit) | ||
results = [r for r in cursor] | ||
assert len(results) == limit | ||
assert results[0]['id'] == skip | ||
assert results[-1]['id'] == limit + skip - 1 | ||
assert cursor.get_page.call_count == 2 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,32 @@ | ||
import random | ||
|
||
|
||
def gen_random_object(): | ||
return {"id": random.randrange(1000000)} | ||
def gen_collection(object_generator_func, start_element=0, count=None, | ||
object_type="campaigns"): | ||
if count is None: | ||
random.randrange(10000) | ||
result = [] | ||
i = 0 | ||
volume = count - start_element | ||
for i in range(volume // 100): | ||
page = gen_page(object_generator_func, count=count, | ||
object_type=object_type, | ||
start_element=start_element + i * 100) | ||
result.append(page) | ||
if volume % 100 != 0: | ||
i = i + 1 if len(result) else 0 | ||
page = gen_page(object_generator_func, count=count, | ||
object_type=object_type, | ||
start_element=start_element + i * 100, | ||
num_elements=volume % 100) | ||
result.append(page) | ||
return result | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you keep There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
|
||
|
||
def gen_random_page(num_elements=None, start_element=0, count=None, | ||
object_type="campaigns"): | ||
def gen_page(object_generator_func, start_element=0, num_elements=None, | ||
count=None, object_type="campaigns"): | ||
if not object_generator_func or not callable(object_generator_func): | ||
raise ValueError("object_generator_func has to be set and callable") | ||
if count is None: | ||
count = random.randrange(10000) | ||
if num_elements is None: | ||
|
@@ -17,22 +37,19 @@ def gen_random_page(num_elements=None, start_element=0, count=None, | |
"start_element": start_element, | ||
"num_elements": num_elements, | ||
"count": count, | ||
object_type: [gen_random_object() for _ in range(num_elements)] | ||
object_type: [object_generator_func(start_element + index) | ||
for index in range(num_elements)] | ||
} | ||
|
||
|
||
def gen_random_collection(count=None, object_type="campaigns"): | ||
if count is None: | ||
count = random.randrange(10000) | ||
result = [] | ||
i = 0 | ||
for i in range(count // 100): | ||
random_page = gen_random_page(count=count, object_type=object_type, | ||
start_element=i * 100) | ||
result.append(random_page) | ||
if count % 100 != 0: | ||
random_page = gen_random_page(count=count, object_type=object_type, | ||
start_element=i * 100, | ||
num_elements=count % 100) | ||
result.append(random_page) | ||
return result | ||
def gen_random_collection(start_element=0, count=None, | ||
object_type="campaigns"): | ||
return gen_collection( | ||
object_generator_func=lambda index: {"id": random.randrange(1000000)}, | ||
start_element=start_element, count=count, object_type=object_type) | ||
|
||
|
||
def gen_ordered_collection(start_element, count, object_type="campaigns"): | ||
return gen_collection( | ||
object_generator_func=lambda index: {"id": index}, | ||
start_element=start_element, count=count, object_type=object_type) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess it would be more readable/maintainable to add a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed, changes made |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Comment from @rambobinator
The
retrieved
variable is not used anymore, remove itThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Variable removed in last commits