diff --git a/shub/schedule.py b/shub/schedule.py index e3ea72e4..d6220462 100644 --- a/shub/schedule.py +++ b/shub/schedule.py @@ -34,6 +34,13 @@ Similarly, job-specific settings can be supplied through the -s option: shub schedule myspider -s SETTING=VALUE -s LOG_LEVEL=DEBUG + +Also, the spider can be run with all arguments are taken from another job with -f option: + + shub schedule myspider -f 123/21/134 + +But arguments with -a will replace these arguments + """ SHORT_HELP = "Schedule a spider to run on Scrapy Cloud" @@ -54,7 +61,9 @@ help='Amount of Scrapy Cloud units (-u number)') @click.option('-t', '--tag', help='Job tags (-t tag)', multiple=True) -def cli(spider, argument, set, environment, priority, units, tag): +@click.option('-f', '--args_from', + help='project/spider/job for copying arguments (-f 123/321/456)') +def cli(spider, argument, set, environment, priority, units, tag, args_from): try: target, spider = spider.rsplit('/', 1) except ValueError: @@ -62,7 +71,7 @@ def cli(spider, argument, set, environment, priority, units, tag): targetconf = get_target_conf(target) job_key = schedule_spider(targetconf.project_id, targetconf.endpoint, targetconf.apikey, spider, argument, set, - priority, units, tag, environment) + priority, units, tag, environment, args_from) watch_url = urljoin( targetconf.endpoint, '../p/{}/{}/{}'.format(*job_key.split('/')), @@ -78,11 +87,13 @@ def cli(spider, argument, set, environment, priority, units, tag): def schedule_spider(project, endpoint, apikey, spider, arguments=(), settings=(), - priority=DEFAULT_PRIORITY, units=None, tag=(), environment=()): + priority=DEFAULT_PRIORITY, units=None, tag=(), environment=(), + args_from=None): client = ScrapinghubClient(apikey, dash_endpoint=endpoint) try: project = client.get_project(project) args = dict(x.split('=', 1) for x in arguments) + args = add_args_from_job(client, args, args_from) cmd_args = args.pop('cmd_args', None) meta = args.pop('meta', None) job = project.jobs.run( @@ -99,3 +110,14 @@ def schedule_spider(project, endpoint, apikey, spider, arguments=(), settings=() return job.key except ScrapinghubAPIError as e: raise RemoteErrorException(str(e)) + +def add_args_from_job(client, base_args, args_from): + if not args_from: + return base_args + job_args = get_args_from_parent_job(client, args_from).copy() + job_args.update(base_args) + return job_args + +def get_args_from_parent_job(client, args_from): + job = client.get_job(args_from) + return job.metadata.get("spider_args") or {} diff --git a/tests/test_schedule.py b/tests/test_schedule.py index ce069a6d..78bd7e38 100644 --- a/tests/test_schedule.py +++ b/tests/test_schedule.py @@ -24,20 +24,20 @@ def test_schedules_job_if_input_is_ok(self, mock_schedule): # Default self.runner.invoke(schedule.cli, ['spider']) mock_schedule.assert_called_with( - proj, endpoint, apikey, 'spider', (), (), 2, None, (), ()) + proj, endpoint, apikey, 'spider', (), (), 2, None, (), (), None) # Other project self.runner.invoke(schedule.cli, ['123/spider']) mock_schedule.assert_called_with( - 123, endpoint, apikey, 'spider', (), (), 2, None, (), ()) + 123, endpoint, apikey, 'spider', (), (), 2, None, (), (), None) # Other endpoint proj, endpoint, apikey = self.conf.get_target('vagrant') self.runner.invoke(schedule.cli, ['vagrant/spider']) mock_schedule.assert_called_with( - proj, endpoint, apikey, 'spider', (), (), 2, None, (), ()) + proj, endpoint, apikey, 'spider', (), (), 2, None, (), (), None) # Other project at other endpoint self.runner.invoke(schedule.cli, ['vagrant/456/spider']) mock_schedule.assert_called_with( - 456, endpoint, apikey, 'spider', (), (), 2, None, (), ()) + 456, endpoint, apikey, 'spider', (), (), 2, None, (), (), None) @mock.patch('shub.schedule.ScrapinghubClient', autospec=True) def test_schedule_invalid_spider(self, mock_client): @@ -73,6 +73,41 @@ def test_forwards_args_and_settings(self, mock_client): job_settings, ) + @mock.patch('shub.schedule.ScrapinghubClient', autospec=True) + def test_forwards_args_from(self, mock_client): + mock_proj = mock_client.return_value.get_project.return_value + with mock.patch('shub.schedule.get_args_from_parent_job') as mock_get_afpj: + mock_get_afpj.return_value = {"job_arg": "test"} + + self.runner.invoke( + schedule.cli, + "testspider -a ARG=val1 --args_from 123/321/44".split(' '), + ) + job_args = mock_proj.jobs.run.call_args[1]['job_args'] + self.assertDictContainsSubset( + {'ARG': 'val1', "job_arg": "test"}, + job_args, + ) + + @mock.patch('shub.schedule.ScrapinghubClient', autospec=True) + def test_forwards_overriding_args_and_args_from(self, mock_client): + # override args from another job by args set in cmd + mock_proj = mock_client.return_value.get_project.return_value + with mock.patch('shub.schedule.get_args_from_parent_job') as mock_get_afpj: + mock_get_afpj.return_value = {"job_arg": "test", 'ARG': 'val_test_job'} + + self.runner.invoke( + schedule.cli, + "testspider -a ARG=val1 --args_from 123/321/44 " + "--argument ARGWITHEQUAL=val2=val2".split(' '), + ) + job_args = mock_proj.jobs.run.call_args[1]['job_args'] + self.assertDictContainsSubset( + {'ARG': 'val_test_job', 'ARGWITHEQUAL': 'val2=val2', "job_arg": "test"}, + job_args, + ) + + @mock.patch('shub.schedule.ScrapinghubClient', autospec=True) def test_forwards_tags(self, mock_client): mock_proj = mock_client.return_value.get_project.return_value