Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Google Custom Search API Connector #16

Merged
merged 1 commit into from
Jul 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/lints.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ jobs:
- name: Run Pylint static code analyser
run: |
pip install coverage pylint
pylint src/facere_sensum test
pylint src/facere_sensum test fsy.py
- name: Run Bandit security analyser
run: |
pip install bandit
bandit -r src/facere_sensum
bandit -r test
bandit -r test
bandit fsy.py
18 changes: 18 additions & 0 deletions examples/config_customsearch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"log": "log.csv",
"metrics": [
{
"id": "spartan race",
"source": "customsearch",
"priority": 0.7,
"num": 5,
"URL": "https://www.spartan.com/"
},
{
"id": "obstacle course racing",
"source": "customsearch",
"priority": 0.3,
"URL": "https://www.spartan.com/"
}
]
}
11 changes: 11 additions & 0 deletions fsy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# SPDX-License-Identifier: MIT

'''
facere-sensum debug launcher.
Need to keep this separate to make sure fs.py is imported as a module and
not used as the main script.
'''

from facere_sensum import fs

fs.main()
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
google-api-python-client>=2.93.0
numpy>=1.24.3
pandas>=2.0.1
64 changes: 64 additions & 0 deletions src/facere_sensum/connectors/customsearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# SPDX-License-Identifier: MIT

'''
Data connector for Google Custom Search API.
'''

from googleapiclient.discovery import build
from facere_sensum import fs

# Default for number of search results to consider.
_NUM = 50

_auth = fs.auth['Google']
_cse = build('customsearch', 'v1', developerKey=_auth['custom search API key']).cse() # pylint: disable=E1101

def invoke_cse(query, start): # pragma: no cover
'''
Invoke Custom Search API with specified query and index of the first result to return.
Keep this function separate so that testing scripts can substitute with a mockup.
'''
return _cse.list(q=query, cx=_auth['search engine ID'], start=start).execute()

def get_raw(metric):
'''
Get raw metric score for Google Custom Search API:
rank of the query or zero, if it didn't appear in search results.
'metric' is the metric JSON description.
'''
query = metric['q'] if 'q' in metric else metric['id']
num = metric['num'] if 'num' in metric else _NUM
url = metric['URL']

start = 1
while num > 0:
res = invoke_cse(query, start)

for (index,item) in enumerate(res['items'][:num]):
if item['link'] == url:
return start+index

if 'nextPage' not in res['queries']:
print('Warning (Google Custom Search API connector): ' \
f'query "{query}" produced small number of search results')
return 0

start += 10
num -= 10
return 0

def get_value(metric):
'''
Get standard (i.e., normalized) metric score for Google Custom Search API.
'metric' is the metric JSON description.
'''
raw = get_raw(metric)
metric_id = metric['id']
metric_outcome = str(raw) if raw else 'not found'
print(f' - {metric_id}: {metric_outcome}')

if raw:
num = metric['num'] if 'num' in metric else _NUM
return (num+1-raw) / num

return 0
13 changes: 6 additions & 7 deletions src/facere_sensum/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,15 +143,17 @@ def main():

if args.auth:
try :
with open(args.auth, encoding='utf-8') as auth:
auth = json.load(auth)
with open(args.auth, encoding='utf-8') as auth_file:
# Put authentication config in global scope
# for all other modules to access as necessary.
globals()['auth'] = json.load(auth_file)
except FileNotFoundError:
print('Authentication config file \''+args.auth+'\' not found. Exiting.')
sys.exit(1)

try:
with open(args.config, encoding='utf-8') as config:
config = json.load(config)
with open(args.config, encoding='utf-8') as config_file:
config = json.load(config_file)
except FileNotFoundError:
print('Project config file \''+args.config+'\' not found. Exiting.')
sys.exit(1)
Expand All @@ -168,6 +170,3 @@ def main():
'Please submit an issue at https://github.com/lunarserge/facere-sensum/issues/new',
'with the command that led here.')
sys.exit(1)

if __name__ == '__main__':
main()
Loading