Skip to content

Commit

Permalink
cve_feed: manage http request and add request log (#1407)
Browse files Browse the repository at this point in the history
**NOTE**: Turn on "Hide whitespaces" to review

### Summary

We got some memory problem recently especially after NVD API becomes
more unstable and retries are more often. This change attempts to reduce
the memory leak while many requests are created, also logging the
requests to help debug easier as there might be more improvements needed
for this to work nicer.

* Added a log statement to indicate when the NIST NVD API is being
called, which includes the URL and parameters being used.
* Wrap the `requests.get` call to use a context manager (`with`
statement) to ensure the response is properly closed after use.

---------

Signed-off-by: Khanh Le Do <[email protected]>
  • Loading branch information
kledo-lyft authored Dec 13, 2024
1 parent 59306e6 commit 8043cdd
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 27 deletions.
48 changes: 25 additions & 23 deletions cartography/intel/cve/feed.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,30 +85,32 @@ def _call_cves_api(url: str, api_key: str | None, params: Dict[str, Any]) -> Dic
)
results: Dict[Any, Any] = dict()

while params["resultsPerPage"] > 0 or params["startIndex"] < totalResults:
try:
res = requests.get(
url, params=params, headers=headers, timeout=CONNECT_AND_READ_TIMEOUT,
)
res.raise_for_status()
except requests.exceptions.HTTPError:
logger.error(
f"Failed to get CVE data from NIST NVD API {res.status_code} : {res.text}",
)
retries += 1
if retries >= MAX_RETRIES:
raise
# Exponential backoff
sleep_time *= 2
with requests.Session() as session:
while params["resultsPerPage"] > 0 or params["startIndex"] < totalResults:
logger.info(f"Calling NIST NVD API at {url} with params {params}")
try:
res = session.get(
url, params=params, headers=headers, timeout=CONNECT_AND_READ_TIMEOUT,
)
res.raise_for_status()
data = res.json()
except requests.exceptions.HTTPError:
logger.error(
f"Failed to get CVE data from NIST NVD API {res.status_code} : {res.text}",
)
retries += 1
if retries >= MAX_RETRIES:
raise
# Exponential backoff
sleep_time *= 2
time.sleep(sleep_time)
continue
_map_cve_dict(results, data)
totalResults = data["totalResults"]
params["resultsPerPage"] = data["resultsPerPage"]
params["startIndex"] += data["resultsPerPage"]
retries = 0
time.sleep(sleep_time)
continue
data = res.json()
_map_cve_dict(results, data)
totalResults = data["totalResults"]
params["resultsPerPage"] = data["resultsPerPage"]
params["startIndex"] += data["resultsPerPage"]
retries = 0
time.sleep(sleep_time)
return results


Expand Down
13 changes: 9 additions & 4 deletions tests/unit/cartography/intel/cve/test_feed.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from unittest.mock import Mock
from unittest.mock import patch

import pytest
import requests

from cartography.intel.cve.feed import _call_cves_api
Expand All @@ -18,9 +19,14 @@
API_KEY = "nvd_api_key"


@patch("cartography.intel.cve.feed.DEFAULT_SLEEP_TIME", 0)
@patch("cartography.intel.cve.feed.requests.get")
def test_call_cves_api(mock_get: Mock):
@pytest.fixture
def mock_get():
with patch("cartography.intel.cve.feed.requests.Session") as mock_session:
session_mock = mock_session.return_value.__enter__.return_value
yield session_mock.get


def test_call_cves_api(mock_get):
# Arrange
mock_response_1 = Mock()
mock_response_1.status_code = 200
Expand Down Expand Up @@ -140,7 +146,6 @@ def test_call_cves_api(mock_get: Mock):


@patch("cartography.intel.cve.feed.DEFAULT_SLEEP_TIME", 0)
@patch("cartography.intel.cve.feed.requests.get")
def test_call_cves_api_with_error(mock_get: Mock):
# Arrange
mock_response = Mock()
Expand Down

0 comments on commit 8043cdd

Please sign in to comment.