diff --git a/tests/test_litellm/litellm_core_utils/test_token_counter.py b/tests/test_litellm/litellm_core_utils/test_token_counter.py index 875f3db15d5..20f9a6e4279 100644 --- a/tests/test_litellm/litellm_core_utils/test_token_counter.py +++ b/tests/test_litellm/litellm_core_utils/test_token_counter.py @@ -491,7 +491,20 @@ def test_token_counter(): from litellm.utils import _select_tokenizer_helper, claude_json_str, encoding +# Clear the cache at module load to ensure clean state +_select_tokenizer_helper.cache_clear() + + class TestTokenizerSelection(unittest.TestCase): + def setUp(self): + """Clear the LRU cache before each test method. + + The _select_tokenizer_helper function is decorated with @lru_cache, + which can cause cache hits from previous tests when running with + --dist=loadscope (tests from same file run on same worker). + """ + _select_tokenizer_helper.cache_clear() + @patch("litellm.utils.Tokenizer.from_pretrained") def test_llama3_tokenizer_api_failure(self, mock_from_pretrained): # Setup mock to raise an error