66 * LICENSE file in the root directory of this source tree. 
77 */  
88
9- #ifdef  EXECUTORCH_FB_BUCK
10- #include  < TestResourceUtils/TestResourceUtils.h> 
11- #endif 
129#include  < executorch/extension/llm/tokenizer/tiktoken.h> 
1310#include  < executorch/runtime/platform/runtime.h> 
1411#include  < gmock/gmock.h> 
1512#include  < gtest/gtest.h> 
16- #include  < vector> 
1713
1814using  namespace  ::testing; 
1915using  ::executorch::extension::llm::Tiktoken;
@@ -49,15 +45,6 @@ static inline std::unique_ptr<std::vector<std::string>> _get_special_tokens() {
4945  }
5046  return  special_tokens;
5147}
52- 
53- static  inline  std::string _get_resource_path (const  std::string& name) {
54- #ifdef  EXECUTORCH_FB_BUCK
55-   return  facebook::xplat::testing::getPathForTestResource (" resources/" 
56- #else 
57-   return  std::getenv (" RESOURCES_PATH" std::string (" /" 
58- #endif 
59- }
60- 
6148} //  namespace
6249
6350class  TiktokenExtensionTest  : public  Test  {
@@ -66,7 +53,7 @@ class TiktokenExtensionTest : public Test {
6653    executorch::runtime::runtime_init ();
6754    tokenizer_ = std::make_unique<Tiktoken>(
6855        _get_special_tokens (), kBOSTokenIndex , kEOSTokenIndex );
69-     modelPath_ = _get_resource_path ( " test_tiktoken_tokenizer.model " 
56+     modelPath_ = std::getenv ( " TEST_TIKTOKEN_TOKENIZER " 
7057  }
7158
7259  std::unique_ptr<Tokenizer> tokenizer_;
@@ -84,15 +71,15 @@ TEST_F(TiktokenExtensionTest, DecodeWithoutLoadFails) {
8471}
8572
8673TEST_F (TiktokenExtensionTest, TokenizerVocabSizeIsExpected) {
87-   Error res = tokenizer_->load (modelPath_. c_str () );
74+   Error res = tokenizer_->load (modelPath_);
8875  EXPECT_EQ (res, Error::Ok);
8976  EXPECT_EQ (tokenizer_->vocab_size (), 128256 );
9077  EXPECT_EQ (tokenizer_->bos_tok (), 128000 );
9178  EXPECT_EQ (tokenizer_->eos_tok (), 128001 );
9279}
9380
9481TEST_F (TiktokenExtensionTest, TokenizerEncodeCorrectly) {
95-   Error res = tokenizer_->load (modelPath_. c_str () );
82+   Error res = tokenizer_->load (modelPath_);
9683  EXPECT_EQ (res, Error::Ok);
9784  Result<std::vector<uint64_t >> out = tokenizer_->encode (" hello world" 1 , 0 );
9885  EXPECT_EQ (out.error (), Error::Ok);
@@ -103,7 +90,7 @@ TEST_F(TiktokenExtensionTest, TokenizerEncodeCorrectly) {
10390}
10491
10592TEST_F (TiktokenExtensionTest, TokenizerDecodeCorrectly) {
106-   Error res = tokenizer_->load (modelPath_. c_str () );
93+   Error res = tokenizer_->load (modelPath_);
10794  EXPECT_EQ (res, Error::Ok);
10895  std::vector<std::string> expected = {" <|begin_of_text|>" " hello" "  world" 
10996  std::vector<uint64_t > tokens = {128000 , 15339 , 1917 };
@@ -115,7 +102,7 @@ TEST_F(TiktokenExtensionTest, TokenizerDecodeCorrectly) {
115102}
116103
117104TEST_F (TiktokenExtensionTest, TokenizerDecodeOutOfRangeFails) {
118-   Error res = tokenizer_->load (modelPath_. c_str () );
105+   Error res = tokenizer_->load (modelPath_);
119106  EXPECT_EQ (res, Error::Ok);
120107  //  The vocab size is 128256, addes 256 just so the token is out of vocab
121108  //  range.
@@ -160,31 +147,29 @@ TEST_F(TiktokenExtensionTest, LoadWithInvalidPath) {
160147}
161148
162149TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithInvalidRank) {
163-   auto  invalidModelPath =
164-       _get_resource_path (" test_tiktoken_invalid_rank.model" 
165-   Error res = tokenizer_->load (invalidModelPath.c_str ());
150+   auto  invalidModelPath = std::getenv (" TEST_TIKTOKEN_INVALID_RANK" 
151+   Error res = tokenizer_->load (invalidModelPath);
166152
167153  EXPECT_EQ (res, Error::InvalidArgument);
168154}
169155
170156TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithInvalidBase64) {
171-   auto  invalidModelPath =
172-       _get_resource_path (" test_tiktoken_invalid_base64.model" 
173-   Error res = tokenizer_->load (invalidModelPath.c_str ());
157+   auto  invalidModelPath = std::getenv (" TEST_TIKTOKEN_INVALID_BASE64" 
158+   Error res = tokenizer_->load (invalidModelPath);
174159
175160  EXPECT_EQ (res, Error::InvalidArgument);
176161}
177162
178163TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithNoSpace) {
179-   auto  invalidModelPath = _get_resource_path ( " test_tiktoken_no_space.model " 
180-   Error res = tokenizer_->load (invalidModelPath. c_str () );
164+   auto  invalidModelPath = std::getenv ( " TEST_TIKTOKEN_NO_SPACE " 
165+   Error res = tokenizer_->load (invalidModelPath);
181166
182167  EXPECT_EQ (res, Error::InvalidArgument);
183168}
184169
185170TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithBPEFile) {
186-   auto  invalidModelPath = _get_resource_path ( " test_bpe_tokenizer.bin " 
187-   Error res = tokenizer_->load (invalidModelPath. c_str () );
171+   auto  invalidModelPath = std::getenv ( " TEST_BPE_TOKENIZER " 
172+   Error res = tokenizer_->load (invalidModelPath);
188173
189174  EXPECT_EQ (res, Error::InvalidArgument);
190175}
0 commit comments