guidance-ai · hudson-ai · Mar 18, 2026 · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026
diff --git a/c_sample/c_sample.cpp b/c_sample/c_sample.cpp
@@ -9,28 +9,73 @@
 
 #include "llguidance.h"
 
+// Create an LlgTokenizer using the v2 API.
+// eos_tokens[0] is the primary EOS; any remaining entries are extra EOS token IDs.
+LlgTokenizer *create_tokenizer_v2(std::vector<std::vector<uint8_t>> &tokens,
+                                   std::vector<uint32_t> eos_tokens,
+                                   LlgTokenizeFn tokenize_fn,
+                                   const void *tokenize_user_data) {
+  assert(!eos_tokens.empty());
+  std::vector<uint32_t> token_lens(tokens.size());
+  size_t total_size = 0;
+  for (size_t i = 0; i < tokens.size(); i++) {
+    token_lens[i] = tokens[i].size();
+    total_size += token_lens[i];
+  }
+  std::vector<uint8_t> token_bytes(total_size);
+  size_t offset = 0;
+  for (size_t i = 0; i < tokens.size(); i++) {
+    std::copy(tokens[i].begin(), tokens[i].end(), token_bytes.data() + offset);
+    offset += token_lens[i];
+  }
+
+  LlgTokenizerInitV2 tok_init = {};
+  tok_init.struct_size = sizeof(tok_init);
+  tok_init.vocab_size = (uint32_t)tokens.size();
+  tok_init.tok_eos = eos_tokens[0];
+  tok_init.token_lens = token_lens.data();
+  tok_init.token_bytes = token_bytes.data();
+  tok_init.tokenize_assumes_string = false;
+  tok_init.tokenize_user_data = tokenize_user_data;
+  tok_init.tokenize_fn = tokenize_fn;
+  if (eos_tokens.size() > 1) {
+    tok_init.tok_eos_extra = eos_tokens.data() + 1;
+    tok_init.tok_eos_extra_count = (uint32_t)(eos_tokens.size() - 1);
+  }
+
+  char error_buf[128];
+  auto tok = llg_new_tokenizer_v2(&tok_init, error_buf, sizeof(error_buf));
+
+  if (tok == nullptr) {
+    printf("Error (v2): %s\n", error_buf);
+    exit(1);
+  }
+
+  return tok;
+}
+
 // Create an LlgTokenizer; tokens[token_id] is a byte sequence corresponding to
 // given token_id; see below for tokenize_fn
 LlgTokenizer *create_tokenizer(std::vector<std::vector<uint8_t>> &tokens,
                                uint32_t tok_eos, LlgTokenizeFn tokenize_fn,
                                const void *tokenize_user_data) {
-  auto token_lens = new uint32_t[tokens.size()];
+  std::vector<uint32_t> token_lens(tokens.size());
   size_t total_size = 0;
   for (size_t i = 0; i < tokens.size(); i++) {
     token_lens[i] = tokens[i].size();
     total_size += token_lens[i];
   }
-  auto token_bytes = new uint8_t[total_size];
+  std::vector<uint8_t> token_bytes(total_size);
   size_t offset = 0;
   for (size_t i = 0; i < tokens.size(); i++) {
-    memcpy(token_bytes + offset, tokens[i].data(), token_lens[i]);
+    std::copy(tokens[i].begin(), tokens[i].end(), token_bytes.data() + offset);
     offset += token_lens[i];
   }
   LlgTokenizerInit tok_init = {};
   tok_init.vocab_size = (uint32_t)tokens.size();
   tok_init.tok_eos = tok_eos;
-  tok_init.token_lens = token_lens;
-  tok_init.token_bytes = token_bytes;
+  tok_init.token_lens = token_lens.data();
+  tok_init.token_bytes = token_bytes.data();
   tok_init.tokenize_assumes_string = false;
   tok_init.tokenize_user_data = tokenize_user_data;
   tok_init.tokenize_fn = tokenize_fn;
@@ -63,15 +108,16 @@ size_t tokenize_callback(const void *user_data, const uint8_t *bytes,
   (void)user_data;
   auto tokens = bogus_tokenize(bytes, bytes_len);
   if (output_tokens_len > 0) {
-    memcpy(output_tokens, tokens.data(),
-           std::min(output_tokens_len, tokens.size()) * sizeof(uint32_t));
+    auto n = std::min(output_tokens_len, tokens.size());
+    std::copy(tokens.begin(), tokens.begin() + n, output_tokens);
   }
   return tokens.size();
 }
 
 // This creates a tokenizer that treats each byte as a token.
 LlgTokenizer *create_byte_tokenizer(void) {
   std::vector<std::vector<uint8_t>> tokens;
+  tokens.reserve(257); // 256 byte tokens + 1 EOS
   // every byte is a token
   for (size_t i = 0; i < 256; i++) {
     tokens.push_back({(uint8_t)i});
@@ -82,6 +128,23 @@ LlgTokenizer *create_byte_tokenizer(void) {
                           nullptr);
 }
 
+// Same as above but using the v2 API with an extra (unused) EOS token.
+LlgTokenizer *create_byte_tokenizer_v2(void) {
+  std::vector<std::vector<uint8_t>> tokens;
+  tokens.reserve(258); // 256 byte tokens + 2 EOS
+  for (size_t i = 0; i < 256; i++) {
+    tokens.push_back({(uint8_t)i});
+  }
+  const char *eos = "<EOS>";
+  tokens.push_back(std::vector<uint8_t>(eos, eos + strlen(eos)));
+  const char *eos2 = "<EOS2>";
+  tokens.push_back(std::vector<uint8_t>(eos2, eos2 + strlen(eos2)));
+  // EOS tokens: token 256 (<EOS>) is primary, token 257 (<EOS2>) is extra
+  std::vector<uint32_t> eos_tokens = {(uint32_t)(tokens.size() - 2),
+                                      (uint32_t)(tokens.size() - 1)};
+  return create_tokenizer_v2(tokens, eos_tokens, tokenize_callback, nullptr);
+}
+
 LlgTokenizer *create_hf_tokenizer(std::string tokenizer_json,
                                   uint32_t tok_eos) {
   LlgTokenizerInit tok_init = {};
@@ -141,21 +204,8 @@ std::string do_llg_stringify_tokens(const LlgTokenizer *tok,
   }
 }
 
-int main(int argc, const char *argv[]) {
-  if (argc < 3) {
-    printf("Usage: %s <schema.ll.json> <sample.json> [tokenizer.json]\n",
-           argv[0]);
-    return 1;
-  }
-
-  // the tokenizer can (and should) be shared between constraints
-  LlgTokenizer *tokenizer = argc > 3
-                                ? create_hf_tokenizer(read_file(argv[3]), 2)
-                                : create_byte_tokenizer();
-
-  auto schema_json = read_file(argv[1]);
-  auto sample_json = read_file(argv[2]);
-
+void run_constraint_test(LlgTokenizer *tokenizer, const std::string &schema_json,
+                         const std::string &sample_json, const char *label) {
   LlgConstraintInit init;
   llg_constraint_init_set_defaults(&init, tokenizer);
   init.log_stderr_level = 0; // default to 1 (warnings only)
@@ -167,14 +217,6 @@ int main(int argc, const char *argv[]) {
     fail_constraint(c);
   }
 
-  // for debugging the tokenizer:
-  // for (int i = 0; i < 320; ++i) {
-  //   std::vector<uint32_t> tokens;
-  //   tokens.push_back(i);
-  //   std::string s = do_llg_stringify_tokens(tokenizer, tokens);
-  //   printf("Token %d: %s\n", i, s.c_str());
-  // }
-
   // we assume our "LLM" will generate these tokens
   auto tokens = do_llg_tokenize(tokenizer, sample_json);
 
@@ -212,6 +254,35 @@ int main(int argc, const char *argv[]) {
   // we assume the constraint will force EOS at the end of the input
   assert(mask_res.is_stop);
 
-  printf("OK!\n");
+  llg_free_constraint(c);
+  printf("%s: OK!\n", label);
+}
+
+int main(int argc, const char *argv[]) {
+  if (argc < 3) {
+    printf("Usage: %s <schema.ll.json> <sample.json> [tokenizer.json]\n",
+           argv[0]);
+    return 1;
+  }
+
+  auto schema_json = read_file(argv[1]);
+  auto sample_json = read_file(argv[2]);
+
+  // Test with v1 API (LlgTokenizerInit + llg_new_tokenizer)
+  {
+    LlgTokenizer *tokenizer = argc > 3
+                                  ? create_hf_tokenizer(read_file(argv[3]), 2)
+                                  : create_byte_tokenizer();
+    run_constraint_test(tokenizer, schema_json, sample_json, "v1");
+    llg_free_tokenizer(tokenizer);
+  }
+
+  // Test with v2 API (LlgTokenizerInitV2 + llg_new_tokenizer_v2)
+  {
+    LlgTokenizer *tokenizer = create_byte_tokenizer_v2();
+    run_constraint_test(tokenizer, schema_json, sample_json, "v2");
+    llg_free_tokenizer(tokenizer);
+  }
+
   return 0;
 }
diff --git a/parser/llguidance.h b/parser/llguidance.h
@@ -186,6 +186,11 @@ typedef size_t (*LlgTokenizeFn)(const void *user_data,
                                 uint32_t *output_tokens,
                                 size_t output_tokens_len);
 
+/**
+ * This struct must be zero-initialized (e.g., `= {}` in C/C++) before setting fields.
+ * New fields may be appended in future versions, and zero-initialization ensures
+ * they receive safe default values.
+ */
 typedef struct LlgTokenizerInit {
   /**
    * The number of tokens in the vocabulary
@@ -241,6 +246,87 @@ typedef struct LlgTokenizerInit {
   const char *const *slices;
 } LlgTokenizerInit;
 
+/**
+ * V2 of the tokenizer initialization struct.
+ * Extends LlgTokenizerInit with support for multiple EOS tokens.
+ * Use with `llg_new_tokenizer_v2()`.
+ *
+ * Initialize with: `LlgTokenizerInitV2 init = {}; init.struct_size = sizeof(init);`
+ * The library only reads `struct_size` bytes from the pointer, so callers
+ * compiled against an older header (with a smaller struct) will work with
+ * newer library versions — any new fields default to zero.
+ */
+typedef struct LlgTokenizerInitV2 {
+  /**
+   * Must be set to `sizeof(LlgTokenizerInitV2)`.
+   * The library uses this to determine how many bytes to read, enabling
+   * forward compatibility when new fields are appended in future versions.
+   */
+  size_t struct_size;
+  /**
+   * The number of tokens in the vocabulary
+   */
+  uint32_t vocab_size;
+  /**
+   * The token ID for the end of sentence token
+   * For chat mode, set it to end-of-turn token
+   */
+  LlgToken tok_eos;
+  /**
+   * An array of the lengths of the token strings (vocab_size elements)
+   */
+  const uint32_t *token_lens;
+  /**
+   * A pointer to the token strings
+   * The length of this the sum of all token_lens
+   */
+  const uint8_t *token_bytes;
+  /**
+   * Instead of passing token_lens and token_bytes, this can be set to
+   * the contents of HF tokenizer.json file.
+   */
+  const char *tokenizer_json;
+  /**
+   * Set to true to enable hack that works around the tokenize_fn only
+   * accepting valid UTF-8 strings and possibly adding `<BOS>` etc.
+   * TODO: the `<BOS>` bit not implemented yet
+   */
+  bool tokenize_assumes_string;
+  /**
+   * Tokenization function, see LlgTokenizeFn docs.
+   * It should only tokenize the bytes and not add
+   * any `<BOS>` etc. It should also work on any byte sequence, including
+   * invalid UTF-8. If this is not the case, set tokenize_assumes_string to true.
+   * Either way, this function has to be thread-safe!
+   */
+  LlgTokenizeFn tokenize_fn;
+  /**
+   * Set to true to not use tokenize_fn and instead tokenize greedily,
+   * which is often incorrect and may reduce accuracy.
+   */
+  bool use_approximate_greedy_tokenize_fn;
+  /**
+   * User data to pass to the tokenize_fn
+   */
+  const void *tokenize_user_data;
+  /**
+   * Tokenizer partitions for the slicer optimization.
+   * This is array of pointers to strings, terminated with NULL (argv style).
+   * Pass NULL to use defaults. Pass empty array to disable.
+   */
+  const char *const *slices;
+  /**
+   * Additional EOS token IDs beyond `tok_eos`.
+   * Points to an array of `tok_eos_extra_count` elements.
+   * When NULL (the default for zero-initialized structs), only `tok_eos` is used.
+   */
+  const LlgToken *tok_eos_extra;
+  /**
+   * Number of elements in the `tok_eos_extra` array.
+   */
+  uint32_t tok_eos_extra_count;
+} LlgTokenizerInitV2;
+
 
 
 #ifdef __cplusplus
@@ -347,6 +433,25 @@ struct LlgTokenizer *llg_new_tokenizer(const struct LlgTokenizerInit *tok_init,
                                        char *error_string,
                                        size_t error_string_len);
 
+/**
+ * Create a new tokenizer from a LlgTokenizerInitV2 struct.
+ * This is the v2 API that supports multiple EOS tokens.
+ *
+ * The `tok_init` pointer must be valid and `tok_init->struct_size` must be set
+ * to `sizeof(LlgTokenizerInitV2)` as known by the caller. The library will
+ * only read `struct_size` bytes, so callers compiled against an older (smaller)
+ * version of the struct will work with newer library versions — new fields
+ * default to zero.
+ *
+ * `tok_init` must point to at least `tok_init->struct_size` bytes of
+ * initialized memory, and `struct_size` must be at least
+ * `offsetof(LlgTokenizerInitV2, token_lens)` (i.e., include struct_size,
+ * vocab_size, and the complete tok_eos field).
+ */
+struct LlgTokenizer *llg_new_tokenizer_v2(const struct LlgTokenizerInitV2 *tok_init,
+                                          char *error_string,
+                                          size_t error_string_len);
+
 /**
  * Clone a tokenizer.
  * This increments a reference count and does a small allocation.