diff --git a/libs/platform/ebpf_epoch.c b/libs/platform/ebpf_epoch.c
index a541014b1d..9a83f97e3e 100644
--- a/libs/platform/ebpf_epoch.c
+++ b/libs/platform/ebpf_epoch.c
@@ -10,56 +10,95 @@
 // Each block of code that accesses epoch freed memory wraps access in calls to
 // ebpf_epoch_enter/ebpf_epoch_exit.
 //
-// Epoch tracking is handled differently for pre-emptible vs non-pre-emptible
+// Epoch tracking is handled differently for preemptible vs non-preemptible
 // invocations.
 //
-// Non-pre-emptible invocations are:
-// 1) Tracked by the CPU they are running on as they don't switch CPUs.
-// 2) Accessed without synchronization.
-// 3) Set to the current epoch on entry.
+// Non-preemptible invocations behavior:
+// 1) During ebpf_epoch_enter and ebpf_epoch_exit the value of _ebpf_current_epoch is captured in the
+// _ebpf_epoch_cpu_table[cpu_id].non_preemptible_epoch field.
+// 2) This field is read/written with out explicit synchronization and can be old.
+// 3) _ebpf_epoch_update_cpu_entry periodically updates this on idle CPUs.
 //
-// Pre-emptible invocations are:
-// 1) Tracked by thread ID.
-// 2) Accessed under a lock.
-// 3) Set to the current epoch on entry.
-// 4) Set to epoch 0 on exit.
+// Preemptible invocations behavior:
+// 1) During ebpf_epoch_enter the value of _ebpf_current_epoch is captured and stored in the
+// _ebpf_epoch_cpu_table[cpu_id].thread_table[thread_id].entry_epoch field.
+// 2) During ebpf_epoch_exit the value of _ebpf_current_epoch is captured and stored in the
+// _ebpf_epoch_cpu_table[cpu_id].thread_table[thread_id].exit_epoch field.
+// 3) The _ebpf_epoch_cpu_table[cpu_id].thread_table is protected by _ebpf_epoch_cpu_table[cpu_id].thread_table_lock.
+// 4) If and only if entry_epoch > exit_epoch then the thread is actively executing between calls to ebpf_epoch_enter
+// and ebpf_epoch_exit.
 //
-// Memory can be freed only if there is no code using that epoch.
-// The CPU epoch table and thread table are scanned to find the lowest epoch in use.
-// The release epoch is then lowest epoch - 1 (if not 0).
+// Common behavior:
+// 1) Calls to ebpf_epoch_free mark the memory with _ebpf_current_epoch, atomically increment it and insert the memory
+// into the _ebpf_epoch_cpu_table[cpu_id].free_list while holding the _ebpf_epoch_cpu_table[cpu_id].free_list_lock.
+// 2) During ebpf_epoch_exit all entries in _ebpf_epoch_cpu_table[cpu_id].free_list where freed_epoch <
+// _ebpf_release_epoch are freed using ebpf_free.
+// 3) During ebpf_epoch_flush the minimum epoch is computed across the values in
+// _ebpf_epoch_cpu_table[*].non_preemptible_epoch and  _ebpf_epoch_cpu_table[*].thread_table[*].entry_epoch (for active
+// threads) and then written to _ebpf_release_epoch.
+// 4) ebpf_epoch_flush is called periodically by _ebpf_flush_timer.
+// 5) ebpf_epoch_flush also queues a non-preemptible work-item (_ebpf_epoch_update_cpu_entry) to all CPUs where
+// _ebpf_epoch_cpu_table[cpu_id].non_preemptible_epoch != _ebpf_current_epoch.
+// 6) _ebpf_epoch_update_cpu_entry calls ebpf_epoch_enter/ebpf_epoch_exit.
 //
 // Note:
-// CPU table entries aren't cleared on exit as we can't rely on
-// memory ordering.
+// CPU table entries aren't cleared on exit as we can't rely on memory ordering.
 // I.e., the thread doing the cleanup may have a stale view of the CPU table.
 // As long as the entries in the CPU table increase, this gives correct behavior.
 //
 
+// Frequency to compute newest inactive epoch.
 #define EBPF_EPOCH_FLUSH_DELAY_IN_MICROSECONDS 1000
 
-// TODO: This lock may become a contention point.
-// Investigate partitioning the table.
-// https://github.com/microsoft/ebpf-for-windows/issues/417
-static ebpf_lock_t _ebpf_epoch_thread_table_lock = {0};
-
-// Table to track what epoch each thread is on.
-static _Requires_lock_held_(&_ebpf_epoch_thread_table_lock) ebpf_hash_table_t* _ebpf_epoch_thread_table = NULL;
+typedef struct _ebpf_epoch_thread_entry
+{
+    int64_t entry_epoch;
+    int64_t exit_epoch;
+} ebpf_epoch_thread_entry_t;
 
-// Table to track what epoch each CPU is on.
+// Table to track per CPU state.
 typedef struct _ebpf_epoch_cpu_entry
 {
-    int64_t epoch;
+    // Discussion: https://github.com/microsoft/ebpf-for-windows/discussions/442
+    // Should this be split into an entry/exit epoch + lock?
+    int64_t non_preemptible_epoch;
     ebpf_non_preemptible_work_item_t* non_preemptible_work_item;
+    // Discussion: https://github.com/microsoft/ebpf-for-windows/discussions/438
+    // Should this code switch to using an InterlockedSList?
     ebpf_lock_t free_list_lock;
-    ebpf_list_entry_t free_list;
+    _Requires_lock_held_(free_list_lock) ebpf_list_entry_t free_list;
+    // Discussion: https://github.com/microsoft/ebpf-for-windows/discussions/440
+    // Should this code switch to a lock-free hash table?
+    ebpf_lock_t thread_table_lock;
+    _Requires_lock_held_(thread_table_lock) ebpf_hash_table_t* thread_table;
 } ebpf_epoch_cpu_entry_t;
 
 static _Writable_elements_(_ebpf_epoch_cpu_count) ebpf_epoch_cpu_entry_t* _ebpf_epoch_cpu_table = NULL;
 static uint32_t _ebpf_epoch_cpu_count = 0;
 
+/**
+ * @brief _ebpf_current_epoch indicates the newest active epoch. All memory free
+ * operations were performed prior to this value.
+ *
+ */
 static volatile int64_t _ebpf_current_epoch = 1;
+/**
+ * @brief _ebpf_release_epoch indicates the newest inactive epoch. All memory
+ * free operations performed prior to this value can be safely deleted.
+ *
+ */
+static volatile int64_t _ebpf_release_epoch = 0;
+
+/**
+ * @brief Flag to indicate that eBPF epoch tracker is shutting down.
+ *
+ */
 static bool _ebpf_epoch_rundown = false;
 
+/**
+ * @brief Timer used to update _ebpf_release_epoch.
+ *
+ */
 static ebpf_timer_work_item_t* _ebpf_flush_timer = NULL;
 static volatile int32_t _ebpf_flush_timer_set = 0;
 
@@ -93,19 +132,20 @@ typedef struct _ebpf_epoch_work_item
 
 static bool _ebpf_epoch_initiated = false;
 
-// Release memory that was freed during this epoch or a prior epoch.
 static void
-_ebpf_epoch_release_free_list(uint32_t cpu_id, int64_t released_epoch);
+_ebpf_epoch_release_free_list(ebpf_epoch_cpu_entry_t* cpu_entry, int64_t released_epoch);
 
-// Get the highest epoch that is no longer in use.
 static ebpf_result_t
 _ebpf_epoch_get_release_epoch(_Out_ int64_t* released_epoch);
 
 static void
-_ebpf_epoch_update_cpu_entry(void* context, void* parameter_1);
+_ebpf_epoch_update_cpu_entry(_In_ void* context, _In_ void* parameter_1);
 
 static void
-_ebpf_flush_worker(void* context);
+_ebpf_flush_worker(_In_ void* context);
+
+ebpf_result_t
+_ebpf_epoch_update_thread_state(uint32_t cpu_id, uintptr_t thread_id, int64_t current_epoch, bool enter);
 
 ebpf_result_t
 ebpf_epoch_initiate()
@@ -121,8 +161,6 @@ ebpf_epoch_initiate()
     _ebpf_current_epoch = 1;
     _ebpf_epoch_cpu_count = cpu_count;
 
-    ebpf_lock_create(&_ebpf_epoch_thread_table_lock);
-
     _ebpf_epoch_cpu_table = ebpf_allocate(_ebpf_epoch_cpu_count * sizeof(ebpf_epoch_cpu_entry_t));
     if (!_ebpf_epoch_cpu_table) {
         return_value = EBPF_NO_MEMORY;
@@ -130,7 +168,7 @@ ebpf_epoch_initiate()
     }
 
     for (cpu_id = 0; cpu_id < _ebpf_epoch_cpu_count; cpu_id++) {
-        _ebpf_epoch_cpu_table[cpu_id].epoch = _ebpf_current_epoch;
+        _ebpf_epoch_cpu_table[cpu_id].non_preemptible_epoch = _ebpf_current_epoch;
 
         ebpf_list_initialize(&_ebpf_epoch_cpu_table[cpu_id].free_list);
         ebpf_lock_create(&_ebpf_epoch_cpu_table[cpu_id].free_list_lock);
@@ -145,12 +183,19 @@ ebpf_epoch_initiate()
             }
             _ebpf_epoch_cpu_table[cpu_id].non_preemptible_work_item = work_item_context;
         }
-    }
 
-    return_value = ebpf_hash_table_create(
-        &_ebpf_epoch_thread_table, ebpf_allocate, ebpf_free, sizeof(uint64_t), sizeof(int64_t), cpu_count, NULL);
-    if (return_value != EBPF_SUCCESS) {
-        goto Error;
+        ebpf_lock_create(&_ebpf_epoch_cpu_table[cpu_id].thread_table_lock);
+        return_value = ebpf_hash_table_create(
+            &_ebpf_epoch_cpu_table[cpu_id].thread_table,
+            ebpf_allocate,
+            ebpf_free,
+            sizeof(uintptr_t),
+            sizeof(ebpf_epoch_thread_entry_t),
+            _ebpf_epoch_cpu_count,
+            NULL);
+        if (return_value != EBPF_SUCCESS) {
+            goto Error;
+        }
     }
 
     return_value = ebpf_allocate_timer_work_item(&_ebpf_flush_timer, _ebpf_flush_worker, NULL);
@@ -179,18 +224,20 @@ ebpf_epoch_terminate()
             _ebpf_epoch_cpu_table[cpu_id].non_preemptible_work_item = NULL;
         }
     }
-    _ebpf_epoch_cpu_count = 0;
 
     ebpf_free_timer_work_item(_ebpf_flush_timer);
-    ebpf_hash_table_destroy(_ebpf_epoch_thread_table);
-    ebpf_lock_destroy(&_ebpf_epoch_thread_table_lock);
     _ebpf_epoch_rundown = true;
     for (cpu_id = 0; cpu_id < _ebpf_epoch_cpu_count; cpu_id++) {
-        _ebpf_epoch_release_free_list(cpu_id, MAXINT64);
+        _ebpf_epoch_release_free_list(&_ebpf_epoch_cpu_table[cpu_id], MAXINT64);
         ebpf_assert(ebpf_list_is_empty(&_ebpf_epoch_cpu_table[cpu_id].free_list));
         ebpf_lock_destroy(&_ebpf_epoch_cpu_table[cpu_id].free_list_lock);
+        ebpf_lock_destroy(&_ebpf_epoch_cpu_table[cpu_id].thread_table_lock);
+        ebpf_hash_table_destroy(_ebpf_epoch_cpu_table[cpu_id].thread_table);
+        _ebpf_epoch_cpu_table[cpu_id].thread_table = NULL;
     }
 
+    _ebpf_epoch_cpu_count = 0;
+
     ebpf_free(_ebpf_epoch_cpu_table);
     _ebpf_epoch_initiated = false;
 }
@@ -205,21 +252,9 @@ ebpf_epoch_enter()
     }
 
     if (ebpf_is_preemptible()) {
-        ebpf_result_t return_value;
-        ebpf_lock_state_t lock_state;
-        uint64_t current_thread_id = ebpf_get_current_thread_id();
-        int64_t current_epoch = _ebpf_current_epoch;
-        lock_state = ebpf_lock_lock(&_ebpf_epoch_thread_table_lock);
-        return_value = ebpf_hash_table_update(
-            _ebpf_epoch_thread_table,
-            (const uint8_t*)&current_thread_id,
-            (const uint8_t*)&current_epoch,
-            NULL,
-            EBPF_HASH_TABLE_OPERATION_ANY);
-        ebpf_lock_unlock(&_ebpf_epoch_thread_table_lock, lock_state);
-        return return_value;
+        return _ebpf_epoch_update_thread_state(current_cpu, ebpf_get_current_thread_id(), _ebpf_current_epoch, true);
     } else {
-        _ebpf_epoch_cpu_table[current_cpu].epoch = _ebpf_current_epoch;
+        _ebpf_epoch_cpu_table[current_cpu].non_preemptible_epoch = _ebpf_current_epoch;
         return EBPF_SUCCESS;
     }
 }
@@ -233,27 +268,19 @@ ebpf_epoch_exit()
     }
 
     if (ebpf_is_preemptible()) {
-        ebpf_lock_state_t lock_state;
-        ebpf_result_t result;
-        uint64_t current_thread_id = ebpf_get_current_thread_id();
-        int64_t current_epoch = 0;
-        lock_state = ebpf_lock_lock(&_ebpf_epoch_thread_table_lock);
-        result = ebpf_hash_table_update(
-            _ebpf_epoch_thread_table,
-            (const uint8_t*)&current_thread_id,
-            (const uint8_t*)&current_epoch,
-            NULL,
-            EBPF_HASH_TABLE_OPERATION_REPLACE);
-        ebpf_assert(result == EBPF_SUCCESS);
-        ebpf_lock_unlock(&_ebpf_epoch_thread_table_lock, lock_state);
+        _ebpf_epoch_update_thread_state(current_cpu, ebpf_get_current_thread_id(), _ebpf_current_epoch, false);
     } else {
-
-        _ebpf_epoch_cpu_table[current_cpu].epoch = _ebpf_current_epoch;
+        _ebpf_epoch_cpu_table[current_cpu].non_preemptible_epoch = _ebpf_current_epoch;
     }
+
     if (!ebpf_list_is_empty(&_ebpf_epoch_cpu_table[current_cpu].free_list) &&
         (ebpf_interlocked_compare_exchange_int32(&_ebpf_flush_timer_set, 1, 0) == 0)) {
         ebpf_schedule_timer_work_item(_ebpf_flush_timer, EBPF_EPOCH_FLUSH_DELAY_IN_MICROSECONDS);
     }
+
+    if (!ebpf_list_is_empty(&_ebpf_epoch_cpu_table[current_cpu].free_list)) {
+        _ebpf_epoch_release_free_list(&_ebpf_epoch_cpu_table[current_cpu], _ebpf_release_epoch);
+    }
 }
 
 void
@@ -263,6 +290,11 @@ ebpf_epoch_flush()
     int64_t released_epoch;
     uint32_t cpu_id;
 
+    return_value = _ebpf_epoch_get_release_epoch(&released_epoch);
+    if (return_value == EBPF_SUCCESS) {
+        _ebpf_release_epoch = released_epoch, _ebpf_current_epoch;
+    }
+
     if (ebpf_is_non_preemptible_work_item_supported()) {
         // Schedule a non-preemptible work item to bring the CPU up to the current
         // epoch.
@@ -270,17 +302,10 @@ ebpf_epoch_flush()
         for (cpu_id = 0; cpu_id < _ebpf_epoch_cpu_count; cpu_id++) {
             // Note: Either the per-cpu epoch or the global epoch could be out of date.
             // That is acceptable as it may schedule an extra work item.
-            if (_ebpf_epoch_cpu_table[cpu_id].epoch != _ebpf_current_epoch)
+            if (_ebpf_epoch_cpu_table[cpu_id].non_preemptible_epoch != _ebpf_current_epoch)
                 ebpf_queue_non_preemptible_work_item(_ebpf_epoch_cpu_table[cpu_id].non_preemptible_work_item, NULL);
         }
     }
-
-    return_value = _ebpf_epoch_get_release_epoch(&released_epoch);
-    if (return_value == EBPF_SUCCESS) {
-        for (cpu_id = 0; cpu_id < _ebpf_epoch_cpu_count; cpu_id++) {
-            _ebpf_epoch_release_free_list(cpu_id, released_epoch);
-        }
-    }
 }
 
 void*
@@ -296,7 +321,7 @@ ebpf_epoch_allocate(size_t size)
 }
 
 void
-ebpf_epoch_free(void* memory)
+ebpf_epoch_free(_In_ void* memory)
 {
     ebpf_epoch_allocation_header_t* header = (ebpf_epoch_allocation_header_t*)memory;
     ebpf_lock_state_t lock_state;
@@ -327,8 +352,67 @@ ebpf_epoch_free(void* memory)
     ebpf_lock_unlock(&_ebpf_epoch_cpu_table[current_cpu].free_list_lock, lock_state);
 }
 
+ebpf_epoch_work_item_t*
+ebpf_epoch_allocate_work_item(_In_ void* callback_context, _In_ void (*callback)(void* context))
+{
+    ebpf_epoch_work_item_t* work_item = ebpf_allocate(sizeof(ebpf_epoch_work_item_t));
+    if (!work_item) {
+        return NULL;
+    }
+
+    work_item->callback = callback;
+    work_item->callback_context = callback_context;
+    work_item->header.entry_type = EBPF_EPOCH_ALLOCATION_WORK_ITEM;
+
+    return work_item;
+}
+
+void
+ebpf_epoch_schedule_work_item(_In_ ebpf_epoch_work_item_t* work_item)
+{
+    ebpf_lock_state_t lock_state;
+    uint32_t current_cpu;
+    current_cpu = ebpf_get_current_cpu();
+    if (current_cpu >= _ebpf_epoch_cpu_count) {
+        return;
+    }
+
+    if (_ebpf_epoch_rundown) {
+        work_item->callback(work_item->callback_context);
+        return;
+    }
+
+    // Items are inserted into the free list in increasing epoch order.
+    lock_state = ebpf_lock_lock(&_ebpf_epoch_cpu_table[current_cpu].free_list_lock);
+    work_item->header.freed_epoch = ebpf_interlocked_increment_int64(&_ebpf_current_epoch) - 1;
+    ebpf_list_insert_tail(&_ebpf_epoch_cpu_table[current_cpu].free_list, &work_item->header.list_entry);
+    ebpf_lock_unlock(&_ebpf_epoch_cpu_table[current_cpu].free_list_lock, lock_state);
+}
+
+void
+ebpf_epoch_free_work_item(_In_ ebpf_epoch_work_item_t* work_item)
+{
+    ebpf_lock_state_t lock_state;
+    uint32_t current_cpu;
+    current_cpu = ebpf_get_current_cpu();
+    if (current_cpu >= _ebpf_epoch_cpu_count) {
+        return;
+    }
+
+    lock_state = ebpf_lock_lock(&_ebpf_epoch_cpu_table[current_cpu].free_list_lock);
+    ebpf_list_remove_entry(&work_item->header.list_entry);
+    ebpf_lock_unlock(&_ebpf_epoch_cpu_table[current_cpu].free_list_lock, lock_state);
+    ebpf_free(work_item);
+}
+
+/**
+ * @brief Remove all entries from the per-CPU free list that have an epoch that is before released_epoch.
+ *
+ * @param[in] cpu_id The per-CPU free list to search.
+ * @param[in] released_epoch The epoch to release.
+ */
 static void
-_ebpf_epoch_release_free_list(uint32_t cpu_id, int64_t released_epoch)
+_ebpf_epoch_release_free_list(ebpf_epoch_cpu_entry_t* cpu_entry, int64_t released_epoch)
 {
     ebpf_lock_state_t lock_state;
     ebpf_list_entry_t* entry;
@@ -338,9 +422,9 @@ _ebpf_epoch_release_free_list(uint32_t cpu_id, int64_t released_epoch)
     ebpf_list_initialize(&free_list);
 
     // Move all expired items to the free list.
-    lock_state = ebpf_lock_lock(&_ebpf_epoch_cpu_table[cpu_id].free_list_lock);
-    while (!ebpf_list_is_empty(&_ebpf_epoch_cpu_table[cpu_id].free_list)) {
-        entry = _ebpf_epoch_cpu_table[cpu_id].free_list.Flink;
+    lock_state = ebpf_lock_lock(&cpu_entry->free_list_lock);
+    while (!ebpf_list_is_empty(&cpu_entry->free_list)) {
+        entry = cpu_entry->free_list.Flink;
         header = CONTAINING_RECORD(entry, ebpf_epoch_allocation_header_t, list_entry);
         if (header->freed_epoch <= released_epoch) {
             ebpf_list_remove_entry(entry);
@@ -349,7 +433,7 @@ _ebpf_epoch_release_free_list(uint32_t cpu_id, int64_t released_epoch)
             break;
         }
     }
-    ebpf_lock_unlock(&_ebpf_epoch_cpu_table[cpu_id].free_list_lock, lock_state);
+    ebpf_lock_unlock(&cpu_entry->free_list_lock, lock_state);
 
     // Free all the expired items outside of the lock.
     while (!ebpf_list_is_empty(&free_list)) {
@@ -367,119 +451,211 @@ _ebpf_epoch_release_free_list(uint32_t cpu_id, int64_t released_epoch)
         }
         }
     }
+    ebpf_assert(ebpf_list_is_empty(&cpu_entry->free_list) || !_ebpf_epoch_rundown);
 }
 
+/**
+ * @brief Determine the newest inactive epoch and return it.
+ *
+ * @param[out] release_epoch The newest inactive epoch.
+ * @retval EBPF_SUCCESS Found the newest inactive epoch.
+ * @retval EBPF_NO_MEMORY Insufficient memory to complete this operation.
+ */
 static ebpf_result_t
 _ebpf_epoch_get_release_epoch(_Out_ int64_t* release_epoch)
 {
-    int64_t lowest_epoch = INT64_MAX;
-    int64_t* thread_epoch;
+    // Grab an authoritative version of _ebpf_current_epoch.
+    // Note: If there are no active threads or non-preemptible work items then we need to assign
+    // an epoch that is guaranteed to be older than any thread that starts after this point.
+    // Grabbing the current epoch guarantees that.
+    int64_t lowest_epoch = ebpf_interlocked_increment_int64(&_ebpf_current_epoch);
     uint32_t cpu_id;
-    uint64_t thread_id = 0;
     ebpf_lock_state_t lock_state;
     ebpf_result_t return_value;
+    ebpf_hash_table_t* per_thread_epoch_table = NULL;
+
+    return_value = ebpf_hash_table_create(
+        &per_thread_epoch_table,
+        ebpf_allocate,
+        ebpf_free,
+        sizeof(uintptr_t),
+        sizeof(ebpf_epoch_thread_entry_t),
+        _ebpf_epoch_cpu_count,
+        NULL);
 
+    if (return_value != EBPF_SUCCESS) {
+        goto Exit;
+    }
+
+    // Gather the lowest epoch from non-preemptible work items that may have run.
+    // If the platform supports non-preemtible work items, check the per-CPU epochs.
     if (ebpf_is_non_preemptible_work_item_supported()) {
         for (cpu_id = 0; cpu_id < _ebpf_epoch_cpu_count; cpu_id++) {
-            if (_ebpf_epoch_cpu_table[cpu_id].epoch < lowest_epoch)
-                lowest_epoch = _ebpf_epoch_cpu_table[cpu_id].epoch;
+            lowest_epoch = min(lowest_epoch, _ebpf_epoch_cpu_table[cpu_id].non_preemptible_epoch);
         }
     }
 
-    lock_state = ebpf_lock_lock(&_ebpf_epoch_thread_table_lock);
-    return_value = ebpf_hash_table_next_key(_ebpf_epoch_thread_table, NULL, (uint8_t*)&thread_id);
-    if (return_value == EBPF_SUCCESS)
-        for (;;) {
-            return_value =
-                ebpf_hash_table_find(_ebpf_epoch_thread_table, (uint8_t*)&thread_id, (uint8_t**)&thread_epoch);
-            if (return_value != EBPF_SUCCESS)
-                break;
-
-            if (*thread_epoch != 0 && *thread_epoch < lowest_epoch)
-                lowest_epoch = *thread_epoch;
+    // Gather highest entry/exit epoch this thread has seen across all CPUs.
+    for (cpu_id = 0; cpu_id < _ebpf_epoch_cpu_count; cpu_id++) {
+        ebpf_epoch_thread_entry_t* thread_entry = NULL;
+        uintptr_t thread_id = 0;
+        // Check each per-CPU thread state.
+        lock_state = ebpf_lock_lock(&_ebpf_epoch_cpu_table[cpu_id].thread_table_lock);
+        while (return_value == EBPF_SUCCESS) {
+            ebpf_epoch_thread_entry_t* new_thread_entry = NULL;
+            ebpf_result_t local_result;
+            // Get the next per-thread entry from this CPU.
+            return_value = ebpf_hash_table_next_key_and_value(
+                _ebpf_epoch_cpu_table[cpu_id].thread_table,
+                thread_id == 0 ? NULL : (uint8_t*)&thread_id,
+                (uint8_t*)&thread_id,
+                (uint8_t**)&thread_entry);
 
-            return_value =
-                ebpf_hash_table_next_key(_ebpf_epoch_thread_table, (uint8_t*)&thread_id, (uint8_t*)&thread_id);
-            if (return_value != EBPF_SUCCESS)
+            if (return_value != EBPF_SUCCESS) {
                 break;
+            }
+
+            // Check if this thread is already present in the global thread table.
+            local_result =
+                ebpf_hash_table_find(per_thread_epoch_table, (uint8_t*)&thread_id, (uint8_t**)&new_thread_entry);
+            if (local_result == EBPF_KEY_NOT_FOUND) {
+                // Not found, insert a copy of the per-CPU entry.
+                return_value = ebpf_hash_table_update(
+                    per_thread_epoch_table,
+                    (uint8_t*)&thread_id,
+                    (uint8_t*)thread_entry,
+                    NULL,
+                    EBPF_HASH_TABLE_OPERATION_INSERT);
+            } else if (local_result == EBPF_SUCCESS) {
+                // Found, merge the global and per-CPU entry.
+                new_thread_entry->entry_epoch = max(new_thread_entry->entry_epoch, thread_entry->entry_epoch);
+                new_thread_entry->exit_epoch = max(new_thread_entry->exit_epoch, thread_entry->exit_epoch);
+            }
+        };
+        ebpf_lock_unlock(&_ebpf_epoch_cpu_table[cpu_id].thread_table_lock, lock_state);
+        if (return_value != EBPF_NO_MORE_KEYS) {
+            goto Exit;
         }
-    ebpf_lock_unlock(&_ebpf_epoch_thread_table_lock, lock_state);
+        return_value = EBPF_SUCCESS;
+    }
 
+    // Gather the lowest epoch from threads that are actively running.
+    // Thread is active if and only if entry_epoch > exit_epoch.
+    uintptr_t thread_id = 0;
+    while (return_value == EBPF_SUCCESS) {
+        ebpf_epoch_thread_entry_t* thread_entry = NULL;
+        return_value = ebpf_hash_table_next_key_and_value(
+            per_thread_epoch_table,
+            thread_id == 0 ? NULL : (uint8_t*)&thread_id,
+            (uint8_t*)&thread_id,
+            (uint8_t**)&thread_entry);
+
+        if (return_value == EBPF_SUCCESS) {
+            // Only consider the thread if it is active.
+            if (thread_entry->entry_epoch > thread_entry->exit_epoch) {
+                lowest_epoch = min(lowest_epoch, thread_entry->entry_epoch);
+            }
+        }
+    }
     if (return_value != EBPF_NO_MORE_KEYS) {
-        return return_value;
+        goto Exit;
     }
 
+    return_value = EBPF_SUCCESS;
+
+Exit:
+
     *release_epoch = lowest_epoch - 1;
-    return EBPF_SUCCESS;
+    return return_value;
 }
 
+/**
+ * @brief Helper function to bring this CPU up to the current epoch and flush free list.
+ *
+ * @param[in] context Not used.
+ * @param[in] parameter_1 Not used.
+ */
 static void
-_ebpf_epoch_update_cpu_entry(void* context, void* parameter_1)
+_ebpf_epoch_update_cpu_entry(_In_ void* context, _In_ void* parameter_1)
 {
     ebpf_epoch_cpu_entry_t* cpu_entry = (ebpf_epoch_cpu_entry_t*)context;
     UNREFERENCED_PARAMETER(parameter_1);
 
-    cpu_entry->epoch = _ebpf_current_epoch;
+    cpu_entry->non_preemptible_epoch = _ebpf_current_epoch;
+    if (!ebpf_list_is_empty(&cpu_entry->free_list)) {
+        _ebpf_epoch_release_free_list(cpu_entry, _ebpf_release_epoch);
+    }
 }
 
+/**
+ * @brief Routine executed on a timer to compute the newest inactive epoch.
+ *
+ * @param[in] context Unused.
+ */
 static void
-_ebpf_flush_worker(void* context)
+_ebpf_flush_worker(_In_ void* context)
 {
     UNREFERENCED_PARAMETER(context);
 
     ebpf_epoch_flush();
-    _ebpf_flush_timer_set = 0;
+    ebpf_interlocked_compare_exchange_int32(&_ebpf_flush_timer_set, 0, 1);
 }
 
-ebpf_epoch_work_item_t*
-ebpf_epoch_allocate_work_item(void* callback_context, void (*callback)(void* context))
+ebpf_result_t
+_ebpf_epoch_update_thread_state(uint32_t cpu_id, uintptr_t thread_id, int64_t current_epoch, bool enter)
 {
-    ebpf_epoch_work_item_t* work_item = ebpf_allocate(sizeof(ebpf_epoch_work_item_t));
-    if (!work_item) {
-        return NULL;
+    ebpf_result_t return_value;
+    ebpf_lock_state_t lock_state;
+    ebpf_epoch_thread_entry_t* thread_state;
+    ebpf_epoch_thread_entry_t local_thread_state = {enter ? current_epoch : 0, !enter ? current_epoch : 0};
+    lock_state = ebpf_lock_lock(&_ebpf_epoch_cpu_table[cpu_id].thread_table_lock);
+    return_value = ebpf_hash_table_find(
+        _ebpf_epoch_cpu_table[cpu_id].thread_table, (uint8_t*)&thread_id, (uint8_t**)&thread_state);
+    if (return_value == EBPF_SUCCESS) {
+        if (enter) {
+            thread_state->entry_epoch = current_epoch;
+        } else {
+            thread_state->exit_epoch = current_epoch;
+        }
+        return_value = EBPF_SUCCESS;
+    } else if (return_value == EBPF_KEY_NOT_FOUND) {
+        return_value = ebpf_hash_table_update(
+            _ebpf_epoch_cpu_table[cpu_id].thread_table,
+            (const uint8_t*)&thread_id,
+            (const uint8_t*)&local_thread_state,
+            NULL,
+            EBPF_HASH_TABLE_OPERATION_INSERT);
     }
+    ebpf_lock_unlock(&_ebpf_epoch_cpu_table[cpu_id].thread_table_lock, lock_state);
 
-    work_item->callback = callback;
-    work_item->callback_context = callback_context;
-    work_item->header.entry_type = EBPF_EPOCH_ALLOCATION_WORK_ITEM;
-
-    return work_item;
-}
-
-void
-ebpf_epoch_schedule_work_item(ebpf_epoch_work_item_t* work_item)
-{
-    ebpf_lock_state_t lock_state;
-    uint32_t current_cpu;
-    current_cpu = ebpf_get_current_cpu();
-    if (current_cpu >= _ebpf_epoch_cpu_count) {
-        return;
+    if (return_value == EBPF_SUCCESS) {
+        goto Exit;
     }
 
-    if (_ebpf_epoch_rundown) {
-        work_item->callback(work_item->callback_context);
-        return;
+    if (enter) {
+        goto Exit;
     }
 
-    // Items are inserted into the free list in increasing epoch order.
-    lock_state = ebpf_lock_lock(&_ebpf_epoch_cpu_table[current_cpu].free_list_lock);
-    work_item->header.freed_epoch = ebpf_interlocked_increment_int64(&_ebpf_current_epoch) - 1;
-    ebpf_list_insert_tail(&_ebpf_epoch_cpu_table[current_cpu].free_list, &work_item->header.list_entry);
-    ebpf_lock_unlock(&_ebpf_epoch_cpu_table[current_cpu].free_list_lock, lock_state);
-}
+    // This can only fail on out of memory.
+    ebpf_assert(return_value == EBPF_NO_MEMORY);
 
-void
-ebpf_epoch_free_work_item(ebpf_epoch_work_item_t* work_item)
-{
-    ebpf_lock_state_t lock_state;
-    uint32_t current_cpu;
-    current_cpu = ebpf_get_current_cpu();
-    if (current_cpu >= _ebpf_epoch_cpu_count) {
-        return;
+    // Failed to insert on exit.
+    // There must be an existing thread entry for this thread on another CPU.
+    for (cpu_id = 0; cpu_id < _ebpf_epoch_cpu_count; cpu_id++) {
+        lock_state = ebpf_lock_lock(&_ebpf_epoch_cpu_table[cpu_id].thread_table_lock);
+        return_value = ebpf_hash_table_find(
+            _ebpf_epoch_cpu_table[cpu_id].thread_table, (uint8_t*)&thread_id, (uint8_t**)&thread_state);
+        if (return_value == EBPF_SUCCESS) {
+            thread_state->exit_epoch = current_epoch;
+        }
+        ebpf_lock_unlock(&_ebpf_epoch_cpu_table[cpu_id].thread_table_lock, lock_state);
+        if (thread_state) {
+            break;
+        }
     }
+    // There must be at least 1 thread_state created on entry.
+    ebpf_assert(thread_state);
 
-    lock_state = ebpf_lock_lock(&_ebpf_epoch_cpu_table[current_cpu].free_list_lock);
-    ebpf_list_remove_entry(&work_item->header.list_entry);
-    ebpf_lock_unlock(&_ebpf_epoch_cpu_table[current_cpu].free_list_lock, lock_state);
-    ebpf_free(work_item);
+Exit:
+    return return_value;
 }
diff --git a/libs/platform/ebpf_epoch.h b/libs/platform/ebpf_epoch.h
index c6e84dffc1..1ccb91ad0c 100644
--- a/libs/platform/ebpf_epoch.h
+++ b/libs/platform/ebpf_epoch.h
@@ -57,7 +57,7 @@ extern "C"
      * @param[in] memory Allocation to be freed once epoch ends.
      */
     void
-    ebpf_epoch_free(void* memory);
+    ebpf_epoch_free(_In_ void* memory);
 
     /**
      * @Brief Release any memory that is associated with expired epochs.
@@ -74,7 +74,7 @@ extern "C"
      * @return Pointer to work item that can be scheduled.
      */
     ebpf_epoch_work_item_t*
-    ebpf_epoch_allocate_work_item(void* callback_context, void (*callback)(void* context));
+    ebpf_epoch_allocate_work_item(_In_ void* callback_context, _In_ void (*callback)(void* context));
 
     /**
      * @brief Schedule a previously allocated work-item to run when the current
@@ -83,7 +83,7 @@ extern "C"
      * @param[in] work_item Pointer to work item to run on epoch end.
      */
     void
-    ebpf_epoch_schedule_work_item(ebpf_epoch_work_item_t* work_item);
+    ebpf_epoch_schedule_work_item(_In_ ebpf_epoch_work_item_t* work_item);
 
     /**
      * @brief Free an epoch work item.
@@ -91,7 +91,7 @@ extern "C"
      * @param[in] work_item Pointer to work item to free.
      */
     void
-    ebpf_epoch_free_work_item(ebpf_epoch_work_item_t* work_item);
+    ebpf_epoch_free_work_item(_In_ ebpf_epoch_work_item_t* work_item);
 
 #ifdef __cplusplus
 }
diff --git a/libs/platform/unit/platform_unit_test.cpp b/libs/platform/unit/platform_unit_test.cpp
index 572c94e9e7..44f1e47529 100644
--- a/libs/platform/unit/platform_unit_test.cpp
+++ b/libs/platform/unit/platform_unit_test.cpp
@@ -138,15 +138,29 @@ TEST_CASE("hash_table_stress_test", "[platform]")
     ebpf_hash_table_t* table = nullptr;
     const size_t iterations = 1000;
     uint32_t worker_threads = ebpf_get_cpu_count();
+    uint32_t key_count = 4;
+    uint32_t load_factor = 4;
+    int32_t cpu_id = 0;
     REQUIRE(
         ebpf_hash_table_create(
-            &table, ebpf_epoch_allocate, ebpf_epoch_free, sizeof(uint32_t), sizeof(uint64_t), worker_threads, NULL) ==
-        EBPF_SUCCESS);
-    auto worker = [table, iterations]() {
+            &table,
+            ebpf_epoch_allocate,
+            ebpf_epoch_free,
+            sizeof(uint32_t),
+            sizeof(uint64_t),
+            static_cast<size_t>(worker_threads) * static_cast<size_t>(key_count),
+            NULL) == EBPF_SUCCESS);
+    auto worker = [table, iterations, key_count, load_factor, &cpu_id]() {
         uint32_t next_key = 0;
         uint64_t value = 11;
         uint64_t** returned_value = nullptr;
-        std::vector<uint32_t> keys(32);
+        std::vector<uint32_t> keys(static_cast<size_t>(key_count) * static_cast<size_t>(load_factor));
+
+        uint32_t local_cpu_id = ebpf_interlocked_increment_int32(&cpu_id) - 1;
+        uintptr_t thread_mask = local_cpu_id;
+        thread_mask = static_cast<uintptr_t>(1) << thread_mask;
+        SetThreadAffinityMask(GetCurrentThread(), thread_mask);
+
         for (auto& key : keys) {
             key = ebpf_random_uint32();
         }
diff --git a/libs/platform/user/framework.h b/libs/platform/user/framework.h
index 5b24ebb330..dddd345704 100644
--- a/libs/platform/user/framework.h
+++ b/libs/platform/user/framework.h
@@ -55,6 +55,8 @@ extern "C"
         entry->Blink = previous_entry;
         previous_entry->Flink = entry;
         list_head->Blink = entry;
+        ebpf_assert(list_head->Blink->Flink == list_head);
+        ebpf_assert(list_head->Flink->Blink == list_head);
         return;
     }