|
1 | 1 | #include <stdarg.h> |
| 2 | +#include <algorithm> |
2 | 3 | #include <atomic> |
3 | 4 | #include <chrono> |
4 | 5 | #include <fstream> |
@@ -1995,7 +1996,8 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread |
1995 | 1996 | std::atomic<int64_t> copy_to_backend_time_ms(0); |
1996 | 1997 | std::atomic<int64_t> convert_time_ms(0); |
1997 | 1998 |
|
1998 | | - int num_threads_to_use = n_threads_p > 0 ? n_threads_p : (int)std::thread::hardware_concurrency(); |
| 1999 | + int num_threads_to_use = n_threads_p > 0 ? n_threads_p : get_num_physical_cores(); |
| 2000 | + LOG_DEBUG("using %d threads for model loading", num_threads_to_use); |
1999 | 2001 |
|
2000 | 2002 | int64_t start_time = ggml_time_ms(); |
2001 | 2003 | std::vector<TensorStorage> processed_tensor_storages; |
@@ -2045,13 +2047,25 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread |
2045 | 2047 | w.join(); |
2046 | 2048 | } |
2047 | 2049 |
|
2048 | | - std::unordered_map<std::string, IndexedStorage> latest_map; |
| 2050 | + std::vector<IndexedStorage> deduplicated; |
| 2051 | + deduplicated.reserve(all_results.size()); |
| 2052 | + std::unordered_map<std::string, size_t> name_to_pos; |
2049 | 2053 | for (auto& entry : all_results) { |
2050 | | - latest_map[entry.ts.name] = entry; |
| 2054 | + auto it = name_to_pos.find(entry.ts.name); |
| 2055 | + if (it == name_to_pos.end()) { |
| 2056 | + name_to_pos.emplace(entry.ts.name, deduplicated.size()); |
| 2057 | + deduplicated.push_back(entry); |
| 2058 | + } else if (deduplicated[it->second].index < entry.index) { |
| 2059 | + deduplicated[it->second] = entry; |
| 2060 | + } |
2051 | 2061 | } |
2052 | 2062 |
|
2053 | | - processed_tensor_storages.reserve(latest_map.size()); |
2054 | | - for (auto& [name, entry] : latest_map) { |
| 2063 | + std::sort(deduplicated.begin(), deduplicated.end(), [](const IndexedStorage& a, const IndexedStorage& b) { |
| 2064 | + return a.index < b.index; |
| 2065 | + }); |
| 2066 | + |
| 2067 | + processed_tensor_storages.reserve(deduplicated.size()); |
| 2068 | + for (auto& entry : deduplicated) { |
2055 | 2069 | processed_tensor_storages.push_back(entry.ts); |
2056 | 2070 | } |
2057 | 2071 | } |
@@ -2447,6 +2461,8 @@ bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage |
2447 | 2461 | // Pass, do not convert. For MMDiT |
2448 | 2462 | } else if (contains(name, "time_embed.") || contains(name, "label_emb.")) { |
2449 | 2463 | // Pass, do not convert. For Unet |
| 2464 | + } else if (contains(name, "embedding")) { |
| 2465 | + // Pass, do not convert embedding |
2450 | 2466 | } else { |
2451 | 2467 | return true; |
2452 | 2468 | } |
|
0 commit comments