Skip to content

Commit a8d3aa0

Browse files
committed
Merge branch 'master' into qwen_image
2 parents 5af0bb0 + 1ba30ce commit a8d3aa0

File tree

13 files changed

+77
-48
lines changed

13 files changed

+77
-48
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,14 @@ cmake --build . --config Release
125125
126126
##### Using HipBLAS
127127
This provides BLAS acceleration using the ROCm cores of your AMD GPU. Make sure to have the ROCm toolkit installed.
128+
To build for another GPU architecture than installed in your system, set `$GFX_NAME` manually to the desired architecture (replace first command). This is also necessary if your GPU is not officially supported by ROCm, for example you have to set `$GFX_NAME` manually to `gfx1030` for consumer RDNA2 cards.
128129
129130
Windows User Refer to [docs/hipBLAS_on_Windows.md](docs%2FhipBLAS_on_Windows.md) for a comprehensive guide.
130131
131132
```
132-
export GFX_NAME=$(rocminfo | grep -m 1 -E "gfx[^0]{1}" | sed -e 's/ *Name: *//' | awk '{$1=$1; print}' || echo "rocminfo missing")
133-
echo $GFX_NAME
134-
cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS=$GFX_NAME -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON
133+
if command -v rocminfo; then export GFX_NAME=$(rocminfo | awk '/ *Name: +gfx[1-9]/ {print $2; exit}'); else echo "rocminfo missing!"; fi
134+
if [ -z "${GFX_NAME}" ]; then echo "Error: Couldn't detect GPU!"; else echo "Building for GPU: ${GFX_NAME}"; fi
135+
cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS=$GFX_NAME -DAMDGPU_TARGETS=$GFX_NAME -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
135136
cmake --build . --config Release
136137
```
137138

clip.hpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -553,12 +553,13 @@ class CLIPEmbeddings : public GGMLBlock {
553553
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
554554
enum ggml_type token_wtype = GGML_TYPE_F32;
555555
if (!force_clip_f32) {
556-
auto tensor_type = tensor_types.find(prefix + "token_embedding.weight");
557-
if (tensor_type != tensor_types.end())
556+
auto tensor_type = tensor_types.find(prefix + "token_embedding.weight");
557+
std::set<ggml_type> allow_types = {GGML_TYPE_F16, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0};
558+
if (tensor_type != tensor_types.end() && allow_types.find(tensor_type->second) != allow_types.end()) {
558559
token_wtype = tensor_type->second;
560+
}
559561
}
560-
enum ggml_type position_wtype = GGML_TYPE_F32;
561-
562+
enum ggml_type position_wtype = GGML_TYPE_F32;
562563
params["token_embedding.weight"] = ggml_new_tensor_2d(ctx, token_wtype, embed_dim, vocab_size);
563564
params["position_embedding.weight"] = ggml_new_tensor_2d(ctx, position_wtype, embed_dim, num_positions);
564565
}

conditioner.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
146146
}
147147
return true;
148148
};
149-
model_loader.load_tensors(on_load);
149+
model_loader.load_tensors(on_load, 1);
150150
readed_embeddings.push_back(embd_name);
151151
if (embd) {
152152
int64_t hidden_size = text_model->model.hidden_size;

control.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ struct ControlNet : public GGMLRunner {
445445
guided_hint_cached = true;
446446
}
447447

448-
bool load_from_file(const std::string& file_path) {
448+
bool load_from_file(const std::string& file_path, int n_threads) {
449449
LOG_INFO("loading control net from '%s'", file_path.c_str());
450450
alloc_params_buffer();
451451
std::map<std::string, ggml_tensor*> tensors;
@@ -458,7 +458,7 @@ struct ControlNet : public GGMLRunner {
458458
return false;
459459
}
460460

461-
bool success = model_loader.load_tensors(tensors, ignore_tensors);
461+
bool success = model_loader.load_tensors(tensors, ignore_tensors, n_threads);
462462

463463
if (!success) {
464464
LOG_ERROR("load control net tensors from model loader failed");

docs/lora.md

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,30 @@ Here's a simple example:
2020
2121
NOTE: The other backends may have different support.
2222

23-
| Quant / Type | CUDA |
24-
|--------------|------|
25-
| F32 | ✔️ |
26-
| F16 | ✔️ |
27-
| BF16 | ✔️ |
28-
| I32 | ✔️ |
29-
| Q4_0 | ✔️ |
30-
| Q4_1 | ✔️ |
31-
| Q5_0 | ✔️ |
32-
| Q5_1 | ✔️ |
33-
| Q8_0 | ✔️ |
34-
| Q2_K ||
35-
| Q3_K ||
36-
| Q4_K ||
37-
| Q5_K ||
38-
| Q6_K ||
39-
| Q8_K ||
23+
| Quant / Type | CUDA | Vulkan |
24+
|--------------|------|--------|
25+
| F32 | ✔️ | ✔️ |
26+
| F16 | ✔️ | ✔️ |
27+
| BF16 | ✔️ | ✔️ |
28+
| I32 | ✔️ ||
29+
| Q4_0 | ✔️ | ✔️ |
30+
| Q4_1 | ✔️ | ✔️ |
31+
| Q5_0 | ✔️ | ✔️ |
32+
| Q5_1 | ✔️ | ✔️ |
33+
| Q8_0 | ✔️ | ✔️ |
34+
| Q2_K |||
35+
| Q3_K |||
36+
| Q4_K |||
37+
| Q5_K |||
38+
| Q6_K |||
39+
| Q8_K |||
40+
| IQ1_S || ✔️ |
41+
| IQ1_M || ✔️ |
42+
| IQ2_XXS || ✔️ |
43+
| IQ2_XS || ✔️ |
44+
| IQ2_S || ✔️ |
45+
| IQ3_XXS || ✔️ |
46+
| IQ3_S || ✔️ |
47+
| IQ4_XS || ✔️ |
48+
| IQ4_NL || ✔️ |
49+
| MXFP4 || ✔️ |

esrgan.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ struct ESRGAN : public GGMLRunner {
164164
return "esrgan";
165165
}
166166

167-
bool load_from_file(const std::string& file_path) {
167+
bool load_from_file(const std::string& file_path, int n_threads) {
168168
LOG_INFO("loading esrgan from '%s'", file_path.c_str());
169169

170170
alloc_params_buffer();
@@ -177,7 +177,7 @@ struct ESRGAN : public GGMLRunner {
177177
return false;
178178
}
179179

180-
bool success = model_loader.load_tensors(esrgan_tensors);
180+
bool success = model_loader.load_tensors(esrgan_tensors, {}, n_threads);
181181

182182
if (!success) {
183183
LOG_ERROR("load esrgan tensors from model loader failed");

ggml

Submodule ggml updated 142 files

lora.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ struct LoraModel : public GGMLRunner {
116116
return "lora";
117117
}
118118

119-
bool load_from_file(bool filter_tensor = false, int n_threads = 0) {
119+
bool load_from_file(bool filter_tensor, int n_threads) {
120120
LOG_INFO("loading LoRA from '%s'", file_path.c_str());
121121

122122
if (load_failed) {

model.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <stdarg.h>
2+
#include <algorithm>
23
#include <atomic>
34
#include <chrono>
45
#include <fstream>
@@ -1995,7 +1996,8 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
19951996
std::atomic<int64_t> copy_to_backend_time_ms(0);
19961997
std::atomic<int64_t> convert_time_ms(0);
19971998

1998-
int num_threads_to_use = n_threads_p > 0 ? n_threads_p : (int)std::thread::hardware_concurrency();
1999+
int num_threads_to_use = n_threads_p > 0 ? n_threads_p : get_num_physical_cores();
2000+
LOG_DEBUG("using %d threads for model loading", num_threads_to_use);
19992001

20002002
int64_t start_time = ggml_time_ms();
20012003
std::vector<TensorStorage> processed_tensor_storages;
@@ -2045,13 +2047,25 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
20452047
w.join();
20462048
}
20472049

2048-
std::unordered_map<std::string, IndexedStorage> latest_map;
2050+
std::vector<IndexedStorage> deduplicated;
2051+
deduplicated.reserve(all_results.size());
2052+
std::unordered_map<std::string, size_t> name_to_pos;
20492053
for (auto& entry : all_results) {
2050-
latest_map[entry.ts.name] = entry;
2054+
auto it = name_to_pos.find(entry.ts.name);
2055+
if (it == name_to_pos.end()) {
2056+
name_to_pos.emplace(entry.ts.name, deduplicated.size());
2057+
deduplicated.push_back(entry);
2058+
} else if (deduplicated[it->second].index < entry.index) {
2059+
deduplicated[it->second] = entry;
2060+
}
20512061
}
20522062

2053-
processed_tensor_storages.reserve(latest_map.size());
2054-
for (auto& [name, entry] : latest_map) {
2063+
std::sort(deduplicated.begin(), deduplicated.end(), [](const IndexedStorage& a, const IndexedStorage& b) {
2064+
return a.index < b.index;
2065+
});
2066+
2067+
processed_tensor_storages.reserve(deduplicated.size());
2068+
for (auto& entry : deduplicated) {
20552069
processed_tensor_storages.push_back(entry.ts);
20562070
}
20572071
}
@@ -2447,6 +2461,8 @@ bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage
24472461
// Pass, do not convert. For MMDiT
24482462
} else if (contains(name, "time_embed.") || contains(name, "label_emb.")) {
24492463
// Pass, do not convert. For Unet
2464+
} else if (contains(name, "embedding")) {
2465+
// Pass, do not convert embedding
24502466
} else {
24512467
return true;
24522468
}

pmid.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -591,7 +591,7 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
591591
return "id_embeds";
592592
}
593593

594-
bool load_from_file(bool filter_tensor = false) {
594+
bool load_from_file(bool filter_tensor, int n_threads) {
595595
LOG_INFO("loading PhotoMaker ID Embeds from '%s'", file_path.c_str());
596596

597597
if (load_failed) {
@@ -623,11 +623,11 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
623623
return true;
624624
};
625625

626-
model_loader->load_tensors(on_new_tensor_cb);
626+
model_loader->load_tensors(on_new_tensor_cb, n_threads);
627627
alloc_params_buffer();
628628

629629
dry_run = false;
630-
model_loader->load_tensors(on_new_tensor_cb);
630+
model_loader->load_tensors(on_new_tensor_cb, n_threads);
631631

632632
LOG_DEBUG("finished loading PhotoMaker ID Embeds ");
633633
return true;

0 commit comments

Comments
 (0)