Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions tools/mtmd/debug/mtmd-debug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ static void show_additional_info(int /*argc*/, char ** argv) {
" -p \"encode\" (debugging encode pass, default case):\n"
" --image can be:\n"
" \"white\", \"black\", \"gray\": filled 1.0f, 0.0f and 0.5f respectively\n"
" \"red\", \"green\", \"blue\": filled with respective colors\n"
" \"cb\": checkerboard pattern, alternate 1.0f and 0.0f\n"
" \"rainbow\": raspberry-pi-like rainbow pattern\n"
" --audio can be:\n"
" \"one\", \"zero\", \"half\": filled 1.0f, 0.0f and 0.5f respectively\n"
" \"1010\": checkerboard pattern, alternate 1.0f and 0.0f\n"
Expand Down Expand Up @@ -144,6 +146,65 @@ int main(int argc, char ** argv) {
image[y][x * 3 + 2] = v;
}
}
} else if (input == "red") {
for (int i = 0; i < inp_size; ++i) {
auto row = std::vector<float>(inp_size * 3, 0.0f);
for (int j = 0; j < inp_size; ++j) {
row[j * 3 + 0] = 1.0f; // R channel
}
image.push_back(row);
}
} else if (input == "green") {
for (int i = 0; i < inp_size; ++i) {
auto row = std::vector<float>(inp_size * 3, 0.0f);
for (int j = 0; j < inp_size; ++j) {
row[j * 3 + 1] = 1.0f; // G channel
}
image.push_back(row);
}
} else if (input == "blue") {
for (int i = 0; i < inp_size; ++i) {
auto row = std::vector<float>(inp_size * 3, 0.0f);
for (int j = 0; j < inp_size; ++j) {
row[j * 3 + 2] = 1.0f; // B channel
}
image.push_back(row);
}
} else if (input == "rainbow") {
for (int i = 0; i < inp_size; ++i) {
image.push_back(std::vector<float>(inp_size * 3, 0.0f));
}
float cx = inp_size / 2.0f;
float cy = inp_size / 2.0f;
float max_dist = std::sqrt(cx * cx + cy * cy);
for (int y = 0; y < inp_size; ++y) {
for (int x = 0; x < inp_size; ++x) {
float dx = x - cx;
float dy = y - cy;
float hue = std::atan2(dy, dx) / (2.0f * 3.14159265f);
if (hue < 0) hue += 1.0f;
float sat = std::sqrt(dx * dx + dy * dy) / max_dist;
if (sat > 1.0f) sat = 1.0f;
float h6 = hue * 6.0f;
int i6 = (int)h6;
float f = h6 - i6;
float p = 1.0f - sat;
float q = 1.0f - sat * f;
float t = 1.0f - sat * (1.0f - f);
float r, g, b;
switch (i6 % 6) {
case 0: r=1; g=t; b=p; break;
case 1: r=q; g=1; b=p; break;
case 2: r=p; g=1; b=t; break;
case 3: r=p; g=q; b=1; break;
case 4: r=t; g=p; b=1; break;
default: r=1; g=p; b=q; break;
}
image[y][x * 3 + 0] = r;
image[y][x * 3 + 1] = g;
image[y][x * 3 + 2] = b;
}
}
} else if (input == "one") {
samples = std::vector<float>(inp_size, 1.0f);
} else if (input == "zero") {
Expand Down
37 changes: 37 additions & 0 deletions tools/mtmd/debug/mtmd-debug.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,43 @@ def test_vision():
test_vision()
```

Example of debugging a rainbow image:

```py
import torch
import math

def make_rainbow(img_size):
cx, cy = img_size / 2.0, img_size / 2.0
max_dist = math.sqrt(cx * cx + cy * cy)
img = torch.zeros(1, 3, img_size, img_size)
for y in range(img_size):
for x in range(img_size):
dx, dy = x - cx, y - cy
hue = math.atan2(dy, dx) / (2 * math.pi)
if hue < 0:
hue += 1
sat = math.sqrt(dx * dx + dy * dy) / max_dist
sat = min(sat, 1.0)
h6 = hue * 6
i6 = int(h6)
f = h6 - i6
p = 1 - sat
q = 1 - sat * f
t = 1 - sat * (1 - f)
rgb = [(1,t,p),(q,1,p),(p,1,t),(p,q,1),(t,p,1),(1,p,q)][i6 % 6]
img[0, 0, y, x] = rgb[0]
img[0, 1, y, x] = rgb[1]
img[0, 2, y, x] = rgb[2]
return img

img_size = 896
pixel_values = make_rainbow(img_size)
with torch.no_grad():
outputs = model.model.get_image_features(pixel_values=pixel_values)
print("last_hidden_state:", outputs.last_hidden_state)
```

## Debugging preprocess pass

(TODO)
Loading