-
Notifications
You must be signed in to change notification settings - Fork 0
/
aesthetic_optimizer.py
336 lines (294 loc) · 11.2 KB
/
aesthetic_optimizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
import argparse
import base64
import io
import json
import math
import random
from pathlib import Path
import numpy as np
import requests
from PIL import Image, ImageDraw, PngImagePlugin
from transformers import logging
from aesthetic_predictor.aesthetic_predictor import AestheticPredictor
try:
import tomllib
except ModuleNotFoundError:
import tomli as tomllib
logging.set_verbosity_error()
def sigmoid(x):
return np.exp(-np.logaddexp(0, -x))
def get_best_image(resp_json, models, model_weights):
info = json.loads(resp_json["info"])
infotexts = info["infotexts"]
images = [
Image.open(io.BytesIO(base64.b64decode(image))) for image in resp_json["images"]
]
embeds = models[0].get_embeds(images)
scores = [sigmoid(np.array(model.predict(embeds=embeds))) for model in models]
scores = [np.prod(np.array(s) ** model_weights) for s in zip(*scores)]
batch_best = max(scores)
i = scores.index(batch_best)
best_image = images[i]
best_score = scores[i]
metadata = PngImagePlugin.PngInfo()
metadata.add_text("parameters", infotexts[i])
return {
"image": best_image,
"image_b64": resp_json["images"][i],
"score": best_score,
"metadata": metadata,
"job_timestamp": info["job_timestamp"],
"seed": info["all_seeds"][i],
}
def encode_pil_to_base64(pil_image):
with io.BytesIO() as output_bytes:
pil_image.save(output_bytes, "PNG")
bytes_data = output_bytes.getvalue()
base64_str = str(base64.b64encode(bytes_data), "utf-8")
return f"data:image/png;base64,{base64_str}"
def format_decimal(num):
return f"{num:.2f}"
def generate_patches(image, patch_size, overlap=None):
if overlap is None:
overlap = math.ceil(patch_size / 2)
width, height = image.size
patches = []
for y in range(
0,
math.ceil(height / patch_size) * patch_size - patch_size + 1,
patch_size - overlap,
):
for x in range(
0,
math.ceil(width / patch_size) * patch_size - patch_size + 1,
patch_size - overlap,
):
patch = (x, y, x + patch_size, y + patch_size)
patches.append(patch)
return patches
def create_mask(image, patch_set):
mask = Image.new("L", image.size, 0)
draw = ImageDraw.Draw(mask)
for patch in patch_set:
draw.rectangle(patch, fill=255)
return mask
def txt2img_stage(url, config, models, model_weights, outdir):
best = None
p = 0
print("Starting txt2img seed search")
while p < config.get("seed_search_patience", 64):
response = requests.post(
url=f"{url}/sdapi/v1/txt2img", json=config["parameters"]
)
if response.status_code // 100 != 2:
print("Request failed:", response.status_code)
p += 1
continue
r = response.json()
batch_best = get_best_image(r, models, model_weights)
if best is not None and batch_best["score"] <= best["score"]:
p += len(r["images"])
continue
best = batch_best
# save new best
filename = f"{best['job_timestamp']}-{best['seed']}.png"
best["image"].save(outdir / filename, pnginfo=best["metadata"])
print("New best:", best["score"])
return best
def img2img_stage(url, config, models, model_weights, outdir, best):
p = 0
params = config["parameters"].copy()
denoising_strength = config.get("initial_denoising_strength", 0.75)
# TODO: make these configurable
denoise_step = 0.1
denoise_min = 0.1
print(
f"Starting img2img random search with denoising strength {denoising_strength}"
)
while denoising_strength >= denoise_min:
if p >= config.get("img2img_patience", 16):
denoising_strength -= denoise_step
p = 0
print(f"Reduced denoising strength to {denoising_strength}")
continue
params["init_images"] = [best["image_b64"]]
params["denoising_strength"] = format_decimal(denoising_strength)
response = requests.post(url=f"{url}/sdapi/v1/img2img", json=params)
if response.status_code // 100 != 2:
print("Request failed:", response.status_code)
p += 1
continue
r = response.json()
batch_best = get_best_image(r, models, model_weights)
if batch_best["score"] <= best["score"]:
p += len(r["images"])
continue
p = 0
best = batch_best
# save new best
filename = f"{best['job_timestamp']}-{best['seed']}.png"
best["image"].save(outdir / filename, pnginfo=best["metadata"])
print("New best:", best["score"])
return best
def inpainting_stage(url, config, models, model_weights, outdir, best):
# generate patches with different sizes
patch_ratios = (3, 5)
patch_sizes = [
math.ceil(min(best["image"].size) / patch_ratio) for patch_ratio in patch_ratios
]
patches = []
for patch_size in patch_sizes:
patches.extend(generate_patches(best["image"], patch_size))
# weight patches by success rate
patch_stats = [[1, 1] for _ in patches]
params = config["parameters"].copy()
params["mask_blur"] = int(patch_size * 0.2)
params["inpainting_fill"] = 1 # original
params["inpaint_full_res"] = False # whole image, not just masked region
denoising_strength = config.get("initial_inpaint_denoising_strength", 0.5)
denoise_min = 0.1
denoise_step = 0.1
p = 0
print(f"Inpainting with denoising strength {denoising_strength}")
print(f"Number of tiles: {len(patches)}")
while denoising_strength >= denoise_min:
if p >= len(patches) / 2: # TODO: threshold is arbitrary
denoising_strength -= denoise_step
print(f"Reduced denoising strength to {denoising_strength}")
p = 0
continue
params["init_images"] = [best["image_b64"]]
params["denoising_strength"] = format_decimal(denoising_strength)
num_patches = 1
patch_weights = [s[0] / s[1] for s in patch_stats]
patch_idxs = random.choices(
range(len(patches)), weights=patch_weights, k=num_patches
)
patch_set = [patches[idx] for idx in patch_idxs]
mask = create_mask(best["image"], patch_set)
params["mask"] = encode_pil_to_base64(mask)
response = requests.post(url=f"{url}/sdapi/v1/img2img", json=params)
if response.status_code // 100 != 2:
print("Request failed:", response.status_code)
p += 1
continue
r = response.json()
batch_best = get_best_image(r, models, model_weights)
# update patch stats
for idx in patch_idxs:
# squared ratio to current best: punish lower scores more
patch_stats[idx][0] += (batch_best["score"] / best["score"])**2
patch_stats[idx][1] += 1
if batch_best["score"] <= best["score"]:
p += len(r["images"])
continue
p = 0
best = batch_best
# save new best
filename = f"{best['job_timestamp']}-{best['seed']}.png"
best["image"].save(outdir / filename, pnginfo=best["metadata"])
print("New best:", best["score"])
return best
def main(url, config, outdir, init_image=None, img2img=True, inpaint=True):
# TODO: epsilon threshold for resetting patience counters
# TODO: random chance of accepting worse image ~10%?
model_paths = [model["path"] for model in config["score_models"]]
model_weights = [model.get("weight", 1.0) for model in config["score_models"]]
models = [AestheticPredictor(model_path=path) for path in model_paths]
# create output directory if it doesn't exist
outdir = Path(outdir).expanduser()
outdir.mkdir(parents=True, exist_ok=True)
requests.post(url=f"{url}/sdapi/v1/options", json=config["options"])
best = None
if init_image is not None:
embeds = models[0].get_embeds([init_image])
scores = [sigmoid(np.array(model.predict(embeds=embeds))) for model in models]
scores = [np.prod(np.array(s) ** model_weights) for s in zip(*scores)]
best = {
"image": init_image,
"image_b64": encode_pil_to_base64(init_image),
"score": scores[0],
}
print("Init image score:", best["score"])
# txt2img seed search
else:
best = txt2img_stage(url, config, models, model_weights, outdir)
# img2img refinement
if img2img:
best = img2img_stage(url, config, models, model_weights, outdir, best)
# inpainting refinement
if inpaint:
best = inpainting_stage(url, config, models, model_weights, outdir, best)
return best
if __name__ == "__main__":
# parse command line arguments
parser = argparse.ArgumentParser(
description=(
"Iteratively improve Stable Diffusion outputs using aesthetic scores via"
" AUTOMATIC1111's webui API."
)
)
parser.add_argument(
"--config",
type=str,
help="TOML file containing generation parameters",
default="config.toml",
)
parser.add_argument(
"--outdir", type=str, help="directory to save images", default="outputs"
)
parser.add_argument(
"--url", type=str, help="URL for A1111 WebUI", default="http://localhost:7860"
)
parser.add_argument(
"--init-image",
type=str,
help="use starting image. conflicts with --txt2img",
)
parser.add_argument(
"--txt2img",
action=argparse.BooleanOptionalAction,
help="enable txt2img stage. conflicts with --init-image",
)
parser.add_argument(
"--img2img",
action=argparse.BooleanOptionalAction,
help="enable img2img stage",
)
parser.add_argument(
"--inpaint",
action=argparse.BooleanOptionalAction,
help="enable inpainting stage",
)
args = parser.parse_args()
if args.txt2img and args.init_image is not None:
raise ValueError("Conflicting arguments: --init-image and --txt2img")
flags = [args.txt2img, args.img2img, args.inpaint]
# if a stage is explicitly enabled, set unspecified stages to disabled
if any(flags):
flags = [False if flag is None else flag for flag in flags]
# otherwise enable them
else:
flags = [True if flag is None else flag for flag in flags]
txt2img, img2img, inpaint = flags
# disable txt2img if init_image is provided
if args.init_image is not None:
txt2img = False
# validate flags
if not txt2img and args.init_image is None:
raise ValueError("Missing argument: expected --txt2img or --init-image")
if args.config.lower().endswith(".toml"):
with open(args.config, "rb") as f:
config = tomllib.load(f)
elif args.config.lower().endswith(".json"):
# deprecation notice
print(
"DEPRECATION NOTICE: JSON config support will be removed in a future"
" version in favor of TOML files."
)
with open(args.config, "rt") as f:
config = json.load(f)
init_image = None
if args.init_image is not None:
init_image = Image.open(Path(args.init_image).expanduser())
main(args.url, config, args.outdir, init_image, img2img, inpaint)