Skip to content

Commit ccb2794

Browse files
authored
fix: Increase yaml Buffer to Fix Video Scraper (#3392)
* increase yaml library buffer to fix video scraper * fmt
1 parent 822975f commit ccb2794

File tree

1 file changed

+21
-5
lines changed

1 file changed

+21
-5
lines changed

tool/ood-gen/lib/youtube.ml

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -187,17 +187,33 @@ let scrape yaml_file =
187187
in
188188
match fetched with
189189
| Ok fetched ->
190-
let yaml =
190+
let all_videos =
191191
VideoSet.union fetched scraped
192192
|> VideoSet.to_seq |> List.of_seq
193193
|> List.sort (fun a b -> compare b.Vid.published a.Vid.published)
194-
|> Vid.video_list_to_yaml
195194
in
195+
let yaml = Vid.video_list_to_yaml all_videos in
196+
(* The yaml library uses a fixed-size output buffer. The default is 262140
197+
bytes, which was exceeded when we had 203 videos (~262KB output). This
198+
caused the document_end operation to fail with "doc_end failed" error.
199+
200+
Current stats: 203 videos ≈ 260KB, average ~1.3KB per video. We use a
201+
2MB buffer to accommodate growth to ~1500 videos before hitting limits.
202+
If the list grows beyond that, this will fail with a clear error
203+
message. *)
204+
let buffer_size = 2 * 1024 * 1024 in
205+
(* 2MB *)
196206
let output =
197-
Yaml.pp Format.str_formatter yaml;
198-
Format.flush_str_formatter ()
207+
match Yaml.to_string ~len:buffer_size yaml with
208+
| Ok s -> s
209+
| Error (`Msg err) ->
210+
failwith
211+
(Printf.sprintf
212+
"YAML serialization failed (tried %d videos, buffer size %d \
213+
bytes): %s"
214+
(List.length all_videos) buffer_size err)
199215
in
200216
let oc = open_out yaml_file in
201-
Printf.fprintf oc "%s" output;
217+
output_string oc output;
202218
close_out oc
203219
| Error (`Msg msg) -> failwith msg

0 commit comments

Comments
 (0)