diff --git a/.run-travis-tests.sh b/.run-travis-tests.sh index fdfb35eb..0cf036fa 100755 --- a/.run-travis-tests.sh +++ b/.run-travis-tests.sh @@ -21,6 +21,8 @@ sudo chown $(whoami) /btrfs [ -d ~/.opam/4.11.1 ] || opam init --compiler=4.11.1 opam install --deps-only -t . opam exec -- make +opam exec -- dune exec -- obuilder healthcheck --store=btrfs:/btrfs +opam exec -- dune exec -- obuilder healthcheck --store=zfs:zfs opam exec -- dune exec -- ./stress/stress.exe btrfs:/btrfs opam exec -- dune exec -- ./stress/stress.exe zfs:zfs diff --git a/README.md b/README.md index 903799e4..89bbdc61 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,12 @@ As present, the initial base image is fetched from Docker Hub using `docker pull OBuilder is designed to be used as a component of a build scheduler such as [OCluster][]. However, there is also a command-line interface for testing. +To check that the system is working correctly, you can run a healthcheck. +This checks that Docker is running and then does a simple test build (pulling the `busybox` image if not already present): + + $ obuilder healthcheck --store=zfs:tank + Healthcheck passed + To build `example.spec` (which builds OBuilder itself) using the ZFS pool `tank` to cache the build results: $ obuilder build -f example.spec . --store=zfs:tank diff --git a/lib/build.ml b/lib/build.ml index c5bbdf1b..bbcd0c66 100644 --- a/lib/build.ml +++ b/lib/build.ml @@ -6,6 +6,14 @@ let ( >>!= ) = Lwt_result.bind let hostname = "builder" +let healthcheck_base = "busybox" +let healthcheck_ops = + let open Obuilder_spec in + [ + shell ["/bin/sh"; "-c"]; + run "echo healthcheck" + ] + module Scope = Map.Make(String) module Context = struct @@ -227,13 +235,13 @@ module Make (Raw_store : S.STORE) (Sandbox : S.SANDBOX) = struct ) >>= fun cid -> Lwt.finalize (fun () -> fn cid) - (fun () -> Os.exec ["docker"; "rm"; "--"; cid]) + (fun () -> Os.exec ~stdout:`Dev_null ["docker"; "rm"; "--"; cid]) let get_base t ~log base = log `Heading (Fmt.strf "(from %a)" Sexplib.Sexp.pp_hum (Atom base)); let id = Sha256.to_hex (Sha256.string base) in Store.build t.store ~id ~log (fun ~cancelled:_ ~log tmp -> - Log.info (fun f -> f "Base image not present; importing %S...@." base); + Log.info (fun f -> f "Base image not present; importing %S..." base); let rootfs = tmp / "rootfs" in Os.sudo ["mkdir"; "--mode=755"; "--"; rootfs] >>= fun () -> (* Lwt_process.exec ("", [| "docker"; "pull"; "--"; base |]) >>= fun _ -> *) @@ -269,7 +277,9 @@ module Make (Raw_store : S.STORE) (Sandbox : S.SANDBOX) = struct let context = { context with env = context.env @ env } in run_steps t ~context ~base:id ops - let build = build ~scope:[] + let build t context spec = + let r = build ~scope:[] t context spec in + (r : (string, [ `Cancelled | `Msg of string ]) Lwt_result.t :> (string, [> `Cancelled | `Msg of string ]) Lwt_result.t) let delete ?log t id = Store.delete ?log t.store id @@ -277,6 +287,44 @@ module Make (Raw_store : S.STORE) (Sandbox : S.SANDBOX) = struct let prune ?log t ~before limit = Store.prune ?log t.store ~before limit + let log_to buffer tag x = + match tag with + | `Heading | `Note -> Buffer.add_string buffer (x ^ "\n") + | `Output -> Buffer.add_string buffer x + + let healthcheck ?(timeout=10.0) t = + Os.with_pipe_from_child (fun ~r ~w -> + let pp f = Fmt.string f "docker version" in + let result = Os.exec_result ~pp ~stdout:`Dev_null ~stderr:(`FD_move_safely w) ["docker"; "version"] in + let r = Lwt_io.(of_fd ~mode:input) r ~close:Lwt.return in + Lwt_io.read r >>= fun err -> + result >>= function + | Ok () -> Lwt_result.return () + | Error (`Msg m) -> Lwt_result.fail (`Msg (Fmt.str "%s@.%s" m (String.trim err))) + ) >>!= fun () -> + let buffer = Buffer.create 1024 in + let log = log_to buffer in + (* Get the base image first, before starting the timer. *) + let switch = Lwt_switch.create () in + let context = Context.v ~switch ~log ~src_dir:"/tmp" () in + get_base t ~log healthcheck_base >>= function + | Error (`Msg _) as x -> Lwt.return x + | Error `Cancelled -> failwith "Cancelled getting base image (shouldn't happen!)" + | Ok (id, env) -> + let context = { context with env } in + (* Start the timer *) + Lwt.async (fun () -> + Lwt_unix.sleep timeout >>= fun () -> + Lwt_switch.turn_off switch + ); + run_steps t ~context ~base:id healthcheck_ops >>= function + | Ok id -> Store.delete t.store id >|= Result.ok + | Error (`Msg msg) as x -> + let log = String.trim (Buffer.contents buffer) in + if log = "" then Lwt.return x + else Lwt.return (Fmt.error_msg "%s@.%s" msg log) + | Error `Cancelled -> Lwt.return (Fmt.error_msg "Timeout running healthcheck") + let v ~store ~sandbox = let store = Store.wrap store in { store; sandbox } diff --git a/lib/obuilder.ml b/lib/obuilder.ml index 7c8c4ede..f3b17f6f 100644 --- a/lib/obuilder.ml +++ b/lib/obuilder.ml @@ -15,6 +15,8 @@ module Os = Os module Runc_sandbox = Runc_sandbox module Store_spec = Store_spec +let log_src = Log.src + (**/**) (* For unit-tests *) diff --git a/lib/os.ml b/lib/os.ml index bfd82d72..54053488 100644 --- a/lib/os.ml +++ b/lib/os.ml @@ -61,9 +61,10 @@ let lwt_process_exec = ref default_exec let exec_result ?cwd ?stdin ?stdout ?stderr ~pp argv = Logs.info (fun f -> f "Exec %a" pp_cmd argv); - !lwt_process_exec ?cwd ?stdin ?stdout ?stderr ~pp ("", Array.of_list argv) >>!= function - | 0 -> Lwt_result.return () - | n -> Lwt.return @@ Fmt.error_msg "%t failed with exit status %d" pp n + !lwt_process_exec ?cwd ?stdin ?stdout ?stderr ~pp ("", Array.of_list argv) >>= function + | Ok 0 -> Lwt_result.return () + | Ok n -> Lwt.return @@ Fmt.error_msg "%t failed with exit status %d" pp n + | Error e -> Lwt_result.fail (e : [`Msg of string] :> [> `Msg of string]) let exec ?cwd ?stdin ?stdout ?stderr argv = Logs.info (fun f -> f "Exec %a" pp_cmd argv); diff --git a/lib/s.ml b/lib/s.ml index de8c353f..94ee61a9 100644 --- a/lib/s.ml +++ b/lib/s.ml @@ -88,7 +88,7 @@ module type BUILDER = sig t -> context -> Obuilder_spec.t -> - (id, [`Cancelled | `Msg of string]) Lwt_result.t + (id, [> `Cancelled | `Msg of string]) Lwt_result.t val delete : ?log:(id -> unit) -> t -> id -> unit Lwt.t (** [delete ?log t id] removes [id] from the store, along with all of its dependencies. @@ -102,4 +102,9 @@ module type BUILDER = sig all of which were last used before [before]. Returns the number of items removed. @param log Called just before deleting each item, so it can be displayed. *) + + val healthcheck : ?timeout:float -> t -> (unit, [> `Msg of string]) Lwt_result.t + (** [healthcheck t] performs a check that [t] is working correctly. + @param timeout Cancel and report failure after this many seconds. + This excludes the time to fetch the base image. *) end diff --git a/main.ml b/main.ml index 1d6b056b..a4da5f0f 100644 --- a/main.ml +++ b/main.ml @@ -49,6 +49,19 @@ let build fast_sync store spec src_dir = exit 1 end +let healthcheck fast_sync verbose store = + if verbose then + Logs.Src.set_level Obuilder.log_src (Some Logs.Info); + Lwt_main.run begin + create_builder ~fast_sync store >>= fun (Builder ((module Builder), builder)) -> + Builder.healthcheck builder >|= function + | Error (`Msg m) -> + Fmt.epr "Healthcheck failed: %s@." m; + exit 1 + | Ok () -> + Fmt.pr "Healthcheck passed@." + end + let delete store id = Lwt_main.run begin create_builder store >>= fun (Builder ((module Builder), builder)) -> @@ -127,7 +140,19 @@ let dockerfile = Term.(const dockerfile $ buildkit $ spec_file), Term.info "dockerfile" ~doc -let cmds = [build; delete; dockerfile] +let verbose = + Arg.value @@ + Arg.flag @@ + Arg.info + ~doc:"Enable verbose logging" + ["verbose"] + +let healthcheck = + let doc = "Perform a self-test" in + Term.(const healthcheck $ fast_sync $ verbose $ store), + Term.info "healthcheck" ~doc + +let cmds = [build; delete; dockerfile; healthcheck] let default_cmd = let doc = "a command-line interface for OBuilder" in