From e0841307d4da809754d995a4524b39b87040f2d0 Mon Sep 17 00:00:00 2001 From: Antonio Sarosi Date: Mon, 11 Nov 2024 19:12:25 +0000 Subject: [PATCH] Add basic grammar for `a` vs `an` articles (#1158) > [!IMPORTANT] > Add function to determine 'a' or 'an' for indefinite articles and integrate it into output format rendering logic. > > - **Behavior**: > - Add `indefinite_article_a_or_an()` function in `types.rs` to determine 'a' or 'an' based on the first letter of a word. > - Update `OutputFormatContent::prefix()` to use `indefinite_article_a_or_an()` for primitive types. > - **Documentation**: > - Update `output-format.mdx` to reflect changes in default prefixes for primitive types. > - **Tests**: > - Add tests `render_int()` and `render_float()` in `types.rs` to verify correct article usage. > > This description was created by [Ellipsis](https://www.ellipsis.dev?ref=BoundaryML%2Fbaml&utm_source=github&utm_medium=referral) for 7c264361d2e7564f1bde87a4ffe4146fe8951695. It will automatically update as commits are pushed. --- .../jinja-runtime/src/output_format/types.rs | 29 ++++++++++++++++++- .../baml/prompt-syntax/output-format.mdx | 3 +- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/engine/baml-lib/jinja-runtime/src/output_format/types.rs b/engine/baml-lib/jinja-runtime/src/output_format/types.rs index d1fe52dcc..23f1d50ae 100644 --- a/engine/baml-lib/jinja-runtime/src/output_format/types.rs +++ b/engine/baml-lib/jinja-runtime/src/output_format/types.rs @@ -285,6 +285,16 @@ impl<'s> std::fmt::Display for MapRender<'s> { } } +/// Basic grammar for "a" VS "an" indefinite articles. +/// +/// It does NOT cover all rules & exceptions. +fn indefinite_article_a_or_an(word: &str) -> &str { + match word.chars().next() { + Some(c) if matches!(c.to_ascii_lowercase(), 'a' | 'e' | 'i' | 'o' | 'u') => "an", + _ => "a", + } +} + struct RenderState { hoisted_enums: IndexSet, } @@ -302,7 +312,10 @@ impl OutputFormatContent { ) -> Option { match ft { FieldType::Primitive(TypeValue::String) => None, - FieldType::Primitive(_) => Some(String::from("Answer as a: ")), + FieldType::Primitive(p) => Some(format!( + "Answer as {article} ", + article = indefinite_article_a_or_an(&p.to_string()) + )), FieldType::Literal(_) => Some(String::from("Answer using this specific value:\n")), FieldType::Enum(_) => Some(String::from("Answer with any of the categories:\n")), FieldType::Class(cls) => { @@ -659,6 +672,20 @@ mod tests { assert_eq!(rendered, None); } + #[test] + fn render_int() { + let content = OutputFormatContent::target(FieldType::int()).build(); + let rendered = content.render(RenderOptions::default()).unwrap(); + assert_eq!(rendered, Some("Answer as an int".into())); + } + + #[test] + fn render_float() { + let content = OutputFormatContent::target(FieldType::float()).build(); + let rendered = content.render(RenderOptions::default()).unwrap(); + assert_eq!(rendered, Some("Answer as a float".into())); + } + #[test] fn render_array() { let content = OutputFormatContent::new_array(); diff --git a/fern/03-reference/baml/prompt-syntax/output-format.mdx b/fern/03-reference/baml/prompt-syntax/output-format.mdx index 26b70cfcb..a632b43ba 100644 --- a/fern/03-reference/baml/prompt-syntax/output-format.mdx +++ b/fern/03-reference/baml/prompt-syntax/output-format.mdx @@ -71,7 +71,8 @@ BAML's default prefix varies based on the function's return type. | Fuction return type | Default Prefix | | --- | --- | | Primitive (String) | | -| Primitive (Other) | `Answer as a: ` | +| Primitive (Int) | `Answer as an ` | +| Primitive (Other) | `Answer as a ` | | Enum | `Answer with any of the categories:\n` | | Class | `Answer in JSON using this schema:\n` | | List | `Answer with a JSON Array using this schema:\n` |