@@ -157,6 +157,19 @@ impl MarkedEventReceiver for YamlLoader {
157
157
}
158
158
}
159
159
160
+ #[ derive( Debug ) ]
161
+ pub enum LoadError {
162
+ IO ( std:: io:: Error ) ,
163
+ Scan ( ScanError ) ,
164
+ Decode ( std:: borrow:: Cow < ' static , str > ) ,
165
+ }
166
+
167
+ impl From < std:: io:: Error > for LoadError {
168
+ fn from ( error : std:: io:: Error ) -> Self {
169
+ LoadError :: IO ( error)
170
+ }
171
+ }
172
+
160
173
impl YamlLoader {
161
174
fn insert_new_node ( & mut self , node : ( Yaml , usize ) ) {
162
175
// valid anchor id starts from 1
@@ -197,6 +210,42 @@ impl YamlLoader {
197
210
parser. load ( & mut loader, true ) ?;
198
211
Ok ( loader. docs )
199
212
}
213
+
214
+ pub fn load_from_bytes ( mut source : impl std:: io:: Read ) -> Result < Vec < Yaml > , LoadError > {
215
+ let mut buffer = Vec :: new ( ) ;
216
+ source. read_to_end ( & mut buffer) ?;
217
+
218
+ // Decodes the input buffer using either UTF-8, UTF-16LE or UTF-16BE depending on the BOM codepoint.
219
+ // If the buffer doesn't start with a BOM codepoint, it will use a fallback encoding obtained by
220
+ // detect_utf16_endianness.
221
+ let ( res, _) = encoding:: types:: decode (
222
+ & buffer,
223
+ encoding:: DecoderTrap :: Strict ,
224
+ detect_utf16_endianness ( & buffer) ,
225
+ ) ;
226
+ let s = res. map_err ( LoadError :: Decode ) ?;
227
+ YamlLoader :: load_from_str ( & s) . map_err ( LoadError :: Scan )
228
+ }
229
+ }
230
+
231
+ /// The encoding crate knows how to tell apart UTF-8 from UTF-16LE and utf-16BE, when the
232
+ /// bytestream starts with BOM codepoint.
233
+ /// However, it doesn't even attempt to guess the UTF-16 endianness of the input bytestream since
234
+ /// in the general case the bytestream could start with a codepoint that uses both bytes.
235
+ ///
236
+ /// The YAML-1.2 spec mandates that the first character of a YAML document is an ASCII character.
237
+ /// This allows the encoding to be deduced by the pattern of null (#x00) characters.
238
+ //
239
+ /// See spec at https://yaml.org/spec/1.2/spec.html#id2771184
240
+ fn detect_utf16_endianness ( b : & [ u8 ] ) -> encoding:: types:: EncodingRef {
241
+ if b. len ( ) > 1 && ( b[ 0 ] != b[ 1 ] ) {
242
+ if b[ 0 ] == 0 {
243
+ return encoding:: all:: UTF_16BE ;
244
+ } else if b[ 1 ] == 0 {
245
+ return encoding:: all:: UTF_16LE ;
246
+ }
247
+ }
248
+ encoding:: all:: UTF_8
200
249
}
201
250
202
251
macro_rules! define_as (
@@ -736,4 +785,67 @@ subcommands3:
736
785
let s = "[" . repeat ( 10_000 ) + & "]" . repeat ( 10_000 ) ;
737
786
assert ! ( YamlLoader :: load_from_str( & s) . is_err( ) ) ;
738
787
}
788
+
789
+ #[ test]
790
+ fn test_read_bom ( ) {
791
+ let s = b"\xef \xbb \xbf ---
792
+ a: 1
793
+ b: 2.2
794
+ c: [1, 2]
795
+ " ;
796
+ let out = YamlLoader :: load_from_bytes ( s as & [ u8 ] ) . unwrap ( ) ;
797
+ let doc = & out[ 0 ] ;
798
+ assert_eq ! ( doc[ "a" ] . as_i64( ) . unwrap( ) , 1i64 ) ;
799
+ assert_eq ! ( doc[ "b" ] . as_f64( ) . unwrap( ) , 2.2f64 ) ;
800
+ assert_eq ! ( doc[ "c" ] [ 1 ] . as_i64( ) . unwrap( ) , 2i64 ) ;
801
+ assert ! ( doc[ "d" ] [ 0 ] . is_badvalue( ) ) ;
802
+ }
803
+
804
+ #[ test]
805
+ fn test_read_utf16le ( ) {
806
+ let s = b"\xff \xfe -\x00 -\x00 -\x00
807
+ \x00 a\x00 :\x00 \x00 1\x00
808
+ \x00 b\x00 :\x00 \x00 2\x00 .\x00 2\x00
809
+ \x00 c\x00 :\x00 \x00 [\x00 1\x00 ,\x00 \x00 2\x00 ]\x00
810
+ \x00 ";
811
+ let out = YamlLoader :: load_from_bytes ( s as & [ u8 ] ) . unwrap ( ) ;
812
+ let doc = & out[ 0 ] ;
813
+ println ! ( "GOT: {:?}" , doc) ;
814
+ assert_eq ! ( doc[ "a" ] . as_i64( ) . unwrap( ) , 1i64 ) ;
815
+ assert_eq ! ( doc[ "b" ] . as_f64( ) . unwrap( ) , 2.2f64 ) ;
816
+ assert_eq ! ( doc[ "c" ] [ 1 ] . as_i64( ) . unwrap( ) , 2i64 ) ;
817
+ assert ! ( doc[ "d" ] [ 0 ] . is_badvalue( ) ) ;
818
+ }
819
+
820
+ #[ test]
821
+ fn test_read_utf16be ( ) {
822
+ let s = b"\xfe \xff \x00 -\x00 -\x00 -\x00
823
+ \x00 a\x00 :\x00 \x00 1\x00
824
+ \x00 b\x00 :\x00 \x00 2\x00 .\x00 2\x00
825
+ \x00 c\x00 :\x00 \x00 [\x00 1\x00 ,\x00 \x00 2\x00 ]\x00
826
+ " ;
827
+ let out = YamlLoader :: load_from_bytes ( s as & [ u8 ] ) . unwrap ( ) ;
828
+ let doc = & out[ 0 ] ;
829
+ println ! ( "GOT: {:?}" , doc) ;
830
+ assert_eq ! ( doc[ "a" ] . as_i64( ) . unwrap( ) , 1i64 ) ;
831
+ assert_eq ! ( doc[ "b" ] . as_f64( ) . unwrap( ) , 2.2f64 ) ;
832
+ assert_eq ! ( doc[ "c" ] [ 1 ] . as_i64( ) . unwrap( ) , 2i64 ) ;
833
+ assert ! ( doc[ "d" ] [ 0 ] . is_badvalue( ) ) ;
834
+ }
835
+
836
+ #[ test]
837
+ fn test_read_utf16le_nobom ( ) {
838
+ let s = b"-\x00 -\x00 -\x00
839
+ \x00 a\x00 :\x00 \x00 1\x00
840
+ \x00 b\x00 :\x00 \x00 2\x00 .\x00 2\x00
841
+ \x00 c\x00 :\x00 \x00 [\x00 1\x00 ,\x00 \x00 2\x00 ]\x00
842
+ \x00 ";
843
+ let out = YamlLoader :: load_from_bytes ( s as & [ u8 ] ) . unwrap ( ) ;
844
+ let doc = & out[ 0 ] ;
845
+ println ! ( "GOT: {:?}" , doc) ;
846
+ assert_eq ! ( doc[ "a" ] . as_i64( ) . unwrap( ) , 1i64 ) ;
847
+ assert_eq ! ( doc[ "b" ] . as_f64( ) . unwrap( ) , 2.2f64 ) ;
848
+ assert_eq ! ( doc[ "c" ] [ 1 ] . as_i64( ) . unwrap( ) , 2i64 ) ;
849
+ assert ! ( doc[ "d" ] [ 0 ] . is_badvalue( ) ) ;
850
+ }
739
851
}
0 commit comments