@@ -15,16 +15,32 @@ use crate::buffer::TextBuffer;
1515use crate :: unicode:: Utf8Chars ;
1616use crate :: { apperr, arena_format, sys} ;
1717
18- static mut ENCODINGS : Vec < & ' static str > = Vec :: new ( ) ;
18+ #[ derive( Clone , Copy ) ]
19+ pub struct Encoding {
20+ pub label : & ' static str ,
21+ pub canonical : & ' static str ,
22+ }
23+
24+ pub struct Encodings {
25+ pub preferred : & ' static [ Encoding ] ,
26+ pub all : & ' static [ Encoding ] ,
27+ }
28+
29+ static mut ENCODINGS : Encodings = Encodings { preferred : & [ ] , all : & [ ] } ;
1930
2031/// Returns a list of encodings ICU supports.
21- pub fn get_available_encodings ( ) -> & ' static [ & ' static str ] {
32+ pub fn get_available_encodings ( ) -> & ' static Encodings {
2233 // OnceCell for people that want to put it into a static.
2334 #[ allow( static_mut_refs) ]
2435 unsafe {
25- if ENCODINGS . is_empty ( ) {
26- ENCODINGS . push ( "UTF-8" ) ;
27- ENCODINGS . push ( "UTF-8 BOM" ) ;
36+ if ENCODINGS . all . is_empty ( ) {
37+ let scratch = scratch_arena ( None ) ;
38+ let mut preferred = Vec :: new_in ( & * scratch) ;
39+ let mut alternative = Vec :: new_in ( & * scratch) ;
40+
41+ // These encodings are always available.
42+ preferred. push ( Encoding { label : "UTF-8" , canonical : "UTF-8" } ) ;
43+ preferred. push ( Encoding { label : "UTF-8 BOM" , canonical : "UTF-8 BOM" } ) ;
2844
2945 if let Ok ( f) = init_if_needed ( ) {
3046 let mut n = 0 ;
@@ -34,17 +50,43 @@ pub fn get_available_encodings() -> &'static [&'static str] {
3450 break ;
3551 }
3652
53+ n += 1 ;
54+
3755 let name = CStr :: from_ptr ( name) . to_str ( ) . unwrap_unchecked ( ) ;
38- // We have already pushed UTF-8 above.
39- // There is no need to filter UTF-8 BOM here, since ICU does not distinguish it from UTF-8.
40- if name != "UTF-8" {
41- ENCODINGS . push ( name) ;
56+ // We have already pushed UTF-8 above and can skip it.
57+ // There is no need to filter UTF-8 BOM here,
58+ // since ICU does not distinguish it from UTF-8.
59+ if name. is_empty ( ) || name == "UTF-8" {
60+ continue ;
4261 }
4362
44- n += 1 ;
63+ let mut status = icu_ffi:: U_ZERO_ERROR ;
64+ let mime = ( f. ucnv_getStandardName ) (
65+ name. as_ptr ( ) ,
66+ c"MIME" . as_ptr ( ) as * const _ ,
67+ & mut status,
68+ ) ;
69+ if !mime. is_null ( ) && status. is_success ( ) {
70+ let mime = CStr :: from_ptr ( mime) . to_str ( ) . unwrap_unchecked ( ) ;
71+ preferred. push ( Encoding { label : mime, canonical : name } ) ;
72+ } else {
73+ alternative. push ( Encoding { label : name, canonical : name } ) ;
74+ }
4575 }
4676 }
77+
78+ let preferred_len = preferred. len ( ) ;
79+
80+ // Combine the preferred and alternative encodings into a single list.
81+ let mut all = Vec :: with_capacity ( preferred. len ( ) + alternative. len ( ) ) ;
82+ all. extend ( preferred) ;
83+ all. extend ( alternative) ;
84+
85+ let all = all. leak ( ) ;
86+ ENCODINGS . preferred = & all[ ..preferred_len] ;
87+ ENCODINGS . all = & all[ ..] ;
4788 }
89+
4890 & ENCODINGS
4991 }
5092}
@@ -827,23 +869,35 @@ pub fn fold_case<'a>(arena: &'a Arena, input: &str) -> ArenaString<'a> {
827869 result
828870}
829871
872+ // NOTE:
873+ // To keep this neat, fields are ordered by prefix (= `ucol_` before `uregex_`),
874+ // followed by functions in this order:
875+ // * Static methods (e.g. `ucnv_getAvailableName`)
876+ // * Constructors (e.g. `ucnv_open`)
877+ // * Destructors (e.g. `ucnv_close`)
878+ // * Methods, grouped by relationship
879+ // (e.g. `uregex_start64` and `uregex_end64` are near each other)
880+ //
830881// WARNING:
831882// The order of the fields MUST match the order of strings in the following two arrays.
832883#[ allow( non_snake_case) ]
833884#[ repr( C ) ]
834885struct LibraryFunctions {
835886 // LIBICUUC_PROC_NAMES
836887 u_errorName : icu_ffi:: u_errorName ,
888+ ucasemap_open : icu_ffi:: ucasemap_open ,
889+ ucasemap_utf8FoldCase : icu_ffi:: ucasemap_utf8FoldCase ,
837890 ucnv_getAvailableName : icu_ffi:: ucnv_getAvailableName ,
891+ ucnv_getStandardName : icu_ffi:: ucnv_getStandardName ,
838892 ucnv_open : icu_ffi:: ucnv_open ,
839893 ucnv_close : icu_ffi:: ucnv_close ,
840894 ucnv_convertEx : icu_ffi:: ucnv_convertEx ,
841- ucasemap_open : icu_ffi:: ucasemap_open ,
842- ucasemap_utf8FoldCase : icu_ffi:: ucasemap_utf8FoldCase ,
843895 utext_setup : icu_ffi:: utext_setup ,
844896 utext_close : icu_ffi:: utext_close ,
845897
846898 // LIBICUI18N_PROC_NAMES
899+ ucol_open : icu_ffi:: ucol_open ,
900+ ucol_strcollUTF8 : icu_ffi:: ucol_strcollUTF8 ,
847901 uregex_open : icu_ffi:: uregex_open ,
848902 uregex_close : icu_ffi:: uregex_close ,
849903 uregex_setTimeLimit : icu_ffi:: uregex_setTimeLimit ,
@@ -852,25 +906,26 @@ struct LibraryFunctions {
852906 uregex_findNext : icu_ffi:: uregex_findNext ,
853907 uregex_start64 : icu_ffi:: uregex_start64 ,
854908 uregex_end64 : icu_ffi:: uregex_end64 ,
855- ucol_open : icu_ffi:: ucol_open ,
856- ucol_strcollUTF8 : icu_ffi:: ucol_strcollUTF8 ,
857909}
858910
859- const LIBICUUC_PROC_NAMES : [ & CStr ; 9 ] = [
860- // Found in libicuuc.so on UNIX, icuuc.dll/icu.dll on Windows.
911+ // Found in libicuuc.so on UNIX, icuuc.dll/icu.dll on Windows.
912+ const LIBICUUC_PROC_NAMES : [ & CStr ; 10 ] = [
861913 c"u_errorName" ,
914+ c"ucasemap_open" ,
915+ c"ucasemap_utf8FoldCase" ,
862916 c"ucnv_getAvailableName" ,
917+ c"ucnv_getStandardName" ,
863918 c"ucnv_open" ,
864919 c"ucnv_close" ,
865920 c"ucnv_convertEx" ,
866- c"ucasemap_open" ,
867- c"ucasemap_utf8FoldCase" ,
868921 c"utext_setup" ,
869922 c"utext_close" ,
870923] ;
871924
925+ // Found in libicui18n.so on UNIX, icuin.dll/icu.dll on Windows.
872926const LIBICUI18N_PROC_NAMES : [ & CStr ; 10 ] = [
873- // Found in libicui18n.so on UNIX, icuin.dll/icu.dll on Windows.
927+ c"ucol_open" ,
928+ c"ucol_strcollUTF8" ,
874929 c"uregex_open" ,
875930 c"uregex_close" ,
876931 c"uregex_setTimeLimit" ,
@@ -879,8 +934,6 @@ const LIBICUI18N_PROC_NAMES: [&CStr; 10] = [
879934 c"uregex_findNext" ,
880935 c"uregex_start64" ,
881936 c"uregex_end64" ,
882- c"ucol_open" ,
883- c"ucol_strcollUTF8" ,
884937] ;
885938
886939enum LibraryFunctionsState {
@@ -1020,7 +1073,13 @@ mod icu_ffi {
10201073
10211074 pub struct UConverter ;
10221075
1023- pub type ucnv_getAvailableName = unsafe extern "C" fn ( n : i32 ) -> * mut c_char ;
1076+ pub type ucnv_getAvailableName = unsafe extern "C" fn ( n : i32 ) -> * const c_char ;
1077+
1078+ pub type ucnv_getStandardName = unsafe extern "C" fn (
1079+ name : * const u8 ,
1080+ standard : * const u8 ,
1081+ status : & mut UErrorCode ,
1082+ ) -> * const c_char ;
10241083
10251084 pub type ucnv_open =
10261085 unsafe extern "C" fn ( converter_name : * const u8 , status : & mut UErrorCode ) -> * mut UConverter ;
0 commit comments