1010
1111#include < util/std_expr.h>
1212#include < util/prefix.h>
13+ #include < util/arith_tools.h>
14+ #include < util/unicode.h>
15+
16+ #include < linking/zero_initializer.h>
1317
1418#include " java_bytecode_typecheck.h"
1519#include " java_pointer_casts.h"
20+ #include " java_types.h"
1621
1722/* ******************************************************************\
1823
@@ -114,6 +119,27 @@ static std::string escape_non_alnum(const std::string &toescape)
114119
115120/* ******************************************************************\
116121
122+ Function: utf16_to_array
123+
124+ Inputs: `in`: wide string to convert
125+
126+ Outputs: Returns a Java char array containing the same wchars.
127+
128+ Purpose: Convert UCS-2 or UTF-16 to an array expression.
129+
130+ \*******************************************************************/
131+
132+ static array_exprt utf16_to_array (const std::wstring &in)
133+ {
134+ const auto jchar=java_char_type ();
135+ array_exprt ret (array_typet (jchar, infinity_exprt (java_int_type ())));
136+ for (const auto c : in)
137+ ret.copy_to_operands (from_integer (c, jchar));
138+ return ret;
139+ }
140+
141+ /* ******************************************************************\
142+
117143Function: java_bytecode_typecheckt::typecheck_expr_java_string_literal
118144
119145 Inputs:
@@ -136,28 +162,106 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr)
136162 auto findit=symbol_table.symbols .find (escaped_symbol_name);
137163 if (findit!=symbol_table.symbols .end ())
138164 {
139- expr=findit->second .symbol_expr ();
165+ expr=address_of_exprt ( findit->second .symbol_expr () );
140166 return ;
141167 }
142168
143169 // Create a new symbol:
144170 symbolt new_symbol;
145171 new_symbol.name =escaped_symbol_name;
146- new_symbol.type =pointer_typet ( string_type) ;
172+ new_symbol.type =string_type;
147173 new_symbol.base_name =" Literal" ;
148174 new_symbol.pretty_name =value;
149175 new_symbol.mode =ID_java;
150176 new_symbol.is_type =false ;
151177 new_symbol.is_lvalue =true ;
152178 new_symbol.is_static_lifetime =true ; // These are basically const global data.
153179
180+ // Regardless of string refinement setting, at least initialize
181+ // the literal with @clsid = String and @lock = false:
182+ symbol_typet jlo_symbol (" java::java.lang.Object" );
183+ const auto &jlo_struct=to_struct_type (ns.follow (jlo_symbol));
184+ struct_exprt jlo_init (jlo_symbol);
185+ const auto &jls_struct=to_struct_type (ns.follow (string_type));
186+
187+ jlo_init.copy_to_operands (
188+ constant_exprt (
189+ " java::java.lang.String" ,
190+ jlo_struct.components ()[0 ].type ()));
191+ jlo_init.copy_to_operands (
192+ from_integer (
193+ 0 ,
194+ jlo_struct.components ()[1 ].type ()));
195+
196+ // If string refinement *is* around, populate the actual
197+ // contents as well:
198+ if (string_refinement_enabled)
199+ {
200+ struct_exprt literal_init (new_symbol.type );
201+ literal_init.move_to_operands (jlo_init);
202+
203+ // Initialize the string with a constant utf-16 array:
204+ symbolt array_symbol;
205+ array_symbol.name =escaped_symbol_name+" _constarray" ;
206+ array_symbol.type =array_typet (
207+ java_char_type (), infinity_exprt (java_int_type ()));
208+ array_symbol.base_name =" Literal_constarray" ;
209+ array_symbol.pretty_name =value;
210+ array_symbol.mode =ID_java;
211+ array_symbol.is_type =false ;
212+ array_symbol.is_lvalue =true ;
213+ // These are basically const global data:
214+ array_symbol.is_static_lifetime =true ;
215+ array_symbol.is_state_var =true ;
216+ auto literal_array=utf16_to_array (
217+ utf8_to_utf16_little_endian (id2string (value)));
218+ array_symbol.value =literal_array;
219+
220+ if (symbol_table.add (array_symbol))
221+ throw " failed to add constarray symbol to symbol table" ;
222+
223+ literal_init.copy_to_operands (
224+ from_integer (literal_array.operands ().size (),
225+ jls_struct.components ()[1 ].type ()));
226+ literal_init.copy_to_operands (
227+ address_of_exprt (array_symbol.symbol_expr ()));
228+
229+ new_symbol.value =literal_init;
230+ }
231+ else if (jls_struct.components ().size ()>=1 &&
232+ jls_struct.components ()[0 ].get_name ()==" @java.lang.Object" )
233+ {
234+ // Case where something defined java.lang.String, so it has
235+ // a proper base class (always java.lang.Object in practical
236+ // JDKs seen so far)
237+ struct_exprt literal_init (new_symbol.type );
238+ literal_init.move_to_operands (jlo_init);
239+ for (const auto &comp : jls_struct.components ())
240+ {
241+ if (comp.get_name ()==" @java.lang.Object" )
242+ continue ;
243+ // Other members of JDK's java.lang.String we don't understand
244+ // without string-refinement. Just zero-init them; consider using
245+ // test-gen-like nondet object trees instead.
246+ literal_init.copy_to_operands (
247+ zero_initializer (comp.type (), expr.source_location (), ns));
248+ }
249+ new_symbol.value =literal_init;
250+ }
251+ else if (jls_struct.get_bool (ID_incomplete_class))
252+ {
253+ // Case where java.lang.String was stubbed, and so directly defines
254+ // @class_identifier and @lock:
255+ new_symbol.value =jlo_init;
256+ }
257+
154258 if (symbol_table.add (new_symbol))
155259 {
156260 error () << " failed to add string literal symbol to symbol table" << eom;
157261 throw 0 ;
158262 }
159263
160- expr=new_symbol.symbol_expr ();
264+ expr=address_of_exprt ( new_symbol.symbol_expr () );
161265}
162266
163267/* ******************************************************************\
0 commit comments