2727#include < solvers/sat/satcheck.h>
2828#include < solvers/refinement/string_constraint_instantiation.h>
2929#include < java_bytecode/java_types.h>
30+ #include < unordered_set>
3031
3132static exprt substitute_array_with_expr (const exprt &expr, const exprt &index);
3233
@@ -452,7 +453,8 @@ static union_find_replacet generate_symbol_resolution_from_equations(
452453 // function applications can be ignored because they will be replaced
453454 // in the convert_function_application step of dec_solve
454455 }
455- else if (has_char_pointer_subtype (lhs.type ()))
456+ else if (lhs.type ().id () != ID_pointer &&
457+ has_char_pointer_subtype (lhs.type ()))
456458 {
457459 if (rhs.type ().id () == ID_struct)
458460 {
@@ -479,12 +481,125 @@ static union_find_replacet generate_symbol_resolution_from_equations(
479481 return solver;
480482}
481483
482- // / Symbol resolution for expressions of type string typet
484+ // / Maps equation to expressions contained in them and conversely expressions to
485+ // / equations that contain them. This can be used on a subset of expressions
486+ // / which interests us, in particular strings. Equations are identified by an
487+ // / index of type `std::size_t` for more efficient insertion and lookup.
488+ class equation_symbol_mappingt
489+ {
490+ public:
491+ // Record index of the equations that contain a given expression
492+ std::map<exprt, std::vector<std::size_t >> equations_containing;
493+ // Record expressions that are contained in the equation with the given index
494+ std::unordered_map<std::size_t , std::vector<exprt>> strings_in_equation;
495+
496+ void add (const std::size_t i, const exprt &expr)
497+ {
498+ equations_containing[expr].push_back (i);
499+ strings_in_equation[i].push_back (expr);
500+ }
501+
502+ std::vector<exprt> find_expressions (const std::size_t i)
503+ {
504+ return strings_in_equation[i];
505+ }
506+
507+ std::vector<std::size_t > find_equations (const exprt &expr)
508+ {
509+ return equations_containing[expr];
510+ }
511+ };
512+
513+ // / This is meant to be used on the lhs of an equation with string subtype.
514+ // / \param lhs: expression which is either of string type, or a symbol
515+ // / representing a struct with some string members
516+ // / \return if lhs is a string return this string, if it is a struct return the
517+ // / members of the expression that have string type.
518+ static std::vector<exprt> extract_strings_from_lhs (const exprt &lhs)
519+ {
520+ std::vector<exprt> result;
521+ if (lhs.type () == string_typet ())
522+ result.push_back (lhs);
523+ else if (lhs.type ().id () == ID_struct)
524+ {
525+ const struct_typet &struct_type = to_struct_type (lhs.type ());
526+ for (const auto &comp : struct_type.components ())
527+ {
528+ const std::vector<exprt> strings_in_comp = extract_strings_from_lhs (
529+ member_exprt (lhs, comp.get_name (), comp.type ()));
530+ result.insert (
531+ result.end (), strings_in_comp.begin (), strings_in_comp.end ());
532+ }
533+ }
534+ return result;
535+ }
536+
537+ // / \param expr: an expression
538+ // / \return all subexpressions of type string which are not if_exprt expressions
539+ // / this includes expressions of the form `e.x` if e is a symbol subexpression
540+ // / with a field `x` of type string
541+ static std::vector<exprt> extract_strings (const exprt &expr)
542+ {
543+ std::vector<exprt> result;
544+ for (auto it = expr.depth_begin (); it != expr.depth_end ();)
545+ {
546+ if (it->type () == string_typet () && it->id () != ID_if)
547+ {
548+ result.push_back (*it);
549+ it.next_sibling_or_parent ();
550+ }
551+ else if (it->id () == ID_symbol)
552+ {
553+ for (const exprt &e : extract_strings_from_lhs (*it))
554+ result.push_back (e);
555+ it.next_sibling_or_parent ();
556+ }
557+ else
558+ ++it;
559+ }
560+ return result;
561+ }
562+
563+ // / Given an equation on strings, mark these strings as belonging to the same
564+ // / set in the `symbol_resolve` structure. The lhs and rhs of the equation,
565+ // / should have string type or be struct with string members.
566+ // / \param eq: equation to add
567+ // / \param symbol_resolve: structure to which the equation will be added
568+ // / \param ns: namespace
569+ static void add_string_equation_to_symbol_resolution (
570+ const equal_exprt &eq,
571+ union_find_replacet &symbol_resolve,
572+ const namespacet &ns)
573+ {
574+ if (eq.rhs ().type () == string_typet ())
575+ {
576+ symbol_resolve.make_union (eq.lhs (), eq.rhs ());
577+ }
578+ else if (has_string_subtype (eq.lhs ().type ()))
579+ {
580+ if (eq.rhs ().type ().id () == ID_struct)
581+ {
582+ const struct_typet &struct_type = to_struct_type (eq.rhs ().type ());
583+ for (const auto &comp : struct_type.components ())
584+ {
585+ const member_exprt lhs_data (eq.lhs (), comp.get_name (), comp.type ());
586+ const exprt rhs_data = simplify_expr (
587+ member_exprt (eq.rhs (), comp.get_name (), comp.type ()), ns);
588+ add_string_equation_to_symbol_resolution (
589+ equal_exprt (lhs_data, rhs_data), symbol_resolve, ns);
590+ }
591+ }
592+ }
593+ }
594+
595+ // / Symbol resolution for expressions of type string typet.
596+ // / We record equality between these expressions in the output if one of the
597+ // / function calls depends on them.
483598// / \param equations: list of equations
484599// / \param ns: namespace
485600// / \param stream: output stream
486601// / \return union_find_replacet structure containing the correspondences.
487- static union_find_replacet string_identifiers_resolution_from_equations (
602+ union_find_replacet string_identifiers_resolution_from_equations (
488603 std::vector<equal_exprt> &equations,
489604 const namespacet &ns,
490605 messaget::mstreamt &stream)
@@ -494,35 +609,63 @@ static union_find_replacet string_identifiers_resolution_from_equations(
494609 " WARNING string_refinement.cpp "
495610 " string_identifiers_resolution_from_equations:" ;
496611
497- union_find_replacet result;
498- for (const equal_exprt &eq : equations)
612+ equation_symbol_mappingt equation_map;
613+
614+ // Indexes of equations that need to be added to the result
615+ std::unordered_set<size_t > required_equations;
616+ std::stack<size_t > equations_to_treat;
617+
618+ for (std::size_t i = 0 ; i < equations.size (); ++i)
499619 {
500- if (eq.rhs ().type () == string_typet ())
501- result.make_union (eq.lhs (), eq.rhs ());
502- else if (has_string_subtype (eq.lhs ().type ()))
620+ const equal_exprt &eq = equations[i];
621+ if (eq.rhs ().id () == ID_function_application)
622+ {
623+ if (required_equations.insert (i).second )
624+ equations_to_treat.push (i);
625+
626+ std::vector<exprt> rhs_strings = extract_strings (eq.rhs ());
627+ for (const auto expr : rhs_strings)
628+ equation_map.add (i, expr);
629+ }
630+ else if (eq.lhs ().type ().id () != ID_pointer &&
631+ has_string_subtype (eq.lhs ().type ()))
503632 {
504- if (eq.rhs ().type ().id () == ID_struct)
633+ std::vector<exprt> lhs_strings = extract_strings_from_lhs (eq.lhs ());
634+
635+ for (const auto expr : lhs_strings)
636+ equation_map.add (i, expr);
637+
638+ if (lhs_strings.empty ())
505639 {
506- const struct_typet &struct_type = to_struct_type (eq.rhs ().type ());
507- for (const auto &comp : struct_type.components ())
508- {
509- if (comp.type () == string_typet ())
510- {
511- const member_exprt lhs_data (eq.lhs (), comp.get_name (), comp.type ());
512- const exprt rhs_data = simplify_expr (
513- member_exprt (eq.rhs (), comp.get_name (), comp.type ()), ns);
514- result.make_union (lhs_data, rhs_data);
515- }
516- }
640+ stream << log_message << " non struct with string subtype "
641+ << from_expr (ns, " " , eq.lhs ()) << " \n * of type "
642+ << from_type (ns, " " , eq.lhs ().type ()) << eom;
517643 }
518- else
644+
645+ for (const exprt &expr : extract_strings (eq.rhs ()))
646+ equation_map.add (i, expr);
647+ }
648+ }
649+
650+ // transitively add all equations which depend on the equations to treat
651+ while (!equations_to_treat.empty ())
652+ {
653+ const std::size_t i = equations_to_treat.top ();
654+ equations_to_treat.pop ();
655+ for (const exprt &string : equation_map.find_expressions (i))
656+ {
657+ for (const std::size_t j : equation_map.find_equations (string))
519658 {
520- stream << log_message << " non struct with string subexpr "
521- << from_expr (ns, " " , eq.rhs ()) << " \n * of type "
522- << from_type (ns, " " , eq.rhs ().type ()) << eom;
659+ if (required_equations.insert (j).second )
660+ equations_to_treat.push (j);
523661 }
524662 }
525663 }
664+
665+ union_find_replacet result;
666+ for (const std::size_t i : required_equations)
667+ add_string_equation_to_symbol_resolution (equations[i], result, ns);
668+
526669 return result;
527670}
528671
0 commit comments