From 1066d2cad16ba797a9ca698c3cf3e7e3afa1ebd8 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 29 Nov 2021 11:36:44 +0000 Subject: [PATCH 01/13] Extend unit test of bits2expr/expr2bits with struct type Structs with sub-byte bit fields demonstrate that big-endian ordering also re-orders the bits within a byte. An executable variant of this unit test was validated on mips (using qemu-mips), demonstrating the same bit ordering. No bug fixes/code changes required, this test just confirms that our endianness interpretation matches actual hardware. --- unit/util/simplify_expr.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/unit/util/simplify_expr.cpp b/unit/util/simplify_expr.cpp index a7f1e19a62b..0c830c84f69 100644 --- a/unit/util/simplify_expr.cpp +++ b/unit/util/simplify_expr.cpp @@ -108,6 +108,30 @@ TEST_CASE("expr2bits and bits2expr respect bit order", "[core][util]") const auto should_be_deadbeef2 = bits2expr(*be, unsignedbv_typet(32), false, ns); REQUIRE(deadbeef == *should_be_deadbeef2); + + c_bit_field_typet four_bits{unsignedbv_typet{8}, 4}; + struct_typet st{{{"s", unsignedbv_typet{16}}, + {"bf1", four_bits}, + {"bf2", four_bits}, + {"b", unsignedbv_typet{8}}}}; + + const auto fill_struct_le = bits2expr(*le, st, true, ns); + REQUIRE(fill_struct_le.has_value()); + REQUIRE( + to_struct_expr(*fill_struct_le).operands()[1] == + from_integer(0xd, four_bits)); + REQUIRE( + to_struct_expr(*fill_struct_le).operands()[2] == + from_integer(0xa, four_bits)); + + const auto fill_struct_be = bits2expr(*be, st, false, ns); + REQUIRE(fill_struct_be.has_value()); + REQUIRE( + to_struct_expr(*fill_struct_be).operands()[1] == + from_integer(0xb, four_bits)); + REQUIRE( + to_struct_expr(*fill_struct_be).operands()[2] == + from_integer(0xe, four_bits)); } TEST_CASE("Simplify extractbit", "[core][util]") From 5a572418ee3b895fe84115ae2ba66b73fb3536b6 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 29 Nov 2021 11:41:16 +0000 Subject: [PATCH 02/13] bv_typet is also sensitive to endianness The bits within an object of bv_typet are ordered depending on endianness. This is consistent with typecasts to/from bv_typet of some other bitvector type, which do not alter the sequence of bits. --- src/solvers/lowering/byte_operators.cpp | 27 +++++++++---------------- src/util/endianness_map.cpp | 10 ++++----- 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/src/solvers/lowering/byte_operators.cpp b/src/solvers/lowering/byte_operators.cpp index cfcfb91277a..fad23110da8 100644 --- a/src/solvers/lowering/byte_operators.cpp +++ b/src/solvers/lowering/byte_operators.cpp @@ -50,10 +50,7 @@ static boundst map_bounds( // big-endian bounds need swapping if(result.ub < result.lb) - { - result.lb = endianness_map.number_of_bits() - result.lb - 1; - result.ub = endianness_map.number_of_bits() - result.ub - 1; - } + std::swap(result.lb, result.ub); } return result; @@ -1295,7 +1292,7 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns) concatenation_exprt concatenation( std::move(op), bitvector_typet(subtype->id(), width_bytes * 8)); - endianness_mapt map(src.type(), little_endian, ns); + endianness_mapt map(concatenation.type(), little_endian, ns); return bv_to_expr(concatenation, src.type(), map, ns); } } @@ -2226,22 +2223,16 @@ static exprt lower_byte_update( // original_bits |= newvalue bitor_exprt bitor_expr{bitand_expr, value_shifted}; - if(!is_little_endian && bit_width > type_bits) - { - return simplify_expr( - typecast_exprt::conditional_cast( - extractbits_exprt{bitor_expr, - bit_width - 1, - bit_width - type_bits, - bv_typet{type_bits}}, - src.type()), - ns); - } - else if(bit_width > type_bits) + if(bit_width > type_bits) { + endianness_mapt endianness_map( + bitor_expr.type(), src.id() == ID_byte_update_little_endian, ns); + const auto bounds = map_bounds(endianness_map, 0, type_bits - 1); + return simplify_expr( typecast_exprt::conditional_cast( - extractbits_exprt{bitor_expr, type_bits - 1, 0, bv_typet{type_bits}}, + extractbits_exprt{ + bitor_expr, bounds.ub, bounds.lb, bv_typet{type_bits}}, src.type()), ns); } diff --git a/src/util/endianness_map.cpp b/src/util/endianness_map.cpp index d861d9aa33d..36f26ae4819 100644 --- a/src/util/endianness_map.cpp +++ b/src/util/endianness_map.cpp @@ -53,12 +53,10 @@ void endianness_mapt::build_big_endian(const typet &src) { if(src.id() == ID_c_enum_tag) build_big_endian(ns.follow_tag(to_c_enum_tag_type(src))); - else if(src.id()==ID_unsignedbv || - src.id()==ID_signedbv || - src.id()==ID_fixedbv || - src.id()==ID_floatbv || - src.id()==ID_c_enum || - src.id()==ID_c_bit_field) + else if( + src.id() == ID_unsignedbv || src.id() == ID_signedbv || + src.id() == ID_fixedbv || src.id() == ID_floatbv || src.id() == ID_c_enum || + src.id() == ID_c_bit_field || src.id() == ID_bv) { // these do get re-ordered! auto bits = pointer_offset_bits(src, ns); // error is -1 From 62a4f1377702c59ebeea2d599fa0adb3909c3247 Mon Sep 17 00:00:00 2001 From: Daniel Kroening Date: Wed, 1 Sep 2021 13:27:26 +0100 Subject: [PATCH 03/13] enable nondeterministic pointers This commit enables the use of nondeterministic pointers, to allow declarative modeling of states that include pointers. --- regression/cbmc/Pointer14/main.c | 4 +--- .../String_Abstraction14/pass-in-implicit.c | 2 +- .../cbmc/String_Abstraction20/structs2.c | 2 +- .../double_deref_with_pointer_arithmetic.desc | 2 +- ..._with_pointer_arithmetic_single_alias.desc | 2 +- .../cbmc/nondet-pointer/nondet-pointer1.c | 13 +++++++++++++ .../cbmc/nondet-pointer/nondet-pointer1.desc | 8 ++++++++ .../cbmc/nondet-pointer/nondet-pointer2.c | 14 ++++++++++++++ .../cbmc/nondet-pointer/nondet-pointer2.desc | 8 ++++++++ .../cbmc/nondet-pointer/nondet-pointer3.c | 18 ++++++++++++++++++ .../cbmc/nondet-pointer/nondet-pointer3.desc | 8 ++++++++ .../cbmc/nondet-pointer/nondet-pointer4.c | 18 ++++++++++++++++++ .../cbmc/nondet-pointer/nondet-pointer4.desc | 8 ++++++++ .../cbmc/nondet-pointer/nondet-pointer5.c | 19 +++++++++++++++++++ .../cbmc/nondet-pointer/nondet-pointer5.desc | 8 ++++++++ src/pointer-analysis/value_set.cpp | 18 +++++++++++++++++- 16 files changed, 144 insertions(+), 8 deletions(-) create mode 100644 regression/cbmc/nondet-pointer/nondet-pointer1.c create mode 100644 regression/cbmc/nondet-pointer/nondet-pointer1.desc create mode 100644 regression/cbmc/nondet-pointer/nondet-pointer2.c create mode 100644 regression/cbmc/nondet-pointer/nondet-pointer2.desc create mode 100644 regression/cbmc/nondet-pointer/nondet-pointer3.c create mode 100644 regression/cbmc/nondet-pointer/nondet-pointer3.desc create mode 100644 regression/cbmc/nondet-pointer/nondet-pointer4.c create mode 100644 regression/cbmc/nondet-pointer/nondet-pointer4.desc create mode 100644 regression/cbmc/nondet-pointer/nondet-pointer5.c create mode 100644 regression/cbmc/nondet-pointer/nondet-pointer5.desc diff --git a/regression/cbmc/Pointer14/main.c b/regression/cbmc/Pointer14/main.c index fdcba62bc34..5f6543f7bef 100644 --- a/regression/cbmc/Pointer14/main.c +++ b/regression/cbmc/Pointer14/main.c @@ -1,6 +1,4 @@ -typedef unsigned int size_t; - -void *malloc(size_t size); +void *malloc(__CPROVER_size_t); enum blockstate { diff --git a/regression/cbmc/String_Abstraction14/pass-in-implicit.c b/regression/cbmc/String_Abstraction14/pass-in-implicit.c index 547422d0649..3c2c5feb988 100644 --- a/regression/cbmc/String_Abstraction14/pass-in-implicit.c +++ b/regression/cbmc/String_Abstraction14/pass-in-implicit.c @@ -1,4 +1,4 @@ -void *malloc(unsigned); +void *malloc(__CPROVER_size_t); void use_str(char *s) { diff --git a/regression/cbmc/String_Abstraction20/structs2.c b/regression/cbmc/String_Abstraction20/structs2.c index dd295ed05fd..e190cd0efc2 100644 --- a/regression/cbmc/String_Abstraction20/structs2.c +++ b/regression/cbmc/String_Abstraction20/structs2.c @@ -1,4 +1,4 @@ -void *malloc(unsigned); +void *malloc(__CPROVER_size_t); typedef struct str_struct { diff --git a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc index df655168340..8a037e5ffda 100644 --- a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc +++ b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc @@ -2,7 +2,7 @@ CORE double_deref_with_pointer_arithmetic.c --show-vcc ^\{-[0-9]+\} derefd_pointer::derefd_pointer!0#1 = \{ symex_dynamic::dynamic_object1#3\[\[0\]\], symex_dynamic::dynamic_object1#3\[\[1\]\] \}\[cast\(mod #source_location=""\(main::argc!0@1#1, 2\), signedbv\[64\]\)\] -^\{1\} \(derefd_pointer::derefd_pointer!0#1 = address_of\(symex_dynamic::dynamic_object[23]\) \? main::argc!0@1#1 = 2 : \(derefd_pointer::derefd_pointer!0#1 = address_of\(symex_dynamic::dynamic_object[23]\) \? main::argc!0@1#1 = 1 : symex::invalid_object!0#0 = main::argc!0@1#1\)\) +^\{1\} \(derefd_pointer::derefd_pointer!0#1 = address_of\(symex_dynamic::dynamic_object[23]\) \? main::argc!0@1#1 = 2 : main::argc!0@1#1 = 1 ^EXIT=0$ ^SIGNAL=0$ -- diff --git a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc index cf6fda8319a..39dcd3f91f1 100644 --- a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc +++ b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc @@ -1,7 +1,7 @@ CORE double_deref_with_pointer_arithmetic_single_alias.c --show-vcc -\{1\} \(derefd_pointer::derefd_pointer!0#1 = address_of\(symex_dynamic::dynamic_object2\) \? main::argc!0@1#1 = 1 : symex::invalid_object!0#0 = main::argc!0@1#1\) +\{1\} main::argc!0@1#1 = 1 ^EXIT=0$ ^SIGNAL=0$ -- diff --git a/regression/cbmc/nondet-pointer/nondet-pointer1.c b/regression/cbmc/nondet-pointer/nondet-pointer1.c new file mode 100644 index 00000000000..d4378f7ac14 --- /dev/null +++ b/regression/cbmc/nondet-pointer/nondet-pointer1.c @@ -0,0 +1,13 @@ +int *nondet_pointer(); + +int main() +{ + int x = 123; + int *p = &x; + int *q = nondet_pointer(); + + if(p == q) + __CPROVER_assert(*q == 123, "value of *q"); + + return 0; +} diff --git a/regression/cbmc/nondet-pointer/nondet-pointer1.desc b/regression/cbmc/nondet-pointer/nondet-pointer1.desc new file mode 100644 index 00000000000..d4834e848eb --- /dev/null +++ b/regression/cbmc/nondet-pointer/nondet-pointer1.desc @@ -0,0 +1,8 @@ +CORE +nondet-pointer1.c + +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/regression/cbmc/nondet-pointer/nondet-pointer2.c b/regression/cbmc/nondet-pointer/nondet-pointer2.c new file mode 100644 index 00000000000..d56fb806444 --- /dev/null +++ b/regression/cbmc/nondet-pointer/nondet-pointer2.c @@ -0,0 +1,14 @@ +int *nondet_pointer(); + +int main() +{ + int x = 123, y = 456; + int *px = &x; + int *py = &y; + int *q = nondet_pointer(); + + if(q == px || q == py) + __CPROVER_assert(*q == 123 || *q == 456, "value of *q"); + + return 0; +} diff --git a/regression/cbmc/nondet-pointer/nondet-pointer2.desc b/regression/cbmc/nondet-pointer/nondet-pointer2.desc new file mode 100644 index 00000000000..d4834e848eb --- /dev/null +++ b/regression/cbmc/nondet-pointer/nondet-pointer2.desc @@ -0,0 +1,8 @@ +CORE +nondet-pointer1.c + +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/regression/cbmc/nondet-pointer/nondet-pointer3.c b/regression/cbmc/nondet-pointer/nondet-pointer3.c new file mode 100644 index 00000000000..d2bf7113643 --- /dev/null +++ b/regression/cbmc/nondet-pointer/nondet-pointer3.c @@ -0,0 +1,18 @@ +int *nondet_pointer(); + +int main() +{ + int some_array[10]; + int *p = some_array; + int *q = nondet_pointer(); + int index; + + __CPROVER_assume(index >= 0 && index < 10); + __CPROVER_assume(q == p); + + q[index] = 123; + + __CPROVER_assert(some_array[index] == 123, "value of array[index]"); + + return 0; +} diff --git a/regression/cbmc/nondet-pointer/nondet-pointer3.desc b/regression/cbmc/nondet-pointer/nondet-pointer3.desc new file mode 100644 index 00000000000..d4834e848eb --- /dev/null +++ b/regression/cbmc/nondet-pointer/nondet-pointer3.desc @@ -0,0 +1,8 @@ +CORE +nondet-pointer1.c + +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/regression/cbmc/nondet-pointer/nondet-pointer4.c b/regression/cbmc/nondet-pointer/nondet-pointer4.c new file mode 100644 index 00000000000..347f9e037c7 --- /dev/null +++ b/regression/cbmc/nondet-pointer/nondet-pointer4.c @@ -0,0 +1,18 @@ +int *nondet_pointer(); + +int main() +{ + int some_array[10]; + int *p = some_array; + int *q = nondet_pointer(); + int index; + + __CPROVER_assume(index >= 0 && index < 10); + __CPROVER_assume(q == p + index); + + *q = 123; + + __CPROVER_assert(some_array[index] == 123, "value of array[index]"); + + return 0; +} diff --git a/regression/cbmc/nondet-pointer/nondet-pointer4.desc b/regression/cbmc/nondet-pointer/nondet-pointer4.desc new file mode 100644 index 00000000000..d4834e848eb --- /dev/null +++ b/regression/cbmc/nondet-pointer/nondet-pointer4.desc @@ -0,0 +1,8 @@ +CORE +nondet-pointer1.c + +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/regression/cbmc/nondet-pointer/nondet-pointer5.c b/regression/cbmc/nondet-pointer/nondet-pointer5.c new file mode 100644 index 00000000000..fdd1e153055 --- /dev/null +++ b/regression/cbmc/nondet-pointer/nondet-pointer5.c @@ -0,0 +1,19 @@ +int *nondet_pointer(); + +int main() +{ + int some_array[10]; + int *p = some_array; + int *q = nondet_pointer(); + int index; + + __CPROVER_assume(index >= 0 && index < 10); + __CPROVER_assume(__CPROVER_same_object(p, q)); + __CPROVER_assume(__CPROVER_POINTER_OFFSET(q) == sizeof(int) * index); + + *q = 123; + + __CPROVER_assert(some_array[index] == 123, "value of array[index]"); + + return 0; +} diff --git a/regression/cbmc/nondet-pointer/nondet-pointer5.desc b/regression/cbmc/nondet-pointer/nondet-pointer5.desc new file mode 100644 index 00000000000..d4834e848eb --- /dev/null +++ b/regression/cbmc/nondet-pointer/nondet-pointer5.desc @@ -0,0 +1,8 @@ +CORE +nondet-pointer1.c + +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index 2deee5085ae..25234f31ba0 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -505,6 +505,23 @@ void value_sett::get_value_set_rec( else insert(dest, exprt(ID_unknown, original_type)); } + else if(expr.id() == ID_nondet_symbol) + { + if(expr.type().id() == ID_pointer) + { + // we'll take the union of all objects we see, with unspecified offsets + values.iterate([this, &dest](const irep_idt &key, const entryt &value) { + for(const auto &object : value.object_map.read()) + insert(dest, object.first, offsett()); + }); + + // we'll add null, in case it's not there yet + insert( + dest, + exprt(ID_null_object, to_pointer_type(expr_type).subtype()), + offsett()); + } + } else if(expr.id()==ID_if) { get_value_set_rec( @@ -984,7 +1001,6 @@ void value_sett::get_value_set_rec( } else { - // for example: expr.id() == ID_nondet_symbol insert(dest, exprt(ID_unknown, original_type)); } From abb0f348137ca45a94c71640849c59d309de3306 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Fri, 5 Nov 2021 19:33:18 +0000 Subject: [PATCH 04/13] Support extractbits with out-of-bounds limits Byte extract already supports access beyond the bounds of the object being extracted from. Any bits outside bounds are free variables. extractbits now equally supports this case (via free variables). --- src/solvers/flattening/boolbv_extractbits.cpp | 47 +++++++++++++------ 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/src/solvers/flattening/boolbv_extractbits.cpp b/src/solvers/flattening/boolbv_extractbits.cpp index c8de549a2df..52cfde94d88 100644 --- a/src/solvers/flattening/boolbv_extractbits.cpp +++ b/src/solvers/flattening/boolbv_extractbits.cpp @@ -31,18 +31,6 @@ bvt boolbvt::convert_extractbits(const extractbits_exprt &expr) auto upper_as_int = maybe_upper_as_int.value(); auto lower_as_int = maybe_lower_as_int.value(); - DATA_INVARIANT_WITH_DIAGNOSTICS( - upper_as_int >= 0 && upper_as_int < src_bv.size(), - "upper end of extracted bits must be within the bitvector", - expr.find_source_location(), - irep_pretty_diagnosticst{expr}); - - DATA_INVARIANT_WITH_DIAGNOSTICS( - lower_as_int >= 0 && lower_as_int < src_bv.size(), - "lower end of extracted bits must be within the bitvector", - expr.find_source_location(), - irep_pretty_diagnosticst{expr}); - DATA_INVARIANT( lower_as_int <= upper_as_int, "upper bound must be greater or equal to lower bound"); @@ -56,9 +44,40 @@ bvt boolbvt::convert_extractbits(const extractbits_exprt &expr) expr.find_source_location(), irep_pretty_diagnosticst{expr}); - const std::size_t offset = numeric_cast_v(lower_as_int); + bvt result_bv; - bvt result_bv(src_bv.begin() + offset, src_bv.begin() + offset + bv_width); + // add out-of-bounds bits (if any) at the lower end + if(lower_as_int < 0) + { + if(upper_as_int < 0) + { + lower_as_int -= upper_as_int + 1; + upper_as_int = 0; + } + + const std::size_t lower_out_of_bounds = + numeric_cast_v(-lower_as_int); + result_bv = prop.new_variables(lower_out_of_bounds); + lower_as_int = 0; + } + + const std::size_t offset = numeric_cast_v(lower_as_int); + const std::size_t upper_size_t = numeric_cast_v(upper_as_int); + + result_bv.reserve(bv_width); + result_bv.insert( + result_bv.end(), + src_bv.begin() + std::min(offset, src_bv.size()), + src_bv.begin() + std::min(src_bv.size(), upper_size_t + 1)); + + // add out-of-bounds bits (if any) at the upper end + if(upper_size_t >= src_bv.size()) + { + bvt upper_oob_bits = + prop.new_variables(upper_size_t - std::max(offset, src_bv.size()) + 1); + result_bv.insert( + result_bv.end(), upper_oob_bits.begin(), upper_oob_bits.end()); + } return result_bv; } From 8b5e9387cb57b6feee18ab9d4ec2cb2610bbea5d Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Fri, 5 Nov 2021 20:54:58 +0000 Subject: [PATCH 05/13] goto-symex: expand unknown points-to values to all objects Non-deterministic pointers should consider all possible objects. goto-symex is aware of all current objects, and can thus expand points-to values of "unknown" to all current objects (leaving the pointer offset unconstrained). --- src/goto-symex/symex_dereference_state.cpp | 37 +++++++++++++++++++- src/pointer-analysis/value_set.cpp | 39 ++++++++++++++-------- src/pointer-analysis/value_set.h | 7 ++++ 3 files changed, 68 insertions(+), 15 deletions(-) diff --git a/src/goto-symex/symex_dereference_state.cpp b/src/goto-symex/symex_dereference_state.cpp index 363b61eabc8..b171b5cc6e8 100644 --- a/src/goto-symex/symex_dereference_state.cpp +++ b/src/goto-symex/symex_dereference_state.cpp @@ -11,6 +11,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "symex_dereference_state.h" +#include #include /// Get or create a failed symbol for the given pointer-typed expression. These @@ -80,5 +81,39 @@ symex_dereference_statet::get_or_create_failed_symbol(const exprt &expr) std::vector symex_dereference_statet::get_value_set(const exprt &expr) const { - return state.value_set.get_value_set(expr, ns); + auto result = state.value_set.get_value_set(expr, ns); + + bool has_unknown = result.empty(); + for(const auto &v : result) + { + if(v.id() == ID_unknown) + { + has_unknown = true; + break; + } + } + + if(has_unknown) + { +#if 0 + // The value set only knows about objects the address of which has been + // taken, and may therefore still present an underapproximation for + // non-deterministic pointers. + result = state.value_set.get_any_object(expr, ns); +#else + state.get_level2().current_names.iterate( + [this, &result]( + const irep_idt &key, const std::pair &entry) { + const irep_idt &obj_identifier = entry.first.get_object_name(); + if(obj_identifier != state.guard_identifier()) + result.emplace_back( + object_descriptor_exprt{ns.lookup(obj_identifier).symbol_expr()}); + }); + + result.emplace_back(object_descriptor_exprt{ + exprt(ID_null_object, to_pointer_type(expr.type()).subtype())}); +#endif + } + + return result; } diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index 25234f31ba0..0d1ceba27db 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -371,6 +371,30 @@ value_sett::object_mapt value_sett::get_value_set( return dest; } +std::vector +value_sett::get_any_object(exprt expr, const namespacet &ns) const +{ + if(expr.type().id() != ID_pointer) + return {}; + + object_mapt object_map; + + // we'll take the union of all objects we see, with unspecified offsets + values.iterate([this, &object_map](const irep_idt &key, const entryt &value) { + for(const auto &object : value.object_map.read()) + insert(object_map, object.first, offsett()); + }); + + // we'll add null, in case it's not there yet + insert( + object_map, + exprt(ID_null_object, to_pointer_type(expr.type()).subtype()), + offsett()); + + return make_range(object_map.read()) + .map([&](const object_map_dt::value_type &pair) { return to_expr(pair); }); +} + /// Check if 'suffix' starts with 'field'. /// Suffix is delimited by periods and '[]' (array access) tokens, so we're /// looking for ".field($|[]|.)" @@ -507,20 +531,7 @@ void value_sett::get_value_set_rec( } else if(expr.id() == ID_nondet_symbol) { - if(expr.type().id() == ID_pointer) - { - // we'll take the union of all objects we see, with unspecified offsets - values.iterate([this, &dest](const irep_idt &key, const entryt &value) { - for(const auto &object : value.object_map.read()) - insert(dest, object.first, offsett()); - }); - - // we'll add null, in case it's not there yet - insert( - dest, - exprt(ID_null_object, to_pointer_type(expr_type).subtype()), - offsett()); - } + insert(dest, exprt(ID_unknown, original_type)); } else if(expr.id()==ID_if) { diff --git a/src/pointer-analysis/value_set.h b/src/pointer-analysis/value_set.h index ac493488d39..334929e367a 100644 --- a/src/pointer-analysis/value_set.h +++ b/src/pointer-analysis/value_set.h @@ -251,6 +251,13 @@ class value_sett /// are object_descriptor_exprt or have id ID_invalid or ID_unknown. std::vector get_value_set(exprt expr, const namespacet &ns) const; + /// Collects all objects known to the value set. + /// \param expr: query expression + /// \param ns: global namespace + /// \return list of objects that any pointer could point to. These expressions + /// are object_descriptor_exprt or have id ID_invalid or ID_unknown. + std::vector get_any_object(exprt expr, const namespacet &ns) const; + void clear() { values.clear(); From 6cc4f98af0bbd93b7d585ee023c9ed7f58d96c63 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Fri, 5 Nov 2021 19:48:28 +0000 Subject: [PATCH 06/13] goto-symex: initialisation of pointers is no longer necessary goto-symex now treats empty value sets as dereferences to all current objects. Therefore, there is no need to initialise the value set for pointer-typed objects upon declaration. --- regression/cbmc/nondet-pointer/nondet-pointer1.c | 4 ++++ src/goto-symex/goto_symex_state.cpp | 15 --------------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/regression/cbmc/nondet-pointer/nondet-pointer1.c b/regression/cbmc/nondet-pointer/nondet-pointer1.c index d4378f7ac14..26c261a09dc 100644 --- a/regression/cbmc/nondet-pointer/nondet-pointer1.c +++ b/regression/cbmc/nondet-pointer/nondet-pointer1.c @@ -5,9 +5,13 @@ int main() int x = 123; int *p = &x; int *q = nondet_pointer(); + int *r; if(p == q) __CPROVER_assert(*q == 123, "value of *q"); + if(p == r) + __CPROVER_assert(*r == 123, "value of *q"); + return 0; } diff --git a/src/goto-symex/goto_symex_state.cpp b/src/goto-symex/goto_symex_state.cpp index e669329a4fd..ae324c4c05e 100644 --- a/src/goto-symex/goto_symex_state.cpp +++ b/src/goto-symex/goto_symex_state.cpp @@ -820,21 +820,6 @@ ssa_exprt goto_symex_statet::declare(ssa_exprt ssa, const namespacet &ns) rename(ssa.type(), l1_identifier, ns); ssa.update_type(); - // in case of pointers, put something into the value set - if(ssa.type().id() == ID_pointer) - { - exprt rhs; - if( - auto failed = - get_failed_symbol(to_symbol_expr(ssa.get_original_expr()), ns)) - rhs = address_of_exprt(*failed, to_pointer_type(ssa.type())); - else - rhs = exprt(ID_invalid); - - exprt l1_rhs = rename(std::move(rhs), ns).get(); - value_set.assign(ssa, l1_rhs, ns, true, false); - } - // L2 renaming const exprt fields = field_sensitivity.get_fields(ns, *this, ssa); for(const auto &l1_symbol : find_symbols(fields)) From bce9228e14ca009789188a3c1d6223bd59b0b37e Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 29 Nov 2021 11:25:12 +0000 Subject: [PATCH 07/13] Fix for nondet pointers all objects --- regression/cbmc/Linked_List1/test.desc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regression/cbmc/Linked_List1/test.desc b/regression/cbmc/Linked_List1/test.desc index 9efefbc7362..a50033dac6c 100644 --- a/regression/cbmc/Linked_List1/test.desc +++ b/regression/cbmc/Linked_List1/test.desc @@ -1,6 +1,6 @@ CORE main.c - +--unwind 11 --unwinding-assertions ^EXIT=0$ ^SIGNAL=0$ ^VERIFICATION SUCCESSFUL$ From 1c1126a3e8a3424abbead6c4fffa37fbe30cd25b Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 29 Nov 2021 11:50:35 +0000 Subject: [PATCH 08/13] Revert "Fix for nondet pointers all objects" This reverts commit 5723e39725bb59bcd55536f8a8c495226de88949. --- regression/cbmc/Linked_List1/test.desc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regression/cbmc/Linked_List1/test.desc b/regression/cbmc/Linked_List1/test.desc index a50033dac6c..9efefbc7362 100644 --- a/regression/cbmc/Linked_List1/test.desc +++ b/regression/cbmc/Linked_List1/test.desc @@ -1,6 +1,6 @@ CORE main.c ---unwind 11 --unwinding-assertions + ^EXIT=0$ ^SIGNAL=0$ ^VERIFICATION SUCCESSFUL$ From d67b538d012a380ce1ce5298cb0ea2efce6e2efe Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Tue, 9 Nov 2021 17:57:54 +0000 Subject: [PATCH 09/13] Revert "goto-symex: initialisation of pointers is no longer necessary" This reverts commit be0770dbe596d69a134ef08cdaa6a95962e46ee2. --- regression/cbmc/nondet-pointer/nondet-pointer1.c | 4 ---- src/goto-symex/goto_symex_state.cpp | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/regression/cbmc/nondet-pointer/nondet-pointer1.c b/regression/cbmc/nondet-pointer/nondet-pointer1.c index 26c261a09dc..d4378f7ac14 100644 --- a/regression/cbmc/nondet-pointer/nondet-pointer1.c +++ b/regression/cbmc/nondet-pointer/nondet-pointer1.c @@ -5,13 +5,9 @@ int main() int x = 123; int *p = &x; int *q = nondet_pointer(); - int *r; if(p == q) __CPROVER_assert(*q == 123, "value of *q"); - if(p == r) - __CPROVER_assert(*r == 123, "value of *q"); - return 0; } diff --git a/src/goto-symex/goto_symex_state.cpp b/src/goto-symex/goto_symex_state.cpp index ae324c4c05e..e669329a4fd 100644 --- a/src/goto-symex/goto_symex_state.cpp +++ b/src/goto-symex/goto_symex_state.cpp @@ -820,6 +820,21 @@ ssa_exprt goto_symex_statet::declare(ssa_exprt ssa, const namespacet &ns) rename(ssa.type(), l1_identifier, ns); ssa.update_type(); + // in case of pointers, put something into the value set + if(ssa.type().id() == ID_pointer) + { + exprt rhs; + if( + auto failed = + get_failed_symbol(to_symbol_expr(ssa.get_original_expr()), ns)) + rhs = address_of_exprt(*failed, to_pointer_type(ssa.type())); + else + rhs = exprt(ID_invalid); + + exprt l1_rhs = rename(std::move(rhs), ns).get(); + value_set.assign(ssa, l1_rhs, ns, true, false); + } + // L2 renaming const exprt fields = field_sensitivity.get_fields(ns, *this, ssa); for(const auto &l1_symbol : find_symbols(fields)) From df485498e624d1a1e848d24900303ecbebf2eb6f Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Fri, 19 Nov 2021 22:41:22 +0000 Subject: [PATCH 10/13] Revert "goto-symex: expand unknown points-to values to all objects" This reverts commit f105968f58e481b29afc82c1f237c0eceef2028d. --- src/goto-symex/symex_dereference_state.cpp | 37 +------------------- src/pointer-analysis/value_set.cpp | 39 ++++++++-------------- src/pointer-analysis/value_set.h | 7 ---- 3 files changed, 15 insertions(+), 68 deletions(-) diff --git a/src/goto-symex/symex_dereference_state.cpp b/src/goto-symex/symex_dereference_state.cpp index b171b5cc6e8..363b61eabc8 100644 --- a/src/goto-symex/symex_dereference_state.cpp +++ b/src/goto-symex/symex_dereference_state.cpp @@ -11,7 +11,6 @@ Author: Daniel Kroening, kroening@kroening.com #include "symex_dereference_state.h" -#include #include /// Get or create a failed symbol for the given pointer-typed expression. These @@ -81,39 +80,5 @@ symex_dereference_statet::get_or_create_failed_symbol(const exprt &expr) std::vector symex_dereference_statet::get_value_set(const exprt &expr) const { - auto result = state.value_set.get_value_set(expr, ns); - - bool has_unknown = result.empty(); - for(const auto &v : result) - { - if(v.id() == ID_unknown) - { - has_unknown = true; - break; - } - } - - if(has_unknown) - { -#if 0 - // The value set only knows about objects the address of which has been - // taken, and may therefore still present an underapproximation for - // non-deterministic pointers. - result = state.value_set.get_any_object(expr, ns); -#else - state.get_level2().current_names.iterate( - [this, &result]( - const irep_idt &key, const std::pair &entry) { - const irep_idt &obj_identifier = entry.first.get_object_name(); - if(obj_identifier != state.guard_identifier()) - result.emplace_back( - object_descriptor_exprt{ns.lookup(obj_identifier).symbol_expr()}); - }); - - result.emplace_back(object_descriptor_exprt{ - exprt(ID_null_object, to_pointer_type(expr.type()).subtype())}); -#endif - } - - return result; + return state.value_set.get_value_set(expr, ns); } diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index 0d1ceba27db..25234f31ba0 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -371,30 +371,6 @@ value_sett::object_mapt value_sett::get_value_set( return dest; } -std::vector -value_sett::get_any_object(exprt expr, const namespacet &ns) const -{ - if(expr.type().id() != ID_pointer) - return {}; - - object_mapt object_map; - - // we'll take the union of all objects we see, with unspecified offsets - values.iterate([this, &object_map](const irep_idt &key, const entryt &value) { - for(const auto &object : value.object_map.read()) - insert(object_map, object.first, offsett()); - }); - - // we'll add null, in case it's not there yet - insert( - object_map, - exprt(ID_null_object, to_pointer_type(expr.type()).subtype()), - offsett()); - - return make_range(object_map.read()) - .map([&](const object_map_dt::value_type &pair) { return to_expr(pair); }); -} - /// Check if 'suffix' starts with 'field'. /// Suffix is delimited by periods and '[]' (array access) tokens, so we're /// looking for ".field($|[]|.)" @@ -531,7 +507,20 @@ void value_sett::get_value_set_rec( } else if(expr.id() == ID_nondet_symbol) { - insert(dest, exprt(ID_unknown, original_type)); + if(expr.type().id() == ID_pointer) + { + // we'll take the union of all objects we see, with unspecified offsets + values.iterate([this, &dest](const irep_idt &key, const entryt &value) { + for(const auto &object : value.object_map.read()) + insert(dest, object.first, offsett()); + }); + + // we'll add null, in case it's not there yet + insert( + dest, + exprt(ID_null_object, to_pointer_type(expr_type).subtype()), + offsett()); + } } else if(expr.id()==ID_if) { diff --git a/src/pointer-analysis/value_set.h b/src/pointer-analysis/value_set.h index 334929e367a..ac493488d39 100644 --- a/src/pointer-analysis/value_set.h +++ b/src/pointer-analysis/value_set.h @@ -251,13 +251,6 @@ class value_sett /// are object_descriptor_exprt or have id ID_invalid or ID_unknown. std::vector get_value_set(exprt expr, const namespacet &ns) const; - /// Collects all objects known to the value set. - /// \param expr: query expression - /// \param ns: global namespace - /// \return list of objects that any pointer could point to. These expressions - /// are object_descriptor_exprt or have id ID_invalid or ID_unknown. - std::vector get_any_object(exprt expr, const namespacet &ns) const; - void clear() { values.clear(); From bd41e25974c89e0082bb52554576ce892531c5d6 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 29 Nov 2021 11:35:33 +0000 Subject: [PATCH 11/13] Unit test lower_byte_extract to/from an array of bits Byte operator lowering made several assumptions about array elements being byte aligned, which may be true for ANSI C, but isn't the case for our C front-end (which supports arrays of single bits), and not true for the overall framework in general. --- unit/solvers/lowering/byte_operators.cpp | 83 ++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/unit/solvers/lowering/byte_operators.cpp b/unit/solvers/lowering/byte_operators.cpp index 559012dc6d1..95088e3669c 100644 --- a/unit/solvers/lowering/byte_operators.cpp +++ b/unit/solvers/lowering/byte_operators.cpp @@ -24,6 +24,88 @@ #include #include +TEST_CASE("byte extract and bits", "[core][solvers][lowering][byte_extract]") +{ + // this test does require a proper architecture to be set so that byte extract + // uses adequate endianness + cmdlinet cmdline; + config.set(cmdline); + + const symbol_tablet symbol_table; + const namespacet ns(symbol_table); + + const unsignedbv_typet u16{16}; + const exprt sixteen_bits = from_integer(0x1234, u16); + const array_typet bit_array_type{bv_typet{1}, from_integer(16, size_type())}; + + bool little_endian; + GIVEN("Little endian") + { + little_endian = true; + + const auto bit_string = expr2bits(sixteen_bits, little_endian, ns); + REQUIRE(bit_string.has_value()); + REQUIRE(bit_string->size() == 16); + + const auto array_of_bits = + bits2expr(*bit_string, bit_array_type, little_endian, ns); + REQUIRE(array_of_bits.has_value()); + + const auto bit_string2 = expr2bits(*array_of_bits, little_endian, ns); + REQUIRE(bit_string2.has_value()); + REQUIRE(*bit_string == *bit_string2); + + const byte_extract_exprt be1{little_endian ? ID_byte_extract_little_endian + : ID_byte_extract_big_endian, + sixteen_bits, + from_integer(0, index_type()), + bit_array_type}; + const exprt lower_be1 = lower_byte_extract(be1, ns); + REQUIRE(lower_be1 == *array_of_bits); + + const byte_extract_exprt be2{little_endian ? ID_byte_extract_little_endian + : ID_byte_extract_big_endian, + *array_of_bits, + from_integer(0, index_type()), + u16}; + const exprt lower_be2 = lower_byte_extract(be2, ns); + REQUIRE(lower_be2 == sixteen_bits); + } + + GIVEN("Big endian") + { + little_endian = false; + + const auto bit_string = expr2bits(sixteen_bits, little_endian, ns); + REQUIRE(bit_string.has_value()); + REQUIRE(bit_string->size() == 16); + + const auto array_of_bits = + bits2expr(*bit_string, bit_array_type, little_endian, ns); + REQUIRE(array_of_bits.has_value()); + + const auto bit_string2 = expr2bits(*array_of_bits, little_endian, ns); + REQUIRE(bit_string2.has_value()); + REQUIRE(*bit_string == *bit_string2); + + const byte_extract_exprt be1{little_endian ? ID_byte_extract_little_endian + : ID_byte_extract_big_endian, + sixteen_bits, + from_integer(0, index_type()), + bit_array_type}; + const exprt lower_be1 = lower_byte_extract(be1, ns); + REQUIRE(lower_be1 == *array_of_bits); + + const byte_extract_exprt be2{little_endian ? ID_byte_extract_little_endian + : ID_byte_extract_big_endian, + *array_of_bits, + from_integer(0, index_type()), + u16}; + const exprt lower_be2 = lower_byte_extract(be2, ns); + REQUIRE(lower_be2 == sixteen_bits); + } +} + SCENARIO("byte_extract_lowering", "[core][solvers][lowering][byte_extract]") { // this test does require a proper architecture to be set so that byte extract @@ -196,6 +278,7 @@ SCENARIO("byte_extract_lowering", "[core][solvers][lowering][byte_extract]") union_typet({{"compA", u32}, {"compB", u64}}), c_enum_typet(u16), c_enum_typet(unsignedbv_typet(128)), + array_typet{bv_typet{1}, from_integer(128, size_type())}, array_typet(u8, size), array_typet(s32, size), array_typet(u64, size), From 18ac0b468498c88d9af6e51b9e7c138de19ed399 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 29 Nov 2021 11:35:53 +0000 Subject: [PATCH 12/13] Unit test lower_byte_update of/from an array of bits Byte operator lowering made several assumptions about array elements being byte aligned, which may be true for ANSI C, but isn't the case for our C front-end (which supports arrays of single bits), and not true for the overall framework in general. --- unit/solvers/lowering/byte_operators.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/unit/solvers/lowering/byte_operators.cpp b/unit/solvers/lowering/byte_operators.cpp index 95088e3669c..c9383f2c23c 100644 --- a/unit/solvers/lowering/byte_operators.cpp +++ b/unit/solvers/lowering/byte_operators.cpp @@ -429,6 +429,7 @@ SCENARIO("byte_update_lowering", "[core][solvers][lowering][byte_update]") union_typet({{"compA", u32}, {"compB", u64}}), c_enum_typet(u16), c_enum_typet(unsignedbv_typet(128)), + array_typet{bv_typet{1}, from_integer(128, size_type())}, array_typet(u8, size), array_typet(s32, size), array_typet(u64, size), From 84d65327f6ce4da7e87fbea420e69df4e5577fa7 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 29 Nov 2021 11:49:48 +0000 Subject: [PATCH 13/13] bits WIP --- src/solvers/lowering/byte_operators.cpp | 667 ++++++++++++++---------- 1 file changed, 392 insertions(+), 275 deletions(-) diff --git a/src/solvers/lowering/byte_operators.cpp b/src/solvers/lowering/byte_operators.cpp index fad23110da8..646dfd40f4e 100644 --- a/src/solvers/lowering/byte_operators.cpp +++ b/src/solvers/lowering/byte_operators.cpp @@ -405,6 +405,10 @@ static exprt::operandst instantiate_byte_array( src.operands().begin() + narrow_cast(upper_bound)}; } + PRECONDITION(src.type().id() == ID_array || src.type().id() == ID_vector); + PRECONDITION( + can_cast_type(src.type().subtype()) && + to_bitvector_type(src.type().subtype()).get_width() == 8); exprt::operandst bytes; bytes.reserve(upper_bound - lower_bound); for(std::size_t i = lower_bound; i < upper_bound; ++i) @@ -508,69 +512,116 @@ static exprt unpack_array_vector( // refine the number of elements to extract in case the element width is known // and a multiple of bytes; otherwise we will expand the entire array/vector optionalt num_elements = src_size; - if(element_bits > 0 && element_bits % 8 == 0) + if(element_bits > 0) { if(!num_elements.has_value()) { // turn bytes into elements, rounding up - num_elements = (*max_bytes + el_bytes - 1) / el_bytes; + num_elements = (*max_bytes * 8 + element_bits - 1) / element_bits; } if(offset_bytes.has_value()) { // compute offset as number of elements - first_element = *offset_bytes / el_bytes; - // insert offset_bytes-many nil bytes into the output array + first_element = *offset_bytes * 8 / element_bits; + // insert offset_bytes-many zero bytes into the output array byte_operands.resize( - numeric_cast_v(*offset_bytes - (*offset_bytes % el_bytes)), + numeric_cast_v( + (*offset_bytes * 8 - ((*offset_bytes * 8) % element_bits)) / 8), from_integer(0, bv_typet{8})); } } // the maximum number of bytes is an upper bound in case the size of the - // array/vector is unknown; if element_bits was usable above this will + // array/vector is unknown; if element_bits was non-zero above this will // have been turned into a number of elements already if(!num_elements) num_elements = *max_bytes; const exprt src_simp = simplify_expr(src, ns); - for(mp_integer i = first_element; i < *num_elements; ++i) + if(element_bits % 8 == 0) { - exprt element; - - if( - (src_simp.id() == ID_array || src_simp.id() == ID_vector) && - i < src_simp.operands().size()) + for(mp_integer i = first_element; i < *num_elements; ++i) { - const std::size_t index_int = numeric_cast_v(i); - element = src_simp.operands()[index_int]; + exprt element; + + if( + (src_simp.id() == ID_array || src_simp.id() == ID_vector) && + i < src_simp.operands().size()) + { + const std::size_t index_int = numeric_cast_v(i); + element = src_simp.operands()[index_int]; + } + else + { + element = index_exprt(src_simp, from_integer(i, index_type())); + } + + // recursively unpack each element so that we eventually just have an array + // of bytes left + + const optionalt element_max_bytes = + max_bytes + ? std::min(mp_integer{el_bytes}, *max_bytes - byte_operands.size()) + : optionalt{}; + const std::size_t element_max_bytes_int = + element_max_bytes ? numeric_cast_v(*element_max_bytes) + : el_bytes; + + exprt sub = + unpack_rec(element, little_endian, {}, element_max_bytes, ns, true); + exprt::operandst sub_operands = + instantiate_byte_array(sub, 0, element_max_bytes_int, ns); + byte_operands.insert( + byte_operands.end(), sub_operands.begin(), sub_operands.end()); + + if(max_bytes && byte_operands.size() >= *max_bytes) + break; } - else + } + else + { + const std::size_t element_bits_int = + numeric_cast_v(element_bits); + + exprt::operandst elements; + for(mp_integer i = *num_elements - 1; i >= first_element; --i) { - element = index_exprt(src_simp, from_integer(i, index_type())); + if( + (src_simp.id() == ID_array || src_simp.id() == ID_vector) && + i < src_simp.operands().size()) + { + const std::size_t index_int = numeric_cast_v(i); + elements.push_back(typecast_exprt::conditional_cast( + src_simp.operands()[index_int], bv_typet{element_bits_int})); + } + else + { + elements.push_back(typecast_exprt::conditional_cast( + index_exprt(src_simp, from_integer(i, index_type())), + bv_typet{element_bits_int})); + } } - // recursively unpack each element so that we eventually just have an array - // of bytes left - - const optionalt element_max_bytes = - max_bytes - ? std::min(mp_integer{el_bytes}, *max_bytes - byte_operands.size()) - : optionalt{}; - const std::size_t element_max_bytes_int = - element_max_bytes ? numeric_cast_v(*element_max_bytes) - : el_bytes; - - exprt sub = - unpack_rec(element, little_endian, {}, element_max_bytes, ns, true); - exprt::operandst sub_operands = - instantiate_byte_array(sub, 0, element_max_bytes_int, ns); + const std::size_t merged_bit_width = + numeric_cast_v((*num_elements - first_element) * element_bits); + if(!little_endian) + std::reverse(elements.begin(), elements.end()); + concatenation_exprt merged{std::move(elements), bv_typet{merged_bit_width}}; + + exprt merged_as_bytes = + unpack_rec(merged, little_endian, {}, max_bytes, ns, true); + + const std::size_t merged_byte_width = (merged_bit_width + 7) / 8; + const std::size_t max_bytes_int = + max_bytes.has_value() ? + std::min(numeric_cast_v(*max_bytes), merged_byte_width) : merged_byte_width; + + exprt::operandst sub_operands = instantiate_byte_array( + merged_as_bytes, 0, max_bytes_int, ns); byte_operands.insert( byte_operands.end(), sub_operands.begin(), sub_operands.end()); - - if(max_bytes && byte_operands.size() >= *max_bytes) - break; } const std::size_t size = byte_operands.size(); @@ -1060,24 +1111,28 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns) to_bitvector_type(to_type_with_subtype(unpacked.op().type()).subtype()) .get_width() == 8); - if(src.type().id()==ID_array) + if(src.type().id()==ID_array || src.type().id() == ID_vector) { - const array_typet &array_type=to_array_type(src.type()); - const typet &subtype=array_type.subtype(); + const typet &subtype=to_type_with_subtype(src.type()).subtype(); - // consider ways of dealing with arrays of unknown subtype size or with a - // subtype size that does not fit byte boundaries; currently we fall back to - // stitching together consecutive elements down below auto element_bits = pointer_offset_bits(subtype, ns); - if(element_bits.has_value() && *element_bits >= 1 && *element_bits % 8 == 0) - { - auto num_elements = numeric_cast(array_type.size()); + PRECONDITION_WITH_DIAGNOSTICS( + element_bits.has_value() && *element_bits >= 1, + "byte extracting arrays of unknown/zero subtype is not yet implemented"); + + optionalt num_elements; + if(src.type().id() == ID_array) + num_elements = numeric_cast(to_array_type(src.type()).size()); + else + num_elements = numeric_cast(to_vector_type(src.type()).size()); - if(num_elements.has_value()) + if(num_elements.has_value()) + { + exprt::operandst operands; + operands.reserve(*num_elements); + for(std::size_t i = 0; i < *num_elements; ++i) { - exprt::operandst operands; - operands.reserve(*num_elements); - for(std::size_t i = 0; i < *num_elements; ++i) + if(*element_bits % 8 == 0) { plus_exprt new_offset( unpacked.offset(), @@ -1089,68 +1144,75 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns) operands.push_back(lower_byte_extract(tmp, ns)); } + else + { + const mp_integer first_index = i * *element_bits / 8; + const mp_integer last_index = ((i + 1) * *element_bits + 7) / 8; + + exprt::operandst to_concat; + to_concat.reserve(numeric_cast_v(last_index - first_index)); + for(mp_integer j = first_index; j < last_index; ++j) + { + to_concat.push_back(index_exprt{unpacked.op(), plus_exprt{unpacked.offset(), from_integer(j, unpacked.offset().type())}}); + } + + const std::size_t base = numeric_cast_v((i * *element_bits) % 8); + const std::size_t last = numeric_cast_v(base + *element_bits - 1); + endianness_mapt endianness_map( + bv_typet{8 * to_concat.size()}, little_endian, ns); + const auto bounds = map_bounds(endianness_map, base, last); + extractbits_exprt element{ + concatenation_exprt{to_concat, bv_typet{8 * to_concat.size()}}, + from_integer(bounds.ub, size_type()), + from_integer(bounds.lb, size_type()), + subtype}; + + operands.push_back(element); + } + } - return simplify_expr(array_exprt(std::move(operands), array_type), ns); + if(src.type().id() == ID_array) + { + return simplify_expr(array_exprt{std::move(operands), to_array_type(src.type())}, ns); } else { - // TODO we either need a symbol table here or make array comprehensions - // introduce a scope - static std::size_t array_comprehension_index_counter = 0; - ++array_comprehension_index_counter; - symbol_exprt array_comprehension_index{ - "$array_comprehension_index_a" + - std::to_string(array_comprehension_index_counter), - index_type()}; - - plus_exprt new_offset{ - unpacked.offset(), - typecast_exprt::conditional_cast( - mult_exprt{array_comprehension_index, - from_integer( - *element_bits / 8, array_comprehension_index.type())}, - unpacked.offset().type())}; - - byte_extract_exprt body(unpacked); - body.type() = subtype; - body.offset() = std::move(new_offset); - - return array_comprehension_exprt{std::move(array_comprehension_index), - lower_byte_extract(body, ns), - array_type}; + return simplify_expr(vector_exprt{std::move(operands), to_vector_type(src.type())}, ns); } } - } - else if(src.type().id() == ID_vector) - { - const vector_typet &vector_type = to_vector_type(src.type()); - const typet &subtype = vector_type.subtype(); - - // consider ways of dealing with vectors of unknown subtype size or with a - // subtype size that does not fit byte boundaries; currently we fall back to - // stitching together consecutive elements down below - auto element_bits = pointer_offset_bits(subtype, ns); - if(element_bits.has_value() && *element_bits >= 1 && *element_bits % 8 == 0) + else { - const std::size_t num_elements = - numeric_cast_v(vector_type.size()); - - exprt::operandst operands; - operands.reserve(num_elements); - for(std::size_t i = 0; i < num_elements; ++i) - { - plus_exprt new_offset( - unpacked.offset(), - from_integer(i * (*element_bits) / 8, unpacked.offset().type())); - - byte_extract_exprt tmp(unpacked); - tmp.type() = subtype; - tmp.offset() = simplify_expr(new_offset, ns); + DATA_INVARIANT(src.type().id() == ID_array, "vectors have constant size"); + const array_typet &array_type = to_array_type(src.type()); + + // TODO we either need a symbol table here or make array comprehensions + // introduce a scope + static std::size_t array_comprehension_index_counter = 0; + ++array_comprehension_index_counter; + symbol_exprt array_comprehension_index{ + "$array_comprehension_index_a" + + std::to_string(array_comprehension_index_counter), + index_type()}; + + PRECONDITION_WITH_DIAGNOSTICS( + *element_bits % 8 == 0, + "byte extracting non-byte-aligned arrays requires constant size"); + + plus_exprt new_offset{ + unpacked.offset(), + typecast_exprt::conditional_cast( + mult_exprt{array_comprehension_index, + from_integer( + *element_bits / 8, array_comprehension_index.type())}, + unpacked.offset().type())}; - operands.push_back(lower_byte_extract(tmp, ns)); - } + byte_extract_exprt body(unpacked); + body.type() = subtype; + body.offset() = std::move(new_offset); - return simplify_expr(vector_exprt(std::move(operands), vector_type), ns); + return array_comprehension_exprt{std::move(array_comprehension_index), + lower_byte_extract(body, ns), + array_type}; } } else if(src.type().id() == ID_complex) @@ -1711,10 +1773,175 @@ static exprt lower_byte_update_array_vector_non_const( return simplify_expr(std::move(result), ns); } +/// Byte update a struct/array/vector element. +/// \param src: Original byte-update expression +/// \param offset_bits: Offset in \p src expressed as bits +/// \param element_to_update: struct/array/vector element +/// \param subtype_bits: Bit width of \p element_to_update +/// \param bits_already_set: Number of bits in the original target object that +/// have been updated already +/// \param value_as_byte_array: Update value as an array of bytes +/// \param non_const_update_bound: If set, (symbolically) constrain updates such +/// as to at most update \p non_const_update_bound elements +/// \param ns: Namespace +/// \return Array/vector expression reflecting all updates. +static exprt lower_byte_update_single_element( + const byte_update_exprt &src, + const mp_integer &offset_bits, + const exprt &element_to_update, + const mp_integer &subtype_bits, + const mp_integer &bits_already_set, + const exprt &value_as_byte_array, + const optionalt &non_const_update_bound, + const namespacet &ns) +{ + // We need to take one or more bytes from value_as_byte_array to modify the + // target element. We need to compute: + // - The position in value_as_byte_array to take bytes from: If subtype_bits + // is less than the size of a byte, then multiple struct/array/vector elements + // will need to be updated using the same byte. + std::size_t first = 0; + // - An upper bound on the number of bytes required from value_as_byte_array. + mp_integer update_elements = (subtype_bits + 7) / 8; + // - The offset into the target element: If subtype_bits is greater or equal + // to the size of a byte, then there may be an offset into the target element + // that needs to be taken into account, and multiple bytes will be required. + mp_integer offset_bits_in_target_element = offset_bits - bits_already_set; + + if(offset_bits_in_target_element < 0) + { + INVARIANT( + value_as_byte_array.id() != ID_array || + value_as_byte_array.operands().size() * 8 > + -offset_bits_in_target_element, + "indices past the update should be handled below"); + first += numeric_cast_v(-offset_bits_in_target_element / 8); + offset_bits_in_target_element += (-offset_bits_in_target_element / 8) * 8; + if(offset_bits_in_target_element < 0) + ++update_elements; + } + else + { + update_elements -= offset_bits_in_target_element / 8; + INVARIANT( + update_elements > 0, "indices before the update should be handled above"); + } + + std::size_t end = first + numeric_cast_v(update_elements); + if(value_as_byte_array.id() == ID_array) + end = std::min(end, value_as_byte_array.operands().size()); + exprt::operandst update_values = + instantiate_byte_array(value_as_byte_array, first, end, ns); + const std::size_t update_size = update_values.size(); + const exprt update_size_expr = from_integer(update_size, size_type()); + const array_typet update_type{bv_typet{8}, update_size_expr}; + + // each case below will set "new_value" as appropriate + exprt new_value; + + if( + offset_bits_in_target_element >= 0 && + offset_bits_in_target_element % 8 == 0) + { + new_value = array_exprt{update_values, update_type}; + } + else + { + if(src.id() == ID_byte_update_little_endian) + std::reverse(update_values.begin(), update_values.end()); + if(offset_bits_in_target_element < 0) + { + if(src.id() == ID_byte_update_little_endian) + { + new_value = lshr_exprt{ + concatenation_exprt{update_values, bv_typet{update_size * 8}}, + numeric_cast_v(-offset_bits_in_target_element)}; + } + else + { + new_value = shl_exprt{ + concatenation_exprt{update_values, bv_typet{update_size * 8}}, + numeric_cast_v(-offset_bits_in_target_element)}; + } + } + else + { + const std::size_t offset_bits_int = + numeric_cast_v(offset_bits_in_target_element); + const std::size_t subtype_bits_int = + numeric_cast_v(subtype_bits); + + extractbits_exprt bits_to_keep{element_to_update, + subtype_bits_int - 1, + subtype_bits_int - offset_bits_int, + bv_typet{offset_bits_int}}; + new_value = concatenation_exprt{ + bits_to_keep, + extractbits_exprt{ + concatenation_exprt{update_values, bv_typet{update_size * 8}}, + update_size * 8 - 1, + offset_bits_int, + bv_typet{update_size * 8 - offset_bits_int}}, + bv_typet{update_size * 8}}; + } + + const irep_idt extract_opcode = src.id() == ID_byte_update_little_endian + ? ID_byte_extract_little_endian + : ID_byte_extract_big_endian; + + const byte_extract_exprt byte_extract_expr{ + extract_opcode, + new_value, + from_integer(0, src.offset().type()), + update_type}; + + new_value = lower_byte_extract(byte_extract_expr, ns); + + offset_bits_in_target_element = 0; + } + + // With an upper bound on the size of the update established, construct the + // actual update expression. If the exact size of the update is unknown, + // make the size expression conditional. + const byte_update_exprt bu{ + src.id(), + element_to_update, + from_integer(offset_bits_in_target_element / 8, src.offset().type()), + new_value}; + + optionalt update_bound; + if(non_const_update_bound.has_value()) + { + // The size of the update is not constant, and thus is to be symbolically + // bound; first see how many bytes we have left in the update: + // non_const_update_bound > first ? non_const_update_bound - first: 0 + const exprt remaining_update_bytes = typecast_exprt::conditional_cast( + if_exprt{binary_predicate_exprt{ + *non_const_update_bound, + ID_gt, + from_integer(first, non_const_update_bound->type())}, + minus_exprt{*non_const_update_bound, + from_integer(first, non_const_update_bound->type())}, + from_integer(0, non_const_update_bound->type())}, + size_type()); + // Now take the minimum of update-bytes-left and the previously computed + // size of the element to be updated: + update_bound = simplify_expr( + if_exprt{ + binary_predicate_exprt{update_size_expr, ID_lt, remaining_update_bytes}, + update_size_expr, + remaining_update_bytes}, + ns); + } + + return lower_byte_update(bu, new_value, update_bound, ns); +} + /// Apply a byte update \p src to an array/vector typed operand using the byte /// array \p value_as_byte_array as update value. /// \param src: Original byte-update expression /// \param subtype: Array/vector element type +/// \param subtype_bits: Bit width of \p subtype /// \param value_as_byte_array: Update value as an array of bytes /// \param non_const_update_bound: If set, (symbolically) constrain updates such /// as to at most update \p non_const_update_bound elements @@ -1723,6 +1950,7 @@ static exprt lower_byte_update_array_vector_non_const( static exprt lower_byte_update_array_vector( const byte_update_exprt &src, const typet &subtype, + const optionalt &subtype_bits, const exprt &value_as_byte_array, const optionalt &non_const_update_bound, const namespacet &ns) @@ -1734,13 +1962,10 @@ static exprt lower_byte_update_array_vector( else size = to_vector_type(src.type()).size(); - auto subtype_bits = pointer_offset_bits(subtype, ns); - // fall back to bytewise updates in all non-constant or dubious cases if( !size.is_constant() || !src.offset().is_constant() || - !subtype_bits.has_value() || *subtype_bits == 0 || *subtype_bits % 8 != 0 || - non_const_update_bound.has_value() || value_as_byte_array.id() != ID_array) + !subtype_bits.has_value() || value_as_byte_array.id() != ID_array) { return lower_byte_update_array_vector_non_const( src, subtype, value_as_byte_array, non_const_update_bound, ns); @@ -1748,56 +1973,32 @@ static exprt lower_byte_update_array_vector( std::size_t num_elements = numeric_cast_v(to_constant_expr(size)); - mp_integer offset_bytes = - numeric_cast_v(to_constant_expr(src.offset())); + mp_integer offset_bits = + numeric_cast_v(to_constant_expr(src.offset())) * 8; exprt::operandst elements; elements.reserve(num_elements); std::size_t i = 0; // copy the prefix not affected by the update - for(; i < num_elements && (i + 1) * *subtype_bits <= offset_bytes * 8; ++i) + for(; i < num_elements && (i + 1) * *subtype_bits <= offset_bits; ++i) elements.push_back(index_exprt{src.op(), from_integer(i, index_type())}); // the modified elements for(; i < num_elements && i * *subtype_bits < - (offset_bytes + value_as_byte_array.operands().size()) * 8; + offset_bits + value_as_byte_array.operands().size() * 8; ++i) { - mp_integer update_offset = offset_bytes - i * (*subtype_bits / 8); - mp_integer update_elements = *subtype_bits / 8; - exprt::operandst::const_iterator first = - value_as_byte_array.operands().begin(); - exprt::operandst::const_iterator end = value_as_byte_array.operands().end(); - if(update_offset < 0) - { - INVARIANT( - value_as_byte_array.operands().size() > -update_offset, - "indices past the update should be handled above"); - first += numeric_cast_v(-update_offset); - } - else - { - update_elements -= update_offset; - INVARIANT( - update_elements > 0, - "indices before the update should be handled above"); - } - - if(std::distance(first, end) > update_elements) - end = first + numeric_cast_v(update_elements); - exprt::operandst update_values{first, end}; - const std::size_t update_size = update_values.size(); - - const byte_update_exprt bu{ - src.id(), + elements.push_back(lower_byte_update_single_element( + src, + offset_bits, index_exprt{src.op(), from_integer(i, index_type())}, - from_integer(update_offset < 0 ? 0 : update_offset, src.offset().type()), - array_exprt{ - std::move(update_values), - array_typet{bv_typet{8}, from_integer(update_size, size_type())}}}; - elements.push_back(lower_byte_operators(bu, ns)); + *subtype_bits, + i * *subtype_bits, + value_as_byte_array, + non_const_update_bound, + ns)); } // copy the tail not affected by the update @@ -1828,10 +2029,6 @@ static exprt lower_byte_update_struct( const optionalt &non_const_update_bound, const namespacet &ns) { - const irep_idt extract_opcode = src.id() == ID_byte_update_little_endian - ? ID_byte_extract_little_endian - : ID_byte_extract_big_endian; - exprt::operandst elements; elements.reserve(struct_type.components().size()); @@ -1840,36 +2037,25 @@ static exprt lower_byte_update_struct( { auto element_width = pointer_offset_bits(comp.type(), ns); - exprt member = member_exprt{src.op(), comp.get_name(), comp.type()}; - // compute the update offset relative to this struct member - will be // negative if we are already in the middle of the update or beyond it exprt offset = simplify_expr( - minus_exprt{src.offset(), - from_integer(member_offset_bits / 8, src.offset().type())}, + minus_exprt{ + mult_exprt{src.offset(), from_integer(8, src.offset().type())}, + from_integer(member_offset_bits, src.offset().type())}, ns); - auto offset_bytes = numeric_cast(offset); - // we don't need to update anything when - // 1) the update offset is greater than the end of this member (thus the - // relative offset is greater than this element) or - // 2) the update offset plus the size of the update is less than the member - // offset - if( - offset_bytes.has_value() && - (*offset_bytes * 8 >= *element_width || - (value_as_byte_array.id() == ID_array && *offset_bytes < 0 && - -*offset_bytes >= value_as_byte_array.operands().size()))) - { - elements.push_back(std::move(member)); - member_offset_bits += *element_width; - continue; - } - else if(!offset_bytes.has_value()) + auto offset_bits = numeric_cast(offset); + if(!offset_bits.has_value() || !element_width.has_value()) { - // The offset to update is not constant; abort the attempt to update - // indiviual struct members and instead turn the operand-to-be-updated - // into a byte array, which we know how to update even if the offset is - // non-constant. + // The offset to update is not constant, either because the original + // offset in src never was, or because a struct member has an unknown + // offset. Abort the attempt to update individual struct members and + // instead turn the operand-to-be-updated into a byte array, which we know + // how to update even if the offset is non-constant. + const irep_idt extract_opcode = src.id() == ID_byte_update_little_endian + ? ID_byte_extract_little_endian + : ID_byte_extract_big_endian; + auto src_size_opt = size_of_expr(src.type(), ns); CHECK_RETURN(src_size_opt.has_value()); @@ -1893,107 +2079,32 @@ static exprt lower_byte_update_struct( ns); } - // We now need to figure out how many bytes to consume from the update - // value. If the size of the update is unknown, then we need to leave some - // of this work to a back-end solver via the non_const_update_bound branch - // below. - mp_integer update_elements = (*element_width + 7) / 8; - std::size_t first = 0; - if(*offset_bytes < 0) - { - offset = from_integer(0, src.offset().type()); - INVARIANT( - value_as_byte_array.id() != ID_array || - value_as_byte_array.operands().size() > -*offset_bytes, - "members past the update should be handled above"); - first = numeric_cast_v(-*offset_bytes); - } - else - { - update_elements -= *offset_bytes; - INVARIANT( - update_elements > 0, - "members before the update should be handled above"); - } - - // Now that we have an idea on how many bytes we need from the update value, - // determine the exact range [first, end) in the update value, and create - // that sequence of bytes (via instantiate_byte_array). - std::size_t end = first + numeric_cast_v(update_elements); - if(value_as_byte_array.id() == ID_array) - end = std::min(end, value_as_byte_array.operands().size()); - exprt::operandst update_values = - instantiate_byte_array(value_as_byte_array, first, end, ns); - const std::size_t update_size = update_values.size(); - - // With an upper bound on the size of the update established, construct the - // actual update expression. If the exact size of the update is unknown, - // make the size expression conditional. - exprt update_size_expr = from_integer(update_size, size_type()); - array_exprt update_expr{std::move(update_values), - array_typet{bv_typet{8}, update_size_expr}}; - optionalt member_update_bound; - if(non_const_update_bound.has_value()) - { - // The size of the update is not constant, and thus is to be symbolically - // bound; first see how many bytes we have left in the update: - // non_const_update_bound > first ? non_const_update_bound - first: 0 - const exprt remaining_update_bytes = typecast_exprt::conditional_cast( - if_exprt{ - binary_predicate_exprt{ - *non_const_update_bound, - ID_gt, - from_integer(first, non_const_update_bound->type())}, - minus_exprt{*non_const_update_bound, - from_integer(first, non_const_update_bound->type())}, - from_integer(0, non_const_update_bound->type())}, - size_type()); - // Now take the minimum of update-bytes-left and the previously computed - // size of the member to be updated: - update_size_expr = if_exprt{ - binary_predicate_exprt{update_size_expr, ID_lt, remaining_update_bytes}, - update_size_expr, - remaining_update_bytes}; - simplify(update_size_expr, ns); - member_update_bound = std::move(update_size_expr); - } - - // We have established the bytes to use for the update, but now need to - // account for sub-byte members. - const std::size_t shift = - numeric_cast_v(member_offset_bits % 8); - const std::size_t element_bits_int = - numeric_cast_v(*element_width); + exprt member = member_exprt{src.op(), comp.get_name(), comp.type()}; - const bool little_endian = src.id() == ID_byte_update_little_endian; - if(shift != 0) + // we don't need to update anything when + // 1) the update offset is greater than the end of this member (thus the + // relative offset is greater than this element) or + // 2) the update offset plus the size of the update is less than the member + // offset + if( + *offset_bits >= *element_width || + (value_as_byte_array.id() == ID_array && *offset_bits < 0 && + -*offset_bits >= value_as_byte_array.operands().size() * 8)) { - member = concatenation_exprt{ - typecast_exprt::conditional_cast(member, bv_typet{element_bits_int}), - from_integer(0, bv_typet{shift}), - bv_typet{shift + element_bits_int}}; - - if(!little_endian) - to_concatenation_expr(member).op0().swap( - to_concatenation_expr(member).op1()); + elements.push_back(member); + member_offset_bits += *element_width; + continue; } - // Finally construct the updated member. - byte_update_exprt bu{src.id(), std::move(member), offset, update_expr}; - exprt updated_element = - lower_byte_update(bu, update_expr, member_update_bound, ns); - - if(shift == 0) - elements.push_back(updated_element); - else - { - elements.push_back(typecast_exprt::conditional_cast( - extractbits_exprt{updated_element, - element_bits_int - 1 + (little_endian ? shift : 0), - (little_endian ? shift : 0), - bv_typet{element_bits_int}}, - comp.type())); - } + elements.push_back(lower_byte_update_single_element( + src, + *offset_bits + member_offset_bits, + member, + *element_width, + member_offset_bits, + value_as_byte_array, + non_const_update_bound, + ns)); member_offset_bits += *element_width; } @@ -2057,11 +2168,8 @@ static exprt lower_byte_update( subtype = to_array_type(src.type()).subtype(); auto element_width = pointer_offset_bits(*subtype, ns); - CHECK_RETURN(element_width.has_value()); - CHECK_RETURN(*element_width > 0); - CHECK_RETURN(*element_width % 8 == 0); - if(*element_width == 8) + if(element_width.has_value() && *element_width == 8) { if(value_as_byte_array.id() != ID_array) { @@ -2080,8 +2188,15 @@ static exprt lower_byte_update( ns); } else + { return lower_byte_update_array_vector( - src, *subtype, value_as_byte_array, non_const_update_bound, ns); + src, + *subtype, + element_width, + value_as_byte_array, + non_const_update_bound, + ns); + } } else if(src.type().id() == ID_struct || src.type().id() == ID_struct_tag) { @@ -2133,15 +2248,17 @@ static exprt lower_byte_update( typecast_exprt::conditional_cast(src.op(), bv_typet{type_bits}); if(bit_width > type_bits) { - val_before = - concatenation_exprt{from_integer(0, bv_typet{bit_width - type_bits}), - val_before, - bv_typet{bit_width}}; + val_before = concatenation_exprt{ + from_integer(0, bv_typet{bit_width - type_bits}), + val_before, + bv_typet{bit_width}}; if(!is_little_endian) + { to_concatenation_expr(val_before) .op0() .swap(to_concatenation_expr(val_before).op1()); + } } if(non_const_update_bound.has_value())