From b0bdb4724fba33e9469499c29075af1bd5cd8ca7 Mon Sep 17 00:00:00 2001 From: Iskander Sharipov Date: Fri, 13 Jan 2023 17:03:56 +0400 Subject: [PATCH] [compiler] add basic support for type-checked mixed union types Types like `int|string` can now be expressed in the type system. Every mixed union variant sets a bit flag inside TypeData. When generating the C++ code, any mixed unions become simple mixed types. Right now we only check function parameters passing and field assignments. Mixed union types are washed away in all contexts except for the phpdoc. We'll remove this restriction later, when more code will be ready to adapt for this change. --- compiler/inferring/node-recalc.cpp | 7 +- compiler/inferring/rvalue.h | 7 +- compiler/inferring/type-data.cpp | 140 ++++++++- compiler/inferring/type-data.h | 75 +++-- compiler/inferring/type-hint-recalc.cpp | 8 +- compiler/pipes/collect-main-edges.cpp | 2 +- tests/phpt/generics/021_default_T.php | 6 +- tests/phpt/phpdocs/106_boolean_in_phpdoc.php | 1 + tests/phpt/restricted_mixed/correct_usage.php | 290 ++++++++++++++++++ .../error_param_float_or_string.php | 9 + .../error_param_float_or_string2.php | 14 + .../error_param_float_or_string3.php | 9 + .../error_return_int_or_string.php | 17 + tests/phpt/restricted_mixed/todo.php | 35 +++ 14 files changed, 575 insertions(+), 45 deletions(-) create mode 100644 tests/phpt/restricted_mixed/correct_usage.php create mode 100644 tests/phpt/restricted_mixed/error_param_float_or_string.php create mode 100644 tests/phpt/restricted_mixed/error_param_float_or_string2.php create mode 100644 tests/phpt/restricted_mixed/error_param_float_or_string3.php create mode 100644 tests/phpt/restricted_mixed/error_return_int_or_string.php create mode 100644 tests/phpt/restricted_mixed/todo.php diff --git a/compiler/inferring/node-recalc.cpp b/compiler/inferring/node-recalc.cpp index 4acfb9f82a..80f2911cbc 100644 --- a/compiler/inferring/node-recalc.cpp +++ b/compiler/inferring/node-recalc.cpp @@ -103,7 +103,12 @@ void NodeRecalc::set_lca_at(const MultiKey *key, const RValue &rvalue) { key = &MultiKey::any_key(0); } - new_type_->set_lca_at(*key, type, !rvalue.drop_or_false, !rvalue.drop_or_null, rvalue.ffi_flags); + TypeData::LCAFlags lca_flags; + lca_flags.save_or_false = !rvalue.drop_or_false; + lca_flags.save_or_null = !rvalue.drop_or_null; + lca_flags.ffi_drop_ref = rvalue.ffi_drop_ref; + lca_flags.ffi_take_addr = rvalue.ffi_take_addr; + new_type_->set_lca_at(*key, type, lca_flags); if (unlikely(new_type_->error_flag())) { on_new_type_became_tpError(type, rvalue); diff --git a/compiler/inferring/rvalue.h b/compiler/inferring/rvalue.h index 607ccf2d27..4cadcdd285 100644 --- a/compiler/inferring/rvalue.h +++ b/compiler/inferring/rvalue.h @@ -26,20 +26,21 @@ struct RValue { const MultiKey *key{nullptr}; bool drop_or_false{false}; bool drop_or_null{false}; - TypeData::FFIRvalueFlags ffi_flags; + bool ffi_drop_ref{false}; + bool ffi_take_addr{false}; }; // take &T cdata type and return it as T; // for non-cdata types it has no effect inline RValue ffi_rvalue_drop_ref(RValue rvalue) { - rvalue.ffi_flags.drop_ref = true; + rvalue.ffi_drop_ref = true; return rvalue; } // take T cdata type and return it as *T; // for non-cdata types it has no effect inline RValue ffi_rvalue_take_addr(RValue rvalue) { - rvalue.ffi_flags.take_addr = true; + rvalue.ffi_take_addr = true; return rvalue; } diff --git a/compiler/inferring/type-data.cpp b/compiler/inferring/type-data.cpp index 41efacd742..eb475f6ded 100644 --- a/compiler/inferring/type-data.cpp +++ b/compiler/inferring/type-data.cpp @@ -24,6 +24,7 @@ static std::vector primitive_types; static std::vector array_types; +static TypeData *foreach_key_type; void TypeData::init_static() { if (!primitive_types.empty()) { @@ -39,6 +40,17 @@ void TypeData::init_static() { for (int tp = 0; tp < ptype_size; tp++) { array_types[tp] = create_array_of(primitive_types[tp]); } + + // create this TypeData object once and use it for all foreach keys; + // foreach key is encoded as string|int mixed type + foreach_key_type = new TypeData(tp_mixed); + foreach_key_type->flags_ = restricted_mixed_flag_e | + restricted_mixed_string_flag_e | + restricted_mixed_int_flag_e; +} + +const TypeData *TypeData::get_foreach_key_type() { + return foreach_key_type; } const TypeData *TypeData::get_type(PrimitiveType type) { @@ -142,16 +154,41 @@ std::string TypeData::as_human_readable(bool colored) const { } break; } + case tp_mixed: + if (is_restricted_mixed()) { + const PrimitiveType possible_elements[] = { + tp_int, + tp_float, + tp_string, + tp_array, + tp_bool, + }; + for (auto ptype : possible_elements) { + if (!restricted_mixed_contains(ptype)) { + continue; + } + if (res.empty()) { + res = ptype_name(ptype); + } else { + res += "|" + std::string{ptype_name(ptype)}; + } + } + } else { + res = ptype_name(ptype_); + } + break; default: res = ptype_name(ptype_); } if (ptype_ != tp_any) { - if (use_or_null() && !use_or_false()) { + bool print_null_type = use_or_null() || (is_restricted_mixed() && or_null_flag()); + bool print_false_type = use_or_false() || (is_restricted_mixed() && or_false_flag()); + if (print_null_type && !print_false_type) { res = "?" + res; - } else if (use_or_false() && !use_or_null()) { + } else if (print_false_type && !print_null_type) { res += "|false"; - } else if (use_or_false() && use_or_null()) { + } else if (print_false_type && print_null_type) { res += "|false|null"; } } @@ -188,6 +225,55 @@ PrimitiveType TypeData::get_real_ptype() const { return p; } +bool TypeData::restricted_mixed_contains(const TypeData *rhs) const { + if (rhs->ptype() == tp_mixed && !rhs->is_restricted_mixed()) { + return true; // allow any unrestricted mixed assignment for now + } + if (rhs->or_null_flag() && !or_null_flag()) { + return false; + } + if (rhs->or_false_flag() && !(or_false_flag() || restricted_mixed_contains(tp_bool))) { + return false; + } + if (rhs->ptype() == tp_int && restricted_mixed_contains(tp_float)) { + return true; + } + if (ptype_mixed_flag(rhs->ptype()) != 0 && !restricted_mixed_contains(rhs->ptype())) { + return false; + } + return true; +} + +uint16_t TypeData::restricted_mixed_types_mask() const { + return flags_ & restricted_mixed_flags_mask(); +} + +uint16_t TypeData::restricted_mixed_flags_mask() { + return restricted_mixed_flag_e | + restricted_mixed_int_flag_e | + restricted_mixed_float_flag_e | + restricted_mixed_string_flag_e | + restricted_mixed_array_flag_e | + restricted_mixed_bool_flag_e; +} + +uint16_t TypeData::ptype_mixed_flag(PrimitiveType ptype) { + switch (ptype) { + case tp_int: + return restricted_mixed_int_flag_e; + case tp_float: + return restricted_mixed_float_flag_e; + case tp_string: + return restricted_mixed_string_flag_e; + case tp_array: + return restricted_mixed_array_flag_e; + case tp_bool: + return restricted_mixed_bool_flag_e; + default: + return 0; + } +} + bool TypeData::is_ffi_ref() const { auto klass = class_type(); if (klass && klass->ffi_class_mixin) { @@ -320,7 +406,7 @@ bool TypeData::is_primitive_type() const { return vk::any_of_equal(get_real_ptype(), tp_int, tp_bool, tp_float, tp_future, tp_future_queue); } -void TypeData::set_flags(uint8_t new_flags) { +void TypeData::set_flags(uint16_t new_flags) { kphp_assert_msg((flags_ & new_flags) == flags_, "It is forbidden to remove flag"); flags_ = new_flags; } @@ -411,14 +497,31 @@ const TypeData *TypeData::get_deepest_type_of_array() const { return this; } -void TypeData::set_lca(const TypeData *rhs, bool save_or_false, bool save_or_null, FFIRvalueFlags ffi_flags) { +void TypeData::set_lca(const TypeData *rhs, LCAFlags flags) { if (rhs == nullptr) { return; } TypeData *lhs = this; + auto new_flags = rhs->flags_; PrimitiveType new_ptype = type_lca(lhs->ptype(), rhs->ptype()); + if (lhs->is_restricted_mixed()) { + if (flags.phpdoc) { + // still constructing a type from a phpdoc context + new_flags |= ptype_mixed_flag(rhs->ptype()); + } else { + // checking a type compatibility + if (!lhs->restricted_mixed_contains(rhs)) { + new_ptype = tp_Error; + } + } + } if (new_ptype == tp_mixed) { + if (flags.phpdoc && lhs->ptype() != tp_mixed && rhs->ptype() != tp_mixed) { + new_flags |= restricted_mixed_flag_e; + new_flags |= ptype_mixed_flag(lhs->ptype()); + new_flags |= ptype_mixed_flag(rhs->ptype()); + } if (lhs->ptype() == tp_array && lhs->lookup_at_any_key()) { lhs->set_lca_at(MultiKey::any_key(1), TypeData::get_type(tp_mixed)); if (lhs->ptype() == tp_Error) { @@ -435,24 +538,29 @@ void TypeData::set_lca(const TypeData *rhs, bool save_or_false, bool save_or_nul } lhs->set_ptype(new_ptype); - uint8_t new_flags = rhs->flags_; - if (!save_or_false) { + if (!flags.save_or_false) { new_flags &= ~(or_false_flag_e); } - if (!save_or_null) { + if (!flags.save_or_null) { new_flags &= ~(or_null_flag_e); } + if (!flags.phpdoc) { + // only phpdoc types can have restricted mixed types; + // when assigning to a local variable, etc. we wash all these flags away + // note that we may allow these types globally at some point + // (it would require the changes to cfg pass so the casts work correctly) + new_flags &= ~restricted_mixed_flags_mask(); + } new_flags |= lhs->flags_; - lhs->set_flags(new_flags); - if (ffi_flags.drop_ref && rhs->is_ffi_ref()) { + if (flags.ffi_drop_ref && rhs->is_ffi_ref()) { auto *new_rhs = rhs->clone(); new_rhs->class_type_ = {rhs->class_type()->ffi_class_mixin->non_ref}; rhs = new_rhs; } int rhs_indirection = rhs->get_indirection(); - if (ffi_flags.take_addr) { + if (flags.ffi_take_addr) { rhs_indirection++; } @@ -509,7 +617,7 @@ void TypeData::set_lca(const TypeData *rhs, bool save_or_false, bool save_or_nul } } -void TypeData::set_lca_at(const MultiKey &multi_key, const TypeData *rhs, bool save_or_false, bool save_or_null, FFIRvalueFlags ffi_flags) { +void TypeData::set_lca_at(const MultiKey &multi_key, const TypeData *rhs, LCAFlags flags) { TypeData *cur = this; for (const Key &key : multi_key) { @@ -530,7 +638,7 @@ void TypeData::set_lca_at(const MultiKey &multi_key, const TypeData *rhs, bool s } } - cur->set_lca(rhs, save_or_false, save_or_null, ffi_flags); + cur->set_lca(rhs, flags); if (cur->error_flag()) { // proxy tp_Error from keys to the type itself this->set_ptype(tp_Error); } @@ -902,6 +1010,12 @@ bool is_less_or_equal_type(const TypeData *given, const TypeData *expected, cons } break; case tp_mixed: + if (expected->is_restricted_mixed()) { + if (given->is_restricted_mixed()) { + return expected->restricted_mixed_types_mask() == given->restricted_mixed_types_mask(); + } + return expected->restricted_mixed_contains(given); + } if (vk::any_of_equal(tp, tp_bool, tp_int, tp_float, tp_string, tp_mixed)) { return true; } diff --git a/compiler/inferring/type-data.h b/compiler/inferring/type-data.h index 740de71894..003241bd03 100644 --- a/compiler/inferring/type-data.h +++ b/compiler/inferring/type-data.h @@ -17,39 +17,62 @@ #include "compiler/stage.h" #include "compiler/threading/tls.h" - class TypeData { DEBUG_STRING_METHOD { return as_human_readable(false); } private: - enum flag_id_t : uint8_t { - write_flag_e = 0b00000001, - or_null_flag_e = 0b00000010, - or_false_flag_e = 0b00000100, - shape_has_varg_flag_e = 0b00001000, - ffi_const_flag_e = 0b00010000, - tuple_as_array_flag_e = 0b00100000, + enum flag_id_t : uint16_t { + write_flag_e = 1 << 0, + or_null_flag_e = 1 << 1, + or_false_flag_e = 1 << 2, + shape_has_varg_flag_e = 1 << 3, + ffi_const_flag_e = 1 << 4, + tuple_as_array_flag_e = 1 << 5, + + // the flags in the following group encode a restricted union type variants; + // int|string would have restricted_mixed_int_flag_e and restricted_mixed_string_flag_e + // flags set to 1 as well as restricted_mixed_flag_e + // too complex types like (?int)|string may still decay to mixed, but we can + // express ?(int|string) instead by using the two variant flags along with or_null_flag_e + restricted_mixed_flag_e = 1 << 6, // whether tp_mixed type should be considered to be a ptype union (default is false) + restricted_mixed_int_flag_e = 1 << 7, // if restricted_mixed_flag_e is 1, this flag tells if mixed ptype union contains an int type + restricted_mixed_float_flag_e = 1 << 8, // if restricted_mixed_flag_e is 1, this flag tells if mixed ptype union contains a float type + restricted_mixed_string_flag_e = 1 << 9, // if restricted_mixed_flag_e is 1, this flag tells if mixed ptype union contains a string type + restricted_mixed_array_flag_e = 1 << 10, // if restricted_mixed_flag_e is 1, this flag tells if mixed ptype union contains an array type + restricted_mixed_bool_flag_e = 1 << 11, // if restricted_mixed_flag_e is 1, this flag tells if mixed ptype union contains an bool type + + // 4 bits are unused }; public: using SubkeyItem = std::pair; using lookup_iterator = std::forward_list::const_iterator; - // TODO: move all flags (drop_false, drop_null) here and rename this struct? - // passing several booleans as set_lca args is clumsy - struct FFIRvalueFlags { - bool drop_ref: 1; - bool take_addr: 1; - - FFIRvalueFlags() - : drop_ref{false} - , take_addr{false} {} + struct LCAFlags { + bool save_or_false: 1; + bool save_or_null: 1; + bool ffi_drop_ref: 1; + bool ffi_take_addr: 1; + bool phpdoc: 1; + + LCAFlags() + : save_or_false{true} + , save_or_null{true} + , ffi_drop_ref{false} + , ffi_take_addr{false} + , phpdoc {false} {} + + static LCAFlags for_phpdoc() { + LCAFlags flags; + flags.phpdoc = true; + return flags; + } }; private: PrimitiveType ptype_ : 8; // current type (int/array/etc); tp_any for uninited, tp_Error if error - uint8_t flags_{0}; // a binary mask of flag_id_t + uint16_t flags_{0}; // a binary mask of flag_id_t uint8_t indirection_{0}; // ptr levels for FFI pointers // current class for tp_Class (but during inferring it could contain many classes due to multiple implements) @@ -97,14 +120,21 @@ class TypeData { ClassPtr get_first_class_type_inside() const; bool is_primitive_type() const; - uint8_t flags() const { return flags_; } - void set_flags(uint8_t new_flags); + uint16_t flags() const { return flags_; } + void set_flags(uint16_t new_flags); bool is_ffi_ref() const; bool ffi_const_flag() const { return get_flag(); } void set_ffi_const_flag() { set_flag(); } + bool is_restricted_mixed() const { return get_flag(); } + bool restricted_mixed_contains(PrimitiveType ptype) const { return (flags_ & ptype_mixed_flag(ptype)) != 0; } + bool restricted_mixed_contains(const TypeData *other) const; + uint16_t restricted_mixed_types_mask() const; + static uint16_t restricted_mixed_flags_mask(); + static uint16_t ptype_mixed_flag(PrimitiveType ptype); + bool or_false_flag() const { return get_flag(); } void set_or_false_flag() { set_flag(); } bool use_or_false() const { return or_false_flag() && !::can_store_false(ptype_); } @@ -151,8 +181,8 @@ class TypeData { } const TypeData *const_read_at(const MultiKey &multi_key) const; - void set_lca(const TypeData *rhs, bool save_or_false = true, bool save_or_null = true, FFIRvalueFlags ffi_flags = {}); - void set_lca_at(const MultiKey &multi_key, const TypeData *rhs, bool save_or_false = true, bool save_or_null = true, FFIRvalueFlags ffi_flags = {}); + void set_lca(const TypeData *rhs, LCAFlags flags = {}); + void set_lca_at(const MultiKey &multi_key, const TypeData *rhs, LCAFlags flags = {}); void set_lca(PrimitiveType ptype); void fix_inf_array(); @@ -164,6 +194,7 @@ class TypeData { static void init_static(); static const TypeData *get_type(PrimitiveType type); static const TypeData *get_type(PrimitiveType array, PrimitiveType type); + static const TypeData *get_foreach_key_type(); static const TypeData *create_for_class(ClassPtr klass); static const TypeData *create_array_of(const TypeData *element_type); }; diff --git a/compiler/inferring/type-hint-recalc.cpp b/compiler/inferring/type-hint-recalc.cpp index aa7df9ccb8..0c842e2254 100644 --- a/compiler/inferring/type-hint-recalc.cpp +++ b/compiler/inferring/type-hint-recalc.cpp @@ -93,7 +93,7 @@ void TypeHintArgSubkeyGet::recalc_type_data_in_context_of_call(TypeData *dst, Ve } void TypeHintArray::recalc_type_data_in_context_of_call(TypeData *dst, VertexPtr call) const { - dst->set_lca(TypeData::get_type(tp_array)); + dst->set_lca(TypeData::get_type(tp_array), TypeData::LCAFlags::for_phpdoc()); TypeData nested(*TypeData::get_type(tp_any)); inner->recalc_type_data_in_context_of_call(&nested, call); dst->set_lca_at(MultiKey::any_key(1), &nested); @@ -187,10 +187,10 @@ void TypeHintRefToMethod::recalc_type_data_in_context_of_call(TypeData *dst, Ver void TypeHintOptional::recalc_type_data_in_context_of_call(TypeData *dst, VertexPtr call) const { inner->recalc_type_data_in_context_of_call(dst, call); if (or_null) { - dst->set_lca(TypeData::get_type(tp_Null)); + dst->set_lca(TypeData::get_type(tp_Null), TypeData::LCAFlags::for_phpdoc()); } if (or_false) { - dst->set_lca(TypeData::get_type(tp_False)); + dst->set_lca(TypeData::get_type(tp_False), TypeData::LCAFlags::for_phpdoc()); } } @@ -201,7 +201,7 @@ void TypeHintPipe::recalc_type_data_in_context_of_call(TypeData *dst, VertexPtr } void TypeHintPrimitive::recalc_type_data_in_context_of_call(TypeData *dst, VertexPtr call __attribute__ ((unused))) const { - dst->set_lca(TypeData::get_type(ptype)); + dst->set_lca(TypeData::get_type(ptype), TypeData::LCAFlags::for_phpdoc()); } void TypeHintObject::recalc_type_data_in_context_of_call(TypeData *dst __attribute__ ((unused)), VertexPtr call __attribute__ ((unused))) const { diff --git a/compiler/pipes/collect-main-edges.cpp b/compiler/pipes/collect-main-edges.cpp index 1ac2cfcfc0..f3f328cc87 100644 --- a/compiler/pipes/collect-main-edges.cpp +++ b/compiler/pipes/collect-main-edges.cpp @@ -393,7 +393,7 @@ void CollectMainEdgesPass::on_foreach(VertexAdaptor foreach_op) { } create_set(as_lvalue(x->var_id), params); if (key) { - create_type_assign(as_lvalue(key->var_id), TypeData::get_type(tp_mixed)); + create_type_assign(as_lvalue(key->var_id), TypeData::get_foreach_key_type()); } } diff --git a/tests/phpt/generics/021_default_T.php b/tests/phpt/generics/021_default_T.php index 939ea6534c..4165174a75 100644 --- a/tests/phpt/generics/021_default_T.php +++ b/tests/phpt/generics/021_default_T.php @@ -154,8 +154,12 @@ function pushInOne($arr1, $arr2 = [], $arr3 = [], $arr4 = []) { /** @var mixed[] */ $mixeds = pushInOne([1,2,3], ['1','2','3']); -$mixeds = pushInOne([1,2,3], ['1','2','3'], [true], [null]); +$mixeds = pushInOne([1,2,3], ['1','2','3'], [true], [1]); var_dump($mixeds); +$mixeds2 = pushInOne([1,2,3], ['1','2','3'], [true], [false]); +var_dump($mixeds2); +$mixeds3 = pushInOne([1,2,3], ['1','2','3'], [true], ['str']); +var_dump($mixeds3); $as = pushInOne([new A]); $as = pushInOne([new A], [new A]); diff --git a/tests/phpt/phpdocs/106_boolean_in_phpdoc.php b/tests/phpt/phpdocs/106_boolean_in_phpdoc.php index f5c4d1f0ec..cc2aebe0c5 100644 --- a/tests/phpt/phpdocs/106_boolean_in_phpdoc.php +++ b/tests/phpt/phpdocs/106_boolean_in_phpdoc.php @@ -7,3 +7,4 @@ function foo($a) { } foo(false); +foo(true); diff --git a/tests/phpt/restricted_mixed/correct_usage.php b/tests/phpt/restricted_mixed/correct_usage.php new file mode 100644 index 0000000000..aad7e57b09 --- /dev/null +++ b/tests/phpt/restricted_mixed/correct_usage.php @@ -0,0 +1,290 @@ +@ok +int_or_string); // this will be null, even though the type is int|string + $obj->int_or_string = 134; + $obj->int_or_string = 'str'; + + $obj->int_or_string2 = $obj->int_or_string; + + $local_int_or_string = $obj->int_or_string; + // for now, even if $local_int_or_string type is mixed, we allow assignments + // of mixed to a restricted mixed to decrease the amount of new compile errors; + // when restricted mixed types are preserved between the assignments, + // it will be forbidden to assign an incompatible mixed type + $obj->int_or_string2 = $local_int_or_string; + $obj->int_or_string2 = $mixed; + if (is_string($local_int_or_string)) { + ensure_string($local_int_or_string); + } + + $obj->nullable_string_or_array = null; + $obj->nullable_string_or_array = 'str'; + $obj->nullable_string_or_array = [1]; + $obj->nullable_string_or_array = ['str']; + $obj->nullable_string_or_array = []; + + $obj->nullable_string_or_array2 = null; + $obj->nullable_string_or_array2 = $obj->nullable_string_or_array; + + default_int_or_string($mixed); + default_int_or_string(1); + default_int_or_string('str'); + default_int_or_string($obj->int_or_string); + + union_with_mixed(1); + union_with_mixed(1.4); + union_with_mixed('str'); + union_with_mixed([1]); + union_with_mixed(['str']); + union_with_mixed(true); + union_with_mixed(false); + union_with_mixed(null); + + typed_array_union(['x']); + typed_array_union([1]); + typed_array_union([1.6]); + typed_array_union([true]); + typed_array_union([]); + typed_array_union([$mixed]); + + typed_array_union2(['x']); + typed_array_union2([1]); + typed_array_union2([false]); + typed_array_union2([true]); + typed_array_union2([]); + + typed_array_union3([3.5]); + typed_array_union3([1]); + typed_array_union3([]); + typed_array_union3([true]); + + optional_int_or_string($int_or_string_var); + optional_int_or_string($nullable_string_var); + optional_int_or_string($optional_string_var); + optional_int_or_string(null); + optional_int_or_string(false); + optional_int_or_string(1); + optional_int_or_string('str'); +} + +main(); diff --git a/tests/phpt/restricted_mixed/error_param_float_or_string.php b/tests/phpt/restricted_mixed/error_param_float_or_string.php new file mode 100644 index 0000000000..9a5898f928 --- /dev/null +++ b/tests/phpt/restricted_mixed/error_param_float_or_string.php @@ -0,0 +1,9 @@ +@kphp_should_fail +/pass bool to argument \$x of f/ +/declared as @param float\|string/ +