From f090a349d2ce40b6c70321b055fe69eb755d3a2f Mon Sep 17 00:00:00 2001 From: lateralusX Date: Wed, 17 Jan 2024 12:43:22 +0100 Subject: [PATCH 1/3] Reduce Mono AOT cross compiler x64 memory footprint. Building .net8 S.P.C using Mono AOT cross compiler in full AOT consumes a large amount of memory (up to 6 GB). This is mainly due to generated LLVM module not being optimized at all while kept in memory during full module generation. Mono x64 also lacks support for several intrinsics as well as Vector 256/512 that in turn leads to massive inlining of intrinsics functions generating a very large LLVM module, where majority of this code ends up as dead code due to IsSupported/IsHardwareAccelerated returning false. The follow commit adjusts several things that will bring down the memory usage, compiling .net8/.net9 Mono S.P.C on x64 Windows from 6 GB down to ~750 MB. * Use PSNE implementations on intrinsics not supported on Mono. * Add ILLinker substitutions for intrinsics not supported on Mono. Enables ILLinker to do dead code elimination, reduce code to AOT compile. * Prevent aggressive inlining for a couple of unsupported intrinsics types making sure we don't end up with excessive inlining, exploding code size. * Run a couple of LLVM optimization passes on each generated method doing early code simplification and dead code elimination during LLVM module generation. * Explicit SN_get_IsHardwareAccelerated/SN_get_IsSupported intrinsics implementation for all unsupported Mono x64 SIMD intrinsics. * Fixed numerous memory leaks in Mono AOT cross compiler code. * Fix a couple of sequence points free after use errors. * Fix an anonymous struct build warning triggering build error for LLVM enabled cross compiler on Windows. --- .../System.Private.CoreLib.Shared.projitems | 32 +++++--- .../System.Private.CoreLib.csproj | 2 + ...LLink.Substitutions.Intrinsics.Vectors.xml | 10 +++ .../ILLink.Substitutions.Intrinsics.x86.xml | 79 +++++++++++++++++++ src/mono/mono/mini/aot-compiler.c | 3 +- src/mono/mono/mini/method-to-ir.c | 13 ++- src/mono/mono/mini/mini.h | 1 + src/mono/mono/mini/simd-intrinsics.c | 63 ++++++++++++++- 8 files changed, 187 insertions(+), 16 deletions(-) create mode 100644 src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.Vectors.xml create mode 100644 src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 261968ae7f8f3d..a0a2501984f698 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -2575,18 +2575,27 @@ - - - - - - - - - + + + + + + + + + + + + + + + + + - + + @@ -2597,7 +2606,8 @@ - + + diff --git a/src/mono/System.Private.CoreLib/System.Private.CoreLib.csproj b/src/mono/System.Private.CoreLib/System.Private.CoreLib.csproj index 49c5d602711719..1c085ed36e446e 100644 --- a/src/mono/System.Private.CoreLib/System.Private.CoreLib.csproj +++ b/src/mono/System.Private.CoreLib/System.Private.CoreLib.csproj @@ -151,6 +151,8 @@ + + diff --git a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.Vectors.xml b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.Vectors.xml new file mode 100644 index 00000000000000..871271290a7f75 --- /dev/null +++ b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.Vectors.xml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml new file mode 100644 index 00000000000000..2044f416909908 --- /dev/null +++ b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml @@ -0,0 +1,79 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/mono/mono/mini/aot-compiler.c b/src/mono/mono/mini/aot-compiler.c index f8275230e210be..a5775c2ae47210 100644 --- a/src/mono/mono/mini/aot-compiler.c +++ b/src/mono/mono/mini/aot-compiler.c @@ -4227,6 +4227,7 @@ get_plt_entry (MonoAotCompile *acfg, MonoJumpInfo *patch_info) res->llvm_symbol = mono_mempool_strdup_printf (acfg->mempool, "%s_%s_llvm", res->symbol, res->debug_sym); else res->llvm_symbol = mono_mempool_strdup_printf (acfg->mempool, "%s_llvm", res->symbol); + if (strstr (res->llvm_symbol, acfg->temp_prefix) == res->llvm_symbol) res->llvm_symbol = res->llvm_symbol + strlen (acfg->temp_prefix); @@ -14229,7 +14230,7 @@ static void acfg_free (MonoAotCompile *acfg) { #ifdef ENABLE_LLVM - if (acfg->aot_opts.llvm) + if (mono_use_llvm || acfg->aot_opts.llvm) mono_llvm_free_aot_module (); #endif diff --git a/src/mono/mono/mini/method-to-ir.c b/src/mono/mono/mini/method-to-ir.c index 87b9498074ce02..bbe35491277b8b 100644 --- a/src/mono/mono/mini/method-to-ir.c +++ b/src/mono/mono/mini/method-to-ir.c @@ -4746,6 +4746,17 @@ mini_inline_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature * return inline_method (cfg, cmethod, fsig, sp, ip, real_offset, inline_always, NULL); } +static gboolean +aggressive_inline_method (MonoMethod *cmethod) +{ + gboolean aggressive_inline = m_method_is_aggressive_inlining (cmethod); +#ifdef MONO_ARCH_SIMD_INTRINSICS + if (aggressive_inline) + aggressive_inline = !mono_simd_unsupported_aggressive_inline_intrinsic_type (cmethod); +#endif + return aggressive_inline; +} + /* * inline_method: * @@ -4871,7 +4882,7 @@ inline_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, cfg->disable_inline = prev_disable_inline; cfg->inline_depth --; - if ((costs >= 0 && costs < 60) || inline_always || (costs >= 0 && (cmethod->iflags & METHOD_IMPL_ATTRIBUTE_AGGRESSIVE_INLINING))) { + if ((costs >= 0 && costs < 60) || inline_always || (costs >= 0 && aggressive_inline_method (cmethod))) { if (cfg->verbose_level > 2) printf ("INLINE END %s -> %s\n", mono_method_full_name (cfg->method, TRUE), mono_method_full_name (cmethod, TRUE)); diff --git a/src/mono/mono/mini/mini.h b/src/mono/mono/mini/mini.h index 7a9ca5644678a4..9982afc22e3f2f 100644 --- a/src/mono/mono/mini/mini.h +++ b/src/mono/mono/mini/mini.h @@ -2962,6 +2962,7 @@ MonoInst* mono_emit_common_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoInst* mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args); MonoInst* mono_emit_simd_field_load (MonoCompile *cfg, MonoClassField *field, MonoInst *addr); void mono_simd_intrinsics_init (void); +gboolean mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod *cmethod); MonoMethod* mini_method_to_shared (MonoMethod *method); // null if not shared diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 13d341f9e8bf01..b855dec613bb98 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1178,6 +1178,20 @@ create_class_instance (const char* name_space, const char *name, MonoType *param return ivector_inst; } +static gboolean +is_supported_vector_primitive_type (MonoType *type) +{ + gboolean constrained_generic_param = (type->type == MONO_TYPE_VAR || type->type == MONO_TYPE_MVAR); + + if (constrained_generic_param && type->data.generic_param->gshared_constraint && MONO_TYPE_IS_VECTOR_PRIMITIVE (type->data.generic_param->gshared_constraint)) + return TRUE; + + if (MONO_TYPE_IS_VECTOR_PRIMITIVE (type)) + return TRUE; + + return FALSE; +} + static guint16 sri_vector_methods [] = { SN_Abs, SN_Add, @@ -1422,9 +1436,16 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi else return NULL; - if (vector_size == 256 || vector_size == 512) - return NULL; - + if (vector_size == 256 || vector_size == 512) { + if (id == SN_get_IsHardwareAccelerated ) { + MonoInst* ins; + EMIT_NEW_ICONST (cfg, ins, 0); + return ins; + } + + return NULL; + } + // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 #ifdef TARGET_ARM64 if (!COMPILE_LLVM (cfg)) { @@ -2477,6 +2498,12 @@ emit_sri_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f g_free (name); } + if (id == SN_get_IsSupported) { + MonoInst *ins; + EMIT_NEW_ICONST (cfg, ins, is_supported_vector_primitive_type (etype) ? 1 : 0); + return ins; + } + // Apart from filtering out non-primitive types this also filters out shared generic instance types like: T_BYTE which cannot be intrinsified if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype)) { // Happens often in gshared code @@ -3199,6 +3226,11 @@ emit_sys_numerics_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSig type = m_class_get_byval_arg (klass); etype = mono_class_get_context (klass)->class_inst->type_argv [0]; + if (id == SN_get_IsSupported) { + EMIT_NEW_ICONST (cfg, ins, is_supported_vector_primitive_type (etype) ? 1 : 0); + return ins; + } + if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype)) return NULL; @@ -6118,11 +6150,36 @@ mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *i decompose_vtype_opt_store_arg (cfg, bb, ins, &(ins->dreg)); } } + +gboolean +mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod *cmethod) +{ + /* + * If a method has been marked with aggressive inlining, check if we support + * aggressive inlining of the intrinsics type, if not, ignore aggressive inlining + * since it could end up inlining a large amount of code that most likely will end + * up as dead code. + */ + if (!strcmp (m_class_get_name_space (cmethod->klass), "System.Runtime.Intrinsics")) { + if (!strncmp(m_class_get_name (cmethod->klass), "Vector", 6)) { + const char *vector_type = m_class_get_name (cmethod->klass) + 6; + if (!!strcmp(vector_type, "256`1") || !strcmp(vector_type, "512`1")) + return TRUE; + } + } + return FALSE; +} #else void mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins) { } + +gboolean +mono_simd_unsupported_aggressive_inline_intrinsic_type(MonoMethod* cmethod) +{ + return FALSE; +} #endif /*defined(TARGET_WIN32) && defined(TARGET_AMD64)*/ #endif /* DISABLE_JIT */ From cfaf8d9a917ebfe2c757bae483a5731a1d66d94d Mon Sep 17 00:00:00 2001 From: lateralusX Date: Mon, 22 Jan 2024 12:37:29 +0100 Subject: [PATCH 2/3] Fix review feedback. --- .../System.Private.CoreLib.Shared.projitems | 42 +++++++++---------- src/mono/mono/mini/simd-intrinsics.c | 11 +---- 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index a0a2501984f698..94089d1018d8d1 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -2575,27 +2575,27 @@ - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + - - + + @@ -2606,8 +2606,8 @@ - - + + diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index b855dec613bb98..f37bbf83aedcda 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1436,15 +1436,8 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi else return NULL; - if (vector_size == 256 || vector_size == 512) { - if (id == SN_get_IsHardwareAccelerated ) { - MonoInst* ins; - EMIT_NEW_ICONST (cfg, ins, 0); - return ins; - } - + if (vector_size == 256 || vector_size == 512) return NULL; - } // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 #ifdef TARGET_ARM64 @@ -6163,7 +6156,7 @@ mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod *cmethod) if (!strcmp (m_class_get_name_space (cmethod->klass), "System.Runtime.Intrinsics")) { if (!strncmp(m_class_get_name (cmethod->klass), "Vector", 6)) { const char *vector_type = m_class_get_name (cmethod->klass) + 6; - if (!!strcmp(vector_type, "256`1") || !strcmp(vector_type, "512`1")) + if (!strcmp(vector_type, "256`1") || !strcmp(vector_type, "512`1")) return TRUE; } } From 5ab0d9487d9fe595a251681a7bb70028d66e162c Mon Sep 17 00:00:00 2001 From: Zoltan Varga Date: Fri, 9 Feb 2024 04:05:08 -0500 Subject: [PATCH 3/3] Minor cleanups. --- .../ILLink/ILLink.Substitutions.Intrinsics.Vectors.xml | 2 +- .../src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml | 2 +- src/mono/mono/mini/aot-compiler.c | 1 - src/mono/mono/mini/method-to-ir.c | 2 -- src/mono/mono/mini/simd-intrinsics.c | 9 ++++++++- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.Vectors.xml b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.Vectors.xml index 871271290a7f75..c50829b7843929 100644 --- a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.Vectors.xml +++ b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.Vectors.xml @@ -7,4 +7,4 @@ - \ No newline at end of file + diff --git a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml index 2044f416909908..bd008db96ba1d0 100644 --- a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml +++ b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml @@ -76,4 +76,4 @@ - \ No newline at end of file + diff --git a/src/mono/mono/mini/aot-compiler.c b/src/mono/mono/mini/aot-compiler.c index a5775c2ae47210..d8f80b0bc6a932 100644 --- a/src/mono/mono/mini/aot-compiler.c +++ b/src/mono/mono/mini/aot-compiler.c @@ -4227,7 +4227,6 @@ get_plt_entry (MonoAotCompile *acfg, MonoJumpInfo *patch_info) res->llvm_symbol = mono_mempool_strdup_printf (acfg->mempool, "%s_%s_llvm", res->symbol, res->debug_sym); else res->llvm_symbol = mono_mempool_strdup_printf (acfg->mempool, "%s_llvm", res->symbol); - if (strstr (res->llvm_symbol, acfg->temp_prefix) == res->llvm_symbol) res->llvm_symbol = res->llvm_symbol + strlen (acfg->temp_prefix); diff --git a/src/mono/mono/mini/method-to-ir.c b/src/mono/mono/mini/method-to-ir.c index bbe35491277b8b..86c4eb29158746 100644 --- a/src/mono/mono/mini/method-to-ir.c +++ b/src/mono/mono/mini/method-to-ir.c @@ -4750,10 +4750,8 @@ static gboolean aggressive_inline_method (MonoMethod *cmethod) { gboolean aggressive_inline = m_method_is_aggressive_inlining (cmethod); -#ifdef MONO_ARCH_SIMD_INTRINSICS if (aggressive_inline) aggressive_inline = !mono_simd_unsupported_aggressive_inline_intrinsic_type (cmethod); -#endif return aggressive_inline; } diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index f37bbf83aedcda..73e5d88f3b504a 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -6169,10 +6169,11 @@ mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *i } gboolean -mono_simd_unsupported_aggressive_inline_intrinsic_type(MonoMethod* cmethod) +mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod* cmethod) { return FALSE; } + #endif /*defined(TARGET_WIN32) && defined(TARGET_AMD64)*/ #endif /* DISABLE_JIT */ @@ -6207,6 +6208,12 @@ mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *i { } +gboolean +mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod* cmethod) +{ + return FALSE; +} + #endif /* MONO_ARCH_SIMD_INTRINSICS */ #if defined(TARGET_AMD64)