diff --git a/frame/1m/packm/bli_packm_blk_var1.c b/frame/1m/packm/bli_packm_blk_var1.c index 9ac9582db8..05263c4b7f 100644 --- a/frame/1m/packm/bli_packm_blk_var1.c +++ b/frame/1m/packm/bli_packm_blk_var1.c @@ -190,15 +190,16 @@ void bli_packm_blk_var1 inc_t p_inc = ps_p; - // NOTE: We MUST use round-robin partitioning when packing - // micropanels of a triangular matrix. Hermitian/symmetric - // and general packing may use slab or round-robin, depending - // on which was selected at configure-time. - // The definition of bli_packm_my_iter() will depend on whether slab - // or round-robin partitioning was requested at configure-time. - bool my_iter = bli_is_triangular( strucc ) - ? bli_packm_my_iter_rr( it, it_start, it_end, tid, nt ) - : bli_packm_my_iter ( it, it_start, it_end, tid, nt ); + // NOTE: We MUST use round-robin work allocation (bli_packm_my_iter_rr()) + // when packing micropanels of a triangular matrix. Hermitian/symmetric + // and general packing may use slab or round-robin (bli_packm_my_iter()), + // depending on which was selected at configure-time. + bool my_iter = ( bli_is_triangular( strucc ) && + bli_intersects_diag_n( diagoffc_i, panel_dim_i, + panel_len_full ) + ? bli_packm_my_iter_rr( it, it_start, it_end, tid, nt ) + : bli_packm_my_iter ( it, it_start, it_end, tid, nt ) + ); if ( bli_is_triangular( strucc ) && bli_is_unstored_subpart_n( diagoffc_i, uploc, panel_dim_i, panel_len_full ) )