From 50d29291bb8d008201ab53558753e154654db55c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 30 Sep 2023 22:08:56 +0200 Subject: [PATCH 01/17] STOREIND Coalescing --- src/coreclr/jit/lower.cpp | 301 ++++++++++++++++++++++++++++++++++++++ src/coreclr/jit/lower.h | 1 + 2 files changed, 302 insertions(+) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 4b9d56e240c46a..41c1cc2376ac55 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7802,6 +7802,306 @@ void Lowering::ContainCheckBitCast(GenTree* node) } } +struct StoreCoalescingData +{ + var_types type; + GenTreeLclVarCommon* base; + GenTreeLclVarCommon* index; + uint32_t scale; + int offset; + GenTree* val; +}; + +//------------------------------------------------------------------------ +// GetStoreCoalescingData: given a STOREIND node, get the data needed to perform +// store coalescing including pointer to the previous node. +// +// Arguments: +// ind - the STOREIND node +// data - [OUT] the data needed for store coalescing +// prevTree - [OUT] the pointer to the previous node +// +// Return Value: +// true if the data was successfully retrieved, false otherwise. +// Basically, false means that we definitely can't do store coalescing. +// +static bool GetStoreCoalescingData(GenTreeStoreInd* ind, StoreCoalescingData* data, GenTree** prevTree) +{ + // Don't merge volatile stores. + if (ind->IsVolatile()) + { + return false; + } + + // Data has to be constant. We might allow locals in the future. + if (!ind->Data()->IsCnsIntOrI()) + { + return false; + } + + // We will need to walk previous trees (up to 4 nodes in a store) + // We already know that we have value and addr nodes, LEA might add more. + const int MaxTrees = 4; + GenTree* trees[MaxTrees] = {ind->Data(), ind->Addr(), nullptr, nullptr}; + int treesCount = 2; + + data->type = ind->TypeGet(); + data->val = ind->Data(); + if (ind->Addr()->OperIs(GT_LEA)) + { + GenTree* base = ind->Addr()->AsAddrMode()->Base(); + GenTree* index = ind->Addr()->AsAddrMode()->Index(); + if ((base == nullptr) || !base->IsLocal()) + { + // Base must be a local. It's possible for it to be nullptr when index is not null, + // but let's ignore such cases. + return false; + } + if ((index != nullptr) && !index->IsLocal()) + { + // Index is either nullptr or a local. + return false; + } + data->base = base == nullptr ? nullptr : base->AsLclVarCommon(); + data->index = index == nullptr ? nullptr : index->AsLclVarCommon(); + data->scale = ind->Addr()->AsAddrMode()->GetScale(); + data->offset = ind->Addr()->AsAddrMode()->Offset(); + + // Add index and base to the list of trees. + trees[treesCount++] = base; + if (index != nullptr) + { + trees[treesCount++] = index; + } + } + else if (ind->Addr()->OperIsLocal()) + { + // Address is just a local, no offset, scale is 1 + data->base = ind->Addr()->AsLclVarCommon(); + data->index = nullptr; + data->scale = 1; + data->offset = 0; + } + else + { + // Address is not LEA or local. + return false; + } + + // We expect all the trees to be found in gtPrev (we don't care about the order, just make sure we don't have + // unexpected trees in-between) + GenTree* prev = ind->gtPrev; + for (int i = 0; i < treesCount; i++) + { + bool found = false; + + // We don't need any O(1) lookups here, the array is too small + for (int j = 0; j < MaxTrees; j++) + { + if (trees[j] == prev) + { + trees[j] = nullptr; + found = true; + break; + } + } + + if (!found) + { + // Unexpected node in-between. + return false; + } + prev = prev->gtPrev; + } + + *prevTree = prev; + return true; +} + +//------------------------------------------------------------------------ +// CanBeCoalesced: given two store coalescing data, +// determine whether they can be coalesced. +// +// Arguments: +// data1 - the first store coalescing data +// data2 - the second store coalescing data (order is not important) +// +// Return Value: +// true if the data can be coalesced, false otherwise. +// +static bool CanBeCoalesced(StoreCoalescingData* data1, StoreCoalescingData* data2) +{ + // base, index, scale and type all have to be the same + if ((data1->scale != data2->scale) || (data1->type != data2->type) || !GenTree::Compare(data1->base, data2->base) || + !GenTree::Compare(data1->index, data2->index)) + { + return false; + } + + // val has to be a constant + if (!data1->val->OperIsConst() && !data2->val->OperIsConst()) + { + return false; + } + + // Offset has to match the size of the type + // We don't support the same or overlapping offsets. + if (abs(data1->offset - data2->offset) != (int)genTypeSize(data1->type)) + { + return false; + } + return true; +} + +//------------------------------------------------------------------------ +// LowerCoalescingWithPreviousInd: Try to coalesce a STOREIND with previous STOREINDs. Example: +// +// * STOREIND int +// +--* LCL_VAR byref V00 +// \--* CNS_INT int 0x1 +// +// * STOREIND int +// +--* LEA(b+4) byref +// | \--* LCL_VAR byref V00 +// \--* CNS_INT int 0x2 +// +// We can merge these two into into a single store of 8 bytes with (0x1 | (0x2 << 32)) as the value +// +// * STOREIND long +// +--* LEA(b+0) byref +// | \--* LCL_VAR byref V00 +// \--* CNS_INT long 0x200000001 +// +// Arguments: +// ind - the current STOREIND node +// +void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) +{ + if (!comp->opts.OptimizationEnabled()) + { + return; + } + + // For now, we require the current STOREIND to have LEA (previous store may not have it) + // TODO-CQ: Remove this restriction, we might even not need any LEAs at all for the final coalesced store. + if (!ind->Addr()->OperIs(GT_LEA)) + { + return; + } + + // This check is not really needed, just for better throughput. + // We only support these types for the initial version. + if (ind->TypeIs(TYP_SHORT, TYP_USHORT, TYP_INT)) + { + return; + } + + StoreCoalescingData currData; + StoreCoalescingData prevData; + GenTreeStoreInd* prevInd; + GenTree* prevTree; + + // Get coalescing data for the current STOREIND + if (GetStoreCoalescingData(ind, &currData, &prevTree)) + { + while (prevTree != nullptr && prevTree->OperIs(GT_NOP, GT_IL_OFFSET)) + { + prevTree = prevTree->gtPrev; + } + + if ((prevTree == nullptr) || !prevTree->OperIs(GT_STOREIND)) + { + return; + } + prevInd = prevTree->AsStoreInd(); + + // Get coalescing data for the previous STOREIND + if (GetStoreCoalescingData(prevInd->AsStoreInd(), &prevData, &prevTree) && (prevTree != nullptr)) + { + // Check whether we can coalesce the two stores + if (CanBeCoalesced(&prevData, &currData)) + { + var_types oldType = ind->TypeGet(); + var_types newType; + switch (oldType) + { + case TYP_SHORT: + case TYP_USHORT: + newType = TYP_INT; + break; + +#ifdef TARGET_64BIT + case TYP_INT: + newType = TYP_LONG; + break; +#endif + + // TODO: BYTE, SIMD + + default: + return; + } + + // Delete previous STOREIND + BlockRange().Remove(prevTree->gtNext, prevInd); + + // We know it's always LEA for now + GenTreeAddrMode* addr = ind->Addr()->AsAddrMode(); + + // Take the smallest offset + addr->SetOffset(min(prevData.offset, currData.offset)); + + // Make type twice bigger + ind->gtType = newType; + + // We currently only support constants for val + assert(prevData.val->IsCnsIntOrI() && currData.val->IsCnsIntOrI()); + + size_t lowerCns = (size_t)prevData.val->AsIntCon()->IconValue(); + size_t upperCns = (size_t)currData.val->AsIntCon()->IconValue(); + + ind->Data()->gtType = newType; + + // TP: if both are zero - we're done. + if ((lowerCns | upperCns) == 0) + { + JITDUMP("Coalesced two stores into a single store with value 0\n"); + } + else + { + // if the previous store was at a higher address, swap the constants + if (prevData.offset > currData.offset) + { + std::swap(lowerCns, upperCns); + } + + // Trim the constants to the size of the type, e.g. for TYP_SHORT and TYP_USHORT + // the mask will be 0xFFFF, for TYP_INT - 0xFFFFFFFF. + size_t mask = ~0 >> (sizeof(size_t) - genTypeSize(oldType)) * BITS_IN_BYTE; + lowerCns &= mask; + upperCns &= mask; + + ssize_t val = (ssize_t)(lowerCns | (upperCns << (genTypeSize(oldType) * BITS_IN_BYTE))); + JITDUMP("Coalesced two stores into a single store with value %lld\n", (int64_t)val); + + // It's not expected to be contained yet, but just in case... + ind->Data()->ClearContained(); + ind->Data()->AsIntCon()->gtIconVal = val; + } + + // Now check whether we can coalesce the new store with the previous one, e.g. we had: + // + // STOREIND(int) + // STOREIND(short) + // STOREIND(short) + // + // so we can coalesce the whole thing into a single store of 8 bytes. + LowerCoalescingWithPreviousInd(ind); + } + } + } +} + //------------------------------------------------------------------------ // LowerStoreIndirCommon: a common logic to lower StoreIndir. // @@ -7842,6 +8142,7 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind) } #endif + LowerCoalescingWithPreviousInd(ind); LowerStoreIndir(ind); } } diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 2ac01306a16b58..6d017b7ac900a3 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -312,6 +312,7 @@ class Lowering final : public Phase void LowerStoreIndirCommon(GenTreeStoreInd* ind); void LowerIndir(GenTreeIndir* ind); void LowerStoreIndir(GenTreeStoreInd* node); + void LowerCoalescingWithPreviousInd(GenTreeStoreInd* node); GenTree* LowerAdd(GenTreeOp* node); GenTree* LowerMul(GenTreeOp* mul); bool TryLowerAndNegativeOne(GenTreeOp* node, GenTree** nextNode); From eedb9796bbd848008abe5889a079f3dd09f88591 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 30 Sep 2023 22:11:23 +0200 Subject: [PATCH 02/17] Remove short temporarily --- src/coreclr/jit/lower.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 41c1cc2376ac55..1baea1c3a50a7e 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7991,7 +7991,7 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) // This check is not really needed, just for better throughput. // We only support these types for the initial version. - if (ind->TypeIs(TYP_SHORT, TYP_USHORT, TYP_INT)) + if (!ind->TypeIs(TYP_SHORT, TYP_USHORT, TYP_INT)) { return; } @@ -8025,10 +8025,10 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) var_types newType; switch (oldType) { - case TYP_SHORT: - case TYP_USHORT: - newType = TYP_INT; - break; + //case TYP_SHORT: + //case TYP_USHORT: + // newType = TYP_INT; + // break; #ifdef TARGET_64BIT case TYP_INT: From bfe176591631fc98823f54967b0d6d4e775b1788 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 30 Sep 2023 22:48:51 +0200 Subject: [PATCH 03/17] Check that both indirs are unused --- src/coreclr/jit/lower.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 1baea1c3a50a7e..4f044f01065d2b 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8004,11 +8004,14 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) // Get coalescing data for the current STOREIND if (GetStoreCoalescingData(ind, &currData, &prevTree)) { + // Now we need to find the previous STOREIND, + // we can ignore any NOPs or IL_OFFSETs in-between while (prevTree != nullptr && prevTree->OperIs(GT_NOP, GT_IL_OFFSET)) { prevTree = prevTree->gtPrev; } + // It's not a STOREIND - bail out. if ((prevTree == nullptr) || !prevTree->OperIs(GT_STOREIND)) { return; @@ -8018,6 +8021,13 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) // Get coalescing data for the previous STOREIND if (GetStoreCoalescingData(prevInd->AsStoreInd(), &prevData, &prevTree) && (prevTree != nullptr)) { + LIR::Use use; + if (BlockRange().TryGetUse(ind, &use) || BlockRange().TryGetUse(prevInd, &use)) + { + // Both should be unused + return; + } + // Check whether we can coalesce the two stores if (CanBeCoalesced(&prevData, &currData)) { @@ -8025,10 +8035,10 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) var_types newType; switch (oldType) { - //case TYP_SHORT: - //case TYP_USHORT: - // newType = TYP_INT; - // break; +// case TYP_SHORT: +// case TYP_USHORT: +// newType = TYP_INT; +// break; #ifdef TARGET_64BIT case TYP_INT: From 6f2838e1c3b4c2ae5486644bf00ab9d9d8db9ab2 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 30 Sep 2023 23:06:54 +0200 Subject: [PATCH 04/17] Enable short --- src/coreclr/jit/lower.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 4f044f01065d2b..c3e7d4b35b7980 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8035,10 +8035,10 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) var_types newType; switch (oldType) { -// case TYP_SHORT: -// case TYP_USHORT: -// newType = TYP_INT; -// break; + case TYP_SHORT: + case TYP_USHORT: + newType = TYP_INT; + break; #ifdef TARGET_64BIT case TYP_INT: From 532266bc08c6eea58d537b4e90ce751a368fc807 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 1 Oct 2023 00:02:12 +0200 Subject: [PATCH 05/17] Enable BYTE --- src/coreclr/jit/lower.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index c3e7d4b35b7980..da180f1a471455 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7991,7 +7991,7 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) // This check is not really needed, just for better throughput. // We only support these types for the initial version. - if (!ind->TypeIs(TYP_SHORT, TYP_USHORT, TYP_INT)) + if (!ind->TypeIs(TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT)) { return; } @@ -8035,6 +8035,11 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) var_types newType; switch (oldType) { + case TYP_BYTE: + case TYP_UBYTE: + newType = TYP_USHORT; + break; + case TYP_SHORT: case TYP_USHORT: newType = TYP_INT; @@ -8046,7 +8051,7 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) break; #endif - // TODO: BYTE, SIMD + // TODO: SIMD default: return; @@ -8087,16 +8092,16 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) // Trim the constants to the size of the type, e.g. for TYP_SHORT and TYP_USHORT // the mask will be 0xFFFF, for TYP_INT - 0xFFFFFFFF. - size_t mask = ~0 >> (sizeof(size_t) - genTypeSize(oldType)) * BITS_IN_BYTE; + size_t mask = ~0UL >> (sizeof(size_t) - genTypeSize(oldType)) * BITS_IN_BYTE; lowerCns &= mask; upperCns &= mask; - ssize_t val = (ssize_t)(lowerCns | (upperCns << (genTypeSize(oldType) * BITS_IN_BYTE))); + size_t val = (lowerCns | (upperCns << (genTypeSize(oldType) * BITS_IN_BYTE))); JITDUMP("Coalesced two stores into a single store with value %lld\n", (int64_t)val); // It's not expected to be contained yet, but just in case... ind->Data()->ClearContained(); - ind->Data()->AsIntCon()->gtIconVal = val; + ind->Data()->AsIntCon()->gtIconVal = (ssize_t)val; } // Now check whether we can coalesce the new store with the previous one, e.g. we had: From 033a168e5e4a470b000c785e23acac2b42050130 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 1 Oct 2023 02:47:01 +0200 Subject: [PATCH 06/17] Update lower.cpp --- src/coreclr/jit/lower.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index da180f1a471455..f7b753335cc524 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8092,7 +8092,7 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) // Trim the constants to the size of the type, e.g. for TYP_SHORT and TYP_USHORT // the mask will be 0xFFFF, for TYP_INT - 0xFFFFFFFF. - size_t mask = ~0UL >> (sizeof(size_t) - genTypeSize(oldType)) * BITS_IN_BYTE; + size_t mask = ~(size_t(0)) >> (sizeof(size_t) - genTypeSize(oldType)) * BITS_IN_BYTE; lowerCns &= mask; upperCns &= mask; @@ -8111,7 +8111,7 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) // STOREIND(short) // // so we can coalesce the whole thing into a single store of 8 bytes. - LowerCoalescingWithPreviousInd(ind); + // LowerCoalescingWithPreviousInd(ind); } } } From 9b17c328ade4dec9a3d73e85be884a4523105d77 Mon Sep 17 00:00:00 2001 From: Egor Date: Sun, 1 Oct 2023 06:19:52 +0200 Subject: [PATCH 07/17] call multiple times --- src/coreclr/jit/lower.cpp | 185 ++++++++++++++++++++------------------ 1 file changed, 97 insertions(+), 88 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index f7b753335cc524..45e983c4911a50 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7989,21 +7989,26 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) return; } - // This check is not really needed, just for better throughput. - // We only support these types for the initial version. - if (!ind->TypeIs(TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT)) + do { - return; - } + // This check is not really needed, just for better throughput. + // We only support these types for the initial version. + if (!ind->TypeIs(TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT)) + { + return; + } - StoreCoalescingData currData; - StoreCoalescingData prevData; - GenTreeStoreInd* prevInd; - GenTree* prevTree; + StoreCoalescingData currData; + StoreCoalescingData prevData; + GenTreeStoreInd* prevInd; + GenTree* prevTree; + + // Get coalescing data for the current STOREIND + if (!GetStoreCoalescingData(ind, &currData, &prevTree)) + { + return; + } - // Get coalescing data for the current STOREIND - if (GetStoreCoalescingData(ind, &currData, &prevTree)) - { // Now we need to find the previous STOREIND, // we can ignore any NOPs or IL_OFFSETs in-between while (prevTree != nullptr && prevTree->OperIs(GT_NOP, GT_IL_OFFSET)) @@ -8019,102 +8024,106 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) prevInd = prevTree->AsStoreInd(); // Get coalescing data for the previous STOREIND - if (GetStoreCoalescingData(prevInd->AsStoreInd(), &prevData, &prevTree) && (prevTree != nullptr)) + if (!GetStoreCoalescingData(prevInd->AsStoreInd(), &prevData, &prevTree) || (prevTree == nullptr)) { - LIR::Use use; - if (BlockRange().TryGetUse(ind, &use) || BlockRange().TryGetUse(prevInd, &use)) - { - // Both should be unused - return; - } + return; + } - // Check whether we can coalesce the two stores - if (CanBeCoalesced(&prevData, &currData)) - { - var_types oldType = ind->TypeGet(); - var_types newType; - switch (oldType) - { - case TYP_BYTE: - case TYP_UBYTE: - newType = TYP_USHORT; - break; + LIR::Use use; + if (BlockRange().TryGetUse(ind, &use) || BlockRange().TryGetUse(prevInd, &use)) + { + // Both should be unused + return; + } - case TYP_SHORT: - case TYP_USHORT: - newType = TYP_INT; - break; + // Check whether we can coalesce the two stores + if (!CanBeCoalesced(&prevData, &currData)) + { + return; + } + + var_types oldType = ind->TypeGet(); + var_types newType; + switch (oldType) + { + case TYP_BYTE: + case TYP_UBYTE: + newType = TYP_USHORT; + break; + + case TYP_SHORT: + case TYP_USHORT: + newType = TYP_INT; + break; #ifdef TARGET_64BIT - case TYP_INT: - newType = TYP_LONG; - break; + case TYP_INT: + newType = TYP_LONG; + break; #endif - // TODO: SIMD - - default: - return; - } + // TODO: SIMD - // Delete previous STOREIND - BlockRange().Remove(prevTree->gtNext, prevInd); + default: + return; + } - // We know it's always LEA for now - GenTreeAddrMode* addr = ind->Addr()->AsAddrMode(); + // Delete previous STOREIND + BlockRange().Remove(prevTree->gtNext, prevInd); - // Take the smallest offset - addr->SetOffset(min(prevData.offset, currData.offset)); + // We know it's always LEA for now + GenTreeAddrMode* addr = ind->Addr()->AsAddrMode(); - // Make type twice bigger - ind->gtType = newType; + // Take the smallest offset + addr->SetOffset(min(prevData.offset, currData.offset)); - // We currently only support constants for val - assert(prevData.val->IsCnsIntOrI() && currData.val->IsCnsIntOrI()); + // Make type twice bigger + ind->gtType = newType; - size_t lowerCns = (size_t)prevData.val->AsIntCon()->IconValue(); - size_t upperCns = (size_t)currData.val->AsIntCon()->IconValue(); + // We currently only support constants for val + assert(prevData.val->IsCnsIntOrI() && currData.val->IsCnsIntOrI()); - ind->Data()->gtType = newType; + size_t lowerCns = (size_t)prevData.val->AsIntCon()->IconValue(); + size_t upperCns = (size_t)currData.val->AsIntCon()->IconValue(); - // TP: if both are zero - we're done. - if ((lowerCns | upperCns) == 0) - { - JITDUMP("Coalesced two stores into a single store with value 0\n"); - } - else - { - // if the previous store was at a higher address, swap the constants - if (prevData.offset > currData.offset) - { - std::swap(lowerCns, upperCns); - } + ind->Data()->gtType = newType; - // Trim the constants to the size of the type, e.g. for TYP_SHORT and TYP_USHORT - // the mask will be 0xFFFF, for TYP_INT - 0xFFFFFFFF. - size_t mask = ~(size_t(0)) >> (sizeof(size_t) - genTypeSize(oldType)) * BITS_IN_BYTE; - lowerCns &= mask; - upperCns &= mask; + // TP: if both are zero - we're done. + if ((lowerCns | upperCns) == 0) + { + JITDUMP("Coalesced two stores into a single store with value 0\n"); + } + else + { + // if the previous store was at a higher address, swap the constants + if (prevData.offset > currData.offset) + { + std::swap(lowerCns, upperCns); + } - size_t val = (lowerCns | (upperCns << (genTypeSize(oldType) * BITS_IN_BYTE))); - JITDUMP("Coalesced two stores into a single store with value %lld\n", (int64_t)val); + // Trim the constants to the size of the type, e.g. for TYP_SHORT and TYP_USHORT + // the mask will be 0xFFFF, for TYP_INT - 0xFFFFFFFF. + size_t mask = ~(size_t(0)) >> (sizeof(size_t) - genTypeSize(oldType)) * BITS_IN_BYTE; + lowerCns &= mask; + upperCns &= mask; - // It's not expected to be contained yet, but just in case... - ind->Data()->ClearContained(); - ind->Data()->AsIntCon()->gtIconVal = (ssize_t)val; - } + size_t val = (lowerCns | (upperCns << (genTypeSize(oldType) * BITS_IN_BYTE))); + JITDUMP("Coalesced two stores into a single store with value %lld\n", (int64_t)val); - // Now check whether we can coalesce the new store with the previous one, e.g. we had: - // - // STOREIND(int) - // STOREIND(short) - // STOREIND(short) - // - // so we can coalesce the whole thing into a single store of 8 bytes. - // LowerCoalescingWithPreviousInd(ind); - } + // It's not expected to be contained yet, but just in case... + ind->Data()->ClearContained(); + ind->Data()->AsIntCon()->gtIconVal = (ssize_t)val; } - } + + // Now check whether we can coalesce the new store with the previous one, e.g. we had: + // + // STOREIND(int) + // STOREIND(short) + // STOREIND(short) + // + // so we can coalesce the whole thing into a single store of 8 bytes. + // LowerCoalescingWithPreviousInd(ind); + } while (true); } //------------------------------------------------------------------------ From 48a8f85ab2806dd5d4bd303f8009951a0743e553 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 1 Oct 2023 13:07:20 +0200 Subject: [PATCH 08/17] Clean up, disable for some platforms due to unaligned reads --- src/coreclr/jit/lower.cpp | 100 +++++++++++++++++++++----------------- src/coreclr/jit/lower.h | 2 +- 2 files changed, 57 insertions(+), 45 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 45e983c4911a50..027b3b03ddc37c 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7954,7 +7954,7 @@ static bool CanBeCoalesced(StoreCoalescingData* data1, StoreCoalescingData* data } //------------------------------------------------------------------------ -// LowerCoalescingWithPreviousInd: Try to coalesce a STOREIND with previous STOREINDs. Example: +// LowerStoreIndirCoalescing: Try to coalesce a STOREIND with previous STOREINDs. Example: // // * STOREIND int // +--* LCL_VAR byref V00 @@ -7975,20 +7975,35 @@ static bool CanBeCoalesced(StoreCoalescingData* data1, StoreCoalescingData* data // Arguments: // ind - the current STOREIND node // -void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) +void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) { +// LA, RISC-V and ARM32 more likely to recieve a terrible performance hit from +// unaligned accesses making this optimization questionable. +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) if (!comp->opts.OptimizationEnabled()) { return; } - // For now, we require the current STOREIND to have LEA (previous store may not have it) - // TODO-CQ: Remove this restriction, we might even not need any LEAs at all for the final coalesced store. if (!ind->Addr()->OperIs(GT_LEA)) { + // For now, we require the current STOREIND to have LEA (previous store may not have it) + // So we can easily adjust the offset, consider making it more flexible in future. return; } + // We're going to do it in a loop while we see suitable STOREINDs to coalesce. + // E.g.: we have the following LIR sequence: + // + // ...addr nodes... + // STOREIND(int) + // ...addr nodes... + // STOREIND(short) + // ...addr nodes... + // STOREIND(short) <-- we're here + // + // First we merge two 'short' stores, then we merge the result with the 'int' store + // to get a single store of 8 bytes. do { // This check is not really needed, just for better throughput. @@ -8029,10 +8044,10 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) return; } + // Both should be unused LIR::Use use; if (BlockRange().TryGetUse(ind, &use) || BlockRange().TryGetUse(prevInd, &use)) { - // Both should be unused return; } @@ -8042,6 +8057,7 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) return; } + // Since we're merging two stores of the same type, the new type is twice wider. var_types oldType = ind->TypeGet(); var_types newType; switch (oldType) @@ -8053,7 +8069,7 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) case TYP_SHORT: case TYP_USHORT: - newType = TYP_INT; + newType = TYP_INT; // TYP_UINT is not legal in IR break; #ifdef TARGET_64BIT @@ -8062,68 +8078,64 @@ void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) break; #endif - // TODO: SIMD + // TYP_FLOAT and TYP_DOUBLE aren't needed here - they're expected to + // be converted to TYP_INT/TYP_LONG for constant value. + + // TODO-CQ: + // 2 x LONG -> SIMD16 + // 2 x SIMD16 -> SIMD32 + // 2 x SIMD32 -> SIMD64 + // 2 x REF(null) -> SIMD16 + // + // Where it's legal to use SIMD, e.g. we can't use SIMD16 for REF + // on x64 since it's not atomic. default: return; } - // Delete previous STOREIND + // Delete previous STOREIND entirely BlockRange().Remove(prevTree->gtNext, prevInd); // We know it's always LEA for now GenTreeAddrMode* addr = ind->Addr()->AsAddrMode(); - // Take the smallest offset + // Update offset to be the minimum of the two addr->SetOffset(min(prevData.offset, currData.offset)); - // Make type twice bigger - ind->gtType = newType; + // Update type for both STOREIND and val + ind->gtType = newType; + ind->Data()->gtType = newType; - // We currently only support constants for val + // We currently only support these constants for val + // Will be extended to GT_CNS_VEC in future. assert(prevData.val->IsCnsIntOrI() && currData.val->IsCnsIntOrI()); size_t lowerCns = (size_t)prevData.val->AsIntCon()->IconValue(); size_t upperCns = (size_t)currData.val->AsIntCon()->IconValue(); - ind->Data()->gtType = newType; - - // TP: if both are zero - we're done. - if ((lowerCns | upperCns) == 0) + // if the previous store was at a higher address, swap the constants + if (prevData.offset > currData.offset) { - JITDUMP("Coalesced two stores into a single store with value 0\n"); + std::swap(lowerCns, upperCns); } - else - { - // if the previous store was at a higher address, swap the constants - if (prevData.offset > currData.offset) - { - std::swap(lowerCns, upperCns); - } - // Trim the constants to the size of the type, e.g. for TYP_SHORT and TYP_USHORT - // the mask will be 0xFFFF, for TYP_INT - 0xFFFFFFFF. - size_t mask = ~(size_t(0)) >> (sizeof(size_t) - genTypeSize(oldType)) * BITS_IN_BYTE; - lowerCns &= mask; - upperCns &= mask; + // Trim the constants to the size of the type, e.g. for TYP_SHORT and TYP_USHORT + // the mask will be 0xFFFF, for TYP_INT - 0xFFFFFFFF. + size_t mask = ~(size_t(0)) >> (sizeof(size_t) - genTypeSize(oldType)) * BITS_IN_BYTE; + lowerCns &= mask; + upperCns &= mask; - size_t val = (lowerCns | (upperCns << (genTypeSize(oldType) * BITS_IN_BYTE))); - JITDUMP("Coalesced two stores into a single store with value %lld\n", (int64_t)val); + size_t val = (lowerCns | (upperCns << (genTypeSize(oldType) * BITS_IN_BYTE))); + JITDUMP("Coalesced two stores into a single store with value %lld\n", (int64_t)val); - // It's not expected to be contained yet, but just in case... - ind->Data()->ClearContained(); - ind->Data()->AsIntCon()->gtIconVal = (ssize_t)val; - } + // It's not expected to be contained yet, but just in case... + ind->Data()->ClearContained(); + ind->Data()->AsIntCon()->gtIconVal = (ssize_t)val; + ind->gtFlags |= GTF_IND_UNALIGNED; - // Now check whether we can coalesce the new store with the previous one, e.g. we had: - // - // STOREIND(int) - // STOREIND(short) - // STOREIND(short) - // - // so we can coalesce the whole thing into a single store of 8 bytes. - // LowerCoalescingWithPreviousInd(ind); } while (true); +#endif } //------------------------------------------------------------------------ @@ -8166,7 +8178,7 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind) } #endif - LowerCoalescingWithPreviousInd(ind); + LowerStoreIndirCoalescing(ind); LowerStoreIndir(ind); } } diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 6d017b7ac900a3..8ffe86c9a51b16 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -312,7 +312,7 @@ class Lowering final : public Phase void LowerStoreIndirCommon(GenTreeStoreInd* ind); void LowerIndir(GenTreeIndir* ind); void LowerStoreIndir(GenTreeStoreInd* node); - void LowerCoalescingWithPreviousInd(GenTreeStoreInd* node); + void LowerStoreIndirCoalescing(GenTreeStoreInd* node); GenTree* LowerAdd(GenTreeOp* node); GenTree* LowerMul(GenTreeOp* mul); bool TryLowerAndNegativeOne(GenTreeOp* node, GenTree** nextNode); From ed568a6f3c9997972d5cc7e33ebf6533d1277564 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 1 Oct 2023 14:30:15 +0200 Subject: [PATCH 09/17] Clean up --- src/coreclr/jit/lower.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 027b3b03ddc37c..643f14c5bc6da2 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7849,6 +7849,12 @@ static bool GetStoreCoalescingData(GenTreeStoreInd* ind, StoreCoalescingData* da data->val = ind->Data(); if (ind->Addr()->OperIs(GT_LEA)) { + if (ind->Addr()->AsAddrMode()->GetScale() > 1) + { + // Scaled LEAs aren't yet supported. + return false; + } + GenTree* base = ind->Addr()->AsAddrMode()->Base(); GenTree* index = ind->Addr()->AsAddrMode()->Index(); if ((base == nullptr) || !base->IsLocal()) @@ -8076,20 +8082,18 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) case TYP_INT: newType = TYP_LONG; break; -#endif +#endif // TARGET_64BIT // TYP_FLOAT and TYP_DOUBLE aren't needed here - they're expected to // be converted to TYP_INT/TYP_LONG for constant value. - + // // TODO-CQ: - // 2 x LONG -> SIMD16 + // 2 x LONG/REF -> SIMD16 // 2 x SIMD16 -> SIMD32 // 2 x SIMD32 -> SIMD64 - // 2 x REF(null) -> SIMD16 // - // Where it's legal to use SIMD, e.g. we can't use SIMD16 for REF - // on x64 since it's not atomic. - + // where it's legal (e.g. SIMD is not atomic on x64) + // default: return; } @@ -8135,7 +8139,7 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) ind->gtFlags |= GTF_IND_UNALIGNED; } while (true); -#endif +#endif // TARGET_XARCH || TARGET_ARM64 } //------------------------------------------------------------------------ From 4fbbeefc5733fb1cb58ead098deaf6bdd82413cb Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 1 Oct 2023 18:03:59 +0200 Subject: [PATCH 10/17] Clean up --- src/coreclr/jit/lower.cpp | 66 ++++++++++----------------------------- 1 file changed, 16 insertions(+), 50 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 643f14c5bc6da2..bafdebf65bcb4e 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7833,7 +7833,7 @@ static bool GetStoreCoalescingData(GenTreeStoreInd* ind, StoreCoalescingData* da return false; } - // Data has to be constant. We might allow locals in the future. + // Data has to be INT_CNS, can be also VEC_CNS in future. if (!ind->Data()->IsCnsIntOrI()) { return false; @@ -7849,12 +7849,6 @@ static bool GetStoreCoalescingData(GenTreeStoreInd* ind, StoreCoalescingData* da data->val = ind->Data(); if (ind->Addr()->OperIs(GT_LEA)) { - if (ind->Addr()->AsAddrMode()->GetScale() > 1) - { - // Scaled LEAs aren't yet supported. - return false; - } - GenTree* base = ind->Addr()->AsAddrMode()->Base(); GenTree* index = ind->Addr()->AsAddrMode()->Index(); if ((base == nullptr) || !base->IsLocal()) @@ -7925,42 +7919,8 @@ static bool GetStoreCoalescingData(GenTreeStoreInd* ind, StoreCoalescingData* da } //------------------------------------------------------------------------ -// CanBeCoalesced: given two store coalescing data, -// determine whether they can be coalesced. -// -// Arguments: -// data1 - the first store coalescing data -// data2 - the second store coalescing data (order is not important) -// -// Return Value: -// true if the data can be coalesced, false otherwise. -// -static bool CanBeCoalesced(StoreCoalescingData* data1, StoreCoalescingData* data2) -{ - // base, index, scale and type all have to be the same - if ((data1->scale != data2->scale) || (data1->type != data2->type) || !GenTree::Compare(data1->base, data2->base) || - !GenTree::Compare(data1->index, data2->index)) - { - return false; - } - - // val has to be a constant - if (!data1->val->OperIsConst() && !data2->val->OperIsConst()) - { - return false; - } - - // Offset has to match the size of the type - // We don't support the same or overlapping offsets. - if (abs(data1->offset - data2->offset) != (int)genTypeSize(data1->type)) - { - return false; - } - return true; -} - -//------------------------------------------------------------------------ -// LowerStoreIndirCoalescing: Try to coalesce a STOREIND with previous STOREINDs. Example: +// LowerStoreIndirCoalescing: If the given STOREIND node is followed by a similar +// STOREIND node, try to merge them into a single store of a twice wider type. Example: // // * STOREIND int // +--* LCL_VAR byref V00 @@ -7991,10 +7951,10 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) return; } + // For now, we require the current STOREIND to have LEA (previous store may not have it) + // So we can easily adjust the offset, consider making it more flexible in future. if (!ind->Addr()->OperIs(GT_LEA)) { - // For now, we require the current STOREIND to have LEA (previous store may not have it) - // So we can easily adjust the offset, consider making it more flexible in future. return; } @@ -8021,7 +7981,6 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) StoreCoalescingData currData; StoreCoalescingData prevData; - GenTreeStoreInd* prevInd; GenTree* prevTree; // Get coalescing data for the current STOREIND @@ -8032,7 +7991,7 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) // Now we need to find the previous STOREIND, // we can ignore any NOPs or IL_OFFSETs in-between - while (prevTree != nullptr && prevTree->OperIs(GT_NOP, GT_IL_OFFSET)) + while ((prevTree != nullptr) && prevTree->OperIs(GT_NOP, GT_IL_OFFSET)) { prevTree = prevTree->gtPrev; } @@ -8042,7 +8001,7 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) { return; } - prevInd = prevTree->AsStoreInd(); + GenTreeStoreInd* prevInd = prevTree->AsStoreInd(); // Get coalescing data for the previous STOREIND if (!GetStoreCoalescingData(prevInd->AsStoreInd(), &prevData, &prevTree) || (prevTree == nullptr)) @@ -8057,8 +8016,15 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) return; } - // Check whether we can coalesce the two stores - if (!CanBeCoalesced(&prevData, &currData)) + // Base, Index, Scale and Type all have to match. + if ((prevData.scale != currData.scale) || (prevData.type != currData.type) || + !GenTree::Compare(prevData.base, currData.base) || !GenTree::Compare(prevData.index, currData.index)) + { + return; + } + + // Offset has to match the size of the type. We don't support the same or overlapping offsets. + if (abs(prevData.offset - currData.offset) != (int)genTypeSize(prevData.type)) { return; } From c50200be0fac43bdb3b19f47324cf747fb98ef1e Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 1 Oct 2023 21:48:47 +0200 Subject: [PATCH 11/17] Simplify --- src/coreclr/jit/lower.cpp | 55 ++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index bafdebf65bcb4e..b52450b95f70fc 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7804,12 +7804,12 @@ void Lowering::ContainCheckBitCast(GenTree* node) struct StoreCoalescingData { - var_types type; - GenTreeLclVarCommon* base; - GenTreeLclVarCommon* index; - uint32_t scale; - int offset; - GenTree* val; + var_types targetType; + GenTree* baseAddr; + GenTree* index; + GenTree* value; + uint32_t scale; + int offset; }; //------------------------------------------------------------------------ @@ -7845,27 +7845,27 @@ static bool GetStoreCoalescingData(GenTreeStoreInd* ind, StoreCoalescingData* da GenTree* trees[MaxTrees] = {ind->Data(), ind->Addr(), nullptr, nullptr}; int treesCount = 2; - data->type = ind->TypeGet(); - data->val = ind->Data(); + data->targetType = ind->TypeGet(); + data->value = ind->Data(); if (ind->Addr()->OperIs(GT_LEA)) { GenTree* base = ind->Addr()->AsAddrMode()->Base(); GenTree* index = ind->Addr()->AsAddrMode()->Index(); - if ((base == nullptr) || !base->IsLocal()) + if ((base == nullptr) || !base->OperIs(GT_LCL_VAR)) { // Base must be a local. It's possible for it to be nullptr when index is not null, // but let's ignore such cases. return false; } - if ((index != nullptr) && !index->IsLocal()) + if ((index != nullptr) && !index->OperIs(GT_LCL_VAR)) { // Index is either nullptr or a local. return false; } - data->base = base == nullptr ? nullptr : base->AsLclVarCommon(); - data->index = index == nullptr ? nullptr : index->AsLclVarCommon(); - data->scale = ind->Addr()->AsAddrMode()->GetScale(); - data->offset = ind->Addr()->AsAddrMode()->Offset(); + data->baseAddr = base == nullptr ? nullptr : base; + data->index = index == nullptr ? nullptr : index; + data->scale = ind->Addr()->AsAddrMode()->GetScale(); + data->offset = ind->Addr()->AsAddrMode()->Offset(); // Add index and base to the list of trees. trees[treesCount++] = base; @@ -7874,13 +7874,13 @@ static bool GetStoreCoalescingData(GenTreeStoreInd* ind, StoreCoalescingData* da trees[treesCount++] = index; } } - else if (ind->Addr()->OperIsLocal()) + else if (ind->Addr()->OperIs(GT_LCL_VAR)) { // Address is just a local, no offset, scale is 1 - data->base = ind->Addr()->AsLclVarCommon(); - data->index = nullptr; - data->scale = 1; - data->offset = 0; + data->baseAddr = ind->Addr(); + data->index = nullptr; + data->scale = 1; + data->offset = 0; } else { @@ -8001,9 +8001,9 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) { return; } - GenTreeStoreInd* prevInd = prevTree->AsStoreInd(); // Get coalescing data for the previous STOREIND + GenTreeStoreInd* prevInd = prevTree->AsStoreInd(); if (!GetStoreCoalescingData(prevInd->AsStoreInd(), &prevData, &prevTree) || (prevTree == nullptr)) { return; @@ -8016,15 +8016,16 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) return; } - // Base, Index, Scale and Type all have to match. - if ((prevData.scale != currData.scale) || (prevData.type != currData.type) || - !GenTree::Compare(prevData.base, currData.base) || !GenTree::Compare(prevData.index, currData.index)) + // BaseAddr, Index, Scale and Type all have to match. + if ((prevData.scale != currData.scale) || (prevData.targetType != currData.targetType) || + !GenTree::Compare(prevData.baseAddr, currData.baseAddr) || + !GenTree::Compare(prevData.index, currData.index)) { return; } // Offset has to match the size of the type. We don't support the same or overlapping offsets. - if (abs(prevData.offset - currData.offset) != (int)genTypeSize(prevData.type)) + if (abs(prevData.offset - currData.offset) != (int)genTypeSize(prevData.targetType)) { return; } @@ -8079,10 +8080,10 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) // We currently only support these constants for val // Will be extended to GT_CNS_VEC in future. - assert(prevData.val->IsCnsIntOrI() && currData.val->IsCnsIntOrI()); + assert(prevData.value->IsCnsIntOrI() && currData.value->IsCnsIntOrI()); - size_t lowerCns = (size_t)prevData.val->AsIntCon()->IconValue(); - size_t upperCns = (size_t)currData.val->AsIntCon()->IconValue(); + size_t lowerCns = (size_t)prevData.value->AsIntCon()->IconValue(); + size_t upperCns = (size_t)currData.value->AsIntCon()->IconValue(); // if the previous store was at a higher address, swap the constants if (prevData.offset > currData.offset) From b9c3f6c8293d94ab08cd202d668b0be46a29c3bb Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Mon, 2 Oct 2023 21:50:45 +0200 Subject: [PATCH 12/17] Apply suggestions from code review Co-authored-by: Jakob Botsch Nielsen --- src/coreclr/jit/lower.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index b52450b95f70fc..fb416469daa898 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7973,7 +7973,6 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) do { // This check is not really needed, just for better throughput. - // We only support these types for the initial version. if (!ind->TypeIs(TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT)) { return; @@ -8079,7 +8078,6 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) ind->Data()->gtType = newType; // We currently only support these constants for val - // Will be extended to GT_CNS_VEC in future. assert(prevData.value->IsCnsIntOrI() && currData.value->IsCnsIntOrI()); size_t lowerCns = (size_t)prevData.value->AsIntCon()->IconValue(); From 3cc45859838a84f10fc0d2d0cb8fc5bbc786dbf5 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 2 Oct 2023 23:20:45 +0200 Subject: [PATCH 13/17] Address feedback --- src/coreclr/jit/lower.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index fb416469daa898..723d5a3ab88645 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7817,6 +7817,7 @@ struct StoreCoalescingData // store coalescing including pointer to the previous node. // // Arguments: +// comp - the compiler instance // ind - the STOREIND node // data - [OUT] the data needed for store coalescing // prevTree - [OUT] the pointer to the previous node @@ -7825,7 +7826,7 @@ struct StoreCoalescingData // true if the data was successfully retrieved, false otherwise. // Basically, false means that we definitely can't do store coalescing. // -static bool GetStoreCoalescingData(GenTreeStoreInd* ind, StoreCoalescingData* data, GenTree** prevTree) +static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCoalescingData* data, GenTree** prevTree) { // Don't merge volatile stores. if (ind->IsVolatile()) @@ -7876,6 +7877,11 @@ static bool GetStoreCoalescingData(GenTreeStoreInd* ind, StoreCoalescingData* da } else if (ind->Addr()->OperIs(GT_LCL_VAR)) { + if (comp->lvaGetDesc(ind->Addr()->AsLclVarCommon())->IsAddressExposed()) + { + return false; + } + // Address is just a local, no offset, scale is 1 data->baseAddr = ind->Addr(); data->index = nullptr; @@ -7983,7 +7989,7 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) GenTree* prevTree; // Get coalescing data for the current STOREIND - if (!GetStoreCoalescingData(ind, &currData, &prevTree)) + if (!GetStoreCoalescingData(comp, ind, &currData, &prevTree)) { return; } @@ -8003,17 +8009,14 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) // Get coalescing data for the previous STOREIND GenTreeStoreInd* prevInd = prevTree->AsStoreInd(); - if (!GetStoreCoalescingData(prevInd->AsStoreInd(), &prevData, &prevTree) || (prevTree == nullptr)) + if (!GetStoreCoalescingData(comp, prevInd->AsStoreInd(), &prevData, &prevTree) || (prevTree == nullptr)) { return; } - // Both should be unused + // STOREIND aren't value nodes. LIR::Use use; - if (BlockRange().TryGetUse(ind, &use) || BlockRange().TryGetUse(prevInd, &use)) - { - return; - } + assert(!BlockRange().TryGetUse(prevInd, &use) && !BlockRange().TryGetUse(ind, &use)); // BaseAddr, Index, Scale and Type all have to match. if ((prevData.scale != currData.scale) || (prevData.targetType != currData.targetType) || From f0dd22aebb1e4388443890e4a5dd807f5663a18f Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 2 Oct 2023 23:26:40 +0200 Subject: [PATCH 14/17] Address feedback --- src/coreclr/jit/lower.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 723d5a3ab88645..3f9de34990a679 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7852,15 +7852,17 @@ static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCo { GenTree* base = ind->Addr()->AsAddrMode()->Base(); GenTree* index = ind->Addr()->AsAddrMode()->Index(); - if ((base == nullptr) || !base->OperIs(GT_LCL_VAR)) + if ((base == nullptr) || !base->OperIs(GT_LCL_VAR) || comp->lvaVarAddrExposed(base->AsLclVar()->GetLclNum())) { // Base must be a local. It's possible for it to be nullptr when index is not null, // but let's ignore such cases. return false; } - if ((index != nullptr) && !index->OperIs(GT_LCL_VAR)) + + if (((index != nullptr) && !index->OperIs(GT_LCL_VAR)) || + comp->lvaVarAddrExposed(index->AsLclVar()->GetLclNum())) { - // Index is either nullptr or a local. + // Index should be either nullptr or a local. return false; } data->baseAddr = base == nullptr ? nullptr : base; @@ -7875,13 +7877,8 @@ static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCo trees[treesCount++] = index; } } - else if (ind->Addr()->OperIs(GT_LCL_VAR)) + else if (ind->Addr()->OperIs(GT_LCL_VAR) && !comp->lvaVarAddrExposed(ind->Addr()->AsLclVar()->GetLclNum())) { - if (comp->lvaGetDesc(ind->Addr()->AsLclVarCommon())->IsAddressExposed()) - { - return false; - } - // Address is just a local, no offset, scale is 1 data->baseAddr = ind->Addr(); data->index = nullptr; From 5c954fa5fa3ab3085c792db308a542dcd0a4de75 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 2 Oct 2023 23:34:18 +0200 Subject: [PATCH 15/17] Clean up --- src/coreclr/jit/lower.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 3f9de34990a679..6693bb894f49ea 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7859,12 +7859,13 @@ static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCo return false; } - if (((index != nullptr) && !index->OperIs(GT_LCL_VAR)) || - comp->lvaVarAddrExposed(index->AsLclVar()->GetLclNum())) + if ((index != nullptr) && + (!index->OperIs(GT_LCL_VAR) || comp->lvaVarAddrExposed(index->AsLclVar()->GetLclNum()))) { // Index should be either nullptr or a local. return false; } + data->baseAddr = base == nullptr ? nullptr : base; data->index = index == nullptr ? nullptr : index; data->scale = ind->Addr()->AsAddrMode()->GetScale(); From 62f9bedc09bf3088c049042de3d6777342028af2 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 3 Oct 2023 01:08:34 +0200 Subject: [PATCH 16/17] Address feedback --- src/coreclr/jit/lower.cpp | 67 ++++++++++++--------------------------- 1 file changed, 20 insertions(+), 47 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 6693bb894f49ea..b74dd4ca15db9e 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7820,13 +7820,12 @@ struct StoreCoalescingData // comp - the compiler instance // ind - the STOREIND node // data - [OUT] the data needed for store coalescing -// prevTree - [OUT] the pointer to the previous node // // Return Value: // true if the data was successfully retrieved, false otherwise. // Basically, false means that we definitely can't do store coalescing. // -static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCoalescingData* data, GenTree** prevTree) +static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCoalescingData* data) { // Don't merge volatile stores. if (ind->IsVolatile()) @@ -7840,12 +7839,6 @@ static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCo return false; } - // We will need to walk previous trees (up to 4 nodes in a store) - // We already know that we have value and addr nodes, LEA might add more. - const int MaxTrees = 4; - GenTree* trees[MaxTrees] = {ind->Data(), ind->Addr(), nullptr, nullptr}; - int treesCount = 2; - data->targetType = ind->TypeGet(); data->value = ind->Data(); if (ind->Addr()->OperIs(GT_LEA)) @@ -7870,13 +7863,6 @@ static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCo data->index = index == nullptr ? nullptr : index; data->scale = ind->Addr()->AsAddrMode()->GetScale(); data->offset = ind->Addr()->AsAddrMode()->Offset(); - - // Add index and base to the list of trees. - trees[treesCount++] = base; - if (index != nullptr) - { - trees[treesCount++] = index; - } } else if (ind->Addr()->OperIs(GT_LCL_VAR) && !comp->lvaVarAddrExposed(ind->Addr()->AsLclVar()->GetLclNum())) { @@ -7891,34 +7877,6 @@ static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCo // Address is not LEA or local. return false; } - - // We expect all the trees to be found in gtPrev (we don't care about the order, just make sure we don't have - // unexpected trees in-between) - GenTree* prev = ind->gtPrev; - for (int i = 0; i < treesCount; i++) - { - bool found = false; - - // We don't need any O(1) lookups here, the array is too small - for (int j = 0; j < MaxTrees; j++) - { - if (trees[j] == prev) - { - trees[j] = nullptr; - found = true; - break; - } - } - - if (!found) - { - // Unexpected node in-between. - return false; - } - prev = prev->gtPrev; - } - - *prevTree = prev; return true; } @@ -7984,14 +7942,22 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) StoreCoalescingData currData; StoreCoalescingData prevData; - GenTree* prevTree; // Get coalescing data for the current STOREIND - if (!GetStoreCoalescingData(comp, ind, &currData, &prevTree)) + if (!GetStoreCoalescingData(comp, ind, &currData)) { return; } + bool isClosedRange = false; + // Now we need to find the very first LIR node representing the current STOREIND + // and make sure that there are no other unexpected nodes in-between. + LIR::ReadOnlyRange currIndRange = BlockRange().GetTreeRange(ind, &isClosedRange); + if (!isClosedRange) + { + return; + } + GenTree* prevTree = currIndRange.FirstNode()->gtPrev; // Now we need to find the previous STOREIND, // we can ignore any NOPs or IL_OFFSETs in-between while ((prevTree != nullptr) && prevTree->OperIs(GT_NOP, GT_IL_OFFSET)) @@ -8007,7 +7973,14 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) // Get coalescing data for the previous STOREIND GenTreeStoreInd* prevInd = prevTree->AsStoreInd(); - if (!GetStoreCoalescingData(comp, prevInd->AsStoreInd(), &prevData, &prevTree) || (prevTree == nullptr)) + if (!GetStoreCoalescingData(comp, prevInd->AsStoreInd(), &prevData) || (prevTree == nullptr)) + { + return; + } + + // Same for the previous STOREIND, make sure there are no unexpected nodes around. + LIR::ReadOnlyRange prevIndRange = BlockRange().GetTreeRange(prevInd, &isClosedRange); + if (!isClosedRange) { return; } @@ -8066,7 +8039,7 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) } // Delete previous STOREIND entirely - BlockRange().Remove(prevTree->gtNext, prevInd); + BlockRange().Remove(std::move(prevIndRange)); // We know it's always LEA for now GenTreeAddrMode* addr = ind->Addr()->AsAddrMode(); From b2e4b1a623b256da7f73f86f42aae046868a50fa Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 3 Oct 2023 10:35:00 +0200 Subject: [PATCH 17/17] Update src/coreclr/jit/lower.cpp Co-authored-by: Jakob Botsch Nielsen --- src/coreclr/jit/lower.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index b74dd4ca15db9e..7bc73d6bcecf06 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7973,7 +7973,7 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) // Get coalescing data for the previous STOREIND GenTreeStoreInd* prevInd = prevTree->AsStoreInd(); - if (!GetStoreCoalescingData(comp, prevInd->AsStoreInd(), &prevData) || (prevTree == nullptr)) + if (!GetStoreCoalescingData(comp, prevInd->AsStoreInd(), &prevData)) { return; }