From af817cb9378d649175d5ca80e21ce6e3f82f762b Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 1 Apr 2026 12:29:37 -0700 Subject: [PATCH 01/26] funclet regalloc and frame gen --- src/coreclr/jit/codegencommon.cpp | 4 +- src/coreclr/jit/codegeninterface.h | 28 +- src/coreclr/jit/codegenwasm.cpp | 102 ++++--- src/coreclr/jit/lclvars.cpp | 8 +- src/coreclr/jit/regallocwasm.cpp | 451 +++++++++++++++++++---------- src/coreclr/jit/regallocwasm.h | 12 +- 6 files changed, 403 insertions(+), 202 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 2cd6427a87132b..ffef8c98484de4 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -337,7 +337,9 @@ CodeGenInterface::CodeGenInterface(Compiler* theCompiler) , m_compiler(theCompiler) , treeLifeUpdater(nullptr) #ifdef TARGET_WASM - , WasmLocalsDecls(theCompiler->getAllocator(CMK_Codegen)) + , m_spRegs(theCompiler->getAllocator(CMK_Codegen)) + , m_fpRegs(theCompiler->getAllocator(CMK_Codegen)) + , WasmLocalsDecls(theCompiler->compFuncInfoCount, nullptr, theCompiler->getAllocator(CMK_Codegen)) #endif { } diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index d323a56a0b7dd1..7ab7a88718c906 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -271,29 +271,27 @@ class CodeGenInterface } #if !HAS_FIXED_REGISTER_SET + private: - // For targets without a fixed SP/FP, these are the registers with which they are associated. - PhasedVar m_cgStackPointerReg = REG_NA; - PhasedVar m_cgFramePointerReg = REG_NA; + jitstd::vector m_spRegs; + jitstd::vector m_fpRegs; public: - void SetStackPointerReg(regNumber reg) + void SetStackPointerRegs(jitstd::vector& regs) { - assert(reg != REG_NA); - m_cgStackPointerReg = reg; + m_spRegs = regs; } - void SetFramePointerReg(regNumber reg) + void SetFramePointerRegs(jitstd::vector& regs) { - assert(reg != REG_NA); - m_cgFramePointerReg = reg; + m_fpRegs = regs; } - regNumber GetStackPointerReg() const + regNumber GetStackPointerReg(unsigned funcletIndex) const { - return m_cgStackPointerReg; + return m_spRegs[funcletIndex]; } - regNumber GetFramePointerReg() const + regNumber GetFramePointerReg(unsigned funcletIndex) const { - return m_cgFramePointerReg; + return m_fpRegs[funcletIndex]; } #else // HAS_FIXED_REGISTER_SET regNumber GetStackPointerReg() const @@ -407,7 +405,9 @@ class CodeGenInterface unsigned Count; }; - jitstd::vector WasmLocalsDecls; + // Per-funclet vectors of local declarations + // + jitstd::vector*> WasmLocalsDecls; #endif #ifdef DEBUG diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 67b60e64d15304..52c413f8731d1e 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -50,8 +50,9 @@ void CodeGen::genMarkLabelsForCodegen() void CodeGen::genBeginFnProlog() { unsigned localsCount = 0; - GetEmitter()->emitIns_I(INS_local_cnt, EA_8BYTE, WasmLocalsDecls.size()); - for (WasmLocalsDecl& decl : WasmLocalsDecls) + assert(m_compiler->compCurrFuncIdx == 0); + GetEmitter()->emitIns_I(INS_local_cnt, EA_8BYTE, WasmLocalsDecls[0]->size()); + for (WasmLocalsDecl& decl : *WasmLocalsDecls[0]) { GetEmitter()->emitIns_I_Ty(INS_local_decl, decl.Count, decl.Type, localsCount); localsCount += decl.Count; @@ -77,7 +78,7 @@ void CodeGen::genPushCalleeSavedRegisters() void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) { assert(m_compiler->compGeneratingProlog); - regNumber spReg = GetStackPointerReg(); + regNumber spReg = GetStackPointerReg(m_compiler->compCurrFuncIdx); if (spReg == REG_NA) { assert(!isFramePointerUsed()); @@ -102,7 +103,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni GetEmitter()->emitIns(INS_I_sub); GetEmitter()->emitIns_I(INS_local_set, EA_PTRSIZE, spLclIndex); } - regNumber fpReg = GetFramePointerReg(); + regNumber fpReg = GetFramePointerReg(m_compiler->compCurrFuncIdx); if ((fpReg != REG_NA) && (fpReg != spReg)) { GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, spLclIndex); @@ -168,7 +169,8 @@ void CodeGen::genHomeRegisterParamsOutsideProlog() storeType = genActualType(varDsc); } - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetFramePointerReg())); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, + WasmRegToIndex(GetFramePointerReg(m_compiler->compCurrFuncIdx))); GetEmitter()->emitIns_I(INS_local_get, emitActualTypeSize(storeType), WasmRegToIndex(segment.GetRegister())); GetEmitter()->emitIns_S(ins_Store(storeType), emitActualTypeSize(storeType), lclNum, offset); @@ -255,28 +257,40 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() { } +//------------------------------------------------------------------------ +// genFuncletProlog: codegen for funclet prologs. +// +// Arguments: +// block - the funclet entry block +// void CodeGen::genFuncletProlog(BasicBlock* block) { -#ifdef DEBUG - if (verbose) + assert(m_compiler->bbIsFuncletBeg(block)); + JITDUMP("*************** In genFuncletProlog()\n"); + + // Local sig for the funclet + // + unsigned localsCount = 0; + unsigned funcletIndex = m_compiler->compCurrFuncIdx; + assert(funcletIndex > 0); + GetEmitter()->emitIns_I(INS_local_cnt, EA_8BYTE, WasmLocalsDecls[funcletIndex]->size()); + for (WasmLocalsDecl& decl : *WasmLocalsDecls[funcletIndex]) { - printf("*************** In genFuncletProlog()\n"); + GetEmitter()->emitIns_I_Ty(INS_local_decl, decl.Count, decl.Type, localsCount); + localsCount += decl.Count; } -#endif - NYI_WASM("genFuncletProlog"); + // All the funclet params are used from their home registers, so nothing + // needs homing here. } +//------------------------------------------------------------------------ +// genFuncletEpilog: codegen for funclet eplogs. +// +// For Wasm, funclet epilogs are empty +// void CodeGen::genFuncletEpilog() { -#ifdef DEBUG - if (verbose) - { - printf("*************** In genFuncletEpilog()\n"); - } -#endif - - NYI_WASM("genFuncletEpilog"); } //------------------------------------------------------------------------ @@ -1753,7 +1767,8 @@ void CodeGen::genJumpToThrowHlpBlk(SpecialCodeKind codeKind) { GetEmitter()->emitIns_BlockTy(INS_if); // Throw helpers are managed so we need to push the stack pointer before genEmitHelperCall. - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetStackPointerReg())); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, + WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); genEmitHelperCall(m_compiler->acdHelper(codeKind), 0, EA_UNKNOWN); GetEmitter()->emitIns(INS_end); } @@ -2100,7 +2115,7 @@ void CodeGen::genCodeForLclAddr(GenTreeLclFld* lclAddrNode) unsigned lclNum = lclAddrNode->GetLclNum(); unsigned lclOffset = lclAddrNode->GetLclOffs(); - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetFramePointerReg())); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetFramePointerReg(m_compiler->compCurrFuncIdx))); if ((lclOffset != 0) || (m_compiler->lvaFrameAddress(lclNum, &FPBased) != 0)) { GetEmitter()->emitIns_S(INS_I_const, EA_PTRSIZE, lclNum, lclOffset); @@ -2120,7 +2135,7 @@ void CodeGen::genCodeForLclFld(GenTreeLclFld* tree) assert(tree->OperIs(GT_LCL_FLD)); LclVarDsc* varDsc = m_compiler->lvaGetDesc(tree); - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetFramePointerReg())); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetFramePointerReg(m_compiler->compCurrFuncIdx))); GetEmitter()->emitIns_S(ins_Load(tree->TypeGet()), emitTypeSize(tree), tree->GetLclNum(), tree->GetLclOffs()); WasmProduceReg(tree); } @@ -2143,7 +2158,8 @@ void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) if (!varDsc->lvIsRegCandidate()) { var_types type = varDsc->GetRegisterType(tree); - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetFramePointerReg())); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, + WasmRegToIndex(GetFramePointerReg(m_compiler->compCurrFuncIdx))); GetEmitter()->emitIns_S(ins_Load(type), emitTypeSize(type), tree->GetLclNum(), 0); WasmProduceReg(tree); } @@ -2758,16 +2774,19 @@ void CodeGen::genLclHeap(GenTree* tree) // Decrease the stack pointer by amount // - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetStackPointerReg())); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, + WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); GetEmitter()->emitIns_I(INS_I_const, EA_PTRSIZE, amount); GetEmitter()->emitIns(INS_I_sub); - GetEmitter()->emitIns_I(INS_local_set, EA_PTRSIZE, WasmRegToIndex(GetStackPointerReg())); + GetEmitter()->emitIns_I(INS_local_set, EA_PTRSIZE, + WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); // Zero the newly allocated space if needed // if (needsZeroing) { - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetStackPointerReg())); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, + WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); GetEmitter()->emitIns_I(INS_I_const, EA_PTRSIZE, 0); GetEmitter()->emitIns_I(INS_I_const, EA_PTRSIZE, amount); @@ -2779,13 +2798,16 @@ void CodeGen::genLclHeap(GenTree* tree) // SP now points at the reserved space just below the allocation. // Save the frame pointer at sp[0]. // - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetStackPointerReg())); - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetFramePointerReg())); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, + WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, + WasmRegToIndex(GetFramePointerReg(m_compiler->compCurrFuncIdx))); GetEmitter()->emitIns_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, 0); // Leave the base address of the allocated region on the stack. // - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetStackPointerReg())); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, + WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); GetEmitter()->emitIns_I(INS_I_const, EA_PTRSIZE, reservedSpace); GetEmitter()->emitIns(INS_I_add); } @@ -2822,7 +2844,8 @@ void CodeGen::genLclHeap(GenTree* tree) GetEmitter()->emitIns(INS_else); { // Prepare to subtract from SP - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetStackPointerReg())); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, + WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); // Add reserved space and round up request size to a multiple of STACK_ALIGN GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(sizeReg)); @@ -2839,11 +2862,13 @@ void CodeGen::genLclHeap(GenTree* tree) // Subtract rounded-up size from SP value, and save back to SP GetEmitter()->emitIns(INS_I_sub); - GetEmitter()->emitIns_I(INS_local_set, EA_PTRSIZE, WasmRegToIndex(GetStackPointerReg())); + GetEmitter()->emitIns_I(INS_local_set, EA_PTRSIZE, + WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); if (needsZeroing) { - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetStackPointerReg())); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, + WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); GetEmitter()->emitIns_I(INS_I_const, EA_PTRSIZE, 0); GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(sizeReg)); // TODO-WASM-CQ: possibly do small fills directly @@ -2851,12 +2876,15 @@ void CodeGen::genLclHeap(GenTree* tree) } // Re-establish unwind invariant: store FP at SP[0] - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetStackPointerReg())); - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetFramePointerReg())); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, + WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, + WasmRegToIndex(GetFramePointerReg(m_compiler->compCurrFuncIdx))); GetEmitter()->emitIns_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, 0); // Return value - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetStackPointerReg())); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, + WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); GetEmitter()->emitIns_I(INS_I_const, EA_PTRSIZE, reservedSpace); GetEmitter()->emitIns(INS_I_add); } @@ -2936,7 +2964,7 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD)); GenTreeLclVarCommon* lclVar = src->AsLclVarCommon(); bool fpBased; - srcReg = GetFramePointerReg(); + srcReg = GetFramePointerReg(m_compiler->compCurrFuncIdx); srcOffset = m_compiler->lvaFrameAddress(lclVar->GetLclNum(), &fpBased) + lclVar->GetLclOffs(); assert(fpBased); } @@ -2945,7 +2973,7 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) { GenTreeLclVarCommon* lclVar = dest->AsLclVarCommon(); bool fpBased; - destReg = GetFramePointerReg(); + destReg = GetFramePointerReg(m_compiler->compCurrFuncIdx); destOffset = m_compiler->lvaFrameAddress(lclVar->GetLclNum(), &fpBased) + lclVar->GetLclOffs(); assert(fpBased); } @@ -3156,7 +3184,7 @@ void CodeGen::genLoadLocalIntoReg(regNumber targetReg, unsigned lclNum) { LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); var_types type = varDsc->GetRegisterType(); - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetFramePointerReg())); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetFramePointerReg(m_compiler->compCurrFuncIdx))); GetEmitter()->emitIns_S(ins_Load(type), emitTypeSize(type), lclNum, 0); GetEmitter()->emitIns_I(INS_local_set, emitTypeSize(type), WasmRegToIndex(targetReg)); } diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 59bf4815f7f4e9..e4874070f7471d 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -6029,9 +6029,15 @@ void Compiler::lvaDumpFrameLocation(unsigned lclNum, int minLength) offset = lvaFrameAddress(lclNum, compLocallocUsed, &baseReg, 0, /* isFloatUsage */ false); #else bool EBPbased; - offset = lvaFrameAddress(lclNum, &EBPbased); + offset = lvaFrameAddress(lclNum, &EBPbased); + +#if HAS_FIXED_REGISTER_SET baseReg = EBPbased ? codeGen->GetFramePointerReg() : codeGen->GetStackPointerReg(); +#else + // Just use the sp/fp from the function region + baseReg = EBPbased ? codeGen->GetFramePointerReg(0) : codeGen->GetStackPointerReg(0); #endif +#endif // TARGET_ARM int printed = printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset)); diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index cc272032d385c8..67bc7deda99cae 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -18,6 +18,11 @@ RegAllocInterface* GetRegisterAllocator(Compiler* compiler) WasmRegAlloc::WasmRegAlloc(Compiler* compiler) : m_compiler(compiler) , m_codeGen(compiler->codeGen) + , m_currentBlock(nullptr) + , m_currentFunclet(0) + , m_spRegs(compiler->getAllocator(CMK_LSRA)) + , m_fpRegs(compiler->getAllocator(CMK_LSRA)) + , m_exRegs(compiler->getAllocator(CMK_LSRA)) { } @@ -87,10 +92,15 @@ void WasmRegAlloc::IdentifyCandidates() } } - if (anyFrameLocals || m_compiler->compLocallocUsed) + if (anyFrameLocals || m_compiler->compLocallocUsed || (m_compiler->compFuncInfoCount > 1)) { AllocateFramePointer(); } + + if (m_compiler->compFuncInfoCount > 1) + { + AllocateExceptionPointer(); + } } //------------------------------------------------------------------------ @@ -138,9 +148,15 @@ void WasmRegAlloc::InitializeStackPointer() // void WasmRegAlloc::AllocateStackPointer() { - if (m_spReg == REG_NA) + + if (m_spRegs.size() == 0) { - m_spReg = AllocateVirtualRegister(TYP_I_IMPL); + regNumber spReg = AllocateVirtualRegister(TYP_I_IMPL); + + for (unsigned i = 0; i < m_compiler->compFuncInfoCount; i++) + { + m_spRegs.push_back(spReg); + } } } @@ -149,18 +165,44 @@ void WasmRegAlloc::AllocateStackPointer() // void WasmRegAlloc::AllocateFramePointer() { - assert(m_fpReg == REG_NA); + assert(m_fpRegs.size() == 0); // FP is initialized with SP in the prolog, so ensure the latter is allocated. AllocateStackPointer(); - if (m_compiler->compLocallocUsed) + // If we have funclets or localloc, we must have an FP. + bool const needsFP = m_compiler->compLocallocUsed || (m_compiler->compFuncInfoCount > 1); + regNumber const fpReg = needsFP ? AllocateVirtualRegister(TYP_I_IMPL) : m_spRegs[0]; + + for (unsigned i = 0; i < m_compiler->compFuncInfoCount; i++) { - m_fpReg = AllocateVirtualRegister(TYP_I_IMPL); + m_fpRegs.push_back(fpReg); } - else +} + +//------------------------------------------------------------------------ +// AllocateExceptionPointer: Allocate a virtual register for the exception pointer. +// +void WasmRegAlloc::AllocateExceptionPointer() +{ + assert(m_exRegs.size() == 0); + regNumber exReg = REG_NA; + m_exRegs.resize(m_compiler->compFuncInfoCount, REG_NA); + + for (unsigned i = 0; i < m_compiler->compFuncInfoCount; i++) { - m_fpReg = m_spReg; + const FuncInfoDsc& funcInfo = m_compiler->compFuncInfos[i]; + EHblkDsc* const ehDsc = m_compiler->ehGetDsc(funcInfo.funEHIndex); + + if (ehDsc->HasCatchHandler()) + { + if (exReg == REG_NA) + { + exReg = AllocateVirtualRegister(TYP_REF); + } + + m_exRegs[i] = exReg; + } } } @@ -268,6 +310,11 @@ void WasmRegAlloc::CollectReferencesForBlock(BasicBlock* block) { m_currentBlock = block; + if (m_compiler->bbIsFuncletBeg(block)) + { + m_currentFunclet = m_compiler->funGetFuncIdx(block); + } + // We may modify the range while iterating. // // For now, we assume reordering happens only for already visited @@ -352,6 +399,10 @@ void WasmRegAlloc::CollectReferencesForNode(GenTree* node) CollectReferencesForIndexAddr(node->AsIndexAddr()); break; + case GT_CATCH_ARG: + CollectReferencesForCatchArg(node); + break; + default: assert(!node->OperIsLocalStore()); break; @@ -517,11 +568,26 @@ void WasmRegAlloc::CollectReferencesForLclVar(GenTreeLclVar* lclVar) if (lclVar->GetLclNum() == m_compiler->lvaWasmSpArg) { lclVar->ChangeOper(GT_PHYSREG); - lclVar->AsPhysReg()->gtSrcReg = m_spReg; + lclVar->AsPhysReg()->gtSrcReg = m_spRegs[m_currentFunclet]; CollectReference(lclVar); } } +//------------------------------------------------------------------------ +// CollectReferencesForCatchArg: Collect virtual register references for a CATCH_ARG node. +// +// Rewrites SP references into PHYS_REGs. +// +// Arguments: +// catchArg - The CATCH_ARG node +// +void WasmRegAlloc::CollectReferencesForCatchArg(GenTree* catchArg) +{ + catchArg->ChangeOper(GT_PHYSREG); + catchArg->AsPhysReg()->gtSrcReg = m_exRegs[m_currentFunclet]; + CollectReference(catchArg); +} + //------------------------------------------------------------------------ // RewriteLocalStackStore: rewrite a store to the stack to STOREIND(LCL_ADDR, ...). // @@ -732,180 +798,246 @@ void WasmRegAlloc::ResolveReferences() unsigned Index; }; - PhysicalRegBank virtToPhysRegMap[static_cast(WasmValueType::Count)]; - for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) + // Process funclet by funclet + // + for (m_currentFunclet = 0; m_currentFunclet < m_compiler->compFuncInfoCount; m_currentFunclet++) { - VirtualRegStack& virtRegs = m_virtualRegs[static_cast(type)]; - PhysicalRegBank& physRegs = virtToPhysRegMap[static_cast(type)]; - physRegs.DeclaredCount = virtRegs.Count(); - } + PhysicalRegBank virtToPhysRegMap[static_cast(WasmValueType::Count)]; + for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) + { + VirtualRegStack& virtRegs = m_virtualRegs[static_cast(type)]; + PhysicalRegBank& physRegs = virtToPhysRegMap[static_cast(type)]; + physRegs.DeclaredCount = virtRegs.Count(); + } - unsigned indexBase = 0; - for (unsigned argLclNum = 0; argLclNum < m_compiler->info.compArgsCount; argLclNum++) - { - const ABIPassingInformation& abiInfo = m_compiler->lvaGetParameterABIInfo(argLclNum); - for (const ABIPassingSegment& segment : abiInfo.Segments()) + unsigned indexBase = 0; + const FuncInfoDsc& funcInfo = m_compiler->compFuncInfos[m_currentFunclet]; + + switch (funcInfo.funKind) { - if (segment.IsPassedInRegister()) + case FuncKind::FUNC_ROOT: { - WasmValueType argType; - regNumber argReg = segment.GetRegister(); - unsigned argIndex = UnpackWasmReg(argReg, &argType); - indexBase = max(indexBase, argIndex + 1); - - LclVarDsc* argVarDsc = m_compiler->lvaGetDesc(argLclNum); - if ((argVarDsc->GetRegNum() == argReg) || (m_spReg == argReg)) + assert(m_currentFunclet == 0); + for (unsigned argLclNum = 0; argLclNum < m_compiler->info.compArgsCount; argLclNum++) { - assert(abiInfo.HasExactlyOneRegisterSegment()); - virtToPhysRegMap[static_cast(argType)].DeclaredCount--; + const ABIPassingInformation& abiInfo = m_compiler->lvaGetParameterABIInfo(argLclNum); + for (const ABIPassingSegment& segment : abiInfo.Segments()) + { + if (segment.IsPassedInRegister()) + { + WasmValueType argType; + regNumber argReg = segment.GetRegister(); + unsigned argIndex = UnpackWasmReg(argReg, &argType); + indexBase = max(indexBase, argIndex + 1); + + LclVarDsc* argVarDsc = m_compiler->lvaGetDesc(argLclNum); + if ((argVarDsc->GetRegNum() == argReg) || (m_spRegs[0] == argReg)) + { + assert(abiInfo.HasExactlyOneRegisterSegment()); + virtToPhysRegMap[static_cast(argType)].DeclaredCount--; + } + + const ParameterRegisterLocalMapping* mapping = + m_compiler->FindParameterRegisterLocalMappingByRegister(argReg); + if ((mapping != nullptr) && + (m_compiler->lvaGetDesc(mapping->LclNum)->GetRegNum() == argReg)) + { + virtToPhysRegMap[static_cast(argType)].DeclaredCount--; + } + } + } } + } + break; - const ParameterRegisterLocalMapping* mapping = - m_compiler->FindParameterRegisterLocalMappingByRegister(argReg); - if ((mapping != nullptr) && (m_compiler->lvaGetDesc(mapping->LclNum)->GetRegNum() == argReg)) + case FuncKind::FUNC_HANDLER: + case FuncKind::FUNC_FILTER: + { + assert(m_currentFunclet > 0); + + // TODO: add ABI information for funclets? + // + // All funclets have two intial arguments sp and fp. + // + WasmValueType argType = TypeToWasmValueType(TYP_I_IMPL); + + virtToPhysRegMap[static_cast(argType)].DeclaredCount--; + virtToPhysRegMap[static_cast(argType)].DeclaredCount--; + indexBase += 2; + + // Filter and catch have a third argument - the exception object. + // + EHblkDsc* const eh = m_compiler->ehGetDsc(funcInfo.funEHIndex); + if ((funcInfo.funKind == FuncKind::FUNC_FILTER) || eh->HasCatchHandler()) { + argType = TypeToWasmValueType(TYP_REF); virtToPhysRegMap[static_cast(argType)].DeclaredCount--; + indexBase += 1; } } - } - } - for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) - { - PhysicalRegBank& physRegs = virtToPhysRegMap[static_cast(type)]; - physRegs.IndexBase = indexBase; - physRegs.Index = indexBase; - indexBase += physRegs.DeclaredCount; - } - auto allocPhysReg = [&](regNumber virtReg, LclVarDsc* varDsc) { - regNumber physReg; - if ((varDsc != nullptr) && varDsc->lvIsRegArg && !varDsc->lvIsStructField) - { - unsigned lclNum = m_compiler->lvaGetLclNum(varDsc); - const ABIPassingInformation& abiInfo = m_compiler->lvaGetParameterABIInfo(lclNum); - assert(abiInfo.HasExactlyOneRegisterSegment()); - physReg = abiInfo.Segment(0).GetRegister(); - } - else if ((varDsc != nullptr) && varDsc->lvIsParamRegTarget) - { - unsigned lclNum = m_compiler->lvaGetLclNum(varDsc); - const ParameterRegisterLocalMapping* mapping = - m_compiler->FindParameterRegisterLocalMappingByLocal(lclNum, 0); - assert(mapping != nullptr); - physReg = mapping->RegisterSegment->GetRegister(); + break; + + default: + assert(!"Unexpected funclet kind"); + break; } - else + + for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) { - WasmValueType type = WasmRegToType(virtReg); - unsigned physRegIndex = virtToPhysRegMap[static_cast(type)].Index++; - physReg = MakeWasmReg(physRegIndex, type); + PhysicalRegBank& physRegs = virtToPhysRegMap[static_cast(type)]; + physRegs.IndexBase = indexBase; + physRegs.Index = indexBase; + indexBase += physRegs.DeclaredCount; } - assert(genIsValidReg(physReg)); - if ((varDsc != nullptr) && varDsc->lvIsRegCandidate()) - { - if (varDsc->lvIsParam || varDsc->lvIsParamRegTarget) + auto allocPhysReg = [&](regNumber virtReg, LclVarDsc* varDsc) { + regNumber physReg; + if ((varDsc != nullptr) && varDsc->lvIsRegArg && !varDsc->lvIsStructField) + { + unsigned lclNum = m_compiler->lvaGetLclNum(varDsc); + const ABIPassingInformation& abiInfo = m_compiler->lvaGetParameterABIInfo(lclNum); + assert(abiInfo.HasExactlyOneRegisterSegment()); + physReg = abiInfo.Segment(0).GetRegister(); + } + else if ((varDsc != nullptr) && varDsc->lvIsParamRegTarget) + { + unsigned lclNum = m_compiler->lvaGetLclNum(varDsc); + const ParameterRegisterLocalMapping* mapping = + m_compiler->FindParameterRegisterLocalMappingByLocal(lclNum, 0); + assert(mapping != nullptr); + physReg = mapping->RegisterSegment->GetRegister(); + } + else { - // This is the register codegen will move the local from its ABI location in prolog. - varDsc->SetArgInitReg(physReg); + WasmValueType type = WasmRegToType(virtReg); + unsigned physRegIndex = virtToPhysRegMap[static_cast(type)].Index++; + physReg = MakeWasmReg(physRegIndex, type); } - // This is the location for the "first" def. In our case all defs share the same register. - varDsc->SetRegNum(physReg); - varDsc->lvRegister = true; - varDsc->lvOnFrame = false; - } - return physReg; - }; + assert(genIsValidReg(physReg)); + if ((varDsc != nullptr) && varDsc->lvIsRegCandidate()) + { + if (varDsc->lvIsParam || varDsc->lvIsParamRegTarget) + { + // This is the register codegen will move the local from its ABI location in prolog. + varDsc->SetArgInitReg(physReg); + } - // Allocate all our virtual registers to physical ones. - regNumber spVirtReg = m_spReg; - if (spVirtReg != REG_NA) - { - m_spReg = allocPhysReg(spVirtReg, m_compiler->lvaGetDesc(m_compiler->lvaWasmSpArg)); - } - if (m_fpReg != REG_NA) - { - m_fpReg = (spVirtReg == m_fpReg) ? m_spReg : allocPhysReg(m_fpReg, nullptr); - } + // This is the location for the "first" def. In our case all defs share the same register. + // + // Note this holds even across funclets, since any local that is referred to in multiple funclets + // or in both funclets and the main method won't be a register candidate (we hope). + // (need to verify that if we have a local with two disjoint live ranges that doesn't cross a + // funclet boundary that we get this right). + // + varDsc->SetRegNum(physReg); + varDsc->lvRegister = true; + varDsc->lvOnFrame = false; + } + return physReg; + }; - for (unsigned varIndex = 0; varIndex < m_compiler->lvaTrackedCount; varIndex++) - { - unsigned lclNum = m_compiler->lvaTrackedIndexToLclNum(varIndex); - if (lclNum == m_compiler->lvaWasmSpArg) + // Allocate all our virtual registers to physical ones. + // + regNumber spVirtReg = m_spRegs[m_currentFunclet]; + if (spVirtReg != REG_NA) { - continue; // Handled above. + m_spRegs[m_currentFunclet] = allocPhysReg(spVirtReg, m_compiler->lvaGetDesc(m_compiler->lvaWasmSpArg)); } - LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); - if (varDsc->lvIsRegCandidate()) + if (m_fpRegs[m_currentFunclet] != REG_NA) { - allocPhysReg(varDsc->GetRegNum(), varDsc); + m_fpRegs[m_currentFunclet] = (spVirtReg == m_fpRegs[m_currentFunclet]) + ? m_spRegs[m_currentFunclet] + : allocPhysReg(m_fpRegs[m_currentFunclet], nullptr); } - } - for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) - { - TemporaryRegBank& regs = temporaryRegMap[static_cast(type)]; - for (unsigned i = 0; i < regs.Count; i++) + for (unsigned varIndex = 0; varIndex < m_compiler->lvaTrackedCount; varIndex++) { - regs.Regs[i] = allocPhysReg(regs.Regs[i], nullptr); + unsigned lclNum = m_compiler->lvaTrackedIndexToLclNum(varIndex); + if (lclNum == m_compiler->lvaWasmSpArg) + { + continue; // Handled above. + } + + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); + if (varDsc->lvIsRegCandidate()) + { + allocPhysReg(varDsc->GetRegNum(), varDsc); + } } - } - // Now remap all remaining virtual register references. - unsigned refsCount = m_lastVirtualRegRefsCount; - for (VirtualRegReferences* refs = m_virtualRegRefs; refs != nullptr; refs = refs->Prev) - { - for (size_t i = 0; i < refsCount; i++) + for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) { - GenTree* node = refs->Nodes[i]; - if (node->OperIs(GT_PHYSREG)) + TemporaryRegBank& regs = temporaryRegMap[static_cast(type)]; + for (unsigned i = 0; i < regs.Count; i++) { - assert(node->AsPhysReg()->gtSrcReg == spVirtReg); - node->AsPhysReg()->gtSrcReg = m_spReg; - assert(!genIsValidReg(node->GetRegNum())); // Currently we do not need to multi-use any PHYSREGs. - continue; + regs.Regs[i] = allocPhysReg(regs.Regs[i], nullptr); } + } - regNumber physReg; - if (node->OperIs(GT_STORE_LCL_VAR)) + // Now remap all remaining virtual register references. + unsigned refsCount = m_lastVirtualRegRefsCount; + for (VirtualRegReferences* refs = m_virtualRegRefs; refs != nullptr; refs = refs->Prev) + { + for (size_t i = 0; i < refsCount; i++) { - physReg = m_compiler->lvaGetDesc(node->AsLclVarCommon())->GetRegNum(); + GenTree* node = refs->Nodes[i]; + if (node->OperIs(GT_PHYSREG)) + { + assert(node->AsPhysReg()->gtSrcReg == spVirtReg); + node->AsPhysReg()->gtSrcReg = m_spRegs[m_currentFunclet]; + assert(!genIsValidReg(node->GetRegNum())); // Currently we do not need to multi-use any PHYSREGs. + continue; + } + + regNumber physReg; + if (node->OperIs(GT_STORE_LCL_VAR)) + { + physReg = m_compiler->lvaGetDesc(node->AsLclVarCommon())->GetRegNum(); + } + else + { + assert(!node->OperIsLocal() || !m_compiler->lvaGetDesc(node->AsLclVarCommon())->lvIsRegCandidate()); + WasmValueType type; + unsigned index = UnpackWasmReg(node->GetRegNum(), &type); + physReg = temporaryRegMap[static_cast(type)].Regs[index]; + } + + node->SetRegNum(physReg); } - else + refsCount = ARRAY_SIZE(refs->Nodes); + } + + for (NodeInternalRegistersTable::Node* nodeWithInternalRegs : m_codeGen->internalRegisters.Iterate()) + { + // We need to filter to just the references in the current funclet. + // + InternalRegs* regs = &nodeWithInternalRegs->GetValueRef(); + unsigned count = regs->Count(); + for (unsigned i = 0; i < count; i++) { - assert(!node->OperIsLocal() || !m_compiler->lvaGetDesc(node->AsLclVarCommon())->lvIsRegCandidate()); WasmValueType type; - unsigned index = UnpackWasmReg(node->GetRegNum(), &type); - physReg = temporaryRegMap[static_cast(type)].Regs[index]; + unsigned index = UnpackWasmReg(regs->GetAt(i), &type); + regNumber physReg = temporaryRegMap[static_cast(type)].Regs[index]; + regs->SetAt(i, physReg); } - - node->SetRegNum(physReg); } - refsCount = ARRAY_SIZE(refs->Nodes); - } - for (NodeInternalRegistersTable::Node* nodeWithInternalRegs : m_codeGen->internalRegisters.Iterate()) - { - InternalRegs* regs = &nodeWithInternalRegs->GetValueRef(); - unsigned count = regs->Count(); - for (unsigned i = 0; i < count; i++) - { - WasmValueType type; - unsigned index = UnpackWasmReg(regs->GetAt(i), &type); - regNumber physReg = temporaryRegMap[static_cast(type)].Regs[index]; - regs->SetAt(i, physReg); - } - } + assert(m_compiler->codeGen->WasmLocalsDecls[m_currentFunclet] == nullptr); - jitstd::vector& decls = m_compiler->codeGen->WasmLocalsDecls; - for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) - { - PhysicalRegBank& physRegs = virtToPhysRegMap[static_cast(type)]; - if (physRegs.DeclaredCount != 0) + jitstd::vector* decls = new (m_compiler->getAllocator(CMK_Codegen)) + jitstd::vector(m_compiler->getAllocator(CMK_Codegen)); + m_compiler->codeGen->WasmLocalsDecls[m_currentFunclet] = decls; + + for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) { - decls.push_back({type, physRegs.DeclaredCount}); + PhysicalRegBank& physRegs = virtToPhysRegMap[static_cast(type)]; + if (physRegs.DeclaredCount != 0) + { + decls->push_back({type, physRegs.DeclaredCount}); + } } } } @@ -917,22 +1049,47 @@ void WasmRegAlloc::ResolveReferences() // void WasmRegAlloc::PublishAllocationResults() { - if (m_spReg != REG_NA) +#ifdef DEBUG + for (unsigned i = 0; i < m_spRegs.size(); i++) { - m_codeGen->SetStackPointerReg(m_spReg); - JITDUMP("Allocated SP into %s\n", getRegName(m_spReg)); + if (i == 0) + { + JITDUMP("Allocated function SP into: %s\n", getRegName(m_spRegs[i])); + } + else + { + JITDUMP("Allocated Funclet %u SP into %s\n", i, getRegName(m_spRegs[i])); + } + } + + for (unsigned i = 0; i < m_fpRegs.size(); i++) + { + if (i == 0) + { + JITDUMP("Allocated function FP into: %s\n", getRegName(m_fpRegs[i])); + } + else + { + JITDUMP("Allocated Funclet %u FP into %s\n", i, getRegName(m_fpRegs[i])); + } } - if (m_fpReg != REG_NA) + +#endif // DEBUG + + m_codeGen->SetStackPointerRegs(m_spRegs); + + if (m_fpRegs.size() != 0) { - m_codeGen->SetFramePointerReg(m_fpReg); - m_codeGen->setFramePointerUsed(true); - JITDUMP("Allocated FP into %s\n", getRegName(m_fpReg)); + m_codeGen->SetFramePointerRegs(m_fpRegs); } else { m_codeGen->setFramePointerUsed(false); } + // We don't need to publish the exRegs for codegen; all references were in codegen + // and all were converted to GT_PHYSREG. + m_compiler->raMarkStkVars(); m_compiler->compRegAllocDone = true; } diff --git a/src/coreclr/jit/regallocwasm.h b/src/coreclr/jit/regallocwasm.h index 217f8f4362178c..934734fb6cf594 100644 --- a/src/coreclr/jit/regallocwasm.h +++ b/src/coreclr/jit/regallocwasm.h @@ -78,6 +78,7 @@ class WasmRegAlloc : public RegAllocInterface Compiler* m_compiler; CodeGenInterface* m_codeGen; BasicBlock* m_currentBlock; + unsigned m_currentFunclet; VirtualRegStack m_virtualRegs[static_cast(WasmValueType::Count)]; unsigned m_lastVirtualRegRefsCount = 0; VirtualRegReferences* m_virtualRegRefs = nullptr; @@ -86,8 +87,13 @@ class WasmRegAlloc : public RegAllocInterface // The meaning of these fields is borrowed (partially) from the C ABI for WASM. We define "the SP" to be the local // which is used to make calls - the stack on entry to callees. We term "the FP" to be the local which is used to // access the fixed potion of the frame. For fixed-size frames (no localloc), these will be the same. - regNumber m_spReg = REG_NA; - regNumber m_fpReg = REG_NA; + // + // These values are per funclet region. In funclets FP will differ from SP, and will likely differ from FP in the + // main function body. + // + jitstd::vector m_spRegs; + jitstd::vector m_fpRegs; + jitstd::vector m_exRegs; public: WasmRegAlloc(Compiler* compiler); @@ -111,6 +117,7 @@ class WasmRegAlloc : public RegAllocInterface void InitializeStackPointer(); void AllocateStackPointer(); void AllocateFramePointer(); + void AllocateExceptionPointer(); regNumber AllocateVirtualRegister(var_types type); regNumber AllocateVirtualRegister(WasmValueType type); regNumber AllocateTemporaryRegister(var_types type); @@ -129,6 +136,7 @@ class WasmRegAlloc : public RegAllocInterface void CollectReferencesForBlockStore(GenTreeBlk* node); void CollectReferencesForLclVar(GenTreeLclVar* lclVar); void CollectReferencesForIndexAddr(GenTreeIndexAddr* indexAddrNode); + void CollectReferencesForCatchArg(GenTree* node); void RewriteLocalStackStore(GenTreeLclVarCommon* node); void CollectReference(GenTree* node); void RequestTemporaryRegisterForMultiplyUsedNode(GenTree* node); From fecd9973c28aa92a137567207ce60d54a5f54181 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 1 Apr 2026 14:29:39 -0700 Subject: [PATCH 02/26] track need for internal registers per funclet region --- src/coreclr/jit/codegencommon.cpp | 85 +++++++++++++++++++++++++++--- src/coreclr/jit/codegeninterface.h | 13 +++-- src/coreclr/jit/codegenwasm.cpp | 4 +- src/coreclr/jit/regallocwasm.cpp | 25 +++++---- 4 files changed, 104 insertions(+), 23 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index ffef8c98484de4..182d98ea8b480e 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -63,12 +63,19 @@ CodeGenInterface* getCodeGenerator(Compiler* comp) return new (comp, CMK_Codegen) CodeGen(comp); } +#if HAS_FIXED_REGISTER_SET +//------------------------------------------------------------------------ +// NodeInternalRegisters::NodeInternalRegisters: construct the +// internal registers tracking data +// +// Arguments: +// comp -- compiler instance +// NodeInternalRegisters::NodeInternalRegisters(Compiler* comp) : m_table(comp->getAllocator(CMK_LSRA)) { } -#if HAS_FIXED_REGISTER_SET //------------------------------------------------------------------------ // Add: Add internal allocated registers for the specified node. // @@ -170,6 +177,59 @@ unsigned NodeInternalRegisters::Count(GenTree* tree, regMaskTP mask) return m_table.Lookup(tree, ®s) ? genCountBits(regs & mask) : 0; } #else // !HAS_FIXED_REGISTER_SET + +//------------------------------------------------------------------------ +// NodeInternalRegisters: construct the +// internal registers tracking data +// +// Arguments: +// comp -- compiler instance +// +NodeInternalRegisters::NodeInternalRegisters(Compiler* comp) + : m_compiler(comp) + , m_tables(comp->compFuncInfoCount, nullptr, comp->getAllocator(CMK_LSRA)) +{ +} + +//------------------------------------------------------------------------ +// GetOrCreateTable: get the internal register table for nodes in this funclet region +// or create it if it does not yet exist. +// +// Parameters: +// funcletIndex - Index of the funclet +// +// Returns: +// Pointer to the internal register table. +// +NodeInternalRegistersTable* NodeInternalRegisters::GetOrCreateTable(unsigned funcletIndex) +{ + assert(funcletIndex < m_tables.size()); + NodeInternalRegistersTable* table = m_tables[funcletIndex]; + if (table == nullptr) + { + table = new (m_compiler->getAllocator(CMK_LSRA)) NodeInternalRegistersTable(m_compiler->getAllocator(CMK_LSRA)); + m_tables[funcletIndex] = table; + } + return table; +} + +//------------------------------------------------------------------------ +// GetTable: get the internal register table for nodes in this funclet region +// or create it if it does not yet exist. +// +// Parameters: +// funcletIndex - Index of the funclet +// +// Returns: +// Pointer to the internal register table. +// +NodeInternalRegistersTable* NodeInternalRegisters::GetTable(unsigned funcletIndex) +{ + assert(funcletIndex < m_tables.size()); + NodeInternalRegistersTable* table = m_tables[funcletIndex]; + return table; +} + //------------------------------------------------------------------------ // InternalRegs: construct an empty 'InternalRegs' instance. // @@ -276,12 +336,14 @@ regNumber InternalRegs::Extract() // Add: Add a register to the set of ones internally allocated for this node. // // Parameters: +// funcletIndex -- index of the funclet region for the tree // tree - IR node to add the internal allocated register to // reg - The register to add // -void NodeInternalRegisters::Add(GenTree* tree, regNumber reg) +void NodeInternalRegisters::Add(unsigned funcletIndex, GenTree* tree, regNumber reg) { - InternalRegs* regs = m_table.LookupPointerOrAdd(tree, InternalRegs{}); + NodeInternalRegistersTable* const table = GetOrCreateTable(funcletIndex); + InternalRegs* const regs = table->LookupPointerOrAdd(tree, InternalRegs{}); regs->Add(reg); } @@ -289,14 +351,20 @@ void NodeInternalRegisters::Add(GenTree* tree, regNumber reg) // GetAll: Get the internally allocated registers for the specified node. // // Parameters: +// funcletIndex -- index of the funclet region for the tree // tree - IR node to get the registers for // // Returns: // Pointer to the registers, nullptr if there are none. // -InternalRegs* NodeInternalRegisters::GetAll(GenTree* tree) +InternalRegs* NodeInternalRegisters::GetAll(unsigned funcletIndex, GenTree* tree) { - InternalRegs* regs = m_table.LookupPointer(tree); + NodeInternalRegistersTable* const table = GetTable(funcletIndex); + if (table == nullptr) + { + return nullptr; + } + InternalRegs* const regs = table->LookupPointer(tree); assert((regs == nullptr) || !regs->IsEmpty()); return regs; } @@ -304,12 +372,15 @@ InternalRegs* NodeInternalRegisters::GetAll(GenTree* tree) //------------------------------------------------------------------------ // Iterate: Get the iterator for the internal register table. // +// Parameters: +// table -- pointer to the internal register table for a codegen region. +// // Returns: // A 'for'-loop compatible iterator of the table entries. // -NodeInternalRegistersTable::KeyValueIteration NodeInternalRegisters::Iterate() +NodeInternalRegistersTable::KeyValueIteration NodeInternalRegisters::Iterate(NodeInternalRegistersTable* table) { - return NodeInternalRegistersTable::KeyValueIteration(&m_table); + return NodeInternalRegistersTable::KeyValueIteration(table); } #endif // !HAS_FIXED_REGISTER_SET diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index 7ab7a88718c906..ab6e8f58385f59 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -61,7 +61,12 @@ class InternalRegs using NodeInternalRegistersTable = JitHashTable, InternalRegs>; class NodeInternalRegisters { +#if HAS_FIXED_REGISTER_SET NodeInternalRegistersTable m_table; +#else // !HAS_FIXED_REGISTER_SET + Compiler* m_compiler; + jitstd::vector m_tables; +#endif // !HAS_FIXED_REGISTER_SET public: NodeInternalRegisters(Compiler* comp); @@ -73,9 +78,11 @@ class NodeInternalRegisters regMaskTP GetAll(GenTree* tree); unsigned Count(GenTree* tree, regMaskTP mask = static_cast(-1)); #else // !HAS_FIXED_REGISTER_SET - void Add(GenTree* tree, regNumber reg); - InternalRegs* GetAll(GenTree* tree); - NodeInternalRegistersTable::KeyValueIteration Iterate(); + NodeInternalRegistersTable* GetOrCreateTable(unsigned funcletIndex); + NodeInternalRegistersTable* GetTable(unsigned funcletIndex); + void Add(unsigned funcletIndex, GenTree* tree, regNumber reg); + InternalRegs* GetAll(unsigned funcletIndex, GenTree* tree); + NodeInternalRegistersTable::KeyValueIteration Iterate(NodeInternalRegistersTable* table); #endif // !HAS_FIXED_REGISTER_SET }; diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 52c413f8731d1e..9050cb4492f00e 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -1285,7 +1285,7 @@ void CodeGen::genCodeForBinaryOverflow(GenTreeOp* treeNode) genConsumeOperands(treeNode); const bool is64BitOp = treeNode->TypeIs(TYP_LONG); - InternalRegs* regs = internalRegisters.GetAll(treeNode); + InternalRegs* regs = internalRegisters.GetAll(m_compiler->compCurrFuncIdx, treeNode); regNumber op1Reg = GetMultiUseOperandReg(treeNode->gtGetOp1()); regNumber op2Reg = GetMultiUseOperandReg(treeNode->gtGetOp2()); @@ -2825,7 +2825,7 @@ void CodeGen::genLclHeap(GenTree* tree) } // Fetch the internal register we reserved during RA - InternalRegs* regs = internalRegisters.GetAll(tree); + InternalRegs* regs = internalRegisters.GetAll(m_compiler->compCurrFuncIdx, tree); assert(regs->Count() == 1); regNumber sizeReg = regs->Extract(); assert(WasmRegToType(sizeReg) == TypeToWasmValueType(TYP_I_IMPL)); diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index 67bc7deda99cae..2fe18f0f5ab6cc 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -754,7 +754,7 @@ void WasmRegAlloc::ConsumeTemporaryRegForOperand(GenTree* operand DEBUGARG(const regNumber WasmRegAlloc::RequestInternalRegister(GenTree* node, var_types type) { regNumber reg = AllocateTemporaryRegister(type); - m_codeGen->internalRegisters.Add(node, reg); + m_codeGen->internalRegisters.Add(m_currentFunclet, node, reg); return reg; } @@ -1010,18 +1010,21 @@ void WasmRegAlloc::ResolveReferences() refsCount = ARRAY_SIZE(refs->Nodes); } - for (NodeInternalRegistersTable::Node* nodeWithInternalRegs : m_codeGen->internalRegisters.Iterate()) + NodeInternalRegistersTable* const internalRegTable = m_codeGen->internalRegisters.GetTable(m_currentFunclet); + if (internalRegTable != nullptr) { - // We need to filter to just the references in the current funclet. - // - InternalRegs* regs = &nodeWithInternalRegs->GetValueRef(); - unsigned count = regs->Count(); - for (unsigned i = 0; i < count; i++) + for (NodeInternalRegistersTable::Node* nodeWithInternalRegs : + m_codeGen->internalRegisters.Iterate(internalRegTable)) { - WasmValueType type; - unsigned index = UnpackWasmReg(regs->GetAt(i), &type); - regNumber physReg = temporaryRegMap[static_cast(type)].Regs[index]; - regs->SetAt(i, physReg); + InternalRegs* regs = &nodeWithInternalRegs->GetValueRef(); + unsigned count = regs->Count(); + for (unsigned i = 0; i < count; i++) + { + WasmValueType type; + unsigned index = UnpackWasmReg(regs->GetAt(i), &type); + regNumber physReg = temporaryRegMap[static_cast(type)].Regs[index]; + regs->SetAt(i, physReg); + } } } From a34bfa4fbced40c8ecf4347eaf55f1a847d0dba6 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 1 Apr 2026 17:07:45 -0700 Subject: [PATCH 03/26] fixes --- src/coreclr/jit/regallocwasm.cpp | 84 ++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index 2fe18f0f5ab6cc..986a2fe8794d1d 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -192,7 +192,13 @@ void WasmRegAlloc::AllocateExceptionPointer() for (unsigned i = 0; i < m_compiler->compFuncInfoCount; i++) { const FuncInfoDsc& funcInfo = m_compiler->compFuncInfos[i]; - EHblkDsc* const ehDsc = m_compiler->ehGetDsc(funcInfo.funEHIndex); + + if (funcInfo.funKind == FuncKind::FUNC_ROOT) + { + continue; + } + + EHblkDsc* const ehDsc = m_compiler->ehGetDsc(funcInfo.funEHIndex); if (ehDsc->HasCatchHandler()) { @@ -812,12 +818,12 @@ void WasmRegAlloc::ResolveReferences() unsigned indexBase = 0; const FuncInfoDsc& funcInfo = m_compiler->compFuncInfos[m_currentFunclet]; + const bool inFunclet = funcInfo.funKind != FuncKind::FUNC_ROOT; switch (funcInfo.funKind) { case FuncKind::FUNC_ROOT: { - assert(m_currentFunclet == 0); for (unsigned argLclNum = 0; argLclNum < m_compiler->info.compArgsCount; argLclNum++) { const ABIPassingInformation& abiInfo = m_compiler->lvaGetParameterABIInfo(argLclNum); @@ -853,8 +859,6 @@ void WasmRegAlloc::ResolveReferences() case FuncKind::FUNC_HANDLER: case FuncKind::FUNC_FILTER: { - assert(m_currentFunclet > 0); - // TODO: add ABI information for funclets? // // All funclets have two intial arguments sp and fp. @@ -926,11 +930,10 @@ void WasmRegAlloc::ResolveReferences() // This is the location for the "first" def. In our case all defs share the same register. // - // Note this holds even across funclets, since any local that is referred to in multiple funclets - // or in both funclets and the main method won't be a register candidate (we hope). - // (need to verify that if we have a local with two disjoint live ranges that doesn't cross a - // funclet boundary that we get this right). + // TODO-WASM: this may no longer hold with funclets. If a local has disjoint lifetimes that do not + // cross funclet boundaries, it may need to be allocated to different registers in different funclets. // + // assert(!varDsc->lvRegister); varDsc->SetRegNum(physReg); varDsc->lvRegister = true; varDsc->lvOnFrame = false; @@ -941,16 +944,41 @@ void WasmRegAlloc::ResolveReferences() // Allocate all our virtual registers to physical ones. // regNumber spVirtReg = m_spRegs[m_currentFunclet]; - if (spVirtReg != REG_NA) + regNumber exVirtReg = m_exRegs[m_currentFunclet]; + + if (!inFunclet) { - m_spRegs[m_currentFunclet] = allocPhysReg(spVirtReg, m_compiler->lvaGetDesc(m_compiler->lvaWasmSpArg)); - } + if (spVirtReg != REG_NA) + { + m_spRegs[m_currentFunclet] = allocPhysReg(spVirtReg, m_compiler->lvaGetDesc(m_compiler->lvaWasmSpArg)); + } - if (m_fpRegs[m_currentFunclet] != REG_NA) + if (m_fpRegs[m_currentFunclet] != REG_NA) + { + m_fpRegs[m_currentFunclet] = (spVirtReg == m_fpRegs[m_currentFunclet]) + ? m_spRegs[m_currentFunclet] + : allocPhysReg(m_fpRegs[m_currentFunclet], nullptr); + } + + assert(m_exRegs[m_currentFunclet] == REG_NA); + } + else { - m_fpRegs[m_currentFunclet] = (spVirtReg == m_fpRegs[m_currentFunclet]) - ? m_spRegs[m_currentFunclet] - : allocPhysReg(m_fpRegs[m_currentFunclet], nullptr); + // Funclets always have SP and FP, and maybe EX. + // We do not have lclVars for funclet params. + // SP is the same physreg as in the main method. + // + assert(m_spRegs[m_currentFunclet] != REG_NA); + m_spRegs[m_currentFunclet] = MakeWasmReg(0, TypeToWasmValueType(TYP_I_IMPL)); + assert(m_spRegs[0] == m_spRegs[m_currentFunclet]); + + assert(m_fpRegs[m_currentFunclet] != REG_NA); + m_fpRegs[m_currentFunclet] = MakeWasmReg(1, TypeToWasmValueType(TYP_I_IMPL)); + + if (m_exRegs[m_currentFunclet] != REG_NA) + { + m_exRegs[m_currentFunclet] = MakeWasmReg(2, TypeToWasmValueType(TYP_REF)); + } } for (unsigned varIndex = 0; varIndex < m_compiler->lvaTrackedCount; varIndex++) @@ -986,8 +1014,18 @@ void WasmRegAlloc::ResolveReferences() GenTree* node = refs->Nodes[i]; if (node->OperIs(GT_PHYSREG)) { - assert(node->AsPhysReg()->gtSrcReg == spVirtReg); - node->AsPhysReg()->gtSrcReg = m_spRegs[m_currentFunclet]; + if (node->AsPhysReg()->gtSrcReg == exVirtReg) + { + // Former CATCH_ARG -- should only see in funclets + assert(inFunclet); + node->AsPhysReg()->gtSrcReg = m_exRegs[m_currentFunclet]; + } + else + { + assert(node->AsPhysReg()->gtSrcReg == spVirtReg); + node->AsPhysReg()->gtSrcReg = m_spRegs[m_currentFunclet]; + } + assert(!genIsValidReg(node->GetRegNum())); // Currently we do not need to multi-use any PHYSREGs. continue; } @@ -1081,14 +1119,18 @@ void WasmRegAlloc::PublishAllocationResults() m_codeGen->SetStackPointerRegs(m_spRegs); + bool fpUsed = false; + if (m_fpRegs.size() != 0) { m_codeGen->SetFramePointerRegs(m_fpRegs); + + // We can just check the method region to see if SP and FP differ + // + fpUsed = (m_fpRegs[0] != m_spRegs[0]); } - else - { - m_codeGen->setFramePointerUsed(false); - } + + m_codeGen->setFramePointerUsed(fpUsed); // We don't need to publish the exRegs for codegen; all references were in codegen // and all were converted to GT_PHYSREG. From d2e576faedcea70c5c2d8535224d0568abc04481 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 1 Apr 2026 19:00:20 -0700 Subject: [PATCH 04/26] refactor a bit; virtual references per region; emitter fix --- src/coreclr/jit/codegencommon.cpp | 4 +- src/coreclr/jit/codegeninterface.h | 8 +- src/coreclr/jit/emitwasm.cpp | 26 +++-- src/coreclr/jit/regallocwasm.cpp | 156 ++++++++++++++--------------- src/coreclr/jit/regallocwasm.h | 56 +++++++---- 5 files changed, 136 insertions(+), 114 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 182d98ea8b480e..2e2c711c8c6a11 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -408,8 +408,8 @@ CodeGenInterface::CodeGenInterface(Compiler* theCompiler) , m_compiler(theCompiler) , treeLifeUpdater(nullptr) #ifdef TARGET_WASM - , m_spRegs(theCompiler->getAllocator(CMK_Codegen)) - , m_fpRegs(theCompiler->getAllocator(CMK_Codegen)) + , m_spRegs(theCompiler->compFuncInfoCount, REG_NA, theCompiler->getAllocator(CMK_Codegen)) + , m_fpRegs(theCompiler->compFuncInfoCount, REG_NA, theCompiler->getAllocator(CMK_Codegen)) , WasmLocalsDecls(theCompiler->compFuncInfoCount, nullptr, theCompiler->getAllocator(CMK_Codegen)) #endif { diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index ab6e8f58385f59..e373691eb49fc8 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -284,13 +284,13 @@ class CodeGenInterface jitstd::vector m_fpRegs; public: - void SetStackPointerRegs(jitstd::vector& regs) + void SetStackPointerReg(unsigned funcletIndex, regNumber reg) { - m_spRegs = regs; + m_spRegs[funcletIndex] = reg; } - void SetFramePointerRegs(jitstd::vector& regs) + void SetFramePointerReg(unsigned funcletIndex, regNumber reg) { - m_fpRegs = regs; + m_fpRegs[funcletIndex] = reg; } regNumber GetStackPointerReg(unsigned funcletIndex) const { diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index 3969bce7a529af..3fc418a2d9fbe3 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -218,28 +218,34 @@ void emitter::emitIns_Call(const EmitCallParams& params) } //------------------------------------------------------------------------ -// GetWasmArgsCount: Get WASM argument count for the root method. +// GetWasmArgsCount: Get WASM argument count for the method or a funclet. // // Arguments: // compiler - The compiler object // // Return Value: -// The number of arguments in the WASM signature of the method being compiled. +// The number of arguments in the WASM signature of the method or funclet being compiled. // static unsigned GetWasmArgsCount(Compiler* compiler) { - assert(compiler->funCurrentFunc()->funKind == FUNC_ROOT); - - unsigned count = 0; - for (unsigned argLclNum = 0; argLclNum < compiler->info.compArgsCount; argLclNum++) + if (compiler->funCurrentFunc()->funKind == FUNC_ROOT) { - const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(argLclNum); - for (const ABIPassingSegment& segment : abiInfo.Segments()) + unsigned count = 0; + for (unsigned argLclNum = 0; argLclNum < compiler->info.compArgsCount; argLclNum++) { - count = max(count, WasmRegToIndex(segment.GetRegister()) + 1); + const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(argLclNum); + for (const ABIPassingSegment& segment : abiInfo.Segments()) + { + count = max(count, WasmRegToIndex(segment.GetRegister()) + 1); + } } + return count; + } + else + { + EHblkDsc* const ehDsc = compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex); + return ehDsc->HasCatchHandler() ? 3 : 2; } - return count; } //----------------------------------------------------------------------------- diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index 986a2fe8794d1d..16b34809a4126c 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -20,10 +20,12 @@ WasmRegAlloc::WasmRegAlloc(Compiler* compiler) , m_codeGen(compiler->codeGen) , m_currentBlock(nullptr) , m_currentFunclet(0) - , m_spRegs(compiler->getAllocator(CMK_LSRA)) - , m_fpRegs(compiler->getAllocator(CMK_LSRA)) - , m_exRegs(compiler->getAllocator(CMK_LSRA)) + , m_perRegionData(compiler->compFuncInfoCount, nullptr, compiler->getAllocator(CMK_LSRA)) { + for (unsigned i = 0; i < m_compiler->compFuncInfoCount; i++) + { + m_perRegionData[i] = new (compiler->getAllocator(CMK_LSRA)) PerRegionData(); + } } PhaseStatus WasmRegAlloc::doRegisterAllocation() @@ -92,7 +94,7 @@ void WasmRegAlloc::IdentifyCandidates() } } - if (anyFrameLocals || m_compiler->compLocallocUsed || (m_compiler->compFuncInfoCount > 1)) + if (anyFrameLocals || m_compiler->compLocallocUsed) { AllocateFramePointer(); } @@ -148,14 +150,15 @@ void WasmRegAlloc::InitializeStackPointer() // void WasmRegAlloc::AllocateStackPointer() { - - if (m_spRegs.size() == 0) + // This is the same virtual register in all regions + // + if (m_perRegionData[0]->m_spReg == REG_NA) { regNumber spReg = AllocateVirtualRegister(TYP_I_IMPL); for (unsigned i = 0; i < m_compiler->compFuncInfoCount; i++) { - m_spRegs.push_back(spReg); + m_perRegionData[i]->m_spReg = spReg; } } } @@ -165,18 +168,21 @@ void WasmRegAlloc::AllocateStackPointer() // void WasmRegAlloc::AllocateFramePointer() { - assert(m_fpRegs.size() == 0); - // FP is initialized with SP in the prolog, so ensure the latter is allocated. AllocateStackPointer(); - // If we have funclets or localloc, we must have an FP. - bool const needsFP = m_compiler->compLocallocUsed || (m_compiler->compFuncInfoCount > 1); - regNumber const fpReg = needsFP ? AllocateVirtualRegister(TYP_I_IMPL) : m_spRegs[0]; + regNumber const spReg = m_perRegionData[0]->m_spReg; + regNumber const fpReg = AllocateVirtualRegister(TYP_I_IMPL); - for (unsigned i = 0; i < m_compiler->compFuncInfoCount; i++) + // Main method can use SP for frame access, if there is no localloc. + // + m_perRegionData[0]->m_fpReg = m_compiler->compLocallocUsed ? fpReg : spReg; + + // Funclets must always use FP for frame access + // + for (unsigned i = 1; i < m_compiler->compFuncInfoCount; i++) { - m_fpRegs.push_back(fpReg); + m_perRegionData[i]->m_fpReg = fpReg; } } @@ -185,9 +191,7 @@ void WasmRegAlloc::AllocateFramePointer() // void WasmRegAlloc::AllocateExceptionPointer() { - assert(m_exRegs.size() == 0); regNumber exReg = REG_NA; - m_exRegs.resize(m_compiler->compFuncInfoCount, REG_NA); for (unsigned i = 0; i < m_compiler->compFuncInfoCount; i++) { @@ -207,7 +211,7 @@ void WasmRegAlloc::AllocateExceptionPointer() exReg = AllocateVirtualRegister(TYP_REF); } - m_exRegs[i] = exReg; + m_perRegionData[i]->m_exReg = exReg; } } } @@ -574,7 +578,7 @@ void WasmRegAlloc::CollectReferencesForLclVar(GenTreeLclVar* lclVar) if (lclVar->GetLclNum() == m_compiler->lvaWasmSpArg) { lclVar->ChangeOper(GT_PHYSREG); - lclVar->AsPhysReg()->gtSrcReg = m_spRegs[m_currentFunclet]; + lclVar->AsPhysReg()->gtSrcReg = m_perRegionData[m_currentFunclet]->m_spReg; CollectReference(lclVar); } } @@ -590,7 +594,7 @@ void WasmRegAlloc::CollectReferencesForLclVar(GenTreeLclVar* lclVar) void WasmRegAlloc::CollectReferencesForCatchArg(GenTree* catchArg) { catchArg->ChangeOper(GT_PHYSREG); - catchArg->AsPhysReg()->gtSrcReg = m_exRegs[m_currentFunclet]; + catchArg->AsPhysReg()->gtSrcReg = m_perRegionData[m_currentFunclet]->m_exReg; CollectReference(catchArg); } @@ -664,22 +668,23 @@ void WasmRegAlloc::RewriteLocalStackStore(GenTreeLclVarCommon* lclNode) // void WasmRegAlloc::CollectReference(GenTree* node) { - VirtualRegReferences* refs = m_virtualRegRefs; + PerRegionData* const data = m_perRegionData[m_currentFunclet]; + VirtualRegReferences* refs = data->m_virtualRegRefs; if (refs == nullptr) { - refs = new (m_compiler->getAllocator(CMK_LSRA_RefPosition)) VirtualRegReferences(); - m_virtualRegRefs = refs; + refs = new (m_compiler->getAllocator(CMK_LSRA_RefPosition)) VirtualRegReferences(); + data->m_virtualRegRefs = refs; } - else if (m_lastVirtualRegRefsCount == ARRAY_SIZE(refs->Nodes)) + else if (data->m_lastVirtualRegRefsCount == ARRAY_SIZE(refs->Nodes)) { - refs = new (m_compiler->getAllocator(CMK_LSRA_RefPosition)) VirtualRegReferences(); - refs->Prev = m_virtualRegRefs; - m_virtualRegRefs = refs; - m_lastVirtualRegRefsCount = 0; + refs = new (m_compiler->getAllocator(CMK_LSRA_RefPosition)) VirtualRegReferences(); + refs->Prev = data->m_virtualRegRefs; + data->m_virtualRegRefs = refs; + data->m_lastVirtualRegRefsCount = 0; } - assert(m_lastVirtualRegRefsCount < ARRAY_SIZE(refs->Nodes)); - refs->Nodes[m_lastVirtualRegRefsCount++] = node; + assert(data->m_lastVirtualRegRefsCount < ARRAY_SIZE(refs->Nodes)); + refs->Nodes[data->m_lastVirtualRegRefsCount++] = node; } //------------------------------------------------------------------------ @@ -816,9 +821,10 @@ void WasmRegAlloc::ResolveReferences() physRegs.DeclaredCount = virtRegs.Count(); } - unsigned indexBase = 0; - const FuncInfoDsc& funcInfo = m_compiler->compFuncInfos[m_currentFunclet]; - const bool inFunclet = funcInfo.funKind != FuncKind::FUNC_ROOT; + unsigned indexBase = 0; + const FuncInfoDsc& funcInfo = m_compiler->compFuncInfos[m_currentFunclet]; + const bool inFunclet = funcInfo.funKind != FuncKind::FUNC_ROOT; + PerRegionData* const data = m_perRegionData[m_currentFunclet]; switch (funcInfo.funKind) { @@ -837,7 +843,7 @@ void WasmRegAlloc::ResolveReferences() indexBase = max(indexBase, argIndex + 1); LclVarDsc* argVarDsc = m_compiler->lvaGetDesc(argLclNum); - if ((argVarDsc->GetRegNum() == argReg) || (m_spRegs[0] == argReg)) + if ((argVarDsc->GetRegNum() == argReg) || (data->m_spReg == argReg)) { assert(abiInfo.HasExactlyOneRegisterSegment()); virtToPhysRegMap[static_cast(argType)].DeclaredCount--; @@ -943,24 +949,22 @@ void WasmRegAlloc::ResolveReferences() // Allocate all our virtual registers to physical ones. // - regNumber spVirtReg = m_spRegs[m_currentFunclet]; - regNumber exVirtReg = m_exRegs[m_currentFunclet]; + regNumber spVirtReg = data->m_spReg; + regNumber exVirtReg = data->m_exReg; if (!inFunclet) { if (spVirtReg != REG_NA) { - m_spRegs[m_currentFunclet] = allocPhysReg(spVirtReg, m_compiler->lvaGetDesc(m_compiler->lvaWasmSpArg)); + data->m_spReg = allocPhysReg(spVirtReg, m_compiler->lvaGetDesc(m_compiler->lvaWasmSpArg)); } - if (m_fpRegs[m_currentFunclet] != REG_NA) + if (data->m_fpReg != REG_NA) { - m_fpRegs[m_currentFunclet] = (spVirtReg == m_fpRegs[m_currentFunclet]) - ? m_spRegs[m_currentFunclet] - : allocPhysReg(m_fpRegs[m_currentFunclet], nullptr); + data->m_fpReg = (spVirtReg == data->m_fpReg) ? data->m_spReg : allocPhysReg(data->m_fpReg, nullptr); } - assert(m_exRegs[m_currentFunclet] == REG_NA); + assert(data->m_exReg == REG_NA); } else { @@ -968,16 +972,16 @@ void WasmRegAlloc::ResolveReferences() // We do not have lclVars for funclet params. // SP is the same physreg as in the main method. // - assert(m_spRegs[m_currentFunclet] != REG_NA); - m_spRegs[m_currentFunclet] = MakeWasmReg(0, TypeToWasmValueType(TYP_I_IMPL)); - assert(m_spRegs[0] == m_spRegs[m_currentFunclet]); + assert(data->m_spReg != REG_NA); + data->m_spReg = MakeWasmReg(0, TypeToWasmValueType(TYP_I_IMPL)); + assert(data->m_spReg == m_perRegionData[0]->m_spReg); - assert(m_fpRegs[m_currentFunclet] != REG_NA); - m_fpRegs[m_currentFunclet] = MakeWasmReg(1, TypeToWasmValueType(TYP_I_IMPL)); + assert(data->m_fpReg != REG_NA); + data->m_fpReg = MakeWasmReg(1, TypeToWasmValueType(TYP_I_IMPL)); - if (m_exRegs[m_currentFunclet] != REG_NA) + if (data->m_exReg != REG_NA) { - m_exRegs[m_currentFunclet] = MakeWasmReg(2, TypeToWasmValueType(TYP_REF)); + data->m_exReg = MakeWasmReg(2, TypeToWasmValueType(TYP_REF)); } } @@ -1006,8 +1010,8 @@ void WasmRegAlloc::ResolveReferences() } // Now remap all remaining virtual register references. - unsigned refsCount = m_lastVirtualRegRefsCount; - for (VirtualRegReferences* refs = m_virtualRegRefs; refs != nullptr; refs = refs->Prev) + unsigned refsCount = data->m_lastVirtualRegRefsCount; + for (VirtualRegReferences* refs = data->m_virtualRegRefs; refs != nullptr; refs = refs->Prev) { for (size_t i = 0; i < refsCount; i++) { @@ -1018,12 +1022,12 @@ void WasmRegAlloc::ResolveReferences() { // Former CATCH_ARG -- should only see in funclets assert(inFunclet); - node->AsPhysReg()->gtSrcReg = m_exRegs[m_currentFunclet]; + node->AsPhysReg()->gtSrcReg = data->m_exReg; } else { assert(node->AsPhysReg()->gtSrcReg == spVirtReg); - node->AsPhysReg()->gtSrcReg = m_spRegs[m_currentFunclet]; + node->AsPhysReg()->gtSrcReg = data->m_spReg; } assert(!genIsValidReg(node->GetRegNum())); // Currently we do not need to multi-use any PHYSREGs. @@ -1090,47 +1094,39 @@ void WasmRegAlloc::ResolveReferences() // void WasmRegAlloc::PublishAllocationResults() { -#ifdef DEBUG - for (unsigned i = 0; i < m_spRegs.size(); i++) - { - if (i == 0) - { - JITDUMP("Allocated function SP into: %s\n", getRegName(m_spRegs[i])); - } - else - { - JITDUMP("Allocated Funclet %u SP into %s\n", i, getRegName(m_spRegs[i])); - } - } + bool usesFramePointer = false; - for (unsigned i = 0; i < m_fpRegs.size(); i++) + for (unsigned i = 0; i < m_compiler->compFuncInfoCount; i++) { + PerRegionData* const data = m_perRegionData[i]; + +#ifdef DEBUG if (i == 0) { - JITDUMP("Allocated function FP into: %s\n", getRegName(m_fpRegs[i])); + JITDUMP("Allocated function SP into: %s\n", getRegName(data->m_spReg)); + + if (data->m_fpReg != REG_NA) + { + JITDUMP("Allocated function FP into: %s\n", getRegName(data->m_fpReg)); + } } else { - JITDUMP("Allocated Funclet %u FP into %s\n", i, getRegName(m_fpRegs[i])); + JITDUMP("Allocated funclet %u SP into %s\n", i, getRegName(data->m_spReg)); + JITDUMP("Allocated funclet %u FP into %s\n", i, getRegName(data->m_fpReg)); } - } - #endif // DEBUG - m_codeGen->SetStackPointerRegs(m_spRegs); - - bool fpUsed = false; - - if (m_fpRegs.size() != 0) - { - m_codeGen->SetFramePointerRegs(m_fpRegs); + m_codeGen->SetStackPointerReg(i, data->m_spReg); - // We can just check the method region to see if SP and FP differ - // - fpUsed = (m_fpRegs[0] != m_spRegs[0]); + if (data->m_fpReg != REG_NA) + { + m_codeGen->SetFramePointerReg(i, data->m_fpReg); + usesFramePointer |= (data->m_fpReg != data->m_spReg); + } } - m_codeGen->setFramePointerUsed(fpUsed); + m_codeGen->setFramePointerUsed(usesFramePointer); // We don't need to publish the exRegs for codegen; all references were in codegen // and all were converted to GT_PHYSREG. diff --git a/src/coreclr/jit/regallocwasm.h b/src/coreclr/jit/regallocwasm.h index 934734fb6cf594..1738957d80e06a 100644 --- a/src/coreclr/jit/regallocwasm.h +++ b/src/coreclr/jit/regallocwasm.h @@ -75,25 +75,45 @@ struct VirtualRegReferences class WasmRegAlloc : public RegAllocInterface { - Compiler* m_compiler; - CodeGenInterface* m_codeGen; - BasicBlock* m_currentBlock; - unsigned m_currentFunclet; - VirtualRegStack m_virtualRegs[static_cast(WasmValueType::Count)]; - unsigned m_lastVirtualRegRefsCount = 0; - VirtualRegReferences* m_virtualRegRefs = nullptr; - TemporaryRegStack m_temporaryRegs[static_cast(WasmValueType::Count)]; - - // The meaning of these fields is borrowed (partially) from the C ABI for WASM. We define "the SP" to be the local - // which is used to make calls - the stack on entry to callees. We term "the FP" to be the local which is used to - // access the fixed potion of the frame. For fixed-size frames (no localloc), these will be the same. + Compiler* m_compiler; + CodeGenInterface* m_codeGen; + BasicBlock* m_currentBlock; + unsigned m_currentFunclet; + VirtualRegStack m_virtualRegs[static_cast(WasmValueType::Count)]; + TemporaryRegStack m_temporaryRegs[static_cast(WasmValueType::Count)]; + + // We need to allocate per funclet. This struct holds the per-funclet state. // - // These values are per funclet region. In funclets FP will differ from SP, and will likely differ from FP in the - // main function body. - // - jitstd::vector m_spRegs; - jitstd::vector m_fpRegs; - jitstd::vector m_exRegs; + struct PerRegionData + { + PerRegionData() + : m_spReg(REG_NA) + , m_fpReg(REG_NA) + , m_exReg(REG_NA) + , m_lastVirtualRegRefsCount(0) + , m_virtualRegRefs(nullptr) + { + } + + // The meaning of these fields is borrowed (partially) from the C ABI for WASM. We define "the SP" to be the + // local which is used to make calls - the stack on entry to callees. We term "the FP" to be the local which is + // used to access the fixed potion of the frame. For fixed-size frames (no localloc), these will be the same. + // + // In funclets FP will refer to the fixed portion of the parent frame. It will likely be in a different Wasm + // local than the FP in the main function body. EX will refer to the exception object local for filter and catch + // funclets. + // + regNumber m_spReg; + regNumber m_fpReg; + regNumber m_exReg; + + // Chunked list of virtual reg references in this region. + // + unsigned m_lastVirtualRegRefsCount; + VirtualRegReferences* m_virtualRegRefs; + }; + + jitstd::vector m_perRegionData; public: WasmRegAlloc(Compiler* compiler); From 3645ec5c8b8e41dddcb94bd1e5b4152103c184a2 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 2 Apr 2026 08:57:19 -0700 Subject: [PATCH 05/26] review feedback --- src/coreclr/jit/codegencommon.cpp | 3 +-- src/coreclr/jit/codegeninterface.h | 6 ++++++ src/coreclr/jit/regallocwasm.cpp | 6 +++--- src/coreclr/jit/regallocwasm.h | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 2e2c711c8c6a11..a07290d20ea3bc 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -214,8 +214,7 @@ NodeInternalRegistersTable* NodeInternalRegisters::GetOrCreateTable(unsigned fun } //------------------------------------------------------------------------ -// GetTable: get the internal register table for nodes in this funclet region -// or create it if it does not yet exist. +// GetTable: get the internal register table for nodes in this funclet region. // // Parameters: // funcletIndex - Index of the funclet diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index e373691eb49fc8..30f15af5c8aada 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -286,18 +286,24 @@ class CodeGenInterface public: void SetStackPointerReg(unsigned funcletIndex, regNumber reg) { + assert(funcletIndex < m_spRegs.size()); + assert(reg != REG_NA); m_spRegs[funcletIndex] = reg; } void SetFramePointerReg(unsigned funcletIndex, regNumber reg) { + assert(funcletIndex < m_fpRegs.size()); + assert(reg != REG_NA); m_fpRegs[funcletIndex] = reg; } regNumber GetStackPointerReg(unsigned funcletIndex) const { + assert(funcletIndex < m_spRegs.size()); return m_spRegs[funcletIndex]; } regNumber GetFramePointerReg(unsigned funcletIndex) const { + assert(funcletIndex < m_fpRegs.size()); return m_fpRegs[funcletIndex]; } #else // HAS_FIXED_REGISTER_SET diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index 16b34809a4126c..bc02179bc9d816 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -94,7 +94,7 @@ void WasmRegAlloc::IdentifyCandidates() } } - if (anyFrameLocals || m_compiler->compLocallocUsed) + if (anyFrameLocals || m_compiler->compLocallocUsed || (m_compiler->compFuncInfoCount > 1)) { AllocateFramePointer(); } @@ -586,7 +586,7 @@ void WasmRegAlloc::CollectReferencesForLclVar(GenTreeLclVar* lclVar) //------------------------------------------------------------------------ // CollectReferencesForCatchArg: Collect virtual register references for a CATCH_ARG node. // -// Rewrites SP references into PHYS_REGs. +// Rewrites the CATCH_ARG node into a PHYS_REG that refers to the exception object. // // Arguments: // catchArg - The CATCH_ARG node @@ -867,7 +867,7 @@ void WasmRegAlloc::ResolveReferences() { // TODO: add ABI information for funclets? // - // All funclets have two intial arguments sp and fp. + // All funclets have two initial arguments: sp and fp. // WasmValueType argType = TypeToWasmValueType(TYP_I_IMPL); diff --git a/src/coreclr/jit/regallocwasm.h b/src/coreclr/jit/regallocwasm.h index 1738957d80e06a..8d9cdbac17b657 100644 --- a/src/coreclr/jit/regallocwasm.h +++ b/src/coreclr/jit/regallocwasm.h @@ -97,7 +97,7 @@ class WasmRegAlloc : public RegAllocInterface // The meaning of these fields is borrowed (partially) from the C ABI for WASM. We define "the SP" to be the // local which is used to make calls - the stack on entry to callees. We term "the FP" to be the local which is - // used to access the fixed potion of the frame. For fixed-size frames (no localloc), these will be the same. + // used to access the fixed portion of the frame. For fixed-size frames (no localloc), these will be the same. // // In funclets FP will refer to the fixed portion of the parent frame. It will likely be in a different Wasm // local than the FP in the main function body. EX will refer to the exception object local for filter and catch From 1b3cb9e7b61b9cdef4d04b572743381af3202874 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 2 Apr 2026 09:52:56 -0700 Subject: [PATCH 06/26] move per-funclet state to FuncInfoDsc --- src/coreclr/jit/codegencommon.cpp | 33 +++++++++++++++++++---- src/coreclr/jit/codegeninterface.h | 42 +++--------------------------- src/coreclr/jit/codegenwasm.cpp | 13 ++++++--- src/coreclr/jit/compiler.h | 16 ++++++++++++ src/coreclr/jit/flowgraph.cpp | 12 +++++++++ src/coreclr/jit/regallocwasm.cpp | 9 ++++--- 6 files changed, 74 insertions(+), 51 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index a07290d20ea3bc..b8d2c374ab2925 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -58,6 +58,34 @@ void CodeGenInterface::setFramePointerRequiredEH(bool value) #endif // JIT32_GCENCODER } +#if !HAS_FIXED_REGISTER_SET +void CodeGenInterface::SetStackPointerReg(unsigned funcletIndex, regNumber reg) +{ + assert(funcletIndex < m_compiler->compFuncInfoCount); + assert(reg != REG_NA); + m_compiler->compFuncInfos[funcletIndex].funStackPointerReg = reg; +} + +void CodeGenInterface::SetFramePointerReg(unsigned funcletIndex, regNumber reg) +{ + assert(funcletIndex < m_compiler->compFuncInfoCount); + assert(reg != REG_NA); + m_compiler->compFuncInfos[funcletIndex].funFramePointerReg = reg; +} + +regNumber CodeGenInterface::GetStackPointerReg(unsigned funcletIndex) const +{ + assert(funcletIndex < m_compiler->compFuncInfoCount); + return m_compiler->compFuncInfos[funcletIndex].funStackPointerReg; +} + +regNumber CodeGenInterface::GetFramePointerReg(unsigned funcletIndex) const +{ + assert(funcletIndex < m_compiler->compFuncInfoCount); + return m_compiler->compFuncInfos[funcletIndex].funFramePointerReg; +} +#endif // !HAS_FIXED_REGISTER_SET + CodeGenInterface* getCodeGenerator(Compiler* comp) { return new (comp, CMK_Codegen) CodeGen(comp); @@ -406,11 +434,6 @@ CodeGenInterface::CodeGenInterface(Compiler* theCompiler) , internalRegisters(theCompiler) , m_compiler(theCompiler) , treeLifeUpdater(nullptr) -#ifdef TARGET_WASM - , m_spRegs(theCompiler->compFuncInfoCount, REG_NA, theCompiler->getAllocator(CMK_Codegen)) - , m_fpRegs(theCompiler->compFuncInfoCount, REG_NA, theCompiler->getAllocator(CMK_Codegen)) - , WasmLocalsDecls(theCompiler->compFuncInfoCount, nullptr, theCompiler->getAllocator(CMK_Codegen)) -#endif { } diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index 30f15af5c8aada..106a08cbbb69c0 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -279,33 +279,11 @@ class CodeGenInterface #if !HAS_FIXED_REGISTER_SET -private: - jitstd::vector m_spRegs; - jitstd::vector m_fpRegs; - public: - void SetStackPointerReg(unsigned funcletIndex, regNumber reg) - { - assert(funcletIndex < m_spRegs.size()); - assert(reg != REG_NA); - m_spRegs[funcletIndex] = reg; - } - void SetFramePointerReg(unsigned funcletIndex, regNumber reg) - { - assert(funcletIndex < m_fpRegs.size()); - assert(reg != REG_NA); - m_fpRegs[funcletIndex] = reg; - } - regNumber GetStackPointerReg(unsigned funcletIndex) const - { - assert(funcletIndex < m_spRegs.size()); - return m_spRegs[funcletIndex]; - } - regNumber GetFramePointerReg(unsigned funcletIndex) const - { - assert(funcletIndex < m_fpRegs.size()); - return m_fpRegs[funcletIndex]; - } + void SetStackPointerReg(unsigned funcletIndex, regNumber reg); + void SetFramePointerReg(unsigned funcletIndex, regNumber reg); + regNumber GetStackPointerReg(unsigned funcletIndex) const; + regNumber GetFramePointerReg(unsigned funcletIndex) const; #else // HAS_FIXED_REGISTER_SET regNumber GetStackPointerReg() const { @@ -411,18 +389,6 @@ class CodeGenInterface #endif // !DOUBLE_ALIGN -#ifdef TARGET_WASM - struct WasmLocalsDecl - { - WasmValueType Type; - unsigned Count; - }; - - // Per-funclet vectors of local declarations - // - jitstd::vector*> WasmLocalsDecls; -#endif - #ifdef DEBUG // The following is used to make sure the value of 'GetInterruptible()' isn't // changed after it's been used by any logic that depends on its value. diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 9050cb4492f00e..42d6f57c5d95a9 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -49,10 +49,13 @@ void CodeGen::genMarkLabelsForCodegen() // void CodeGen::genBeginFnProlog() { + FuncInfoDsc* const func = m_compiler->funGetFunc(0); + assert(func->funWasmLocalDecls != nullptr); + unsigned localsCount = 0; assert(m_compiler->compCurrFuncIdx == 0); - GetEmitter()->emitIns_I(INS_local_cnt, EA_8BYTE, WasmLocalsDecls[0]->size()); - for (WasmLocalsDecl& decl : *WasmLocalsDecls[0]) + GetEmitter()->emitIns_I(INS_local_cnt, EA_8BYTE, func->funWasmLocalDecls->size()); + for (FuncInfoDsc::WasmLocalsDecl& decl : *func->funWasmLocalDecls) { GetEmitter()->emitIns_I_Ty(INS_local_decl, decl.Count, decl.Type, localsCount); localsCount += decl.Count; @@ -272,9 +275,11 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // unsigned localsCount = 0; unsigned funcletIndex = m_compiler->compCurrFuncIdx; + FuncInfoDsc* const func = m_compiler->funGetFunc(funcletIndex); assert(funcletIndex > 0); - GetEmitter()->emitIns_I(INS_local_cnt, EA_8BYTE, WasmLocalsDecls[funcletIndex]->size()); - for (WasmLocalsDecl& decl : *WasmLocalsDecls[funcletIndex]) + assert(func->funWasmLocalDecls != nullptr); + GetEmitter()->emitIns_I(INS_local_cnt, EA_8BYTE, func->funWasmLocalDecls->size()); + for (FuncInfoDsc::WasmLocalsDecl& decl : *func->funWasmLocalDecls) { GetEmitter()->emitIns_I_Ty(INS_local_decl, decl.Count, decl.Type, localsCount); localsCount += decl.Count; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 25cbd89b7e73cf..9236f2b4fbce5e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -90,6 +90,7 @@ class PerLoopInfo; // defined in inductionvariableopts.cpp class RangeCheck; // defined in rangecheck.h #ifdef TARGET_WASM class WasmInterval; // defined in fgwasm.h +enum class WasmValueType : unsigned; #endif #ifdef DEBUG struct IndentStack; @@ -1705,6 +1706,21 @@ struct FuncInfoDsc // funclet. It is only valid if funKind field indicates this is a // EH-related funclet: FUNC_HANDLER or FUNC_FILTER +#if !HAS_FIXED_REGISTER_SET + regNumber funStackPointerReg; + regNumber funFramePointerReg; +#endif + +#ifdef TARGET_WASM + struct WasmLocalsDecl + { + WasmValueType Type; + unsigned Count; + }; + + jitstd::vector* funWasmLocalDecls; +#endif + #if defined(TARGET_AMD64) // TODO-AMD64-Throughput: make the AMD64 info more like the ARM info to avoid having this large static array. diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index fac08f153bb73c..9fc3641aeb33cf 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -3154,6 +3154,18 @@ PhaseStatus Compiler::fgCreateFunclets() // Setup the root FuncInfoDsc and prepare to start associating // FuncInfoDsc's with their corresponding EH region memset((void*)funcInfo, 0, funcCnt * sizeof(FuncInfoDsc)); +#if !HAS_FIXED_REGISTER_SET || defined(TARGET_WASM) + for (unsigned i = 0; i < funcCnt; i++) + { +#if !HAS_FIXED_REGISTER_SET + funcInfo[i].funStackPointerReg = REG_NA; + funcInfo[i].funFramePointerReg = REG_NA; +#endif +#ifdef TARGET_WASM + funcInfo[i].funWasmLocalDecls = nullptr; +#endif + } +#endif assert(funcInfo[0].funKind == FUNC_ROOT); funcIdx = 1; diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index bc02179bc9d816..853292fe31d38c 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -1070,11 +1070,12 @@ void WasmRegAlloc::ResolveReferences() } } - assert(m_compiler->codeGen->WasmLocalsDecls[m_currentFunclet] == nullptr); + FuncInfoDsc* const currentFunc = m_compiler->funGetFunc(m_currentFunclet); + assert(currentFunc->funWasmLocalDecls == nullptr); - jitstd::vector* decls = new (m_compiler->getAllocator(CMK_Codegen)) - jitstd::vector(m_compiler->getAllocator(CMK_Codegen)); - m_compiler->codeGen->WasmLocalsDecls[m_currentFunclet] = decls; + jitstd::vector* decls = new (m_compiler->getAllocator(CMK_Codegen)) + jitstd::vector(m_compiler->getAllocator(CMK_Codegen)); + currentFunc->funWasmLocalDecls = decls; for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) { From 7b96ad7333fbe9f8c72e23ca0ce215352220feca Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 2 Apr 2026 11:01:41 -0700 Subject: [PATCH 07/26] handle catch_arg more directly; iterate funclets backwards; fix allocation for abi locals in funclets --- src/coreclr/jit/codegenwasm.cpp | 5 + src/coreclr/jit/regallocwasm.cpp | 165 ++++++++++++------------------- src/coreclr/jit/regallocwasm.h | 2 - 3 files changed, 68 insertions(+), 104 deletions(-) diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 42d6f57c5d95a9..208d6b99b55594 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -742,6 +742,11 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) GetEmitter()->emitIns(INS_unreachable); break; + case GT_CATCH_ARG: + // Catch arg is always in wasm local 3. + GetEmitter()->emitIns_I(INS_local_get, EA_GCREF, 3); + break; + default: #ifdef DEBUG if (JitConfig.JitWasmNyiToR2RUnsupported()) diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index 853292fe31d38c..9d7f5f200a3f0d 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -98,11 +98,6 @@ void WasmRegAlloc::IdentifyCandidates() { AllocateFramePointer(); } - - if (m_compiler->compFuncInfoCount > 1) - { - AllocateExceptionPointer(); - } } //------------------------------------------------------------------------ @@ -186,36 +181,6 @@ void WasmRegAlloc::AllocateFramePointer() } } -//------------------------------------------------------------------------ -// AllocateExceptionPointer: Allocate a virtual register for the exception pointer. -// -void WasmRegAlloc::AllocateExceptionPointer() -{ - regNumber exReg = REG_NA; - - for (unsigned i = 0; i < m_compiler->compFuncInfoCount; i++) - { - const FuncInfoDsc& funcInfo = m_compiler->compFuncInfos[i]; - - if (funcInfo.funKind == FuncKind::FUNC_ROOT) - { - continue; - } - - EHblkDsc* const ehDsc = m_compiler->ehGetDsc(funcInfo.funEHIndex); - - if (ehDsc->HasCatchHandler()) - { - if (exReg == REG_NA) - { - exReg = AllocateVirtualRegister(TYP_REF); - } - - m_perRegionData[i]->m_exReg = exReg; - } - } -} - //------------------------------------------------------------------------ // AllocateVirtualRegister: Allocate a new virtual register. // @@ -409,10 +374,6 @@ void WasmRegAlloc::CollectReferencesForNode(GenTree* node) CollectReferencesForIndexAddr(node->AsIndexAddr()); break; - case GT_CATCH_ARG: - CollectReferencesForCatchArg(node); - break; - default: assert(!node->OperIsLocalStore()); break; @@ -583,21 +544,6 @@ void WasmRegAlloc::CollectReferencesForLclVar(GenTreeLclVar* lclVar) } } -//------------------------------------------------------------------------ -// CollectReferencesForCatchArg: Collect virtual register references for a CATCH_ARG node. -// -// Rewrites the CATCH_ARG node into a PHYS_REG that refers to the exception object. -// -// Arguments: -// catchArg - The CATCH_ARG node -// -void WasmRegAlloc::CollectReferencesForCatchArg(GenTree* catchArg) -{ - catchArg->ChangeOper(GT_PHYSREG); - catchArg->AsPhysReg()->gtSrcReg = m_perRegionData[m_currentFunclet]->m_exReg; - CollectReference(catchArg); -} - //------------------------------------------------------------------------ // RewriteLocalStackStore: rewrite a store to the stack to STOREIND(LCL_ADDR, ...). // @@ -809,10 +755,12 @@ void WasmRegAlloc::ResolveReferences() unsigned Index; }; - // Process funclet by funclet + // Resolve funclet by funclet, in reverse order, so that we process the main method region last. // - for (m_currentFunclet = 0; m_currentFunclet < m_compiler->compFuncInfoCount; m_currentFunclet++) + for (int i = static_cast(m_compiler->compFuncInfoCount) - 1; i >= 0; i--) { + m_currentFunclet = static_cast(i); + PhysicalRegBank virtToPhysRegMap[static_cast(WasmValueType::Count)]; for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) { @@ -901,45 +849,77 @@ void WasmRegAlloc::ResolveReferences() indexBase += physRegs.DeclaredCount; } + // Allocate all our virtual registers to physical ones. + // + regNumber spVirtReg = data->m_spReg; + regNumber fpVirtReg = data->m_fpReg; + auto allocPhysReg = [&](regNumber virtReg, LclVarDsc* varDsc) { - regNumber physReg; - if ((varDsc != nullptr) && varDsc->lvIsRegArg && !varDsc->lvIsStructField) + regNumber physReg = REG_NA; + + if (!inFunclet) { - unsigned lclNum = m_compiler->lvaGetLclNum(varDsc); - const ABIPassingInformation& abiInfo = m_compiler->lvaGetParameterABIInfo(lclNum); - assert(abiInfo.HasExactlyOneRegisterSegment()); - physReg = abiInfo.Segment(0).GetRegister(); + // ABI registers in the main method + // + if ((varDsc != nullptr) && varDsc->lvIsRegArg && !varDsc->lvIsStructField) + { + unsigned lclNum = m_compiler->lvaGetLclNum(varDsc); + const ABIPassingInformation& abiInfo = m_compiler->lvaGetParameterABIInfo(lclNum); + assert(abiInfo.HasExactlyOneRegisterSegment()); + physReg = abiInfo.Segment(0).GetRegister(); + } + else if ((varDsc != nullptr) && varDsc->lvIsParamRegTarget) + { + unsigned lclNum = m_compiler->lvaGetLclNum(varDsc); + const ParameterRegisterLocalMapping* mapping = + m_compiler->FindParameterRegisterLocalMappingByLocal(lclNum, 0); + assert(mapping != nullptr); + physReg = mapping->RegisterSegment->GetRegister(); + } } - else if ((varDsc != nullptr) && varDsc->lvIsParamRegTarget) + else { - unsigned lclNum = m_compiler->lvaGetLclNum(varDsc); - const ParameterRegisterLocalMapping* mapping = - m_compiler->FindParameterRegisterLocalMappingByLocal(lclNum, 0); - assert(mapping != nullptr); - physReg = mapping->RegisterSegment->GetRegister(); + // ABI registers in the funclets + if (virtReg == spVirtReg) + { + physReg = MakeWasmReg(0, TypeToWasmValueType(TYP_I_IMPL)); + } + else if (virtReg == fpVirtReg) + { + physReg = physReg = MakeWasmReg(1, TypeToWasmValueType(TYP_I_IMPL)); + } } - else + + if (physReg == REG_NA) { - WasmValueType type = WasmRegToType(virtReg); + WasmValueType type = WasmRegToType(virtReg); unsigned physRegIndex = virtToPhysRegMap[static_cast(type)].Index++; - physReg = MakeWasmReg(physRegIndex, type); + physReg = MakeWasmReg(physRegIndex, type); } assert(genIsValidReg(physReg)); if ((varDsc != nullptr) && varDsc->lvIsRegCandidate()) { - if (varDsc->lvIsParam || varDsc->lvIsParamRegTarget) + if (!inFunclet && (varDsc->lvIsParam || varDsc->lvIsParamRegTarget)) { // This is the register codegen will move the local from its ABI location in prolog. varDsc->SetArgInitReg(physReg); } - // This is the location for the "first" def. In our case all defs share the same register. + // This is the location for this local in this funclet. Since we process the main method region + // last, its assignment (if any, see below) will be the one that persists after RA. + // + // Funclets may well use different local numbers. Note that any local that is live across a + // funclet boundary should not be a register allocation candidate, so there is no need for funclet + // and main method assignments to match. // - // TODO-WASM: this may no longer hold with funclets. If a local has disjoint lifetimes that do not - // cross funclet boundaries, it may need to be allocated to different registers in different funclets. + // TODO-WASM: this will lead to incorrect debug info in funclets. We may need to track per funclet + // assignments somewhere. + // + // TODO-WASM: we should reset this info as we switch funclets, so if there's a local only live + // in a funclet we don't report it as living in a reg in the main method. + // Seems like this entails walking the full set of tracked locals. // - // assert(!varDsc->lvRegister); varDsc->SetRegNum(physReg); varDsc->lvRegister = true; varDsc->lvOnFrame = false; @@ -947,11 +927,8 @@ void WasmRegAlloc::ResolveReferences() return physReg; }; - // Allocate all our virtual registers to physical ones. + // Map SP and FP to physical registers. // - regNumber spVirtReg = data->m_spReg; - regNumber exVirtReg = data->m_exReg; - if (!inFunclet) { if (spVirtReg != REG_NA) @@ -959,16 +936,15 @@ void WasmRegAlloc::ResolveReferences() data->m_spReg = allocPhysReg(spVirtReg, m_compiler->lvaGetDesc(m_compiler->lvaWasmSpArg)); } - if (data->m_fpReg != REG_NA) + if (fpVirtReg != REG_NA) { - data->m_fpReg = (spVirtReg == data->m_fpReg) ? data->m_spReg : allocPhysReg(data->m_fpReg, nullptr); + data->m_fpReg = (spVirtReg == fpVirtReg) ? data->m_spReg : allocPhysReg(fpVirtReg, nullptr); } - - assert(data->m_exReg == REG_NA); } else { - // Funclets always have SP and FP, and maybe EX. + // Funclets always have SP and FP. + // // We do not have lclVars for funclet params. // SP is the same physreg as in the main method. // @@ -978,11 +954,6 @@ void WasmRegAlloc::ResolveReferences() assert(data->m_fpReg != REG_NA); data->m_fpReg = MakeWasmReg(1, TypeToWasmValueType(TYP_I_IMPL)); - - if (data->m_exReg != REG_NA) - { - data->m_exReg = MakeWasmReg(2, TypeToWasmValueType(TYP_REF)); - } } for (unsigned varIndex = 0; varIndex < m_compiler->lvaTrackedCount; varIndex++) @@ -1018,18 +989,8 @@ void WasmRegAlloc::ResolveReferences() GenTree* node = refs->Nodes[i]; if (node->OperIs(GT_PHYSREG)) { - if (node->AsPhysReg()->gtSrcReg == exVirtReg) - { - // Former CATCH_ARG -- should only see in funclets - assert(inFunclet); - node->AsPhysReg()->gtSrcReg = data->m_exReg; - } - else - { - assert(node->AsPhysReg()->gtSrcReg == spVirtReg); - node->AsPhysReg()->gtSrcReg = data->m_spReg; - } - + assert(node->AsPhysReg()->gtSrcReg == spVirtReg); + node->AsPhysReg()->gtSrcReg = data->m_spReg; assert(!genIsValidReg(node->GetRegNum())); // Currently we do not need to multi-use any PHYSREGs. continue; } diff --git a/src/coreclr/jit/regallocwasm.h b/src/coreclr/jit/regallocwasm.h index 8d9cdbac17b657..40c9972661bc11 100644 --- a/src/coreclr/jit/regallocwasm.h +++ b/src/coreclr/jit/regallocwasm.h @@ -89,7 +89,6 @@ class WasmRegAlloc : public RegAllocInterface PerRegionData() : m_spReg(REG_NA) , m_fpReg(REG_NA) - , m_exReg(REG_NA) , m_lastVirtualRegRefsCount(0) , m_virtualRegRefs(nullptr) { @@ -105,7 +104,6 @@ class WasmRegAlloc : public RegAllocInterface // regNumber m_spReg; regNumber m_fpReg; - regNumber m_exReg; // Chunked list of virtual reg references in this region. // From 90066eb1d38f2a4d91dd676a94344f01f5369294 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 2 Apr 2026 11:30:29 -0700 Subject: [PATCH 08/26] reset lvRegister; format --- src/coreclr/jit/codegenwasm.cpp | 6 +++--- src/coreclr/jit/regallocwasm.cpp | 27 +++++++++++++++++++-------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 208d6b99b55594..ec675127ccdb0e 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -273,9 +273,9 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // Local sig for the funclet // - unsigned localsCount = 0; - unsigned funcletIndex = m_compiler->compCurrFuncIdx; - FuncInfoDsc* const func = m_compiler->funGetFunc(funcletIndex); + unsigned localsCount = 0; + unsigned funcletIndex = m_compiler->compCurrFuncIdx; + FuncInfoDsc* const func = m_compiler->funGetFunc(funcletIndex); assert(funcletIndex > 0); assert(func->funWasmLocalDecls != nullptr); GetEmitter()->emitIns_I(INS_local_cnt, EA_8BYTE, func->funWasmLocalDecls->size()); diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index 9d7f5f200a3f0d..ce9a0090be7a51 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -863,7 +863,7 @@ void WasmRegAlloc::ResolveReferences() // if ((varDsc != nullptr) && varDsc->lvIsRegArg && !varDsc->lvIsStructField) { - unsigned lclNum = m_compiler->lvaGetLclNum(varDsc); + unsigned lclNum = m_compiler->lvaGetLclNum(varDsc); const ABIPassingInformation& abiInfo = m_compiler->lvaGetParameterABIInfo(lclNum); assert(abiInfo.HasExactlyOneRegisterSegment()); physReg = abiInfo.Segment(0).GetRegister(); @@ -892,9 +892,9 @@ void WasmRegAlloc::ResolveReferences() if (physReg == REG_NA) { - WasmValueType type = WasmRegToType(virtReg); + WasmValueType type = WasmRegToType(virtReg); unsigned physRegIndex = virtToPhysRegMap[static_cast(type)].Index++; - physReg = MakeWasmReg(physRegIndex, type); + physReg = MakeWasmReg(physRegIndex, type); } assert(genIsValidReg(physReg)); @@ -916,10 +916,6 @@ void WasmRegAlloc::ResolveReferences() // TODO-WASM: this will lead to incorrect debug info in funclets. We may need to track per funclet // assignments somewhere. // - // TODO-WASM: we should reset this info as we switch funclets, so if there's a local only live - // in a funclet we don't report it as living in a reg in the main method. - // Seems like this entails walking the full set of tracked locals. - // varDsc->SetRegNum(physReg); varDsc->lvRegister = true; varDsc->lvOnFrame = false; @@ -944,7 +940,7 @@ void WasmRegAlloc::ResolveReferences() else { // Funclets always have SP and FP. - // + // // We do not have lclVars for funclet params. // SP is the same physreg as in the main method. // @@ -1046,6 +1042,21 @@ void WasmRegAlloc::ResolveReferences() decls->push_back({type, physRegs.DeclaredCount}); } } + + // If this is not the main method region, remove any local var assignments. + // + if (inFunclet) + { + for (unsigned varIndex = 0; varIndex < m_compiler->lvaTrackedCount; varIndex++) + { + unsigned const lclNum = m_compiler->lvaTrackedIndexToLclNum(varIndex); + LclVarDsc* const varDsc = m_compiler->lvaGetDesc(lclNum); + if (varDsc->lvIsRegCandidate()) + { + varDsc->lvRegister = false; + } + } + } } } From 54b6959b3ce6d15faca6afb1626930e95099c10a Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 2 Apr 2026 12:28:17 -0700 Subject: [PATCH 09/26] revert internal reg codegen visible changes; handle in allocation instead --- src/coreclr/jit/codegencommon.cpp | 77 +++--------------------------- src/coreclr/jit/codegeninterface.h | 13 ++--- src/coreclr/jit/codegenwasm.cpp | 4 +- src/coreclr/jit/regallocwasm.cpp | 33 +++++++------ 4 files changed, 28 insertions(+), 99 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index b8d2c374ab2925..da9810817231e3 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -91,7 +91,6 @@ CodeGenInterface* getCodeGenerator(Compiler* comp) return new (comp, CMK_Codegen) CodeGen(comp); } -#if HAS_FIXED_REGISTER_SET //------------------------------------------------------------------------ // NodeInternalRegisters::NodeInternalRegisters: construct the // internal registers tracking data @@ -104,6 +103,7 @@ NodeInternalRegisters::NodeInternalRegisters(Compiler* comp) { } +#if HAS_FIXED_REGISTER_SET //------------------------------------------------------------------------ // Add: Add internal allocated registers for the specified node. // @@ -205,58 +205,6 @@ unsigned NodeInternalRegisters::Count(GenTree* tree, regMaskTP mask) return m_table.Lookup(tree, ®s) ? genCountBits(regs & mask) : 0; } #else // !HAS_FIXED_REGISTER_SET - -//------------------------------------------------------------------------ -// NodeInternalRegisters: construct the -// internal registers tracking data -// -// Arguments: -// comp -- compiler instance -// -NodeInternalRegisters::NodeInternalRegisters(Compiler* comp) - : m_compiler(comp) - , m_tables(comp->compFuncInfoCount, nullptr, comp->getAllocator(CMK_LSRA)) -{ -} - -//------------------------------------------------------------------------ -// GetOrCreateTable: get the internal register table for nodes in this funclet region -// or create it if it does not yet exist. -// -// Parameters: -// funcletIndex - Index of the funclet -// -// Returns: -// Pointer to the internal register table. -// -NodeInternalRegistersTable* NodeInternalRegisters::GetOrCreateTable(unsigned funcletIndex) -{ - assert(funcletIndex < m_tables.size()); - NodeInternalRegistersTable* table = m_tables[funcletIndex]; - if (table == nullptr) - { - table = new (m_compiler->getAllocator(CMK_LSRA)) NodeInternalRegistersTable(m_compiler->getAllocator(CMK_LSRA)); - m_tables[funcletIndex] = table; - } - return table; -} - -//------------------------------------------------------------------------ -// GetTable: get the internal register table for nodes in this funclet region. -// -// Parameters: -// funcletIndex - Index of the funclet -// -// Returns: -// Pointer to the internal register table. -// -NodeInternalRegistersTable* NodeInternalRegisters::GetTable(unsigned funcletIndex) -{ - assert(funcletIndex < m_tables.size()); - NodeInternalRegistersTable* table = m_tables[funcletIndex]; - return table; -} - //------------------------------------------------------------------------ // InternalRegs: construct an empty 'InternalRegs' instance. // @@ -363,14 +311,12 @@ regNumber InternalRegs::Extract() // Add: Add a register to the set of ones internally allocated for this node. // // Parameters: -// funcletIndex -- index of the funclet region for the tree // tree - IR node to add the internal allocated register to // reg - The register to add // -void NodeInternalRegisters::Add(unsigned funcletIndex, GenTree* tree, regNumber reg) +void NodeInternalRegisters::Add(GenTree* tree, regNumber reg) { - NodeInternalRegistersTable* const table = GetOrCreateTable(funcletIndex); - InternalRegs* const regs = table->LookupPointerOrAdd(tree, InternalRegs{}); + InternalRegs* regs = m_table.LookupPointerOrAdd(tree, InternalRegs{}); regs->Add(reg); } @@ -378,20 +324,14 @@ void NodeInternalRegisters::Add(unsigned funcletIndex, GenTree* tree, regNumber // GetAll: Get the internally allocated registers for the specified node. // // Parameters: -// funcletIndex -- index of the funclet region for the tree // tree - IR node to get the registers for // // Returns: // Pointer to the registers, nullptr if there are none. // -InternalRegs* NodeInternalRegisters::GetAll(unsigned funcletIndex, GenTree* tree) +InternalRegs* NodeInternalRegisters::GetAll(GenTree* tree) { - NodeInternalRegistersTable* const table = GetTable(funcletIndex); - if (table == nullptr) - { - return nullptr; - } - InternalRegs* const regs = table->LookupPointer(tree); + InternalRegs* regs = m_table.LookupPointer(tree); assert((regs == nullptr) || !regs->IsEmpty()); return regs; } @@ -399,15 +339,12 @@ InternalRegs* NodeInternalRegisters::GetAll(unsigned funcletIndex, GenTree* tree //------------------------------------------------------------------------ // Iterate: Get the iterator for the internal register table. // -// Parameters: -// table -- pointer to the internal register table for a codegen region. -// // Returns: // A 'for'-loop compatible iterator of the table entries. // -NodeInternalRegistersTable::KeyValueIteration NodeInternalRegisters::Iterate(NodeInternalRegistersTable* table) +NodeInternalRegistersTable::KeyValueIteration NodeInternalRegisters::Iterate() { - return NodeInternalRegistersTable::KeyValueIteration(table); + return NodeInternalRegistersTable::KeyValueIteration(&m_table); } #endif // !HAS_FIXED_REGISTER_SET diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index 106a08cbbb69c0..ece71b2d4a9cb9 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -61,12 +61,7 @@ class InternalRegs using NodeInternalRegistersTable = JitHashTable, InternalRegs>; class NodeInternalRegisters { -#if HAS_FIXED_REGISTER_SET NodeInternalRegistersTable m_table; -#else // !HAS_FIXED_REGISTER_SET - Compiler* m_compiler; - jitstd::vector m_tables; -#endif // !HAS_FIXED_REGISTER_SET public: NodeInternalRegisters(Compiler* comp); @@ -78,11 +73,9 @@ class NodeInternalRegisters regMaskTP GetAll(GenTree* tree); unsigned Count(GenTree* tree, regMaskTP mask = static_cast(-1)); #else // !HAS_FIXED_REGISTER_SET - NodeInternalRegistersTable* GetOrCreateTable(unsigned funcletIndex); - NodeInternalRegistersTable* GetTable(unsigned funcletIndex); - void Add(unsigned funcletIndex, GenTree* tree, regNumber reg); - InternalRegs* GetAll(unsigned funcletIndex, GenTree* tree); - NodeInternalRegistersTable::KeyValueIteration Iterate(NodeInternalRegistersTable* table); + void Add(GenTree* tree, regNumber reg); + InternalRegs* GetAll(GenTree* tree); + NodeInternalRegistersTable::KeyValueIteration Iterate(); #endif // !HAS_FIXED_REGISTER_SET }; diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index ec675127ccdb0e..902d8dc36f184a 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -1295,7 +1295,7 @@ void CodeGen::genCodeForBinaryOverflow(GenTreeOp* treeNode) genConsumeOperands(treeNode); const bool is64BitOp = treeNode->TypeIs(TYP_LONG); - InternalRegs* regs = internalRegisters.GetAll(m_compiler->compCurrFuncIdx, treeNode); + InternalRegs* regs = internalRegisters.GetAll(treeNode); regNumber op1Reg = GetMultiUseOperandReg(treeNode->gtGetOp1()); regNumber op2Reg = GetMultiUseOperandReg(treeNode->gtGetOp2()); @@ -2835,7 +2835,7 @@ void CodeGen::genLclHeap(GenTree* tree) } // Fetch the internal register we reserved during RA - InternalRegs* regs = internalRegisters.GetAll(m_compiler->compCurrFuncIdx, tree); + InternalRegs* regs = internalRegisters.GetAll(tree); assert(regs->Count() == 1); regNumber sizeReg = regs->Extract(); assert(WasmRegToType(sizeReg) == TypeToWasmValueType(TYP_I_IMPL)); diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index ce9a0090be7a51..6acc3f06107a92 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -711,7 +711,8 @@ void WasmRegAlloc::ConsumeTemporaryRegForOperand(GenTree* operand DEBUGARG(const regNumber WasmRegAlloc::RequestInternalRegister(GenTree* node, var_types type) { regNumber reg = AllocateTemporaryRegister(type); - m_codeGen->internalRegisters.Add(m_currentFunclet, node, reg); + m_codeGen->internalRegisters.Add(node, reg); + CollectReference(node); return reg; } @@ -1005,26 +1006,24 @@ void WasmRegAlloc::ResolveReferences() } node->SetRegNum(physReg); - } - refsCount = ARRAY_SIZE(refs->Nodes); - } - NodeInternalRegistersTable* const internalRegTable = m_codeGen->internalRegisters.GetTable(m_currentFunclet); - if (internalRegTable != nullptr) - { - for (NodeInternalRegistersTable::Node* nodeWithInternalRegs : - m_codeGen->internalRegisters.Iterate(internalRegTable)) - { - InternalRegs* regs = &nodeWithInternalRegs->GetValueRef(); - unsigned count = regs->Count(); - for (unsigned i = 0; i < count; i++) + // If there are internal registers associated with this node, allocate them now. + // + InternalRegs* const internalRegs = m_codeGen->internalRegisters.GetAll(node); + + if (internalRegs != nullptr) { - WasmValueType type; - unsigned index = UnpackWasmReg(regs->GetAt(i), &type); - regNumber physReg = temporaryRegMap[static_cast(type)].Regs[index]; - regs->SetAt(i, physReg); + unsigned count = internalRegs->Count(); + for (unsigned i = 0; i < count; i++) + { + WasmValueType type; + unsigned index = UnpackWasmReg(internalRegs->GetAt(i), &type); + regNumber physReg = temporaryRegMap[static_cast(type)].Regs[index]; + internalRegs->SetAt(i, physReg); + } } } + refsCount = ARRAY_SIZE(refs->Nodes); } FuncInfoDsc* const currentFunc = m_compiler->funGetFunc(m_currentFunclet); From 1dc057ffae9046fa397a483b3a5b930f68bbdab3 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 2 Apr 2026 15:02:04 -0700 Subject: [PATCH 10/26] review feedback --- src/coreclr/jit/codegen.h | 2 + src/coreclr/jit/codegencommon.cpp | 11 ++- src/coreclr/jit/codegeninterface.h | 18 ++-- src/coreclr/jit/codegenwasm.cpp | 85 ++++++++++--------- src/coreclr/jit/compiler.h | 2 + src/coreclr/jit/lclvars.cpp | 8 +- src/coreclr/jit/regallocwasm.cpp | 129 ++++++++++++++--------------- src/coreclr/jit/regallocwasm.h | 7 +- 8 files changed, 132 insertions(+), 130 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 6b2ca6af8484d4..59b1606a6609f0 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -217,6 +217,8 @@ class CodeGen final : public CodeGenInterface void WasmProduceReg(GenTree* node); regNumber GetMultiUseOperandReg(GenTree* operand); void genEmitNullCheck(regNumber reg); + unsigned GetStackPointerRegIndex() const; + unsigned GetFramePointerRegIndex() const; #endif void genEmitStartBlock(BasicBlock* block); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index da9810817231e3..ae1dda142df60e 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -58,7 +58,16 @@ void CodeGenInterface::setFramePointerRequiredEH(bool value) #endif // JIT32_GCENCODER } -#if !HAS_FIXED_REGISTER_SET +#if HAS_FIXED_REGISTER_SET +regNumber CodeGenInterface::GetStackPointerReg(unsigned) const +{ + return REG_SPBASE; +} +regNumber CodeGenInterface::GetFramePointerReg(unsigned) const +{ + return REG_FPBASE; +} +#else void CodeGenInterface::SetStackPointerReg(unsigned funcletIndex, regNumber reg) { assert(funcletIndex < m_compiler->compFuncInfoCount); diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index ece71b2d4a9cb9..5e116a80cdd8cd 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -272,21 +272,13 @@ class CodeGenInterface #if !HAS_FIXED_REGISTER_SET -public: - void SetStackPointerReg(unsigned funcletIndex, regNumber reg); - void SetFramePointerReg(unsigned funcletIndex, regNumber reg); + void SetStackPointerReg(unsigned funcletIndex, regNumber reg); + void SetFramePointerReg(unsigned funcletIndex, regNumber reg); + +#endif // !HAS_FIXED_REGISTER_SET + regNumber GetStackPointerReg(unsigned funcletIndex) const; regNumber GetFramePointerReg(unsigned funcletIndex) const; -#else // HAS_FIXED_REGISTER_SET - regNumber GetStackPointerReg() const - { - return REG_SPBASE; - } - regNumber GetFramePointerReg() const - { - return REG_FPBASE; - } -#endif // HAS_FIXED_REGISTER_SET public: int genCallerSPtoFPdelta() const; diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 902d8dc36f184a..bc63477a87fa07 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -38,6 +38,26 @@ static const instruction INS_I_ge_u = INS_i32_ge_u; static const instruction INS_I_gt_u = INS_i32_gt_u; #endif // !TARGET_64BIT +//------------------------------------------------------------------------ +// GetStackPointerRegIndex: get the Wasm local index for the stack pointer +// +unsigned CodeGen::GetStackPointerRegIndex() const +{ + regNumber spReg = GetStackPointerReg(m_compiler->funCurrentFuncIdx()); + assert(spReg != REG_NA); + return WasmRegToIndex(spReg); +} + +//------------------------------------------------------------------------ +// GetFramePointerRegIndex: get the Wasm local index for the frame pointer +// +unsigned CodeGen::GetFramePointerRegIndex() const +{ + regNumber fpReg = GetFramePointerReg(m_compiler->funCurrentFuncIdx()); + assert(fpReg != REG_NA); + return WasmRegToIndex(fpReg); +} + void CodeGen::genMarkLabelsForCodegen() { // No work needed here for now. @@ -49,11 +69,11 @@ void CodeGen::genMarkLabelsForCodegen() // void CodeGen::genBeginFnProlog() { - FuncInfoDsc* const func = m_compiler->funGetFunc(0); + FuncInfoDsc* const func = m_compiler->funGetFunc(ROOT_FUNC_IDX); assert(func->funWasmLocalDecls != nullptr); unsigned localsCount = 0; - assert(m_compiler->compCurrFuncIdx == 0); + assert(m_compiler->funCurrentFuncIdx() == 0); GetEmitter()->emitIns_I(INS_local_cnt, EA_8BYTE, func->funWasmLocalDecls->size()); for (FuncInfoDsc::WasmLocalsDecl& decl : *func->funWasmLocalDecls) { @@ -81,7 +101,7 @@ void CodeGen::genPushCalleeSavedRegisters() void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) { assert(m_compiler->compGeneratingProlog); - regNumber spReg = GetStackPointerReg(m_compiler->compCurrFuncIdx); + regNumber spReg = GetStackPointerReg(m_compiler->funCurrentFuncIdx()); if (spReg == REG_NA) { assert(!isFramePointerUsed()); @@ -106,7 +126,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni GetEmitter()->emitIns(INS_I_sub); GetEmitter()->emitIns_I(INS_local_set, EA_PTRSIZE, spLclIndex); } - regNumber fpReg = GetFramePointerReg(m_compiler->compCurrFuncIdx); + regNumber fpReg = GetFramePointerReg(m_compiler->funCurrentFuncIdx()); if ((fpReg != REG_NA) && (fpReg != spReg)) { GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, spLclIndex); @@ -172,8 +192,7 @@ void CodeGen::genHomeRegisterParamsOutsideProlog() storeType = genActualType(varDsc); } - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, - WasmRegToIndex(GetFramePointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetFramePointerRegIndex()); GetEmitter()->emitIns_I(INS_local_get, emitActualTypeSize(storeType), WasmRegToIndex(segment.GetRegister())); GetEmitter()->emitIns_S(ins_Store(storeType), emitActualTypeSize(storeType), lclNum, offset); @@ -274,7 +293,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // Local sig for the funclet // unsigned localsCount = 0; - unsigned funcletIndex = m_compiler->compCurrFuncIdx; + unsigned funcletIndex = m_compiler->funCurrentFuncIdx(); FuncInfoDsc* const func = m_compiler->funGetFunc(funcletIndex); assert(funcletIndex > 0); assert(func->funWasmLocalDecls != nullptr); @@ -1777,8 +1796,7 @@ void CodeGen::genJumpToThrowHlpBlk(SpecialCodeKind codeKind) { GetEmitter()->emitIns_BlockTy(INS_if); // Throw helpers are managed so we need to push the stack pointer before genEmitHelperCall. - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, - WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetStackPointerRegIndex()); genEmitHelperCall(m_compiler->acdHelper(codeKind), 0, EA_UNKNOWN); GetEmitter()->emitIns(INS_end); } @@ -2125,7 +2143,7 @@ void CodeGen::genCodeForLclAddr(GenTreeLclFld* lclAddrNode) unsigned lclNum = lclAddrNode->GetLclNum(); unsigned lclOffset = lclAddrNode->GetLclOffs(); - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetFramePointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetFramePointerRegIndex()); if ((lclOffset != 0) || (m_compiler->lvaFrameAddress(lclNum, &FPBased) != 0)) { GetEmitter()->emitIns_S(INS_I_const, EA_PTRSIZE, lclNum, lclOffset); @@ -2145,7 +2163,7 @@ void CodeGen::genCodeForLclFld(GenTreeLclFld* tree) assert(tree->OperIs(GT_LCL_FLD)); LclVarDsc* varDsc = m_compiler->lvaGetDesc(tree); - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetFramePointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetFramePointerRegIndex()); GetEmitter()->emitIns_S(ins_Load(tree->TypeGet()), emitTypeSize(tree), tree->GetLclNum(), tree->GetLclOffs()); WasmProduceReg(tree); } @@ -2168,8 +2186,7 @@ void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) if (!varDsc->lvIsRegCandidate()) { var_types type = varDsc->GetRegisterType(tree); - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, - WasmRegToIndex(GetFramePointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetFramePointerRegIndex()); GetEmitter()->emitIns_S(ins_Load(type), emitTypeSize(type), tree->GetLclNum(), 0); WasmProduceReg(tree); } @@ -2784,19 +2801,16 @@ void CodeGen::genLclHeap(GenTree* tree) // Decrease the stack pointer by amount // - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, - WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetStackPointerRegIndex()); GetEmitter()->emitIns_I(INS_I_const, EA_PTRSIZE, amount); GetEmitter()->emitIns(INS_I_sub); - GetEmitter()->emitIns_I(INS_local_set, EA_PTRSIZE, - WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_set, EA_PTRSIZE, GetStackPointerRegIndex()); // Zero the newly allocated space if needed // if (needsZeroing) { - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, - WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetStackPointerRegIndex()); GetEmitter()->emitIns_I(INS_I_const, EA_PTRSIZE, 0); GetEmitter()->emitIns_I(INS_I_const, EA_PTRSIZE, amount); @@ -2808,16 +2822,13 @@ void CodeGen::genLclHeap(GenTree* tree) // SP now points at the reserved space just below the allocation. // Save the frame pointer at sp[0]. // - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, - WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, - WasmRegToIndex(GetFramePointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetStackPointerRegIndex()); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetFramePointerRegIndex()); GetEmitter()->emitIns_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, 0); // Leave the base address of the allocated region on the stack. // - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, - WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetStackPointerRegIndex()); GetEmitter()->emitIns_I(INS_I_const, EA_PTRSIZE, reservedSpace); GetEmitter()->emitIns(INS_I_add); } @@ -2854,8 +2865,7 @@ void CodeGen::genLclHeap(GenTree* tree) GetEmitter()->emitIns(INS_else); { // Prepare to subtract from SP - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, - WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetStackPointerRegIndex()); // Add reserved space and round up request size to a multiple of STACK_ALIGN GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(sizeReg)); @@ -2872,13 +2882,11 @@ void CodeGen::genLclHeap(GenTree* tree) // Subtract rounded-up size from SP value, and save back to SP GetEmitter()->emitIns(INS_I_sub); - GetEmitter()->emitIns_I(INS_local_set, EA_PTRSIZE, - WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_set, EA_PTRSIZE, GetStackPointerRegIndex()); if (needsZeroing) { - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, - WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetStackPointerRegIndex()); GetEmitter()->emitIns_I(INS_I_const, EA_PTRSIZE, 0); GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(sizeReg)); // TODO-WASM-CQ: possibly do small fills directly @@ -2886,15 +2894,12 @@ void CodeGen::genLclHeap(GenTree* tree) } // Re-establish unwind invariant: store FP at SP[0] - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, - WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, - WasmRegToIndex(GetFramePointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetStackPointerRegIndex()); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetFramePointerRegIndex()); GetEmitter()->emitIns_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, 0); // Return value - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, - WasmRegToIndex(GetStackPointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetStackPointerRegIndex()); GetEmitter()->emitIns_I(INS_I_const, EA_PTRSIZE, reservedSpace); GetEmitter()->emitIns(INS_I_add); } @@ -2974,7 +2979,7 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD)); GenTreeLclVarCommon* lclVar = src->AsLclVarCommon(); bool fpBased; - srcReg = GetFramePointerReg(m_compiler->compCurrFuncIdx); + srcReg = GetFramePointerReg(m_compiler->funCurrentFuncIdx()); srcOffset = m_compiler->lvaFrameAddress(lclVar->GetLclNum(), &fpBased) + lclVar->GetLclOffs(); assert(fpBased); } @@ -2983,7 +2988,7 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) { GenTreeLclVarCommon* lclVar = dest->AsLclVarCommon(); bool fpBased; - destReg = GetFramePointerReg(m_compiler->compCurrFuncIdx); + destReg = GetFramePointerReg(m_compiler->funCurrentFuncIdx()); destOffset = m_compiler->lvaFrameAddress(lclVar->GetLclNum(), &fpBased) + lclVar->GetLclOffs(); assert(fpBased); } @@ -3194,7 +3199,7 @@ void CodeGen::genLoadLocalIntoReg(regNumber targetReg, unsigned lclNum) { LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); var_types type = varDsc->GetRegisterType(); - GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetFramePointerReg(m_compiler->compCurrFuncIdx))); + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetFramePointerRegIndex()); GetEmitter()->emitIns_S(ins_Load(type), emitTypeSize(type), lclNum, 0); GetEmitter()->emitIns_I(INS_local_set, emitTypeSize(type), WasmRegToIndex(targetReg)); } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9236f2b4fbce5e..7d115de8d8810e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1696,6 +1696,8 @@ enum FuncKind : BYTE FUNC_COUNT }; +constexpr unsigned ROOT_FUNC_IDX = 0; + class emitLocation; struct FuncInfoDsc diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index e4874070f7471d..c53e8a69078cc4 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -6031,12 +6031,8 @@ void Compiler::lvaDumpFrameLocation(unsigned lclNum, int minLength) bool EBPbased; offset = lvaFrameAddress(lclNum, &EBPbased); -#if HAS_FIXED_REGISTER_SET - baseReg = EBPbased ? codeGen->GetFramePointerReg() : codeGen->GetStackPointerReg(); -#else - // Just use the sp/fp from the function region - baseReg = EBPbased ? codeGen->GetFramePointerReg(0) : codeGen->GetStackPointerReg(0); -#endif + // Use the sp/fp from the function region + baseReg = EBPbased ? codeGen->GetFramePointerReg(ROOT_FUNC_IDX) : codeGen->GetStackPointerReg(ROOT_FUNC_IDX); #endif // TARGET_ARM int printed = diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index 6acc3f06107a92..b6ac135926d1e7 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -19,12 +19,12 @@ WasmRegAlloc::WasmRegAlloc(Compiler* compiler) : m_compiler(compiler) , m_codeGen(compiler->codeGen) , m_currentBlock(nullptr) - , m_currentFunclet(0) - , m_perRegionData(compiler->compFuncInfoCount, nullptr, compiler->getAllocator(CMK_LSRA)) + , m_currentFunclet(ROOT_FUNC_IDX) + , m_perFuncletData(compiler->compFuncCount(), nullptr, compiler->getAllocator(CMK_LSRA)) { - for (unsigned i = 0; i < m_compiler->compFuncInfoCount; i++) + for (unsigned i = 0; i < m_compiler->compFuncCount(); i++) { - m_perRegionData[i] = new (compiler->getAllocator(CMK_LSRA)) PerRegionData(); + m_perFuncletData[i] = new (compiler->getAllocator(CMK_LSRA)) PerFuncletData(); } } @@ -94,7 +94,7 @@ void WasmRegAlloc::IdentifyCandidates() } } - if (anyFrameLocals || m_compiler->compLocallocUsed || (m_compiler->compFuncInfoCount > 1)) + if (anyFrameLocals || m_compiler->compLocallocUsed || (m_compiler->compFuncCount() > 1)) { AllocateFramePointer(); } @@ -147,13 +147,13 @@ void WasmRegAlloc::AllocateStackPointer() { // This is the same virtual register in all regions // - if (m_perRegionData[0]->m_spReg == REG_NA) + if (m_perFuncletData[ROOT_FUNC_IDX]->m_spReg == REG_NA) { regNumber spReg = AllocateVirtualRegister(TYP_I_IMPL); - for (unsigned i = 0; i < m_compiler->compFuncInfoCount; i++) + for (unsigned i = 0; i < m_compiler->compFuncCount(); i++) { - m_perRegionData[i]->m_spReg = spReg; + m_perFuncletData[i]->m_spReg = spReg; } } } @@ -166,18 +166,27 @@ void WasmRegAlloc::AllocateFramePointer() // FP is initialized with SP in the prolog, so ensure the latter is allocated. AllocateStackPointer(); - regNumber const spReg = m_perRegionData[0]->m_spReg; - regNumber const fpReg = AllocateVirtualRegister(TYP_I_IMPL); + regNumber const spReg = m_perFuncletData[ROOT_FUNC_IDX]->m_spReg; + + bool const needFpReg = m_compiler->compLocallocUsed || (m_compiler->compFuncCount() > 1); + regNumber const fpReg = needFpReg ? AllocateVirtualRegister(TYP_I_IMPL) : REG_NA; // Main method can use SP for frame access, if there is no localloc. // - m_perRegionData[0]->m_fpReg = m_compiler->compLocallocUsed ? fpReg : spReg; + if (m_compiler->compLocallocUsed) + { + m_perFuncletData[ROOT_FUNC_IDX]->m_fpReg = fpReg; + } + else + { + m_perFuncletData[ROOT_FUNC_IDX]->m_fpReg = spReg; + } - // Funclets must always use FP for frame access + // Funclets must always use a distinct FP for frame access // - for (unsigned i = 1; i < m_compiler->compFuncInfoCount; i++) + for (unsigned i = 1; i < m_compiler->compFuncCount(); i++) { - m_perRegionData[i]->m_fpReg = fpReg; + m_perFuncletData[i]->m_fpReg = fpReg; } } @@ -539,7 +548,7 @@ void WasmRegAlloc::CollectReferencesForLclVar(GenTreeLclVar* lclVar) if (lclVar->GetLclNum() == m_compiler->lvaWasmSpArg) { lclVar->ChangeOper(GT_PHYSREG); - lclVar->AsPhysReg()->gtSrcReg = m_perRegionData[m_currentFunclet]->m_spReg; + lclVar->AsPhysReg()->gtSrcReg = m_perFuncletData[m_currentFunclet]->m_spReg; CollectReference(lclVar); } } @@ -614,7 +623,7 @@ void WasmRegAlloc::RewriteLocalStackStore(GenTreeLclVarCommon* lclNode) // void WasmRegAlloc::CollectReference(GenTree* node) { - PerRegionData* const data = m_perRegionData[m_currentFunclet]; + PerFuncletData* const data = m_perFuncletData[m_currentFunclet]; VirtualRegReferences* refs = data->m_virtualRegRefs; if (refs == nullptr) { @@ -758,7 +767,7 @@ void WasmRegAlloc::ResolveReferences() // Resolve funclet by funclet, in reverse order, so that we process the main method region last. // - for (int i = static_cast(m_compiler->compFuncInfoCount) - 1; i >= 0; i--) + for (int i = static_cast(m_compiler->compFuncCount()) - 1; i >= 0; i--) { m_currentFunclet = static_cast(i); @@ -770,10 +779,12 @@ void WasmRegAlloc::ResolveReferences() physRegs.DeclaredCount = virtRegs.Count(); } - unsigned indexBase = 0; - const FuncInfoDsc& funcInfo = m_compiler->compFuncInfos[m_currentFunclet]; - const bool inFunclet = funcInfo.funKind != FuncKind::FUNC_ROOT; - PerRegionData* const data = m_perRegionData[m_currentFunclet]; + unsigned indexBase = 0; + const FuncInfoDsc& funcInfo = m_compiler->compFuncInfos[m_currentFunclet]; + const bool inFunclet = funcInfo.funKind != FuncKind::FUNC_ROOT; + PerFuncletData* const data = m_perFuncletData[m_currentFunclet]; + regNumber const spVirtReg = data->m_spReg; + regNumber const fpVirtReg = data->m_fpReg; switch (funcInfo.funKind) { @@ -814,32 +825,28 @@ void WasmRegAlloc::ResolveReferences() case FuncKind::FUNC_HANDLER: case FuncKind::FUNC_FILTER: { - // TODO: add ABI information for funclets? - // - // All funclets have two initial arguments: sp and fp. + // TODO-WASM: add ABI information for funclets? // WasmValueType argType = TypeToWasmValueType(TYP_I_IMPL); - virtToPhysRegMap[static_cast(argType)].DeclaredCount--; - virtToPhysRegMap[static_cast(argType)].DeclaredCount--; - indexBase += 2; + if (spVirtReg != REG_NA) + { + virtToPhysRegMap[static_cast(argType)].DeclaredCount--; + } - // Filter and catch have a third argument - the exception object. - // - EHblkDsc* const eh = m_compiler->ehGetDsc(funcInfo.funEHIndex); - if ((funcInfo.funKind == FuncKind::FUNC_FILTER) || eh->HasCatchHandler()) + if (fpVirtReg != REG_NA && fpVirtReg != spVirtReg) { - argType = TypeToWasmValueType(TYP_REF); virtToPhysRegMap[static_cast(argType)].DeclaredCount--; - indexBase += 1; } + + EHblkDsc* const ehDsc = m_compiler->ehGetDsc(funcInfo.funEHIndex); + indexBase = ehDsc->HasCatchHandler() ? 3 : 2; } break; default: - assert(!"Unexpected funclet kind"); - break; + unreached(); } for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) @@ -852,9 +859,6 @@ void WasmRegAlloc::ResolveReferences() // Allocate all our virtual registers to physical ones. // - regNumber spVirtReg = data->m_spReg; - regNumber fpVirtReg = data->m_fpReg; - auto allocPhysReg = [&](regNumber virtReg, LclVarDsc* varDsc) { regNumber physReg = REG_NA; @@ -926,32 +930,22 @@ void WasmRegAlloc::ResolveReferences() // Map SP and FP to physical registers. // - if (!inFunclet) + if (spVirtReg != REG_NA) { - if (spVirtReg != REG_NA) + data->m_spReg = allocPhysReg(spVirtReg, m_compiler->lvaGetDesc(m_compiler->lvaWasmSpArg)); + } + + if (fpVirtReg != REG_NA) + { + if (inFunclet) { - data->m_spReg = allocPhysReg(spVirtReg, m_compiler->lvaGetDesc(m_compiler->lvaWasmSpArg)); + data->m_fpReg = MakeWasmReg(1, TypeToWasmValueType(TYP_I_IMPL)); } - - if (fpVirtReg != REG_NA) + else { data->m_fpReg = (spVirtReg == fpVirtReg) ? data->m_spReg : allocPhysReg(fpVirtReg, nullptr); } } - else - { - // Funclets always have SP and FP. - // - // We do not have lclVars for funclet params. - // SP is the same physreg as in the main method. - // - assert(data->m_spReg != REG_NA); - data->m_spReg = MakeWasmReg(0, TypeToWasmValueType(TYP_I_IMPL)); - assert(data->m_spReg == m_perRegionData[0]->m_spReg); - - assert(data->m_fpReg != REG_NA); - data->m_fpReg = MakeWasmReg(1, TypeToWasmValueType(TYP_I_IMPL)); - } for (unsigned varIndex = 0; varIndex < m_compiler->lvaTrackedCount; varIndex++) { @@ -1068,14 +1062,17 @@ void WasmRegAlloc::PublishAllocationResults() { bool usesFramePointer = false; - for (unsigned i = 0; i < m_compiler->compFuncInfoCount; i++) + for (unsigned i = 0; i < m_compiler->compFuncCount(); i++) { - PerRegionData* const data = m_perRegionData[i]; + PerFuncletData* const data = m_perFuncletData[i]; #ifdef DEBUG - if (i == 0) + if (i == ROOT_FUNC_IDX) { - JITDUMP("Allocated function SP into: %s\n", getRegName(data->m_spReg)); + if (data->m_spReg != REG_NA) + { + JITDUMP("Allocated function SP into: %s\n", getRegName(data->m_spReg)); + } if (data->m_fpReg != REG_NA) { @@ -1089,20 +1086,20 @@ void WasmRegAlloc::PublishAllocationResults() } #endif // DEBUG - m_codeGen->SetStackPointerReg(i, data->m_spReg); + if (data->m_fpReg != REG_NA) + { + m_codeGen->SetStackPointerReg(i, data->m_spReg); + } if (data->m_fpReg != REG_NA) { m_codeGen->SetFramePointerReg(i, data->m_fpReg); - usesFramePointer |= (data->m_fpReg != data->m_spReg); + usesFramePointer = true; } } m_codeGen->setFramePointerUsed(usesFramePointer); - // We don't need to publish the exRegs for codegen; all references were in codegen - // and all were converted to GT_PHYSREG. - m_compiler->raMarkStkVars(); m_compiler->compRegAllocDone = true; } diff --git a/src/coreclr/jit/regallocwasm.h b/src/coreclr/jit/regallocwasm.h index 40c9972661bc11..28272398d8b516 100644 --- a/src/coreclr/jit/regallocwasm.h +++ b/src/coreclr/jit/regallocwasm.h @@ -84,9 +84,9 @@ class WasmRegAlloc : public RegAllocInterface // We need to allocate per funclet. This struct holds the per-funclet state. // - struct PerRegionData + struct PerFuncletData { - PerRegionData() + PerFuncletData() : m_spReg(REG_NA) , m_fpReg(REG_NA) , m_lastVirtualRegRefsCount(0) @@ -111,7 +111,7 @@ class WasmRegAlloc : public RegAllocInterface VirtualRegReferences* m_virtualRegRefs; }; - jitstd::vector m_perRegionData; + jitstd::vector m_perFuncletData; public: WasmRegAlloc(Compiler* compiler); @@ -154,7 +154,6 @@ class WasmRegAlloc : public RegAllocInterface void CollectReferencesForBlockStore(GenTreeBlk* node); void CollectReferencesForLclVar(GenTreeLclVar* lclVar); void CollectReferencesForIndexAddr(GenTreeIndexAddr* indexAddrNode); - void CollectReferencesForCatchArg(GenTree* node); void RewriteLocalStackStore(GenTreeLclVarCommon* node); void CollectReference(GenTree* node); void RequestTemporaryRegisterForMultiplyUsedNode(GenTree* node); From 5ab018e768ec19c6584676edfc3fa46dab1033be Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 2 Apr 2026 15:26:17 -0700 Subject: [PATCH 11/26] review feedback --- src/coreclr/jit/codegen.h | 1 + src/coreclr/jit/codegenwasm.cpp | 17 +++++++++++++++-- src/coreclr/jit/regallocwasm.h | 1 - 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 59b1606a6609f0..31dbe07107177c 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -775,6 +775,7 @@ class CodeGen final : public CodeGenInterface #if defined(TARGET_WASM) void genCodeForConstant(GenTree* treeNode); + void genCatchArg(GenTree* treeNode); #endif #if defined(TARGET_X86) diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index bc63477a87fa07..55e911e2d26332 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -762,8 +762,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) break; case GT_CATCH_ARG: - // Catch arg is always in wasm local 3. - GetEmitter()->emitIns_I(INS_local_get, EA_GCREF, 3); + genCatchArg(treeNode); break; default: @@ -855,6 +854,20 @@ void CodeGen::genTableBasedSwitch(GenTree* treeNode) } } +//------------------------------------------------------------------------ +// genCatchArg: emit code for GT_CATCH_ARG +// +// Arguments: +// treeNode - catch arg node +// +void CodeGen::genCatchArg(GenTree* treeNode) +{ + assert(treeNode->OperIs(GT_CATCH_ARG)); + // The catch arg is passed as the 3rd parameter, so has Wasm local index 2. + GetEmitter()->emitIns_I(INS_local_get, EA_GCREF, 2); + WasmProduceReg(treeNode); +} + //------------------------------------------------------------------------ // PackOperAndType: Pack a genTreeOps and var_types into a uint32_t // diff --git a/src/coreclr/jit/regallocwasm.h b/src/coreclr/jit/regallocwasm.h index 28272398d8b516..172e52d9c121cc 100644 --- a/src/coreclr/jit/regallocwasm.h +++ b/src/coreclr/jit/regallocwasm.h @@ -135,7 +135,6 @@ class WasmRegAlloc : public RegAllocInterface void InitializeStackPointer(); void AllocateStackPointer(); void AllocateFramePointer(); - void AllocateExceptionPointer(); regNumber AllocateVirtualRegister(var_types type); regNumber AllocateVirtualRegister(WasmValueType type); regNumber AllocateTemporaryRegister(var_types type); From 3509cd246330a2c4fa5ef7cc057ee5b1f76c955d Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 2 Apr 2026 15:33:25 -0700 Subject: [PATCH 12/26] review feedback --- src/coreclr/jit/regallocwasm.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index b6ac135926d1e7..718986ad8542e6 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -986,12 +986,15 @@ void WasmRegAlloc::ResolveReferences() continue; } - regNumber physReg; + // Since we now collect references for nodes with internal registers, we may see + // cases where the node itself does not have a valid reg. + // + regNumber physReg = REG_NA; if (node->OperIs(GT_STORE_LCL_VAR)) { physReg = m_compiler->lvaGetDesc(node->AsLclVarCommon())->GetRegNum(); } - else + else if (genIsValidReg(node->GetRegNum())) { assert(!node->OperIsLocal() || !m_compiler->lvaGetDesc(node->AsLclVarCommon())->lvIsRegCandidate()); WasmValueType type; @@ -999,7 +1002,10 @@ void WasmRegAlloc::ResolveReferences() physReg = temporaryRegMap[static_cast(type)].Regs[index]; } - node->SetRegNum(physReg); + if (physReg != REG_NA) + { + node->SetRegNum(physReg); + } // If there are internal registers associated with this node, allocate them now. // From 692a79bf65f111ef505ed47b3e5e32ec5403650a Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 2 Apr 2026 16:15:30 -0700 Subject: [PATCH 13/26] fix duplicated assignment --- src/coreclr/jit/regallocwasm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index 718986ad8542e6..755fa910d7b42b 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -891,7 +891,7 @@ void WasmRegAlloc::ResolveReferences() } else if (virtReg == fpVirtReg) { - physReg = physReg = MakeWasmReg(1, TypeToWasmValueType(TYP_I_IMPL)); + physReg = MakeWasmReg(1, TypeToWasmValueType(TYP_I_IMPL)); } } From 876bb11484921cd8c31b90c919579bfc1add9952 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 2 Apr 2026 18:20:21 -0700 Subject: [PATCH 14/26] make sure each node is only on the collect references list once --- src/coreclr/jit/lir.h | 7 ++++--- src/coreclr/jit/regallocwasm.cpp | 11 +++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lir.h b/src/coreclr/jit/lir.h index b212ee69974fbd..dfb854cb265daf 100644 --- a/src/coreclr/jit/lir.h +++ b/src/coreclr/jit/lir.h @@ -42,9 +42,10 @@ class LIR final // require a register (i.e. it can be used from memory). #ifdef TARGET_WASM - MultiplyUsed = 0x08, // Set by lowering on nodes that the RA should allocate into - // a dedicated register (WASM local), for multiple uses. -#endif // TARGET_WASM + MultiplyUsed = 0x08, // Set by lowering on nodes that the RA should allocate into + // a dedicated register (WASM local), for multiple uses. + VirtualRefsCollected = 0x10, // Set on nodes that have had their virtual register references collected. +#endif // TARGET_WASM }; }; diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index 755fa910d7b42b..9b39985825cf7b 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -623,6 +623,13 @@ void WasmRegAlloc::RewriteLocalStackStore(GenTreeLclVarCommon* lclNode) // void WasmRegAlloc::CollectReference(GenTree* node) { + if ((node->gtLIRFlags & LIR::Flags::VirtualRefsCollected) != LIR::Flags::None) + { + return; + } + + node->gtLIRFlags |= LIR::Flags::VirtualRefsCollected; + PerFuncletData* const data = m_perFuncletData[m_currentFunclet]; VirtualRegReferences* refs = data->m_virtualRegRefs; if (refs == nullptr) @@ -978,6 +985,10 @@ void WasmRegAlloc::ResolveReferences() for (size_t i = 0; i < refsCount; i++) { GenTree* node = refs->Nodes[i]; + + assert((node->gtLIRFlags & LIR::Flags::VirtualRefsCollected) != LIR::Flags::None); + node->gtLIRFlags &= ~LIR::Flags::VirtualRefsCollected; + if (node->OperIs(GT_PHYSREG)) { assert(node->AsPhysReg()->gtSrcReg == spVirtReg); From 839781a9d0c9d39d0345eaf02d6ca6d09ce44205 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 3 Apr 2026 07:36:32 -0700 Subject: [PATCH 15/26] fix publish of SP --- src/coreclr/jit/regallocwasm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index 9b39985825cf7b..ebd8bbc0f8a1df 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -1103,7 +1103,7 @@ void WasmRegAlloc::PublishAllocationResults() } #endif // DEBUG - if (data->m_fpReg != REG_NA) + if (data->m_spReg != REG_NA) { m_codeGen->SetStackPointerReg(i, data->m_spReg); } From 98641695d2ef3616f80a124715a3b7c228929714 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 3 Apr 2026 07:49:22 -0700 Subject: [PATCH 16/26] remove redundant checks for mapping fp reg to physreg --- src/coreclr/jit/regallocwasm.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index ebd8bbc0f8a1df..c1438ef661bbf8 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -944,14 +944,7 @@ void WasmRegAlloc::ResolveReferences() if (fpVirtReg != REG_NA) { - if (inFunclet) - { - data->m_fpReg = MakeWasmReg(1, TypeToWasmValueType(TYP_I_IMPL)); - } - else - { - data->m_fpReg = (spVirtReg == fpVirtReg) ? data->m_spReg : allocPhysReg(fpVirtReg, nullptr); - } + data->m_fpReg = (spVirtReg == fpVirtReg) ? data->m_spReg : allocPhysReg(fpVirtReg, nullptr); } for (unsigned varIndex = 0; varIndex < m_compiler->lvaTrackedCount; varIndex++) From b2129f80ed7236fc55931f4f4e852b038d81ff35 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 3 Apr 2026 08:33:57 -0700 Subject: [PATCH 17/26] use funclet reverse iterator during resolution --- src/coreclr/jit/compiler.h | 9 +++++++++ src/coreclr/jit/regallocwasm.cpp | 15 ++++++++------- src/coreclr/jit/regallocwasm.h | 3 +-- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 22c40a9b62b14c..e9159678839dd9 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1728,6 +1728,7 @@ struct FuncInfoDsc BasicBlock* GetStartBlock(Compiler* comp) const; BasicBlock* GetLastBlock(Compiler* comp) const; BasicBlockRangeList Blocks(Compiler* comp) const; + unsigned GetFuncletIdx(Compiler* comp) const; #if defined(TARGET_AMD64) @@ -12936,6 +12937,14 @@ inline BasicBlockRangeList FuncInfoDsc::Blocks(Compiler* comp) const return BasicBlockRangeList(GetStartBlock(comp), GetLastBlock(comp)); } +inline unsigned FuncInfoDsc::GetFuncletIdx(Compiler* comp) const +{ + assert((comp->compFuncInfos <= this) && (this < (comp->compFuncInfos + comp->compFuncInfoCount))); + unsigned funcletIdx = (unsigned)(this - comp->compFuncInfos); + assert(this == &comp->compFuncInfos[funcletIdx]); + return funcletIdx; +} + // FuncInfoRange: adapter class for forward or reverse iteration of a contiguous range of function/funclet // descriptors using range-based `for`, e.g.: // for (FuncInfoDsc* const func : compiler->Funcs()) diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index c1438ef661bbf8..51a1eff9ec8f55 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -774,9 +774,11 @@ void WasmRegAlloc::ResolveReferences() // Resolve funclet by funclet, in reverse order, so that we process the main method region last. // - for (int i = static_cast(m_compiler->compFuncCount()) - 1; i >= 0; i--) + for (FuncInfoDsc* const funcInfo : m_compiler->Funcs().Reverse()) { - m_currentFunclet = static_cast(i); + // Make the funclet index available globally + // + m_currentFunclet = funcInfo->GetFuncletIdx(m_compiler); PhysicalRegBank virtToPhysRegMap[static_cast(WasmValueType::Count)]; for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) @@ -787,13 +789,12 @@ void WasmRegAlloc::ResolveReferences() } unsigned indexBase = 0; - const FuncInfoDsc& funcInfo = m_compiler->compFuncInfos[m_currentFunclet]; - const bool inFunclet = funcInfo.funKind != FuncKind::FUNC_ROOT; + const bool inFunclet = funcInfo->funKind != FuncKind::FUNC_ROOT; PerFuncletData* const data = m_perFuncletData[m_currentFunclet]; regNumber const spVirtReg = data->m_spReg; regNumber const fpVirtReg = data->m_fpReg; - switch (funcInfo.funKind) + switch (funcInfo->funKind) { case FuncKind::FUNC_ROOT: { @@ -841,12 +842,12 @@ void WasmRegAlloc::ResolveReferences() virtToPhysRegMap[static_cast(argType)].DeclaredCount--; } - if (fpVirtReg != REG_NA && fpVirtReg != spVirtReg) + if ((fpVirtReg != REG_NA) && (fpVirtReg != spVirtReg)) { virtToPhysRegMap[static_cast(argType)].DeclaredCount--; } - EHblkDsc* const ehDsc = m_compiler->ehGetDsc(funcInfo.funEHIndex); + EHblkDsc* const ehDsc = funcInfo->GetEHDesc(m_compiler); indexBase = ehDsc->HasCatchHandler() ? 3 : 2; } diff --git a/src/coreclr/jit/regallocwasm.h b/src/coreclr/jit/regallocwasm.h index 172e52d9c121cc..8a092520770861 100644 --- a/src/coreclr/jit/regallocwasm.h +++ b/src/coreclr/jit/regallocwasm.h @@ -99,8 +99,7 @@ class WasmRegAlloc : public RegAllocInterface // used to access the fixed portion of the frame. For fixed-size frames (no localloc), these will be the same. // // In funclets FP will refer to the fixed portion of the parent frame. It will likely be in a different Wasm - // local than the FP in the main function body. EX will refer to the exception object local for filter and catch - // funclets. + // local than the FP in the main function body. // regNumber m_spReg; regNumber m_fpReg; From c00657b6116df61a1ecd47761df7f4ca2bc5797d Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 3 Apr 2026 15:06:28 -0700 Subject: [PATCH 18/26] implement recordVarLocationsAtStartOfBB --- src/coreclr/jit/regallocwasm.cpp | 96 +++++++++++++++++++++----------- src/coreclr/jit/regallocwasm.h | 20 ++++--- 2 files changed, 78 insertions(+), 38 deletions(-) diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index 51a1eff9ec8f55..563f9171c083b8 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -20,11 +20,12 @@ WasmRegAlloc::WasmRegAlloc(Compiler* compiler) , m_codeGen(compiler->codeGen) , m_currentBlock(nullptr) , m_currentFunclet(ROOT_FUNC_IDX) + , m_virtualRegAssignments(compiler->lvaTrackedCount, REG_STK, compiler->getAllocator(CMK_LSRA)) , m_perFuncletData(compiler->compFuncCount(), nullptr, compiler->getAllocator(CMK_LSRA)) { for (unsigned i = 0; i < m_compiler->compFuncCount(); i++) { - m_perFuncletData[i] = new (compiler->getAllocator(CMK_LSRA)) PerFuncletData(); + m_perFuncletData[i] = new (compiler->getAllocator(CMK_LSRA)) PerFuncletData(compiler); } } @@ -38,8 +39,47 @@ PhaseStatus WasmRegAlloc::doRegisterAllocation() return PhaseStatus::MODIFIED_EVERYTHING; } +//------------------------------------------------------------------------ +// recordVarLocationsAtStartOfBB: update enregistered local vars to +// reflect the current register assignment +// +// Arguments: +// bb - the basic block whose start is being processed +// void WasmRegAlloc::recordVarLocationsAtStartOfBB(BasicBlock* bb) { + // Register assignments only change at funclet boundaries + // + bool const isFuncletEntry = m_compiler->bbIsFuncletBeg(bb); + bool const isFuncEntry = m_compiler->fgFirstBB == bb; + + if (!isFuncletEntry && !isFuncEntry) + { + return; + } + + JITDUMP("Recording Var Locations at start of " FMT_BB "\n", bb->bbNum); + + unsigned const funcIdx = isFuncEntry ? ROOT_FUNC_IDX : m_compiler->funGetFuncIdx(bb); + PerFuncletData* const funcData = m_perFuncletData[funcIdx]; + const jitstd::vector& assignments = funcData->m_physicalRegAssignments; + bool hasAssignment = false; + + for (unsigned varIdx = 0; varIdx < assignments.size(); varIdx++) + { + unsigned const lclNum = m_compiler->lvaTrackedIndexToLclNum(varIdx); + LclVarDsc* const varDsc = m_compiler->lvaGetDesc(lclNum); + regNumber const reg = assignments[varIdx]; + varDsc->SetRegNum(reg); + + if (reg != REG_STK) + { + JITDUMP(" V%02u(%s)", lclNum, getRegName(reg)); + hasAssignment = true; + } + } + + JITDUMP("%s\n", hasAssignment ? "" : " "); } bool WasmRegAlloc::willEnregisterLocalVars() const @@ -113,6 +153,8 @@ void WasmRegAlloc::InitializeCandidate(LclVarDsc* varDsc) regNumber reg = AllocateVirtualRegister(varDsc->GetRegisterType()); varDsc->SetRegNum(reg); varDsc->lvLRACandidate = true; + + m_virtualRegAssignments[varDsc->lvVarIndex] = reg; } //------------------------------------------------------------------------ @@ -168,8 +210,8 @@ void WasmRegAlloc::AllocateFramePointer() regNumber const spReg = m_perFuncletData[ROOT_FUNC_IDX]->m_spReg; - bool const needFpReg = m_compiler->compLocallocUsed || (m_compiler->compFuncCount() > 1); - regNumber const fpReg = needFpReg ? AllocateVirtualRegister(TYP_I_IMPL) : REG_NA; + bool const needUniqueFpReg = m_compiler->compLocallocUsed || (m_compiler->compFuncCount() > 1); + regNumber const fpReg = needUniqueFpReg ? AllocateVirtualRegister(TYP_I_IMPL) : REG_NA; // Main method can use SP for frame access, if there is no localloc. // @@ -919,15 +961,7 @@ void WasmRegAlloc::ResolveReferences() varDsc->SetArgInitReg(physReg); } - // This is the location for this local in this funclet. Since we process the main method region - // last, its assignment (if any, see below) will be the one that persists after RA. - // - // Funclets may well use different local numbers. Note that any local that is live across a - // funclet boundary should not be a register allocation candidate, so there is no need for funclet - // and main method assignments to match. - // - // TODO-WASM: this will lead to incorrect debug info in funclets. We may need to track per funclet - // assignments somewhere. + // This is the location for this local in this funclet. // varDsc->SetRegNum(physReg); varDsc->lvRegister = true; @@ -950,17 +984,23 @@ void WasmRegAlloc::ResolveReferences() for (unsigned varIndex = 0; varIndex < m_compiler->lvaTrackedCount; varIndex++) { - unsigned lclNum = m_compiler->lvaTrackedIndexToLclNum(varIndex); + unsigned lclNum = m_compiler->lvaTrackedIndexToLclNum(varIndex); + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); + if (lclNum == m_compiler->lvaWasmSpArg) { - continue; // Handled above. + // Allocation was handled above. } - - LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); - if (varDsc->lvIsRegCandidate()) + else if (!varDsc->lvIsRegCandidate()) + { + continue; + } + else { allocPhysReg(varDsc->GetRegNum(), varDsc); } + + data->m_physicalRegAssignments[varIndex] = varDsc->GetRegNum(); } for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) @@ -1031,12 +1071,13 @@ void WasmRegAlloc::ResolveReferences() refsCount = ARRAY_SIZE(refs->Nodes); } - FuncInfoDsc* const currentFunc = m_compiler->funGetFunc(m_currentFunclet); - assert(currentFunc->funWasmLocalDecls == nullptr); + // Set up the per-funclet local info Wasm needs + // + assert(funcInfo->funWasmLocalDecls == nullptr); jitstd::vector* decls = new (m_compiler->getAllocator(CMK_Codegen)) jitstd::vector(m_compiler->getAllocator(CMK_Codegen)); - currentFunc->funWasmLocalDecls = decls; + funcInfo->funWasmLocalDecls = decls; for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) { @@ -1047,19 +1088,12 @@ void WasmRegAlloc::ResolveReferences() } } - // If this is not the main method region, remove any local var assignments. + // Reset all lcl var assignments back to their virtual registers. // - if (inFunclet) + for (unsigned varIndex = 0; varIndex < m_compiler->lvaTrackedCount; varIndex++) { - for (unsigned varIndex = 0; varIndex < m_compiler->lvaTrackedCount; varIndex++) - { - unsigned const lclNum = m_compiler->lvaTrackedIndexToLclNum(varIndex); - LclVarDsc* const varDsc = m_compiler->lvaGetDesc(lclNum); - if (varDsc->lvIsRegCandidate()) - { - varDsc->lvRegister = false; - } - } + LclVarDsc* varDsc = m_compiler->lvaGetDescByTrackedIndex(varIndex); + varDsc->SetRegNum(m_virtualRegAssignments[varIndex]); } } } diff --git a/src/coreclr/jit/regallocwasm.h b/src/coreclr/jit/regallocwasm.h index 8a092520770861..71ba1d987e3e4a 100644 --- a/src/coreclr/jit/regallocwasm.h +++ b/src/coreclr/jit/regallocwasm.h @@ -75,22 +75,24 @@ struct VirtualRegReferences class WasmRegAlloc : public RegAllocInterface { - Compiler* m_compiler; - CodeGenInterface* m_codeGen; - BasicBlock* m_currentBlock; - unsigned m_currentFunclet; - VirtualRegStack m_virtualRegs[static_cast(WasmValueType::Count)]; - TemporaryRegStack m_temporaryRegs[static_cast(WasmValueType::Count)]; + Compiler* m_compiler; + CodeGenInterface* m_codeGen; + BasicBlock* m_currentBlock; + unsigned m_currentFunclet; + VirtualRegStack m_virtualRegs[static_cast(WasmValueType::Count)]; + TemporaryRegStack m_temporaryRegs[static_cast(WasmValueType::Count)]; + jitstd::vector m_virtualRegAssignments; // We need to allocate per funclet. This struct holds the per-funclet state. // struct PerFuncletData { - PerFuncletData() + PerFuncletData(Compiler* comp) : m_spReg(REG_NA) , m_fpReg(REG_NA) , m_lastVirtualRegRefsCount(0) , m_virtualRegRefs(nullptr) + , m_physicalRegAssignments(comp->lvaTrackedCount, REG_STK, comp->getAllocator(CMK_LSRA)) { } @@ -108,6 +110,10 @@ class WasmRegAlloc : public RegAllocInterface // unsigned m_lastVirtualRegRefsCount; VirtualRegReferences* m_virtualRegRefs; + + // Map from local tracked index to phys reg for that local, in this funclet. + // + jitstd::vector m_physicalRegAssignments; }; jitstd::vector m_perFuncletData; From 2095f3bcc58a2d532d8347e85cb553dea1968657 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 3 Apr 2026 15:21:57 -0700 Subject: [PATCH 19/26] no eh write thru yet --- src/coreclr/jit/regalloc.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/coreclr/jit/regalloc.cpp b/src/coreclr/jit/regalloc.cpp index 957d02d0778231..315bedffc55a4a 100644 --- a/src/coreclr/jit/regalloc.cpp +++ b/src/coreclr/jit/regalloc.cpp @@ -412,7 +412,19 @@ bool RegAllocImpl::isRegCandidate(LclVarDsc* varDsc) if (compiler->opts.MinOpts() && compiler->compHndBBtabCount > 0) { compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::LiveInOutOfHandler)); + return false; + } + +#if defined(TARGET_WASM) + // Wasm RA currently does not support EH write-thru, so any local live in or out + // of a handler must be located only on the stack. + // + if (varDsc->lvLiveInOutOfHndlr) + { + compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::LiveInOutOfHandler)); + return false; } +#endif // defined(TARGET_WASM) if (varDsc->lvDoNotEnregister) { From 0c2ec0345d878d143e7cc9c8ccf625ee5bcde5d9 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 3 Apr 2026 15:59:59 -0700 Subject: [PATCH 20/26] defer setting sizes until the RA phase runs --- src/coreclr/jit/regallocwasm.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index 563f9171c083b8..afe69f2a51753c 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -20,17 +20,20 @@ WasmRegAlloc::WasmRegAlloc(Compiler* compiler) , m_codeGen(compiler->codeGen) , m_currentBlock(nullptr) , m_currentFunclet(ROOT_FUNC_IDX) - , m_virtualRegAssignments(compiler->lvaTrackedCount, REG_STK, compiler->getAllocator(CMK_LSRA)) + , m_virtualRegAssignments(compiler->getAllocator(CMK_LSRA)) , m_perFuncletData(compiler->compFuncCount(), nullptr, compiler->getAllocator(CMK_LSRA)) { - for (unsigned i = 0; i < m_compiler->compFuncCount(); i++) - { - m_perFuncletData[i] = new (compiler->getAllocator(CMK_LSRA)) PerFuncletData(compiler); - } } PhaseStatus WasmRegAlloc::doRegisterAllocation() { + m_virtualRegAssignments.resize(m_compiler->lvaTrackedCount, REG_STK); + + for (unsigned i = 0; i < m_compiler->compFuncCount(); i++) + { + m_perFuncletData[i] = new (m_compiler->getAllocator(CMK_LSRA)) PerFuncletData(m_compiler); + } + IdentifyCandidates(); CollectReferences(); ResolveReferences(); From 18cf566b61806e72bb2505b17ea036ef6f73cd26 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 3 Apr 2026 18:32:29 -0700 Subject: [PATCH 21/26] part-way through deferring setting the physregs until the end of resolution --- src/coreclr/jit/regallocwasm.cpp | 85 +++++++++++++++++++++++--------- src/coreclr/jit/regallocwasm.h | 20 ++++---- 2 files changed, 72 insertions(+), 33 deletions(-) diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index afe69f2a51753c..d282aa11abcc5c 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -20,15 +20,12 @@ WasmRegAlloc::WasmRegAlloc(Compiler* compiler) , m_codeGen(compiler->codeGen) , m_currentBlock(nullptr) , m_currentFunclet(ROOT_FUNC_IDX) - , m_virtualRegAssignments(compiler->getAllocator(CMK_LSRA)) , m_perFuncletData(compiler->compFuncCount(), nullptr, compiler->getAllocator(CMK_LSRA)) { } PhaseStatus WasmRegAlloc::doRegisterAllocation() { - m_virtualRegAssignments.resize(m_compiler->lvaTrackedCount, REG_STK); - for (unsigned i = 0; i < m_compiler->compFuncCount(); i++) { m_perFuncletData[i] = new (m_compiler->getAllocator(CMK_LSRA)) PerFuncletData(m_compiler); @@ -51,23 +48,57 @@ PhaseStatus WasmRegAlloc::doRegisterAllocation() // void WasmRegAlloc::recordVarLocationsAtStartOfBB(BasicBlock* bb) { + // We expect that RA has left the main method physical register assignments + // in the local vars (eg that RA processes the main method last and codegen + // processes it first). Verify this. + // + bool const isFuncEntry = m_compiler->fgFirstBB == bb; + + if (isFuncEntry) + { +#ifdef DEBUG + JITDUMP("Recording Var Locations at start of method entry " FMT_BB "\n", bb->bbNum); + + PerFuncletData* const rootFuncData = m_perFuncletData[ROOT_FUNC_IDX]; + const jitstd::vector& assignments = rootFuncData->m_physicalRegAssignments; + bool hasAssignment = false; + + for (unsigned varIdx = 0; varIdx < assignments.size(); varIdx++) + { + unsigned const lclNum = m_compiler->lvaTrackedIndexToLclNum(varIdx); + LclVarDsc* const varDsc = m_compiler->lvaGetDesc(lclNum); + regNumber const reg = assignments[varIdx]; + assert(varDsc->GetRegNum() == reg); + + if (reg != REG_STK) + { + JITDUMP(" V%02u(%s)", lclNum, getRegName(reg)); + hasAssignment = true; + } + } + + JITDUMP("%s\n", hasAssignment ? "" : " "); +#endif // DEBUG + + return; + } + // Register assignments only change at funclet boundaries // bool const isFuncletEntry = m_compiler->bbIsFuncletBeg(bb); - bool const isFuncEntry = m_compiler->fgFirstBB == bb; - if (!isFuncletEntry && !isFuncEntry) + if (!isFuncletEntry) { return; } - JITDUMP("Recording Var Locations at start of " FMT_BB "\n", bb->bbNum); - - unsigned const funcIdx = isFuncEntry ? ROOT_FUNC_IDX : m_compiler->funGetFuncIdx(bb); + unsigned const funcIdx = m_compiler->funGetFuncIdx(bb); PerFuncletData* const funcData = m_perFuncletData[funcIdx]; const jitstd::vector& assignments = funcData->m_physicalRegAssignments; bool hasAssignment = false; + JITDUMP("Recording Var Locations at start of funclet %u entry" FMT_BB "\n", funcIdx, bb->bbNum); + for (unsigned varIdx = 0; varIdx < assignments.size(); varIdx++) { unsigned const lclNum = m_compiler->lvaTrackedIndexToLclNum(varIdx); @@ -156,8 +187,6 @@ void WasmRegAlloc::InitializeCandidate(LclVarDsc* varDsc) regNumber reg = AllocateVirtualRegister(varDsc->GetRegisterType()); varDsc->SetRegNum(reg); varDsc->lvLRACandidate = true; - - m_virtualRegAssignments[varDsc->lvVarIndex] = reg; } //------------------------------------------------------------------------ @@ -964,11 +993,7 @@ void WasmRegAlloc::ResolveReferences() varDsc->SetArgInitReg(physReg); } - // This is the location for this local in this funclet. - // - varDsc->SetRegNum(physReg); - varDsc->lvRegister = true; - varDsc->lvOnFrame = false; + data->m_physicalRegAssignments[varDsc->lvVarIndex] = physReg; } return physReg; }; @@ -985,6 +1010,9 @@ void WasmRegAlloc::ResolveReferences() data->m_fpReg = (spVirtReg == fpVirtReg) ? data->m_spReg : allocPhysReg(fpVirtReg, nullptr); } + // Do likewise for the enregisgtered locals. + // Note we do not update the LclVarDsc here. + // for (unsigned varIndex = 0; varIndex < m_compiler->lvaTrackedCount; varIndex++) { unsigned lclNum = m_compiler->lvaTrackedIndexToLclNum(varIndex); @@ -1002,8 +1030,6 @@ void WasmRegAlloc::ResolveReferences() { allocPhysReg(varDsc->GetRegNum(), varDsc); } - - data->m_physicalRegAssignments[varIndex] = varDsc->GetRegNum(); } for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) @@ -1040,7 +1066,10 @@ void WasmRegAlloc::ResolveReferences() regNumber physReg = REG_NA; if (node->OperIs(GT_STORE_LCL_VAR)) { - physReg = m_compiler->lvaGetDesc(node->AsLclVarCommon())->GetRegNum(); + LclVarDsc* const varDsc = m_compiler->lvaGetDesc(node->AsLclVarCommon()); + WasmValueType type; + unsigned index = UnpackWasmReg(varDsc->GetRegNum(), &type); + physReg = data->m_physicalRegAssignments[varDsc->lvVarIndex]; } else if (genIsValidReg(node->GetRegNum())) { @@ -1090,13 +1119,23 @@ void WasmRegAlloc::ResolveReferences() decls->push_back({type, physRegs.DeclaredCount}); } } + } - // Reset all lcl var assignments back to their virtual registers. - // - for (unsigned varIndex = 0; varIndex < m_compiler->lvaTrackedCount; varIndex++) + // Set all lcl var assignments to the main method's allocations. + // + const jitstd::vector& mainFuncAssignment = m_perFuncletData[ROOT_FUNC_IDX]->m_physicalRegAssignments; + for (unsigned varIndex = 0; varIndex < m_compiler->lvaTrackedCount; varIndex++) + { + LclVarDsc* const varDsc = m_compiler->lvaGetDescByTrackedIndex(varIndex); + regNumber const assignedReg = mainFuncAssignment[varIndex]; + + if (genIsValidReg(assignedReg)) { - LclVarDsc* varDsc = m_compiler->lvaGetDescByTrackedIndex(varIndex); - varDsc->SetRegNum(m_virtualRegAssignments[varIndex]); + varDsc->SetRegNum(mainFuncAssignment[varIndex]); + + // may want to set these earlier + varDsc->lvRegister = true; + varDsc->lvOnFrame = false; } } } diff --git a/src/coreclr/jit/regallocwasm.h b/src/coreclr/jit/regallocwasm.h index 71ba1d987e3e4a..1f1d19114874cc 100644 --- a/src/coreclr/jit/regallocwasm.h +++ b/src/coreclr/jit/regallocwasm.h @@ -75,15 +75,15 @@ struct VirtualRegReferences class WasmRegAlloc : public RegAllocInterface { - Compiler* m_compiler; - CodeGenInterface* m_codeGen; - BasicBlock* m_currentBlock; - unsigned m_currentFunclet; - VirtualRegStack m_virtualRegs[static_cast(WasmValueType::Count)]; - TemporaryRegStack m_temporaryRegs[static_cast(WasmValueType::Count)]; - jitstd::vector m_virtualRegAssignments; - - // We need to allocate per funclet. This struct holds the per-funclet state. + Compiler* m_compiler; + CodeGenInterface* m_codeGen; + BasicBlock* m_currentBlock; + unsigned m_currentFunclet; + VirtualRegStack m_virtualRegs[static_cast(WasmValueType::Count)]; + TemporaryRegStack m_temporaryRegs[static_cast(WasmValueType::Count)]; + + // We allocate per funclet. This struct holds the per-funclet state. + // (we treat the main fuction body as a funclet). // struct PerFuncletData { @@ -106,7 +106,7 @@ class WasmRegAlloc : public RegAllocInterface regNumber m_spReg; regNumber m_fpReg; - // Chunked list of virtual reg references in this region. + // Chunked list of virtual reg references in this funclet. // unsigned m_lastVirtualRegRefsCount; VirtualRegReferences* m_virtualRegRefs; From c7f5207cc6cee4d2387e330dc66597abea20c1c4 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 4 Apr 2026 08:20:02 -0700 Subject: [PATCH 22/26] make LclVar reg updates aware of the current state, avoid work in some cases --- src/coreclr/jit/regallocwasm.cpp | 87 ++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 39 deletions(-) diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index d282aa11abcc5c..3850c974287748 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -46,29 +46,55 @@ PhaseStatus WasmRegAlloc::doRegisterAllocation() // Arguments: // bb - the basic block whose start is being processed // +// Notes: +// This relies on m_currentFunclet to try and avoid work in some cases. +// void WasmRegAlloc::recordVarLocationsAtStartOfBB(BasicBlock* bb) { - // We expect that RA has left the main method physical register assignments - // in the local vars (eg that RA processes the main method last and codegen - // processes it first). Verify this. + // Register assignments only change at funclet boundaries // - bool const isFuncEntry = m_compiler->fgFirstBB == bb; + bool const isFuncEntry = m_compiler->fgFirstBB == bb; + bool const isFuncletEntry = m_compiler->bbIsFuncletBeg(bb); - if (isFuncEntry) + if (!isFuncletEntry && !isFuncEntry) { -#ifdef DEBUG - JITDUMP("Recording Var Locations at start of method entry " FMT_BB "\n", bb->bbNum); + return; + } + + unsigned const funcIdx = isFuncEntry ? ROOT_FUNC_IDX : m_compiler->funGetFuncIdx(bb); - PerFuncletData* const rootFuncData = m_perFuncletData[ROOT_FUNC_IDX]; - const jitstd::vector& assignments = rootFuncData->m_physicalRegAssignments; + // Walk all the assignments for this funclet, and update or verify the LclVarDscs accordingly. + // + auto updateOrVerifyAssignments = [=](bool verify = false) { + PerFuncletData* const funcData = m_perFuncletData[funcIdx]; + const jitstd::vector& assignments = funcData->m_physicalRegAssignments; bool hasAssignment = false; + if (isFuncletEntry) + { + JITDUMP("%s Var Locations to start of funclet %u entry " FMT_BB "\n", verify ? "Reporting" : "Updating", + funcIdx, bb->bbNum); + } + else + { + JITDUMP("%s Var Locations to start of method entry " FMT_BB "\n", verify ? "Reporting" : "Updating", + bb->bbNum); + } + for (unsigned varIdx = 0; varIdx < assignments.size(); varIdx++) { unsigned const lclNum = m_compiler->lvaTrackedIndexToLclNum(varIdx); LclVarDsc* const varDsc = m_compiler->lvaGetDesc(lclNum); regNumber const reg = assignments[varIdx]; - assert(varDsc->GetRegNum() == reg); + + if (verify) + { + assert(varDsc->GetRegNum() == reg); + } + else + { + varDsc->SetRegNum(reg); + } if (reg != REG_STK) { @@ -78,42 +104,25 @@ void WasmRegAlloc::recordVarLocationsAtStartOfBB(BasicBlock* bb) } JITDUMP("%s\n", hasAssignment ? "" : " "); -#endif // DEBUG - - return; - } + }; - // Register assignments only change at funclet boundaries + // The current assignments may alread hold the desired state. // - bool const isFuncletEntry = m_compiler->bbIsFuncletBeg(bb); - - if (!isFuncletEntry) + if (m_currentFunclet == funcIdx) { +#ifdef DEBUG + // No work required, just verify/dump the current state + updateOrVerifyAssignments(/* verify */ true); +#endif // DEBUG + return; } - unsigned const funcIdx = m_compiler->funGetFuncIdx(bb); - PerFuncletData* const funcData = m_perFuncletData[funcIdx]; - const jitstd::vector& assignments = funcData->m_physicalRegAssignments; - bool hasAssignment = false; - - JITDUMP("Recording Var Locations at start of funclet %u entry" FMT_BB "\n", funcIdx, bb->bbNum); - - for (unsigned varIdx = 0; varIdx < assignments.size(); varIdx++) - { - unsigned const lclNum = m_compiler->lvaTrackedIndexToLclNum(varIdx); - LclVarDsc* const varDsc = m_compiler->lvaGetDesc(lclNum); - regNumber const reg = assignments[varIdx]; - varDsc->SetRegNum(reg); - - if (reg != REG_STK) - { - JITDUMP(" V%02u(%s)", lclNum, getRegName(reg)); - hasAssignment = true; - } - } + updateOrVerifyAssignments(); - JITDUMP("%s\n", hasAssignment ? "" : " "); + // Record what the LclVarDsc assignments hold. + // + m_currentFunclet = funcIdx; } bool WasmRegAlloc::willEnregisterLocalVars() const From 2620d36f32b4cae9dd5ba97712ae74c668a12a11 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 4 Apr 2026 08:43:12 -0700 Subject: [PATCH 23/26] simpler multi-collection avoidance --- src/coreclr/jit/lir.h | 7 +++---- src/coreclr/jit/regallocwasm.cpp | 16 ++++++++-------- src/coreclr/jit/regallocwasm.h | 2 ++ 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/lir.h b/src/coreclr/jit/lir.h index dfb854cb265daf..b212ee69974fbd 100644 --- a/src/coreclr/jit/lir.h +++ b/src/coreclr/jit/lir.h @@ -42,10 +42,9 @@ class LIR final // require a register (i.e. it can be used from memory). #ifdef TARGET_WASM - MultiplyUsed = 0x08, // Set by lowering on nodes that the RA should allocate into - // a dedicated register (WASM local), for multiple uses. - VirtualRefsCollected = 0x10, // Set on nodes that have had their virtual register references collected. -#endif // TARGET_WASM + MultiplyUsed = 0x08, // Set by lowering on nodes that the RA should allocate into + // a dedicated register (WASM local), for multiple uses. +#endif // TARGET_WASM }; }; diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index 3850c974287748..10f17c7dbd2eba 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -706,15 +706,17 @@ void WasmRegAlloc::RewriteLocalStackStore(GenTreeLclVarCommon* lclNode) // void WasmRegAlloc::CollectReference(GenTree* node) { - if ((node->gtLIRFlags & LIR::Flags::VirtualRefsCollected) != LIR::Flags::None) + PerFuncletData* const data = m_perFuncletData[m_currentFunclet]; + + // We may make multiple consecutive collection calls for the same node, but we only need to record it once. + // + if (node == data->m_lastCollectedNode) { return; } - node->gtLIRFlags |= LIR::Flags::VirtualRefsCollected; - - PerFuncletData* const data = m_perFuncletData[m_currentFunclet]; VirtualRegReferences* refs = data->m_virtualRegRefs; + if (refs == nullptr) { refs = new (m_compiler->getAllocator(CMK_LSRA_RefPosition)) VirtualRegReferences(); @@ -730,6 +732,7 @@ void WasmRegAlloc::CollectReference(GenTree* node) assert(data->m_lastVirtualRegRefsCount < ARRAY_SIZE(refs->Nodes)); refs->Nodes[data->m_lastVirtualRegRefsCount++] = node; + data->m_lastCollectedNode = node; } //------------------------------------------------------------------------ @@ -1056,10 +1059,7 @@ void WasmRegAlloc::ResolveReferences() { for (size_t i = 0; i < refsCount; i++) { - GenTree* node = refs->Nodes[i]; - - assert((node->gtLIRFlags & LIR::Flags::VirtualRefsCollected) != LIR::Flags::None); - node->gtLIRFlags &= ~LIR::Flags::VirtualRefsCollected; + GenTree* const node = refs->Nodes[i]; if (node->OperIs(GT_PHYSREG)) { diff --git a/src/coreclr/jit/regallocwasm.h b/src/coreclr/jit/regallocwasm.h index 1f1d19114874cc..6bfa364233dc0a 100644 --- a/src/coreclr/jit/regallocwasm.h +++ b/src/coreclr/jit/regallocwasm.h @@ -91,6 +91,7 @@ class WasmRegAlloc : public RegAllocInterface : m_spReg(REG_NA) , m_fpReg(REG_NA) , m_lastVirtualRegRefsCount(0) + , m_lastCollectedNode(nullptr) , m_virtualRegRefs(nullptr) , m_physicalRegAssignments(comp->lvaTrackedCount, REG_STK, comp->getAllocator(CMK_LSRA)) { @@ -109,6 +110,7 @@ class WasmRegAlloc : public RegAllocInterface // Chunked list of virtual reg references in this funclet. // unsigned m_lastVirtualRegRefsCount; + GenTree* m_lastCollectedNode; VirtualRegReferences* m_virtualRegRefs; // Map from local tracked index to phys reg for that local, in this funclet. From ffdee6a599917ff4424c6b3d22b5bb20b67f3af4 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 4 Apr 2026 09:10:58 -0700 Subject: [PATCH 24/26] add note about debug liveness tracking --- src/coreclr/jit/regallocwasm.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index 10f17c7dbd2eba..d1850664b9c1a9 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -81,11 +81,11 @@ void WasmRegAlloc::recordVarLocationsAtStartOfBB(BasicBlock* bb) bb->bbNum); } - for (unsigned varIdx = 0; varIdx < assignments.size(); varIdx++) + for (unsigned varIndex = 0; varIndex < assignments.size(); varIndex++) { - unsigned const lclNum = m_compiler->lvaTrackedIndexToLclNum(varIdx); + unsigned const lclNum = m_compiler->lvaTrackedIndexToLclNum(varIndex); LclVarDsc* const varDsc = m_compiler->lvaGetDesc(lclNum); - regNumber const reg = assignments[varIdx]; + regNumber const reg = assignments[varIndex]; if (verify) { @@ -94,6 +94,10 @@ void WasmRegAlloc::recordVarLocationsAtStartOfBB(BasicBlock* bb) else { varDsc->SetRegNum(reg); + + // Unlike LSRA, we do not change assignments within a funclet. + // And no locals are live across a funclet boundary. So there + // is no need for any debug liveness update here. } if (reg != REG_STK) From 3d2ec5003750f33fe6085b320865e5b2a3c8efdd Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 4 Apr 2026 17:41:43 -0700 Subject: [PATCH 25/26] fix typos, remove unused computation --- src/coreclr/jit/codegenwasm.cpp | 2 +- src/coreclr/jit/regallocwasm.cpp | 6 ++---- src/coreclr/jit/regallocwasm.h | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 83f50a61470fc4..b795bc95f0546b 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -309,7 +309,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) } //------------------------------------------------------------------------ -// genFuncletEpilog: codegen for funclet eplogs. +// genFuncletEpilog: codegen for funclet epilogs. // // For Wasm, funclet epilogs are empty // diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index d1850664b9c1a9..606e531510c2e3 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -110,7 +110,7 @@ void WasmRegAlloc::recordVarLocationsAtStartOfBB(BasicBlock* bb) JITDUMP("%s\n", hasAssignment ? "" : " "); }; - // The current assignments may alread hold the desired state. + // The current assignments may already hold the desired state. // if (m_currentFunclet == funcIdx) { @@ -1026,7 +1026,7 @@ void WasmRegAlloc::ResolveReferences() data->m_fpReg = (spVirtReg == fpVirtReg) ? data->m_spReg : allocPhysReg(fpVirtReg, nullptr); } - // Do likewise for the enregisgtered locals. + // Do likewise for the enregistered locals. // Note we do not update the LclVarDsc here. // for (unsigned varIndex = 0; varIndex < m_compiler->lvaTrackedCount; varIndex++) @@ -1080,8 +1080,6 @@ void WasmRegAlloc::ResolveReferences() if (node->OperIs(GT_STORE_LCL_VAR)) { LclVarDsc* const varDsc = m_compiler->lvaGetDesc(node->AsLclVarCommon()); - WasmValueType type; - unsigned index = UnpackWasmReg(varDsc->GetRegNum(), &type); physReg = data->m_physicalRegAssignments[varDsc->lvVarIndex]; } else if (genIsValidReg(node->GetRegNum())) diff --git a/src/coreclr/jit/regallocwasm.h b/src/coreclr/jit/regallocwasm.h index 6bfa364233dc0a..cdc1ea9f7ff2ef 100644 --- a/src/coreclr/jit/regallocwasm.h +++ b/src/coreclr/jit/regallocwasm.h @@ -83,7 +83,7 @@ class WasmRegAlloc : public RegAllocInterface TemporaryRegStack m_temporaryRegs[static_cast(WasmValueType::Count)]; // We allocate per funclet. This struct holds the per-funclet state. - // (we treat the main fuction body as a funclet). + // (we treat the main function body as a funclet). // struct PerFuncletData { From 675e0e3ffc289e5b7391973dd7e724aa6ef7ff48 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 4 Apr 2026 17:54:38 -0700 Subject: [PATCH 26/26] review feedback --- src/coreclr/jit/regallocwasm.cpp | 37 +++++++++++++++++++------------- src/coreclr/jit/regallocwasm.h | 2 -- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index 606e531510c2e3..ad184e96919a8d 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -711,16 +711,20 @@ void WasmRegAlloc::RewriteLocalStackStore(GenTreeLclVarCommon* lclNode) void WasmRegAlloc::CollectReference(GenTree* node) { PerFuncletData* const data = m_perFuncletData[m_currentFunclet]; + VirtualRegReferences* refs = data->m_virtualRegRefs; - // We may make multiple consecutive collection calls for the same node, but we only need to record it once. + // We may make multiple consecutive collection calls for the same node. + // We only want to collect it once. // - if (node == data->m_lastCollectedNode) + if (data->m_lastVirtualRegRefsCount > 0) { - return; + assert(refs != nullptr); + if (node == refs->Nodes[data->m_lastVirtualRegRefsCount - 1]) + { + return; + } } - VirtualRegReferences* refs = data->m_virtualRegRefs; - if (refs == nullptr) { refs = new (m_compiler->getAllocator(CMK_LSRA_RefPosition)) VirtualRegReferences(); @@ -736,7 +740,6 @@ void WasmRegAlloc::CollectReference(GenTree* node) assert(data->m_lastVirtualRegRefsCount < ARRAY_SIZE(refs->Nodes)); refs->Nodes[data->m_lastVirtualRegRefsCount++] = node; - data->m_lastCollectedNode = node; } //------------------------------------------------------------------------ @@ -1003,12 +1006,6 @@ void WasmRegAlloc::ResolveReferences() assert(genIsValidReg(physReg)); if ((varDsc != nullptr) && varDsc->lvIsRegCandidate()) { - if (!inFunclet && (varDsc->lvIsParam || varDsc->lvIsParamRegTarget)) - { - // This is the register codegen will move the local from its ABI location in prolog. - varDsc->SetArgInitReg(physReg); - } - data->m_physicalRegAssignments[varDsc->lvVarIndex] = physReg; } return physReg; @@ -1080,7 +1077,7 @@ void WasmRegAlloc::ResolveReferences() if (node->OperIs(GT_STORE_LCL_VAR)) { LclVarDsc* const varDsc = m_compiler->lvaGetDesc(node->AsLclVarCommon()); - physReg = data->m_physicalRegAssignments[varDsc->lvVarIndex]; + physReg = data->m_physicalRegAssignments[varDsc->lvVarIndex]; } else if (genIsValidReg(node->GetRegNum())) { @@ -1144,9 +1141,19 @@ void WasmRegAlloc::ResolveReferences() { varDsc->SetRegNum(mainFuncAssignment[varIndex]); - // may want to set these earlier - varDsc->lvRegister = true; + // While register allocations are fixed per-funclet, they are unlikely + // to agree across all funclets. + // + varDsc->lvRegister = (m_compiler->compFuncCount() == 1); varDsc->lvOnFrame = false; + + if (varDsc->lvIsParam || varDsc->lvIsParamRegTarget) + { + // This is the register codegen will move the local to from its + // ABI location in main function's prolog. + // + varDsc->SetArgInitReg(assignedReg); + } } } } diff --git a/src/coreclr/jit/regallocwasm.h b/src/coreclr/jit/regallocwasm.h index cdc1ea9f7ff2ef..f4cba0b3316819 100644 --- a/src/coreclr/jit/regallocwasm.h +++ b/src/coreclr/jit/regallocwasm.h @@ -91,7 +91,6 @@ class WasmRegAlloc : public RegAllocInterface : m_spReg(REG_NA) , m_fpReg(REG_NA) , m_lastVirtualRegRefsCount(0) - , m_lastCollectedNode(nullptr) , m_virtualRegRefs(nullptr) , m_physicalRegAssignments(comp->lvaTrackedCount, REG_STK, comp->getAllocator(CMK_LSRA)) { @@ -110,7 +109,6 @@ class WasmRegAlloc : public RegAllocInterface // Chunked list of virtual reg references in this funclet. // unsigned m_lastVirtualRegRefsCount; - GenTree* m_lastCollectedNode; VirtualRegReferences* m_virtualRegRefs; // Map from local tracked index to phys reg for that local, in this funclet.