From 3fd1122724a1140b6268dd85fae3dc4122076c70 Mon Sep 17 00:00:00 2001 From: Stanley Gambarin Date: Thu, 8 Dec 2022 07:01:41 -0800 Subject: [PATCH] [PATCH 10/79] [Backport to 15] Relax OpenCL extended instruction restrictions (#1755) * [Backport to 15] Relax OpenCL extended instruction restrictions (#1724) When VectorAnyINTEL capability is enabled, OpenCL extended instructions are able to work on vector types with any number of components greater then or equal to 2. * [Backport to 15] Translate LLVM intrinsics into native_* OpenCL instructions (#1729) When an intrinsic function is called with `afn` flag, it's allowed to substitute an approximate calculations. So the translator can emit native versions of OpenCL extended instructions. Co-authored-by: Victor Mustya Gbp-Pq: Name 0010-Backport-to-15-Relax-OpenCL-extended-instruction-res.patch --- lib/SPIRV/SPIRVUtil.cpp | 6 +- lib/SPIRV/SPIRVWriter.cpp | 77 +++++++++++++------ test/llvm-intrinsics/sqrt.ll | 5 +- ...wer-non-standard-vec-intrinsic-with-ext.ll | 42 ++++++++++ 4 files changed, 105 insertions(+), 25 deletions(-) create mode 100644 test/lower-non-standard-vec-intrinsic-with-ext.ll diff --git a/lib/SPIRV/SPIRVUtil.cpp b/lib/SPIRV/SPIRVUtil.cpp index 9ff96c3..72e9eb5 100644 --- a/lib/SPIRV/SPIRVUtil.cpp +++ b/lib/SPIRV/SPIRVUtil.cpp @@ -1829,7 +1829,8 @@ bool checkTypeForSPIRVExtendedInstLowering(IntrinsicInst *II, SPIRVModule *BM) { Ty = VecTy->getElementType(); } if ((!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy()) || - ((NumElems > 4) && (NumElems != 8) && (NumElems != 16))) { + (!BM->hasCapability(CapabilityVectorAnyINTEL) && + ((NumElems > 4) && (NumElems != 8) && (NumElems != 16)))) { BM->SPIRVCK( false, InvalidFunctionCall, II->getCalledOperand()->getName().str()); return false; @@ -1844,7 +1845,8 @@ bool checkTypeForSPIRVExtendedInstLowering(IntrinsicInst *II, SPIRVModule *BM) { Ty = VecTy->getElementType(); } if ((!Ty->isIntegerTy()) || - ((NumElems > 4) && (NumElems != 8) && (NumElems != 16))) { + (!BM->hasCapability(CapabilityVectorAnyINTEL) && + ((NumElems > 4) && (NumElems != 8) && (NumElems != 16)))) { BM->SPIRVCK( false, InvalidFunctionCall, II->getCalledOperand()->getName().str()); } diff --git a/lib/SPIRV/SPIRVWriter.cpp b/lib/SPIRV/SPIRVWriter.cpp index b4d7f33..f219fcc 100644 --- a/lib/SPIRV/SPIRVWriter.cpp +++ b/lib/SPIRV/SPIRVWriter.cpp @@ -1613,7 +1613,7 @@ void transAliasingMemAccess(SPIRVModule *BM, MDNode *AliasingListMD, std::vector &MemoryAccess, SPIRVWord MemAccessMask) { if (!BM->isAllowedToUseExtension( - ExtensionID::SPV_INTEL_memory_access_aliasing)) + ExtensionID::SPV_INTEL_memory_access_aliasing)) return; auto *MemAliasList = addMemAliasingINTELInstructions(BM, AliasingListMD); if (!MemAliasList) @@ -2543,20 +2543,17 @@ bool LLVMToSPIRVBase::transAlign(Value *V, SPIRVValue *BV) { void LLVMToSPIRVBase::transMemAliasingINTELDecorations(Instruction *Inst, SPIRVValue *BV) { if (!BM->isAllowedToUseExtension( - ExtensionID::SPV_INTEL_memory_access_aliasing)) + ExtensionID::SPV_INTEL_memory_access_aliasing)) return; - if (MDNode *AliasingListMD = - Inst->getMetadata(LLVMContext::MD_alias_scope)) { - auto *MemAliasList = - addMemAliasingINTELInstructions(BM, AliasingListMD); + if (MDNode *AliasingListMD = Inst->getMetadata(LLVMContext::MD_alias_scope)) { + auto *MemAliasList = addMemAliasingINTELInstructions(BM, AliasingListMD); if (!MemAliasList) return; BV->addDecorate(new SPIRVDecorateId(DecorationAliasScopeINTEL, BV, MemAliasList->getId())); } if (MDNode *AliasingListMD = Inst->getMetadata(LLVMContext::MD_noalias)) { - auto *MemAliasList = - addMemAliasingINTELInstructions(BM, AliasingListMD); + auto *MemAliasList = addMemAliasingINTELInstructions(BM, AliasingListMD); if (!MemAliasList) return; BV->addDecorate( @@ -3318,6 +3315,38 @@ static SPIRVWord getBuiltinIdForIntrinsic(Intrinsic::ID IID) { } } +static SPIRVWord getNativeBuiltinIdForIntrinsic(Intrinsic::ID IID) { + switch (IID) { + case Intrinsic::cos: + return OpenCLLIB::Native_cos; + case Intrinsic::exp: + return OpenCLLIB::Native_exp; + case Intrinsic::exp2: + return OpenCLLIB::Native_exp2; + case Intrinsic::log: + return OpenCLLIB::Native_log; + case Intrinsic::log10: + return OpenCLLIB::Native_log10; + case Intrinsic::log2: + return OpenCLLIB::Native_log2; + case Intrinsic::sin: + return OpenCLLIB::Native_sin; + case Intrinsic::sqrt: + return OpenCLLIB::Native_sqrt; + default: + return getBuiltinIdForIntrinsic(IID); + } +} + +static bool allowsApproxFunction(IntrinsicInst *II) { + auto *Ty = II->getType(); + // OpenCL native_* built-ins only support single precision data type + return II->hasApproxFunc() && + (Ty->isFloatTy() || + (Ty->isVectorTy() && + cast(Ty)->getElementType()->isFloatTy())); +} + SPIRVValue *LLVMToSPIRVBase::transIntrinsicInst(IntrinsicInst *II, SPIRVBasicBlock *BB) { auto GetMemoryAccess = [](MemIntrinsic *MI) -> std::vector { @@ -3342,7 +3371,8 @@ SPIRVValue *LLVMToSPIRVBase::transIntrinsicInst(IntrinsicInst *II, // LLVM intrinsics with known translation to SPIR-V are handled here. They // also must be registered at isKnownIntrinsic function in order to make // -spirv-allow-unknown-intrinsics work correctly. - switch (II->getIntrinsicID()) { + auto IID = II->getIntrinsicID(); + switch (IID) { case Intrinsic::assume: { // llvm.assume translation is currently supported only within // SPV_KHR_expect_assume extension, ignore it otherwise, since it's @@ -3379,7 +3409,9 @@ SPIRVValue *LLVMToSPIRVBase::transIntrinsicInst(IntrinsicInst *II, case Intrinsic::trunc: { if (!checkTypeForSPIRVExtendedInstLowering(II, BM)) break; - SPIRVWord ExtOp = getBuiltinIdForIntrinsic(II->getIntrinsicID()); + const SPIRVWord ExtOp = allowsApproxFunction(II) + ? getNativeBuiltinIdForIntrinsic(IID) + : getBuiltinIdForIntrinsic(IID); SPIRVType *STy = transType(II->getType()); std::vector Ops(1, transValue(II->getArgOperand(0), BB)); return BM->addExtInst(STy, BM->getExtInstSetId(SPIRVEIS_OpenCL), ExtOp, Ops, @@ -3395,7 +3427,9 @@ SPIRVValue *LLVMToSPIRVBase::transIntrinsicInst(IntrinsicInst *II, case Intrinsic::minnum: { if (!checkTypeForSPIRVExtendedInstLowering(II, BM)) break; - SPIRVWord ExtOp = getBuiltinIdForIntrinsic(II->getIntrinsicID()); + const SPIRVWord ExtOp = allowsApproxFunction(II) + ? getNativeBuiltinIdForIntrinsic(IID) + : getBuiltinIdForIntrinsic(IID); SPIRVType *STy = transType(II->getType()); std::vector Ops{transValue(II->getArgOperand(0), BB), transValue(II->getArgOperand(1), BB)}; @@ -3410,13 +3444,12 @@ SPIRVValue *LLVMToSPIRVBase::transIntrinsicInst(IntrinsicInst *II, SPIRVValue *FirstArgVal = transValue(II->getArgOperand(0), BB); SPIRVValue *SecondArgVal = transValue(II->getArgOperand(1), BB); - Op OC = (II->getIntrinsicID() == Intrinsic::smin) - ? OpSLessThan - : ((II->getIntrinsicID() == Intrinsic::smax) - ? OpSGreaterThan - : ((II->getIntrinsicID() == Intrinsic::umin) - ? OpULessThan - : OpUGreaterThan)); + const Op OC = + (IID == Intrinsic::smin) + ? OpSLessThan + : ((IID == Intrinsic::smax) + ? OpSGreaterThan + : ((IID == Intrinsic::umin) ? OpULessThan : OpUGreaterThan)); if (auto *VecTy = dyn_cast(II->getArgOperand(0)->getType())) BoolTy = VectorType::get(BoolTy, VecTy->getElementCount()); SPIRVValue *Cmp = @@ -3451,8 +3484,8 @@ SPIRVValue *LLVMToSPIRVBase::transIntrinsicInst(IntrinsicInst *II, } case Intrinsic::ctlz: case Intrinsic::cttz: { - SPIRVWord ExtOp = II->getIntrinsicID() == Intrinsic::ctlz ? OpenCLLIB::Clz - : OpenCLLIB::Ctz; + const SPIRVWord ExtOp = + IID == Intrinsic::ctlz ? OpenCLLIB::Clz : OpenCLLIB::Ctz; SPIRVType *Ty = transType(II->getType()); std::vector Ops(1, transValue(II->getArgOperand(0), BB)); return BM->addExtInst(Ty, BM->getExtInstSetId(SPIRVEIS_OpenCL), ExtOp, Ops, @@ -3904,8 +3937,8 @@ SPIRVValue *LLVMToSPIRVBase::transIntrinsicInst(IntrinsicInst *II, else // Other LLVM intrinsics shouldn't get to SPIRV, because they // can't be represented in SPIRV or aren't implemented yet. - BM->SPIRVCK( - false, InvalidFunctionCall, II->getCalledOperand()->getName().str()); + BM->SPIRVCK(false, InvalidFunctionCall, + II->getCalledOperand()->getName().str()); } return nullptr; } diff --git a/test/llvm-intrinsics/sqrt.ll b/test/llvm-intrinsics/sqrt.ll index 09170ea..68e9092 100644 --- a/test/llvm-intrinsics/sqrt.ll +++ b/test/llvm-intrinsics/sqrt.ll @@ -20,9 +20,13 @@ entry: %0 = call float @llvm.sqrt.f32(float 0x40091EB860000000) %1 = call double @llvm.sqrt.f64(double 2.710000e+00) %2 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> ) + %3 = call afn float @llvm.sqrt.f32(float 0x40091EB860000000) + %4 = call afn double @llvm.sqrt.f64(double 2.710000e+00) ; CHECK: ExtInst [[Float]] {{[0-9]+}} [[ExtInstSetId]] sqrt [[FloatArg]] ; CHECK: ExtInst [[Double]] {{[0-9]+}} [[ExtInstSetId]] sqrt [[DoubleArg]] ; CHECK: ExtInst [[Double4]] {{[0-9]+}} [[ExtInstSetId]] sqrt [[Double4Arg]] +; CHECK: ExtInst [[Float]] {{[0-9]+}} [[ExtInstSetId]] native_sqrt [[FloatArg]] +; CHECK: ExtInst [[Double]] {{[0-9]+}} [[ExtInstSetId]] sqrt [[DoubleArg]] ret void } @@ -43,4 +47,3 @@ attributes #1 = { nounwind readnone speculatable willreturn } !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git b89131cdda5871731a9139664aef2b70c6d72bbd)"} - diff --git a/test/lower-non-standard-vec-intrinsic-with-ext.ll b/test/lower-non-standard-vec-intrinsic-with-ext.ll new file mode 100644 index 0000000..c38065f --- /dev/null +++ b/test/lower-non-standard-vec-intrinsic-with-ext.ll @@ -0,0 +1,42 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: not llvm-spirv -s %t.bc +; RUN: llvm-spirv --spirv-ext=+SPV_INTEL_vector_compute %t.bc +; RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s + +; CHECK: ExtInstImport [[ExtInstSetId:[0-9]+]] "OpenCL.std" +; CHECK: TypeFloat [[Float:[0-9]+]] 32 +; CHECK: TypeVector [[Float5:[0-9]+]] [[Float]] 5 + +; ModuleID = 'lower-non-standard-vec-with-ext' +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +@Id = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +declare <5 x float> @llvm.sqrt.f32(<5 x float> %x) + +; Function Attrs: convergent norecurse +define dso_local spir_func <5 x float> @test_sqrt(<5 x float> %src) local_unnamed_addr #0 !sycl_explicit_simd !4 !intel_reqd_sub_group_size !6 { +entry: + %res = call <5 x float> @llvm.sqrt.f32(<5 x float> %src) +; CHECK: ExtInst [[Float5]] {{[0-9]+}} [[ExtInstSetId]] sqrt + ret <5 x float> %res +} + +attributes #0 = { convergent norecurse "frame-pointer"="all" "min-legal-vector-width"="256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="lower-external-funcs-with-z.cpp" } + +!llvm.module.flags = !{!0, !1} +!opencl.spir.version = !{!2} +!spirv.Source = !{!3} +!opencl.used.extensions = !{!4} +!opencl.used.optional.core.features = !{!4} +!opencl.compiler.options = !{!4} +!llvm.ident = !{!5} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 2} +!3 = !{i32 0, i32 100000} +!4 = !{} +!5 = !{!"Compiler"} +!6 = !{i32 1} -- 2.30.2