Skip to content
Snippets Groups Projects
Commit 2ae98f51 authored by Evangelos Foutras's avatar Evangelos Foutras :smiley_cat:
Browse files

Add upstream patches needed for intel-opencl-clang and intel-compute-runtime.

parent a8ec89a0
No related branches found
Tags 8.0.0-2
No related merge requests found
From 2f6eec18566fb235097bb2f0b00852e858330dc5 Mon Sep 17 00:00:00 2001
From: Alexey Bader <alexey.bader@intel.com>
Date: Tue, 19 Feb 2019 15:19:06 +0000
Subject: [PATCH 1/3] [OpenCL] Change type of block pointer for OpenCL
Summary:
For some reason OpenCL blocks in LLVM IR are represented as function pointers.
These pointers do not point to any real function and never get called. Actually
they point to some structure, which in turn contains pointer to the real block
invoke function.
This patch changes represntation of OpenCL blocks in LLVM IR from function
pointers to pointers to `%struct.__block_literal_generic`.
Such representation allows to avoid unnecessary bitcasts and simplifies
further processing (e.g. translation to SPIR-V ) of the module for targets
which do not support function pointers.
Patch by: Alexey Sotkin.
Reviewers: Anastasia, yaxunl, svenvh
Reviewed By: Anastasia
Subscribers: alexbatashev, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D58277
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354337 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/CodeGen/CodeGenTypes.cpp | 4 +++-
test/CodeGenOpenCL/blocks.cl | 18 ++++++++----------
test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 18 +++++++++---------
3 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index 2acf1ac161..93b3ebf5c2 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -637,7 +637,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
case Type::BlockPointer: {
const QualType FTy = cast<BlockPointerType>(Ty)->getPointeeType();
- llvm::Type *PointeeType = ConvertTypeForMem(FTy);
+ llvm::Type *PointeeType = CGM.getLangOpts().OpenCL
+ ? CGM.getGenericBlockLiteralType()
+ : ConvertTypeForMem(FTy);
unsigned AS = Context.getTargetAddressSpace(FTy);
ResultType = llvm::PointerType::get(PointeeType, AS);
break;
diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
index 675240c6f0..19aacc3f0d 100644
--- a/test/CodeGenOpenCL/blocks.cl
+++ b/test/CodeGenOpenCL/blocks.cl
@@ -35,11 +35,10 @@ void foo(){
// SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3
// SPIR: %[[i_value:.*]] = load i32, i32* %i
// SPIR: store i32 %[[i_value]], i32* %[[block_captured]],
- // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()*
- // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
- // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
- // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
- // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
+ // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to %struct.__opencl_block_literal_generic*
+ // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
+ // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
+ // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
// SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
// SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
// SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
@@ -50,11 +49,10 @@ void foo(){
// AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
// AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i
// AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]],
- // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)*
- // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()*
- // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]],
- // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]]
- // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic*
+ // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to %struct.__opencl_block_literal_generic addrspace(5)*
+ // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
+ // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
+ // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
// AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
// AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
// AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
index 473219478a..84450162da 100644
--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -11,7 +11,7 @@ typedef struct {int a;} ndrange_t;
// For a block global variable, first emit the block literal as a global variable, then emit the block variable itself.
// COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) }
-// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
+// COMMON: @block_G = addrspace(1) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*)
// For anonymous blocks without captures, emit block literals as global variable.
// COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
@@ -77,9 +77,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
// COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
- // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
- // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()*
- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
+ // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic*
+ // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic*
+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
// COMMON-LABEL: call i32 @__enqueue_kernel_basic(
// COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
@@ -95,8 +95,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)*
// COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
// COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
- // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
+ // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to %struct.__opencl_block_literal_generic*
+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
// COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
@@ -300,13 +300,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// Emits global block literal [[BLG8]] and invoke function [[INVG8]].
// The full type of these expressions are long (and repeated elsewhere), so we
// capture it as part of the regex for convenience and clarity.
- // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_A
void (^const block_A)(void) = ^{
return;
};
// Emits global block literal [[BLG9]] and invoke function [[INVG9]].
- // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_B
void (^const block_B)(local void *) = ^(local void *a) {
return;
};
@@ -346,7 +346,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)*
// COMMON-LABEL: call i32 @__enqueue_kernel_basic(
// COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
--
2.21.0
From 46c42e19eb6ad426c07f4e33408c7288ddd5d053 Mon Sep 17 00:00:00 2001
From: Andrew Savonichev <andrew.savonichev@intel.com>
Date: Thu, 21 Feb 2019 11:02:10 +0000
Subject: [PATCH 2/3] [OpenCL] Simplify LLVM IR generated for OpenCL blocks
Summary:
Emit direct call of block invoke functions when possible, i.e. in case the
block is not passed as a function argument.
Also doing some refactoring of `CodeGenFunction::EmitBlockCallExpr()`
Reviewers: Anastasia, yaxunl, svenvh
Reviewed By: Anastasia
Subscribers: cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D58388
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354568 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/CodeGen/CGBlocks.cpp | 77 +++++++++----------
lib/CodeGen/CGOpenCLRuntime.cpp | 30 ++++++--
lib/CodeGen/CGOpenCLRuntime.h | 4 +
test/CodeGenOpenCL/blocks.cl | 10 +--
.../CodeGenOpenCL/cl20-device-side-enqueue.cl | 34 ++++++--
5 files changed, 91 insertions(+), 64 deletions(-)
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index fa3c3ee861..10a0238d91 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -1261,52 +1261,49 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
ReturnValueSlot ReturnValue) {
const BlockPointerType *BPT =
E->getCallee()->getType()->getAs<BlockPointerType>();
-
llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
-
- // Get a pointer to the generic block literal.
- // For OpenCL we generate generic AS void ptr to be able to reuse the same
- // block definition for blocks with captures generated as private AS local
- // variables and without captures generated as global AS program scope
- // variables.
- unsigned AddrSpace = 0;
- if (getLangOpts().OpenCL)
- AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
-
- llvm::Type *BlockLiteralTy =
- llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
-
- // Bitcast the callee to a block literal.
- BlockPtr =
- Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
-
- // Get the function pointer from the literal.
- llvm::Value *FuncPtr =
- Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
- CGM.getLangOpts().OpenCL ? 2 : 3);
-
- // Add the block literal.
+ llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType();
+ llvm::Value *Func = nullptr;
+ QualType FnType = BPT->getPointeeType();
+ ASTContext &Ctx = getContext();
CallArgList Args;
- QualType VoidPtrQualTy = getContext().VoidPtrTy;
- llvm::Type *GenericVoidPtrTy = VoidPtrTy;
if (getLangOpts().OpenCL) {
- GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType();
- VoidPtrQualTy =
- getContext().getPointerType(getContext().getAddrSpaceQualType(
- getContext().VoidTy, LangAS::opencl_generic));
- }
-
- BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy);
- Args.add(RValue::get(BlockPtr), VoidPtrQualTy);
-
- QualType FnType = BPT->getPointeeType();
+ // For OpenCL, BlockPtr is already casted to generic block literal.
+
+ // First argument of a block call is a generic block literal casted to
+ // generic void pointer, i.e. i8 addrspace(4)*
+ llvm::Value *BlockDescriptor = Builder.CreatePointerCast(
+ BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType());
+ QualType VoidPtrQualTy = Ctx.getPointerType(
+ Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic));
+ Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy);
+ // And the rest of the arguments.
+ EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
+
+ // We *can* call the block directly unless it is a function argument.
+ if (!isa<ParmVarDecl>(E->getCalleeDecl()))
+ Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
+ else {
+ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2);
+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+ }
+ } else {
+ // Bitcast the block literal to a generic block literal.
+ BlockPtr = Builder.CreatePointerCast(
+ BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal");
+ // Get pointer to the block invoke function
+ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3);
- // And the rest of the arguments.
- EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
+ // First argument is a block literal casted to a void pointer
+ BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy);
+ Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy);
+ // And the rest of the arguments.
+ EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
- // Load the function.
- llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+ // Load the function.
+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+ }
const FunctionType *FuncTy = FnType->castAs<FunctionType>();
const CGFunctionInfo &FnInfo =
diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
index 7f6f595dd5..75003e569f 100644
--- a/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -123,6 +123,23 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() {
CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
}
+// Get the block literal from an expression derived from the block expression.
+// OpenCL v2.0 s6.12.5:
+// Block variable declarations are implicitly qualified with const. Therefore
+// all block variables must be initialized at declaration time and may not be
+// reassigned.
+static const BlockExpr *getBlockExpr(const Expr *E) {
+ const Expr *Prev = nullptr; // to make sure we do not stuck in infinite loop.
+ while(!isa<BlockExpr>(E) && E != Prev) {
+ Prev = E;
+ E = E->IgnoreCasts();
+ if (auto DR = dyn_cast<DeclRefExpr>(E)) {
+ E = cast<VarDecl>(DR->getDecl())->getInit();
+ }
+ }
+ return cast<BlockExpr>(E);
+}
+
/// Record emitted llvm invoke function and llvm block literal for the
/// corresponding block expression.
void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
@@ -137,20 +154,17 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
EnqueuedBlockMap[E].Kernel = nullptr;
}
+llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) {
+ return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc;
+}
+
CGOpenCLRuntime::EnqueuedBlockInfo
CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
CGF.EmitScalarExpr(E);
// The block literal may be assigned to a const variable. Chasing down
// to get the block literal.
- if (auto DR = dyn_cast<DeclRefExpr>(E)) {
- E = cast<VarDecl>(DR->getDecl())->getInit();
- }
- E = E->IgnoreImplicit();
- if (auto Cast = dyn_cast<CastExpr>(E)) {
- E = Cast->getSubExpr();
- }
- auto *Block = cast<BlockExpr>(E);
+ const BlockExpr *Block = getBlockExpr(E);
assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
"Block expression not emitted");
diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h
index 750721f1b8..4effc7eaa8 100644
--- a/lib/CodeGen/CGOpenCLRuntime.h
+++ b/lib/CodeGen/CGOpenCLRuntime.h
@@ -92,6 +92,10 @@ public:
/// \param Block block literal emitted for the block expression.
void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
llvm::Value *Block);
+
+ /// \return LLVM block invoke function emitted for an expression derived from
+ /// the block expression.
+ llvm::Function *getInvokeFunction(const Expr *E);
};
}
diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
index 19aacc3f0d..ab5a2c643c 100644
--- a/test/CodeGenOpenCL/blocks.cl
+++ b/test/CodeGenOpenCL/blocks.cl
@@ -39,11 +39,8 @@ void foo(){
// SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
// SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
// SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
- // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
// SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
- // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
- // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
- // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
+ // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]])
// AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2
// AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]]
// AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
@@ -53,11 +50,8 @@ void foo(){
// AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
// AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
// AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
- // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
// AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
- // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
- // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)*
- // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]])
+ // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]])
int (^ block_B)(void) = ^{
return i;
diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
index 84450162da..1566912ded 100644
--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -312,9 +312,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
};
// Uses global block literal [[BLG8]] and invoke function [[INVG8]].
- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
block_A();
// Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
@@ -333,15 +331,35 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
unsigned size = get_kernel_work_group_size(block_A);
// Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
block_A();
+ // Make sure that block invoke function is resolved correctly after sequence of assignements.
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1,
+ bl_t b1 = block_G;
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2,
+ bl_t b2 = b1;
+ // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*)
+ // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null)
+ b2(0);
+ // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
+ // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ size = get_kernel_preferred_work_group_size_multiple(b2);
+
void (^block_C)(void) = ^{
callee(i, a);
};
-
// Emits block literal on stack and block kernel [[INVLK3]].
// COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
@@ -404,8 +422,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
// COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
// COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
+// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
// COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
-// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}})
--
2.21.0
From 94a65066cccbe50358dca0a9da7712cf5294912a Mon Sep 17 00:00:00 2001
From: Yaxun Liu <Yaxun.Liu@amd.com>
Date: Tue, 26 Feb 2019 16:20:41 +0000
Subject: [PATCH 3/3] [OpenCL] Fix assertion due to blocks
A recent change caused assertion in CodeGenFunction::EmitBlockCallExpr when a block is called.
There is code
Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
getCalleeDecl calls Expr::getReferencedDeclOfCallee, which does not handle
BlockExpr and returns nullptr, which causes isa to assert.
This patch fixes that.
Differential Revision: https://reviews.llvm.org/D58658
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354893 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/AST/Expr.cpp | 2 ++
test/CodeGenOpenCL/blocks.cl | 6 ++++++
2 files changed, 8 insertions(+)
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
index 7cdd3b2c2a..50a65e02bf 100644
--- a/lib/AST/Expr.cpp
+++ b/lib/AST/Expr.cpp
@@ -1359,6 +1359,8 @@ Decl *Expr::getReferencedDeclOfCallee() {
return DRE->getDecl();
if (MemberExpr *ME = dyn_cast<MemberExpr>(CEE))
return ME->getMemberDecl();
+ if (auto *BE = dyn_cast<BlockExpr>(CEE))
+ return BE->getBlockDecl();
return nullptr;
}
diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
index ab5a2c643c..c3e26855df 100644
--- a/test/CodeGenOpenCL/blocks.cl
+++ b/test/CodeGenOpenCL/blocks.cl
@@ -90,6 +90,12 @@ int get42() {
return blockArgFunc(^{return 42;});
}
+// COMMON-LABEL: define {{.*}}@call_block
+// call {{.*}}@__call_block_block_invoke
+int call_block() {
+ return ^int(int num) { return num; } (11);
+}
+
// CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size"
// CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align"
--
2.21.0
......@@ -3,7 +3,7 @@
pkgname=clang
pkgver=8.0.0
pkgrel=1
pkgrel=2
pkgdesc="C language family frontend for LLVM"
arch=('x86_64')
url="https://clang.llvm.org/"
......@@ -18,6 +18,9 @@ replaces=('clang-analyzer' 'clang-tools-extra')
source=(https://releases.llvm.org/$pkgver/cfe-$pkgver.src.tar.xz{,.sig}
https://releases.llvm.org/$pkgver/clang-tools-extra-$pkgver.src.tar.xz{,.sig}
https://releases.llvm.org/$pkgver/llvm-$pkgver.src.tar.xz{,.sig}
0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch
0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch
0003-OpenCL-Fix-assertion-due-to-blocks.patch
enable-SSP-and-PIE-by-default.patch)
sha256sums=('084c115aab0084e63b23eee8c233abb6739c399e29966eaeccfc6e088e0b736b'
'SKIP'
......@@ -25,6 +28,9 @@ sha256sums=('084c115aab0084e63b23eee8c233abb6739c399e29966eaeccfc6e088e0b736b'
'SKIP'
'8872be1b12c61450cacc82b3d153eab02be2546ef34fa3580ed14137bb26224c'
'SKIP'
'fbb6d3c8135942e458bcad8882605a623bcd09bdec488eb0800e3afee0061e3a'
'91c49139e02c6d29f6201aa394868939b991b39b84215c082392ea96fbd8c334'
'191434810b5298331908ea1e193203e8441693da900238344e946ee6a79dad31'
'58f86da25eb230ed6d423b5b61870cbf3bef88f38103ca676a2c7f34b2372171')
validpgpkeys+=('B6C8F98282B944E3B0D5C2530FC3042E345AD05D') # Hans Wennborg <hans@chromium.org>
validpgpkeys+=('474E22316ABF4785A88C6E8EA2C794A986419D8A') # Tom Stellard <tstellar@redhat.com>
......@@ -34,6 +40,11 @@ prepare() {
mkdir build
mv "$srcdir/clang-tools-extra-$pkgver.src" tools/extra
patch -Np1 -i ../enable-SSP-and-PIE-by-default.patch
# Backported patches for intel-opencl-clang and intel-compute-runtime
patch -Np1 -i ../0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch
patch -Np1 -i ../0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch
patch -Np1 -i ../0003-OpenCL-Fix-assertion-due-to-blocks.patch
}
build() {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment