test_autolocals_in_constexprs test_issue_553 test_issue_577 test_issue_757
test_flatten_barrier_subs test_alignment_with_dynamic_wg
test_alignment_with_dynamic_wg2 test_alignment_with_dynamic_wg3
- test_issue_893 test_builtin_args
+ test_issue_893 test_issue_1435 test_builtin_args
test_workitem_func_outside_kernel
)
add_test_pocl(NAME "regression/test_issue_757" COMMAND "test_issue_757")
+add_test_pocl(NAME "regression/test_issue_1435" COMMAND "test_issue_1435")
+
add_test_pocl(NAME "regression/test_workitem_func_outside_kernel" COMMAND "test_workitem_func_outside_kernel")
if(OPENCL_HEADER_VERSION GREATER 299)
"regression/test_issue_445_${VARIANT}" "regression/test_issue_553_${VARIANT}"
"regression/test_issue_577_${VARIANT}" "regression/test_issue_757_${VARIANT}"
"regression/test_llvm_segfault_issue_889_${VARIANT}"
- "regression/test_issue_893_${VARIANT}"
+ "regression/test_issue_893_${VARIANT}" "regression/test_issue_1435_${VARIANT}"
"regression/test_flatten_barrier_subs_${VARIANT}"
"regression/test_workitem_func_outside_kernel_${VARIANT}"
${OCL_30_TESTS}
--- /dev/null
+/*
+ Github Issue #1435
+*/
+
+#include "pocl_opencl.h"
+
+#define CL_HPP_ENABLE_EXCEPTIONS
+#define CL_HPP_MINIMUM_OPENCL_VERSION 120
+#define CL_HPP_TARGET_OPENCL_VERSION 120
+#include <CL/opencl.hpp>
+#include <cassert>
+#include <iostream>
+
+using namespace std;
+
+const char *SOURCE = R"RAW(
+
+__kernel void medfilt2d(__global float *image, // input image
+ __global float *result, // output array
+ __local float4 *l_data,// local storage 4x the number of threads
+ int khs1, // Kernel half-size along dim1 (nb lines)
+ int khs2, // Kernel half-size along dim2 (nb columns)
+ int height, // Image size along dim1 (nb lines)
+ int width) // Image size along dim2 (nb columns)
+{
+ int threadid = get_local_id(0);
+ int x = get_global_id(1);
+
+ if (x < width)
+ {
+ union
+ {
+ float ary[8];
+ float8 vec;
+ } output, input;
+ input.vec = (float8)(MAXFLOAT, MAXFLOAT, MAXFLOAT, MAXFLOAT, MAXFLOAT, MAXFLOAT, MAXFLOAT, MAXFLOAT);
+ int kfs1 = 2 * khs1 + 1;
+ int kfs2 = 2 * khs2 + 1;
+ int nbands = (kfs1 + 7) / 8;
+ for (int y=0; y<height; y++)
+ {
+ //Select only the active threads, some may remain inactive
+ int nb_threads = (nbands * kfs2);
+ int band_nr = threadid / kfs2;
+ int band_id = threadid % kfs2;
+ int pos_x = clamp((int)(x + band_id - khs2), (int) 0, (int) width-1);
+ int max_vec = clamp(kfs1 - 8 * band_nr, 0, 8);
+ if (y == 0)
+ {
+ for (int i=0; i<max_vec; i++)
+ {
+ if (threadid<nb_threads)
+ {
+ int pos_y = clamp((int)(y + 8 * band_nr + i - khs1), (int) 0, (int) height-1);
+ input.ary[i] = image[pos_x + width * pos_y];
+ }
+ }
+ }
+ else
+ {
+ //store storage.s0 to some shared memory to retrieve it from another thread.
+ l_data[threadid].s0 = input.vec.s0;
+
+ //Offset to the bottom
+ input.vec = (float8)(input.vec.s1,
+ input.vec.s2,
+ input.vec.s3,
+ input.vec.s4,
+ input.vec.s5,
+ input.vec.s6,
+ input.vec.s7,
+ MAXFLOAT);
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ int read_from = threadid + kfs2;
+ if (read_from < nb_threads)
+ input.vec.s7 = l_data[read_from].s0;
+ else if (threadid < nb_threads) //we are on the last band
+ {
+ int pos_y = clamp((int)(y + 8 * band_nr + max_vec - 1 - khs1), (int) 0, (int) height-1);
+ input.ary[max_vec - 1] = image[pos_x + width * pos_y];
+ }
+
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
+ }
+}
+
+)RAW";
+
+#if 0
+
+// the shorter code that should trigger the same issue
+
+const char *SOURCE = R"RAW(
+
+__kernel void testkernel(__local float2 *b) {
+ struct {
+ int c[1];
+ float2 d;
+ } e;
+ for (int f = 0; f < 2; f++) {
+ if (f)
+ for (int g; g < (int)b[0].x; g++)
+ e.c[g] = 0;
+ else if (b)
+ e.d.s0 = b[0].s0;
+ barrier(0);
+ }
+}
+)RAW";
+
+#endif
+
+int main(int argc, char *argv[]) {
+ cl::Device device = cl::Device::getDefault();
+ cl::Program program(SOURCE);
+ program.build("-cl-std=CL1.2");
+
+ // This triggers compilation of dynamic WG binaries.
+ cl::Program::Binaries binaries{};
+ int err = program.getInfo<>(CL_PROGRAM_BINARIES, &binaries);
+ if (err == CL_SUCCESS) {
+ printf("OK\n");
+ return EXIT_SUCCESS;
+ } else {
+ printf("FAIL\n");
+ return EXIT_FAILURE;
+ }
+}