[PATCH] add regression test for issue #1435
authorMichal Babej <michal.babej@intel.com>
Thu, 4 Apr 2024 17:27:15 +0000 (20:27 +0300)
committerAndreas Beckmann <anbe@debian.org>
Mon, 29 Apr 2024 20:56:01 +0000 (22:56 +0200)
Gbp-Pq: Name 0003-add-regression-test-for-issue-1435.patch

tests/regression/CMakeLists.txt
tests/regression/test_issue_1435.cpp [new file with mode: 0644]

index 0e23c77d27afa7c69ec1c92159cee805fe2c5f5b..fd82ce6607f9d319d6b49ce604432edae6ef5963 100644 (file)
@@ -48,7 +48,7 @@ set(PROGRAMS_TO_BUILD test_barrier_between_for_loops test_early_return
   test_autolocals_in_constexprs test_issue_553 test_issue_577 test_issue_757
   test_flatten_barrier_subs test_alignment_with_dynamic_wg
   test_alignment_with_dynamic_wg2 test_alignment_with_dynamic_wg3
-  test_issue_893 test_builtin_args
+  test_issue_893 test_issue_1435 test_builtin_args
   test_workitem_func_outside_kernel
 )
 
@@ -83,6 +83,8 @@ add_test_pocl(NAME "regression/test_issue_577" COMMAND "test_issue_577")
 
 add_test_pocl(NAME "regression/test_issue_757" COMMAND "test_issue_757")
 
+add_test_pocl(NAME "regression/test_issue_1435" COMMAND "test_issue_1435")
+
 add_test_pocl(NAME "regression/test_workitem_func_outside_kernel" COMMAND "test_workitem_func_outside_kernel")
 
 if(OPENCL_HEADER_VERSION GREATER 299)
@@ -231,7 +233,7 @@ foreach(VARIANT ${VARIANTS})
     "regression/test_issue_445_${VARIANT}" "regression/test_issue_553_${VARIANT}"
     "regression/test_issue_577_${VARIANT}" "regression/test_issue_757_${VARIANT}"
     "regression/test_llvm_segfault_issue_889_${VARIANT}"
-    "regression/test_issue_893_${VARIANT}"
+    "regression/test_issue_893_${VARIANT}" "regression/test_issue_1435_${VARIANT}"
     "regression/test_flatten_barrier_subs_${VARIANT}"
     "regression/test_workitem_func_outside_kernel_${VARIANT}"
     ${OCL_30_TESTS}
diff --git a/tests/regression/test_issue_1435.cpp b/tests/regression/test_issue_1435.cpp
new file mode 100644 (file)
index 0000000..0e85f1f
--- /dev/null
@@ -0,0 +1,133 @@
+/*
+  Github Issue #1435
+*/
+
+#include "pocl_opencl.h"
+
+#define CL_HPP_ENABLE_EXCEPTIONS
+#define CL_HPP_MINIMUM_OPENCL_VERSION 120
+#define CL_HPP_TARGET_OPENCL_VERSION 120
+#include <CL/opencl.hpp>
+#include <cassert>
+#include <iostream>
+
+using namespace std;
+
+const char *SOURCE = R"RAW(
+
+__kernel void medfilt2d(__global float *image,  // input image
+                        __global float *result, // output array
+                        __local  float4 *l_data,// local storage 4x the number of threads
+                                 int khs1,      // Kernel half-size along dim1 (nb lines)
+                                 int khs2,      // Kernel half-size along dim2 (nb columns)
+                                 int height,    // Image size along dim1 (nb lines)
+                                 int width)     // Image size along dim2 (nb columns)
+{
+    int threadid = get_local_id(0);
+    int x = get_global_id(1);
+
+    if (x < width)
+    {
+        union
+        {
+            float  ary[8];
+            float8 vec;
+        } output, input;
+        input.vec = (float8)(MAXFLOAT, MAXFLOAT, MAXFLOAT, MAXFLOAT, MAXFLOAT, MAXFLOAT, MAXFLOAT, MAXFLOAT);
+        int kfs1 = 2 * khs1 + 1; 
+        int kfs2 = 2 * khs2 + 1;
+        int nbands = (kfs1 + 7) / 8; 
+        for (int y=0; y<height; y++)
+        {
+            //Select only the active threads, some may remain inactive
+            int nb_threads =  (nbands * kfs2);
+            int band_nr = threadid / kfs2;
+            int band_id = threadid % kfs2;
+            int pos_x = clamp((int)(x + band_id - khs2), (int) 0, (int) width-1);
+            int max_vec = clamp(kfs1 - 8 * band_nr, 0, 8);
+            if (y == 0)
+            {
+                for (int i=0; i<max_vec; i++)
+                {
+                    if (threadid<nb_threads)
+                    {
+                        int pos_y = clamp((int)(y + 8 * band_nr + i - khs1), (int) 0, (int) height-1);
+                        input.ary[i] = image[pos_x + width * pos_y];
+                    }
+                }
+            }
+            else
+            {
+                //store storage.s0 to some shared memory to retrieve it from another thread.
+                l_data[threadid].s0 = input.vec.s0;
+
+                //Offset to the bottom
+                input.vec = (float8)(input.vec.s1,
+                        input.vec.s2,
+                        input.vec.s3,
+                        input.vec.s4,
+                        input.vec.s5,
+                        input.vec.s6,
+                        input.vec.s7,
+                        MAXFLOAT);
+
+                barrier(CLK_LOCAL_MEM_FENCE);
+
+                int read_from = threadid + kfs2;
+                if (read_from < nb_threads)
+                    input.vec.s7 = l_data[read_from].s0;
+                else if (threadid < nb_threads) //we are on the last band
+                {
+                    int pos_y = clamp((int)(y + 8 * band_nr + max_vec - 1 - khs1), (int) 0, (int) height-1);
+                    input.ary[max_vec - 1] = image[pos_x + width * pos_y];
+                }
+
+            }
+
+            barrier(CLK_LOCAL_MEM_FENCE);
+        }
+    }
+}
+
+)RAW";
+
+#if 0
+
+// the shorter code that should trigger the same issue
+
+const char *SOURCE = R"RAW(
+
+__kernel void testkernel(__local float2 *b) {
+  struct {
+    int c[1];
+    float2 d;
+  } e;
+  for (int f = 0; f < 2; f++) {
+    if (f)
+      for (int g; g < (int)b[0].x; g++)
+        e.c[g] = 0;
+    else if (b)
+      e.d.s0 = b[0].s0;
+    barrier(0);
+  }
+}
+)RAW";
+
+#endif
+
+int main(int argc, char *argv[]) {
+  cl::Device device = cl::Device::getDefault();
+  cl::Program program(SOURCE);
+  program.build("-cl-std=CL1.2");
+
+  // This triggers compilation of dynamic WG binaries.
+  cl::Program::Binaries binaries{};
+  int err = program.getInfo<>(CL_PROGRAM_BINARIES, &binaries);
+  if (err == CL_SUCCESS) {
+    printf("OK\n");
+    return EXIT_SUCCESS;
+  } else {
+    printf("FAIL\n");
+    return EXIT_FAILURE;
+  }
+}