diff options
Diffstat (limited to 'llvm-3.7-patch-1.patch')
-rw-r--r-- | llvm-3.7-patch-1.patch | 1193 |
1 files changed, 0 insertions, 1193 deletions
diff --git a/llvm-3.7-patch-1.patch b/llvm-3.7-patch-1.patch deleted file mode 100644 index a375eb6393e0..000000000000 --- a/llvm-3.7-patch-1.patch +++ /dev/null @@ -1,1193 +0,0 @@ -llvm 3.7 change to llvm IR, need two copies if still use the llvm IR -to implement llvm.memset and llvm.memcpy. And opencl c is more clearly. - -Signed-off-by: Yang Rong <rong.r.yang at intel.com> ---- - backend/src/libocl/CMakeLists.txt | 5 +- - backend/src/libocl/include/ocl.h | 1 + - backend/src/libocl/include/ocl_memcpy.h | 51 +++ - backend/src/libocl/include/ocl_memset.h | 33 ++ - backend/src/libocl/src/ocl_memcpy.cl | 49 +++ - backend/src/libocl/src/ocl_memcpy.ll | 729 -------------------------------- - backend/src/libocl/src/ocl_memset.cl | 44 ++ - backend/src/libocl/src/ocl_memset.ll | 193 --------- - 8 files changed, 181 insertions(+), 924 deletions(-) - create mode 100644 backend/src/libocl/include/ocl_memcpy.h - create mode 100644 backend/src/libocl/include/ocl_memset.h - create mode 100644 backend/src/libocl/src/ocl_memcpy.cl - delete mode 100644 backend/src/libocl/src/ocl_memcpy.ll - create mode 100644 backend/src/libocl/src/ocl_memset.cl - delete mode 100644 backend/src/libocl/src/ocl_memset.ll - -diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt -index 0cd1eef..0fffd9b 100644 ---- a/backend/src/libocl/CMakeLists.txt -+++ b/backend/src/libocl/CMakeLists.txt -@@ -52,7 +52,8 @@ FOREACH(M ${OCL_COPY_HEADERS}) - COPY_THE_HEADER(${M}) - ENDFOREACH(M) - --SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_misc ocl_vload ocl_geometric ocl_image) -+SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_memcpy -+ ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image) - FOREACH(M ${OCL_COPY_MODULES}) - COPY_THE_HEADER(${M}) - COPY_THE_SOURCE(${M}) -@@ -181,7 +182,7 @@ MACRO(ADD_LL_TO_BC_TARGET M) - ) - ENDMACRO(ADD_LL_TO_BC_TARGET) - --SET (OCL_LL_MODULES ocl_barrier ocl_memcpy ocl_memset ocl_clz) -+SET (OCL_LL_MODULES ocl_barrier ocl_clz) - FOREACH(f ${OCL_LL_MODULES}) - COPY_THE_LL(${f}) - ADD_LL_TO_BC_TARGET(${f}) -diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h -index a4af4aa..7897567 100644 ---- a/backend/src/libocl/include/ocl.h -+++ b/backend/src/libocl/include/ocl.h -@@ -30,6 +30,7 @@ - #include "ocl_image.h" - #include "ocl_integer.h" - #include "ocl_math.h" -+#include "ocl_memcpy.h" - #include "ocl_misc.h" - #include "ocl_printf.h" - #include "ocl_relational.h" -diff --git a/backend/src/libocl/include/ocl_memcpy.h b/backend/src/libocl/include/ocl_memcpy.h -new file mode 100644 -index 0000000..2672298 ---- /dev/null -+++ b/backend/src/libocl/include/ocl_memcpy.h -@@ -0,0 +1,51 @@ -+/* -+ * Copyright © 2012 - 2014 Intel Corporation -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library. If not, see <http://www.gnu.org/licenses/>. -+ * -+ */ -+#ifndef __OCL_MEMCPY_H__ -+#define __OCL_MEMCPY_H__ -+#include "ocl_types.h" -+ -+///////////////////////////////////////////////////////////////////////////// -+// memcopy functions -+///////////////////////////////////////////////////////////////////////////// -+void __gen_memcpy_gg_align(__global uchar* dst, __global uchar* src, size_t size); -+void __gen_memcpy_gp_align(__global uchar* dst, __private uchar* src, size_t size); -+void __gen_memcpy_gl_align(__global uchar* dst, __local uchar* src, size_t size); -+void __gen_memcpy_gc_align(__global uchar* dst, __constant uchar* src, size_t size); -+void __gen_memcpy_pg_align(__private uchar* dst, __global uchar* src, size_t size); -+void __gen_memcpy_pp_align(__private uchar* dst, __private uchar* src, size_t size); -+void __gen_memcpy_pl_align(__private uchar* dst, __local uchar* src, size_t size); -+void __gen_memcpy_pc_align(__private uchar* dst, __constant uchar* src, size_t size); -+void __gen_memcpy_lg_align(__local uchar* dst, __global uchar* src, size_t size); -+void __gen_memcpy_lp_align(__local uchar* dst, __private uchar* src, size_t size); -+void __gen_memcpy_ll_align(__local uchar* dst, __local uchar* src, size_t size); -+void __gen_memcpy_lc_align(__local uchar* dst, __constant uchar* src, size_t size); -+ -+void __gen_memcpy_gg(__global uchar* dst, __global uchar* src, size_t size); -+void __gen_memcpy_gp(__global uchar* dst, __private uchar* src, size_t size); -+void __gen_memcpy_gl(__global uchar* dst, __local uchar* src, size_t size); -+void __gen_memcpy_gc(__global uchar* dst, __constant uchar* src, size_t size); -+void __gen_memcpy_pg(__private uchar* dst, __global uchar* src, size_t size); -+void __gen_memcpy_pp(__private uchar* dst, __private uchar* src, size_t size); -+void __gen_memcpy_pl(__private uchar* dst, __local uchar* src, size_t size); -+void __gen_memcpy_pc(__private uchar* dst, __constant uchar* src, size_t size); -+void __gen_memcpy_lg(__local uchar* dst, __global uchar* src, size_t size); -+void __gen_memcpy_lp(__local uchar* dst, __private uchar* src, size_t size); -+void __gen_memcpy_ll(__local uchar* dst, __local uchar* src, size_t size); -+void __gen_memcpy_lc(__local uchar* dst, __constant uchar* src, size_t size); -+ -+#endif /* __OCL_MEMCPY_H__ */ -diff --git a/backend/src/libocl/include/ocl_memset.h b/backend/src/libocl/include/ocl_memset.h -new file mode 100644 -index 0000000..2d444ad ---- /dev/null -+++ b/backend/src/libocl/include/ocl_memset.h -@@ -0,0 +1,33 @@ -+/* -+ * Copyright © 2012 - 2014 Intel Corporation -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library. If not, see <http://www.gnu.org/licenses/>. -+ * -+ */ -+#ifndef __OCL_MEMSET_H__ -+#define __OCL_MEMSET_H__ -+#include "ocl_types.h" -+ -+///////////////////////////////////////////////////////////////////////////// -+// memcopy functions -+///////////////////////////////////////////////////////////////////////////// -+void __gen_memset_g_align(__global uchar* dst, uchar val, size_t size); -+void __gen_memset_p_align(__private uchar* dst, uchar val, size_t size); -+void __gen_memset_l_align(__local uchar* dst, uchar val, size_t size); -+ -+void __gen_memset_g(__global uchar* dst, uchar val, size_t size); -+void __gen_memset_p(__private uchar* dst, uchar val, size_t size); -+void __gen_memset_l(__local uchar* dst, uchar val, size_t size); -+ -+#endif /* __OCL_MEMSET_H__ */ -diff --git a/backend/src/libocl/src/ocl_memcpy.cl b/backend/src/libocl/src/ocl_memcpy.cl -new file mode 100644 -index 0000000..85f490f ---- /dev/null -+++ b/backend/src/libocl/src/ocl_memcpy.cl -@@ -0,0 +1,49 @@ -+/* -+ * Copyright © 2012 - 2014 Intel Corporation -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library. If not, see <http://www.gnu.org/licenses/>. -+ * -+ */ -+#include "ocl_memcpy.h" -+ -+#define DECL_TWO_SPACE_MEMCOPY_FN(NAME, DST_SPACE, SRC_SPACE) \ -+void __gen_memcpy_ ##NAME## _align (DST_SPACE uchar* dst, SRC_SPACE uchar* src, size_t size) { \ -+ size_t index = 0; \ -+ while((index + 4) <= size) { \ -+ *((DST_SPACE uint *)(dst + index)) = *((SRC_SPACE uint *)(src + index)); \ -+ index += 4; \ -+ } \ -+ while(index < size) { \ -+ dst[index] = src[index]; \ -+ index++; \ -+ } \ -+} \ -+void __gen_memcpy_ ##NAME (DST_SPACE uchar* dst, SRC_SPACE uchar* src, size_t size) { \ -+ size_t index = 0; \ -+ while(index < size) { \ -+ dst[index] = src[index]; \ -+ index++; \ -+ } \ -+} -+ -+#define DECL_ONE_SPACE_MEMCOPY_FN(NAME, DST_SPACE) \ -+ DECL_TWO_SPACE_MEMCOPY_FN( NAME## g, DST_SPACE, __global) \ -+ DECL_TWO_SPACE_MEMCOPY_FN( NAME## l, DST_SPACE, __local) \ -+ DECL_TWO_SPACE_MEMCOPY_FN( NAME## p, DST_SPACE, __private) \ -+ DECL_TWO_SPACE_MEMCOPY_FN( NAME## c, DST_SPACE, __constant) -+ -+DECL_ONE_SPACE_MEMCOPY_FN(g, __global) -+DECL_ONE_SPACE_MEMCOPY_FN(l, __local) -+DECL_ONE_SPACE_MEMCOPY_FN(p, __private) -+ -diff --git a/backend/src/libocl/src/ocl_memcpy.ll b/backend/src/libocl/src/ocl_memcpy.ll -deleted file mode 100644 -index b3fadb2..0000000 ---- a/backend/src/libocl/src/ocl_memcpy.ll -+++ /dev/null -@@ -1,729 +0,0 @@ --;The memcpy's source code. --; INLINE_OVERLOADABLE void __gen_memcpy_align(uchar* dst, uchar* src, size_t size) { --; size_t index = 0; --; while((index + 4) <= size) { --; *((uint *)(dst + index)) = *((uint *)(src + index)); --; index += 4; --; } --; while(index < size) { --; dst[index] = src[index]; --; index++; --; } --; } -- --define void @__gen_memcpy_gg_align(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { --entry: -- br label %while.cond -- --while.cond: ; preds = %while.body, %entry -- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] -- %add = add i32 %index.0, 4 -- %cmp = icmp ugt i32 %add, %size -- br i1 %cmp, label %while.cond3, label %while.body -- --while.body: ; preds = %while.cond -- %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0 -- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* -- %1 = load i32 addrspace(1)* %0, align 4 -- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 -- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* -- store i32 %1, i32 addrspace(1)* %2, align 4 -- br label %while.cond -- --while.cond3: ; preds = %while.cond, %while.body5 -- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] -- %cmp4 = icmp ult i32 %index.1, %size -- br i1 %cmp4, label %while.body5, label %while.end7 -- --while.body5: ; preds = %while.cond3 -- %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1 -- %3 = load i8 addrspace(1)* %arrayidx, align 1 -- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 -- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 -- %inc = add i32 %index.1, 1 -- br label %while.cond3 -- --while.end7: ; preds = %while.cond3 -- ret void --} -- --define void @__gen_memcpy_gp_align(i8 addrspace(1)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { --entry: -- br label %while.cond -- --while.cond: ; preds = %while.body, %entry -- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] -- %add = add i32 %index.0, 4 -- %cmp = icmp ugt i32 %add, %size -- br i1 %cmp, label %while.cond3, label %while.body -- --while.body: ; preds = %while.cond -- %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0 -- %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)* -- %1 = load i32 addrspace(0)* %0, align 4 -- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 -- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* -- store i32 %1, i32 addrspace(1)* %2, align 4 -- br label %while.cond -- --while.cond3: ; preds = %while.cond, %while.body5 -- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] -- %cmp4 = icmp ult i32 %index.1, %size -- br i1 %cmp4, label %while.body5, label %while.end7 -- --while.body5: ; preds = %while.cond3 -- %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1 -- %3 = load i8 addrspace(0)* %arrayidx, align 1 -- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 -- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 -- %inc = add i32 %index.1, 1 -- br label %while.cond3 -- --while.end7: ; preds = %while.cond3 -- ret void --} -- --define void @__gen_memcpy_gl_align(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { --entry: -- br label %while.cond -- --while.cond: ; preds = %while.body, %entry -- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] -- %add = add i32 %index.0, 4 -- %cmp = icmp ugt i32 %add, %size -- br i1 %cmp, label %while.cond3, label %while.body -- --while.body: ; preds = %while.cond -- %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0 -- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* -- %1 = load i32 addrspace(3)* %0, align 4 -- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 -- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* -- store i32 %1, i32 addrspace(1)* %2, align 4 -- br label %while.cond -- --while.cond3: ; preds = %while.cond, %while.body5 -- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] -- %cmp4 = icmp ult i32 %index.1, %size -- br i1 %cmp4, label %while.body5, label %while.end7 -- --while.body5: ; preds = %while.cond3 -- %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1 -- %3 = load i8 addrspace(3)* %arrayidx, align 1 -- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 -- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 -- %inc = add i32 %index.1, 1 -- br label %while.cond3 -- --while.end7: ; preds = %while.cond3 -- ret void --} -- --define void @__gen_memcpy_pg_align(i8 addrspace(0)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { --entry: -- br label %while.cond -- --while.cond: ; preds = %while.body, %entry -- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] -- %add = add i32 %index.0, 4 -- %cmp = icmp ugt i32 %add, %size -- br i1 %cmp, label %while.cond3, label %while.body -- --while.body: ; preds = %while.cond -- %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0 -- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* -- %1 = load i32 addrspace(1)* %0, align 4 -- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 -- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* -- store i32 %1, i32 addrspace(0)* %2, align 4 -- br label %while.cond -- --while.cond3: ; preds = %while.cond, %while.body5 -- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] -- %cmp4 = icmp ult i32 %index.1, %size -- br i1 %cmp4, label %while.body5, label %while.end7 -- --while.body5: ; preds = %while.cond3 -- %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1 -- %3 = load i8 addrspace(1)* %arrayidx, align 1 -- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 -- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 -- %inc = add i32 %index.1, 1 -- br label %while.cond3 -- --while.end7: ; preds = %while.cond3 -- ret void --} -- --define void @__gen_memcpy_pp_align(i8 addrspace(0)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { --entry: -- br label %while.cond -- --while.cond: ; preds = %while.body, %entry -- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] -- %add = add i32 %index.0, 4 -- %cmp = icmp ugt i32 %add, %size -- br i1 %cmp, label %while.cond3, label %while.body -- --while.body: ; preds = %while.cond -- %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0 -- %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)* -- %1 = load i32 addrspace(0)* %0, align 4 -- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 -- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* -- store i32 %1, i32 addrspace(0)* %2, align 4 -- br label %while.cond -- --while.cond3: ; preds = %while.cond, %while.body5 -- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] -- %cmp4 = icmp ult i32 %index.1, %size -- br i1 %cmp4, label %while.body5, label %while.end7 -- --while.body5: ; preds = %while.cond3 -- %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1 -- %3 = load i8 addrspace(0)* %arrayidx, align 1 -- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 -- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 -- %inc = add i32 %index.1, 1 -- br label %while.cond3 -- --while.end7: ; preds = %while.cond3 -- ret void --} -- --define void @__gen_memcpy_pl_align(i8 addrspace(0)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { --entry: -- br label %while.cond -- --while.cond: ; preds = %while.body, %entry -- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] -- %add = add i32 %index.0, 4 -- %cmp = icmp ugt i32 %add, %size -- br i1 %cmp, label %while.cond3, label %while.body -- --while.body: ; preds = %while.cond -- %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0 -- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* -- %1 = load i32 addrspace(3)* %0, align 4 -- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 -- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* -- store i32 %1, i32 addrspace(0)* %2, align 4 -- br label %while.cond -- --while.cond3: ; preds = %while.cond, %while.body5 -- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] -- %cmp4 = icmp ult i32 %index.1, %size -- br i1 %cmp4, label %while.body5, label %while.end7 -- --while.body5: ; preds = %while.cond3 -- %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1 -- %3 = load i8 addrspace(3)* %arrayidx, align 1 -- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 -- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 -- %inc = add i32 %index.1, 1 -- br label %while.cond3 -- --while.end7: ; preds = %while.cond3 -- ret void --} -- --define void @__gen_memcpy_lg_align(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { --entry: -- br label %while.cond -- --while.cond: ; preds = %while.body, %entry -- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] -- %add = add i32 %index.0, 4 -- %cmp = icmp ugt i32 %add, %size -- br i1 %cmp, label %while.cond3, label %while.body -- --while.body: ; preds = %while.cond -- %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0 -- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* -- %1 = load i32 addrspace(1)* %0, align 4 -- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 -- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* -- store i32 %1, i32 addrspace(3)* %2, align 4 -- br label %while.cond -- --while.cond3: ; preds = %while.cond, %while.body5 -- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] -- %cmp4 = icmp ult i32 %index.1, %size -- br i1 %cmp4, label %while.body5, label %while.end7 -- --while.body5: ; preds = %while.cond3 -- %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1 -- %3 = load i8 addrspace(1)* %arrayidx, align 1 -- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 -- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 -- %inc = add i32 %index.1, 1 -- br label %while.cond3 -- --while.end7: ; preds = %while.cond3 -- ret void --} -- --define void @__gen_memcpy_lp_align(i8 addrspace(3)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { --entry: -- br label %while.cond -- --while.cond: ; preds = %while.body, %entry -- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] -- %add = add i32 %index.0, 4 -- %cmp = icmp ugt i32 %add, %size -- br i1 %cmp, label %while.cond3, label %while.body -- --while.body: ; preds = %while.cond -- %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0 -- %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)* -- %1 = load i32 addrspace(0)* %0, align 4 -- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 -- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* -- store i32 %1, i32 addrspace(3)* %2, align 4 -- br label %while.cond -- --while.cond3: ; preds = %while.cond, %while.body5 -- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] -- %cmp4 = icmp ult i32 %index.1, %size -- br i1 %cmp4, label %while.body5, label %while.end7 -- --while.body5: ; preds = %while.cond3 -- %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1 -- %3 = load i8 addrspace(0)* %arrayidx, align 1 -- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 -- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 -- %inc = add i32 %index.1, 1 -- br label %while.cond3 -- --while.end7: ; preds = %while.cond3 -- ret void --} -- --define void @__gen_memcpy_ll_align(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { --entry: -- br label %while.cond -- --while.cond: ; preds = %while.body, %entry -- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] -- %add = add i32 %index.0, 4 -- %cmp = icmp ugt i32 %add, %size -- br i1 %cmp, label %while.cond3, label %while.body -- --while.body: ; preds = %while.cond -- %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0 -- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* -- %1 = load i32 addrspace(3)* %0, align 4 -- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 -- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* -- store i32 %1, i32 addrspace(3)* %2, align 4 -- br label %while.cond -- --while.cond3: ; preds = %while.cond, %while.body5 -- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] -- %cmp4 = icmp ult i32 %index.1, %size -- br i1 %cmp4, label %while.body5, label %while.end7 -- --while.body5: ; preds = %while.cond3 -- %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1 -- %3 = load i8 addrspace(3)* %arrayidx, align 1 -- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 -- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 -- %inc = add i32 %index.1, 1 -- br label %while.cond3 -- --while.end7: ; preds = %while.cond3 -- ret void --} -- --;The memcpy's source code. --; INLINE_OVERLOADABLE void __gen_memcpy(uchar* dst, uchar* src, size_t size) { --; size_t index = 0; --; while(index < size) { --; dst[index] = src[index]; --; index++; --; } --; } -- --define void @__gen_memcpy_gg(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { --entry: -- %cmp4 = icmp eq i32 %size, 0 -- br i1 %cmp4, label %while.end, label %while.body -- --while.body: ; preds = %entry, %while.body -- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] -- %0 = ptrtoint i8 addrspace(1)* %src to i32 -- %1 = add i32 %0, %index.05 -- %2 = inttoptr i32 %1 to i8 addrspace(1)* -- %3 = load i8 addrspace(1)* %2, align 1 -- %4 = ptrtoint i8 addrspace(1)* %dst to i32 -- %5 = add i32 %4, %index.05 -- %6 = inttoptr i32 %5 to i8 addrspace(1)* -- store i8 %3, i8 addrspace(1)* %6, align 1 -- %inc = add i32 %index.05, 1 -- %cmp = icmp ult i32 %inc, %size -- br i1 %cmp, label %while.body, label %while.end -- --while.end: ; preds = %while.body, %entry -- ret void --} -- --define void @__gen_memcpy_gp(i8 addrspace(1)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { --entry: -- %cmp4 = icmp eq i32 %size, 0 -- br i1 %cmp4, label %while.end, label %while.body -- --while.body: ; preds = %entry, %while.body -- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] -- %0 = ptrtoint i8 addrspace(0)* %src to i32 -- %1 = add i32 %0, %index.05 -- %2 = inttoptr i32 %1 to i8 addrspace(0)* -- %3 = load i8 addrspace(0)* %2, align 1 -- %4 = ptrtoint i8 addrspace(1)* %dst to i32 -- %5 = add i32 %4, %index.05 -- %6 = inttoptr i32 %5 to i8 addrspace(1)* -- store i8 %3, i8 addrspace(1)* %6, align 1 -- %inc = add i32 %index.05, 1 -- %cmp = icmp ult i32 %inc, %size -- br i1 %cmp, label %while.body, label %while.end -- --while.end: ; preds = %while.body, %entry -- ret void --} -- --define void @__gen_memcpy_gl(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { --entry: -- %cmp4 = icmp eq i32 %size, 0 -- br i1 %cmp4, label %while.end, label %while.body -- --while.body: ; preds = %entry, %while.body -- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] -- %0 = ptrtoint i8 addrspace(3)* %src to i32 -- %1 = add i32 %0, %index.05 -- %2 = inttoptr i32 %1 to i8 addrspace(3)* -- %3 = load i8 addrspace(3)* %2, align 1 -- %4 = ptrtoint i8 addrspace(1)* %dst to i32 -- %5 = add i32 %4, %index.05 -- %6 = inttoptr i32 %5 to i8 addrspace(1)* -- store i8 %3, i8 addrspace(1)* %6, align 1 -- %inc = add i32 %index.05, 1 -- %cmp = icmp ult i32 %inc, %size -- br i1 %cmp, label %while.body, label %while.end -- --while.end: ; preds = %while.body, %entry -- ret void --} -- --define void @__gen_memcpy_pg(i8 addrspace(0)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { --entry: -- %cmp4 = icmp eq i32 %size, 0 -- br i1 %cmp4, label %while.end, label %while.body -- --while.body: ; preds = %entry, %while.body -- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] -- %0 = ptrtoint i8 addrspace(1)* %src to i32 -- %1 = add i32 %0, %index.05 -- %2 = inttoptr i32 %1 to i8 addrspace(1)* -- %3 = load i8 addrspace(1)* %2, align 1 -- %4 = ptrtoint i8 addrspace(0)* %dst to i32 -- %5 = add i32 %4, %index.05 -- %6 = inttoptr i32 %5 to i8 addrspace(0)* -- store i8 %3, i8 addrspace(0)* %6, align 1 -- %inc = add i32 %index.05, 1 -- %cmp = icmp ult i32 %inc, %size -- br i1 %cmp, label %while.body, label %while.end -- --while.end: ; preds = %while.body, %entry -- ret void --} -- --define void @__gen_memcpy_pp(i8 addrspace(0)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { --entry: -- %cmp4 = icmp eq i32 %size, 0 -- br i1 %cmp4, label %while.end, label %while.body -- --while.body: ; preds = %entry, %while.body -- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] -- %0 = ptrtoint i8 addrspace(0)* %src to i32 -- %1 = add i32 %0, %index.05 -- %2 = inttoptr i32 %1 to i8 addrspace(0)* -- %3 = load i8 addrspace(0)* %2, align 1 -- %4 = ptrtoint i8 addrspace(0)* %dst to i32 -- %5 = add i32 %4, %index.05 -- %6 = inttoptr i32 %5 to i8 addrspace(0)* -- store i8 %3, i8 addrspace(0)* %6, align 1 -- %inc = add i32 %index.05, 1 -- %cmp = icmp ult i32 %inc, %size -- br i1 %cmp, label %while.body, label %while.end -- --while.end: ; preds = %while.body, %entry -- ret void --} -- --define void @__gen_memcpy_pl(i8 addrspace(0)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { --entry: -- %cmp4 = icmp eq i32 %size, 0 -- br i1 %cmp4, label %while.end, label %while.body -- --while.body: ; preds = %entry, %while.body -- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] -- %0 = ptrtoint i8 addrspace(3)* %src to i32 -- %1 = add i32 %0, %index.05 -- %2 = inttoptr i32 %1 to i8 addrspace(3)* -- %3 = load i8 addrspace(3)* %2, align 1 -- %4 = ptrtoint i8 addrspace(0)* %dst to i32 -- %5 = add i32 %4, %index.05 -- %6 = inttoptr i32 %5 to i8 addrspace(0)* -- store i8 %3, i8 addrspace(0)* %6, align 1 -- %inc = add i32 %index.05, 1 -- %cmp = icmp ult i32 %inc, %size -- br i1 %cmp, label %while.body, label %while.end -- --while.end: ; preds = %while.body, %entry -- ret void --} -- --define void @__gen_memcpy_lg(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { --entry: -- %cmp4 = icmp eq i32 %size, 0 -- br i1 %cmp4, label %while.end, label %while.body -- --while.body: ; preds = %entry, %while.body -- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] -- %0 = ptrtoint i8 addrspace(1)* %src to i32 -- %1 = add i32 %0, %index.05 -- %2 = inttoptr i32 %1 to i8 addrspace(1)* -- %3 = load i8 addrspace(1)* %2, align 1 -- %4 = ptrtoint i8 addrspace(3)* %dst to i32 -- %5 = add i32 %4, %index.05 -- %6 = inttoptr i32 %5 to i8 addrspace(3)* -- store i8 %3, i8 addrspace(3)* %6, align 1 -- %inc = add i32 %index.05, 1 -- %cmp = icmp ult i32 %inc, %size -- br i1 %cmp, label %while.body, label %while.end -- --while.end: ; preds = %while.body, %entry -- ret void --} -- --define void @__gen_memcpy_lp(i8 addrspace(3)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { --entry: -- %cmp4 = icmp eq i32 %size, 0 -- br i1 %cmp4, label %while.end, label %while.body -- --while.body: ; preds = %entry, %while.body -- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] -- %0 = ptrtoint i8 addrspace(0)* %src to i32 -- %1 = add i32 %0, %index.05 -- %2 = inttoptr i32 %1 to i8 addrspace(0)* -- %3 = load i8 addrspace(0)* %2, align 1 -- %4 = ptrtoint i8 addrspace(3)* %dst to i32 -- %5 = add i32 %4, %index.05 -- %6 = inttoptr i32 %5 to i8 addrspace(3)* -- store i8 %3, i8 addrspace(3)* %6, align 1 -- %inc = add i32 %index.05, 1 -- %cmp = icmp ult i32 %inc, %size -- br i1 %cmp, label %while.body, label %while.end -- --while.end: ; preds = %while.body, %entry -- ret void --} -- --define void @__gen_memcpy_ll(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { --entry: -- %cmp4 = icmp eq i32 %size, 0 -- br i1 %cmp4, label %while.end, label %while.body -- --while.body: ; preds = %entry, %while.body -- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] -- %0 = ptrtoint i8 addrspace(3)* %src to i32 -- %1 = add i32 %0, %index.05 -- %2 = inttoptr i32 %1 to i8 addrspace(3)* -- %3 = load i8 addrspace(3)* %2, align 1 -- %4 = ptrtoint i8 addrspace(3)* %dst to i32 -- %5 = add i32 %4, %index.05 -- %6 = inttoptr i32 %5 to i8 addrspace(3)* -- store i8 %3, i8 addrspace(3)* %6, align 1 -- %inc = add i32 %index.05, 1 -- %cmp = icmp ult i32 %inc, %size -- br i1 %cmp, label %while.body, label %while.end -- --while.end: ; preds = %while.body, %entry -- ret void --} -- --define void @__gen_memcpy_gc_align(i8 addrspace(1)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { --entry: -- br label %while.cond -- --while.cond: ; preds = %while.body, %entry -- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] -- %add = add i32 %index.0, 4 -- %cmp = icmp ugt i32 %add, %size -- br i1 %cmp, label %while.cond3, label %while.body -- --while.body: ; preds = %while.cond -- %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0 -- %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* -- %1 = load i32 addrspace(2)* %0, align 4 -- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 -- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* -- store i32 %1, i32 addrspace(1)* %2, align 4 -- br label %while.cond -- --while.cond3: ; preds = %while.cond, %while.body5 -- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] -- %cmp4 = icmp ult i32 %index.1, %size -- br i1 %cmp4, label %while.body5, label %while.end7 -- --while.body5: ; preds = %while.cond3 -- %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1 -- %3 = load i8 addrspace(2)* %arrayidx, align 1 -- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 -- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 -- %inc = add i32 %index.1, 1 -- br label %while.cond3 -- --while.end7: ; preds = %while.cond3 -- ret void --} -- --define void @__gen_memcpy_pc_align(i8 addrspace(0)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { --entry: -- br label %while.cond -- --while.cond: ; preds = %while.body, %entry -- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] -- %add = add i32 %index.0, 4 -- %cmp = icmp ugt i32 %add, %size -- br i1 %cmp, label %while.cond3, label %while.body -- --while.body: ; preds = %while.cond -- %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0 -- %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* -- %1 = load i32 addrspace(2)* %0, align 4 -- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 -- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* -- store i32 %1, i32 addrspace(0)* %2, align 4 -- br label %while.cond -- --while.cond3: ; preds = %while.cond, %while.body5 -- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] -- %cmp4 = icmp ult i32 %index.1, %size -- br i1 %cmp4, label %while.body5, label %while.end7 -- --while.body5: ; preds = %while.cond3 -- %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1 -- %3 = load i8 addrspace(2)* %arrayidx, align 1 -- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 -- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 -- %inc = add i32 %index.1, 1 -- br label %while.cond3 -- --while.end7: ; preds = %while.cond3 -- ret void --} -- --define void @__gen_memcpy_lc_align(i8 addrspace(3)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { --entry: -- br label %while.cond -- --while.cond: ; preds = %while.body, %entry -- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] -- %add = add i32 %index.0, 4 -- %cmp = icmp ugt i32 %add, %size -- br i1 %cmp, label %while.cond3, label %while.body -- --while.body: ; preds = %while.cond -- %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0 -- %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* -- %1 = load i32 addrspace(2)* %0, align 4 -- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 -- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* -- store i32 %1, i32 addrspace(3)* %2, align 4 -- br label %while.cond -- --while.cond3: ; preds = %while.cond, %while.body5 -- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] -- %cmp4 = icmp ult i32 %index.1, %size -- br i1 %cmp4, label %while.body5, label %while.end7 -- --while.body5: ; preds = %while.cond3 -- %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1 -- %3 = load i8 addrspace(2)* %arrayidx, align 1 -- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 -- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 -- %inc = add i32 %index.1, 1 -- br label %while.cond3 -- --while.end7: ; preds = %while.cond3 -- ret void --} -- --define void @__gen_memcpy_pc(i8 addrspace(0)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { --entry: -- %cmp4 = icmp eq i32 %size, 0 -- br i1 %cmp4, label %while.end, label %while.body -- --while.body: ; preds = %entry, %while.body -- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] -- %0 = ptrtoint i8 addrspace(2)* %src to i32 -- %1 = add i32 %0, %index.05 -- %2 = inttoptr i32 %1 to i8 addrspace(2)* -- %3 = load i8 addrspace(2)* %2, align 1 -- %4 = ptrtoint i8 addrspace(0)* %dst to i32 -- %5 = add i32 %4, %index.05 -- %6 = inttoptr i32 %5 to i8 addrspace(0)* -- store i8 %3, i8 addrspace(0)* %6, align 1 -- %inc = add i32 %index.05, 1 -- %cmp = icmp ult i32 %inc, %size -- br i1 %cmp, label %while.body, label %while.end -- --while.end: ; preds = %while.body, %entry -- ret void --} -- --define void @__gen_memcpy_gc(i8 addrspace(1)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { --entry: -- %cmp4 = icmp eq i32 %size, 0 -- br i1 %cmp4, label %while.end, label %while.body -- --while.body: ; preds = %entry, %while.body -- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] -- %0 = ptrtoint i8 addrspace(2)* %src to i32 -- %1 = add i32 %0, %index.05 -- %2 = inttoptr i32 %1 to i8 addrspace(2)* -- %3 = load i8 addrspace(2)* %2, align 1 -- %4 = ptrtoint i8 addrspace(1)* %dst to i32 -- %5 = add i32 %4, %index.05 -- %6 = inttoptr i32 %5 to i8 addrspace(1)* -- store i8 %3, i8 addrspace(1)* %6, align 1 -- %inc = add i32 %index.05, 1 -- %cmp = icmp ult i32 %inc, %size -- br i1 %cmp, label %while.body, label %while.end -- --while.end: ; preds = %while.body, %entry -- ret void --} -- --define void @__gen_memcpy_lc(i8 addrspace(3)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { --entry: -- %cmp4 = icmp eq i32 %size, 0 -- br i1 %cmp4, label %while.end, label %while.body -- --while.body: ; preds = %entry, %while.body -- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] -- %0 = ptrtoint i8 addrspace(2)* %src to i32 -- %1 = add i32 %0, %index.05 -- %2 = inttoptr i32 %1 to i8 addrspace(2)* -- %3 = load i8 addrspace(2)* %2, align 1 -- %4 = ptrtoint i8 addrspace(3)* %dst to i32 -- %5 = add i32 %4, %index.05 -- %6 = inttoptr i32 %5 to i8 addrspace(3)* -- store i8 %3, i8 addrspace(3)* %6, align 1 -- %inc = add i32 %index.05, 1 -- %cmp = icmp ult i32 %inc, %size -- br i1 %cmp, label %while.body, label %while.end -- --while.end: ; preds = %while.body, %entry -- ret void --} -diff --git a/backend/src/libocl/src/ocl_memset.cl b/backend/src/libocl/src/ocl_memset.cl -new file mode 100644 -index 0000000..b41851a ---- /dev/null -+++ b/backend/src/libocl/src/ocl_memset.cl -@@ -0,0 +1,44 @@ -+/* -+ * Copyright © 2012 - 2014 Intel Corporation -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library. If not, see <http://www.gnu.org/licenses/>. -+ * -+ */ -+#include "ocl_memset.h" -+ -+#define DECL_MEMSET_FN(NAME, DST_SPACE) \ -+void __gen_memset_ ##NAME## _align (DST_SPACE uchar* dst, uchar val, size_t size) { \ -+ size_t index = 0; \ -+ uint v = (val << 24) | (val << 16) | (val << 8) | val; \ -+ while((index + 4) >= size) { \ -+ *((DST_SPACE uint *)(dst + index)) = v; \ -+ index += 4; \ -+ } \ -+ while(index < size) { \ -+ dst[index] = val; \ -+ index++; \ -+ } \ -+} \ -+void __gen_memset_ ##NAME (DST_SPACE uchar* dst, uchar val, size_t size) { \ -+ size_t index = 0; \ -+ while(index < size) { \ -+ dst[index] = val; \ -+ index++; \ -+ } \ -+} -+ -+DECL_MEMSET_FN(g, __global) -+DECL_MEMSET_FN(l, __local) -+DECL_MEMSET_FN(p, __private) -+ -diff --git a/backend/src/libocl/src/ocl_memset.ll b/backend/src/libocl/src/ocl_memset.ll -deleted file mode 100644 -index 665eac4..0000000 ---- a/backend/src/libocl/src/ocl_memset.ll -+++ /dev/null -@@ -1,193 +0,0 @@ --;The memset's source code. --; INLINE_OVERLOADABLE void __gen_memset_align(uchar* dst, uchar val, size_t size) { --; size_t index = 0; --; uint v = (val << 24) | (val << 16) | (val << 8) | val; --; while((index + 4) >= size) { --; *((uint *)(dst + index)) = v; --; index += 4; --; } --; while(index < size) { --; dst[index] = val; --; index++; --; } --; } -- --define void @__gen_memset_p_align(i8* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { --entry: -- %conv = zext i8 %val to i32 -- %shl = shl nuw i32 %conv, 24 -- %shl2 = shl nuw nsw i32 %conv, 16 -- %or = or i32 %shl, %shl2 -- %shl4 = shl nuw nsw i32 %conv, 8 -- %or5 = or i32 %or, %shl4 -- %or7 = or i32 %or5, %conv -- br label %while.cond -- --while.cond: ; preds = %while.body, %entry -- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] -- %add = add i32 %index.0, 4 -- %cmp = icmp ugt i32 %add, %size -- br i1 %cmp, label %while.cond10, label %while.body -- --while.body: ; preds = %while.cond -- %add.ptr = getelementptr inbounds i8* %dst, i32 %index.0 -- %0 = bitcast i8* %add.ptr to i32* -- store i32 %or7, i32* %0, align 4 -- br label %while.cond -- --while.cond10: ; preds = %while.cond, %while.body13 -- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ] -- %cmp11 = icmp ult i32 %index.1, %size -- br i1 %cmp11, label %while.body13, label %while.end14 -- --while.body13: ; preds = %while.cond10 -- %arrayidx = getelementptr inbounds i8* %dst, i32 %index.1 -- store i8 %val, i8* %arrayidx, align 1 -- %inc = add i32 %index.1, 1 -- br label %while.cond10 -- --while.end14: ; preds = %while.cond10 -- ret void --} -- --define void @__gen_memset_g_align(i8 addrspace(1)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { --entry: -- %conv = zext i8 %val to i32 -- %shl = shl nuw i32 %conv, 24 -- %shl2 = shl nuw nsw i32 %conv, 16 -- %or = or i32 %shl, %shl2 -- %shl4 = shl nuw nsw i32 %conv, 8 -- %or5 = or i32 %or, %shl4 -- %or7 = or i32 %or5, %conv -- br label %while.cond -- --while.cond: ; preds = %while.body, %entry -- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] -- %add = add i32 %index.0, 4 -- %cmp = icmp ugt i32 %add, %size -- br i1 %cmp, label %while.cond10, label %while.body -- --while.body: ; preds = %while.cond -- %add.ptr = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 -- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* -- store i32 %or7, i32 addrspace(1)* %0, align 4 -- br label %while.cond -- --while.cond10: ; preds = %while.cond, %while.body13 -- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ] -- %cmp11 = icmp ult i32 %index.1, %size -- br i1 %cmp11, label %while.body13, label %while.end14 -- --while.body13: ; preds = %while.cond10 -- %arrayidx = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 -- store i8 %val, i8 addrspace(1)* %arrayidx, align 1 -- %inc = add i32 %index.1, 1 -- br label %while.cond10 -- --while.end14: ; preds = %while.cond10 -- ret void --} -- --define void @__gen_memset_l_align(i8 addrspace(3)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { --entry: -- %conv = zext i8 %val to i32 -- %shl = shl nuw i32 %conv, 24 -- %shl2 = shl nuw nsw i32 %conv, 16 -- %or = or i32 %shl, %shl2 -- %shl4 = shl nuw nsw i32 %conv, 8 -- %or5 = or i32 %or, %shl4 -- %or7 = or i32 %or5, %conv -- br label %while.cond -- --while.cond: ; preds = %while.body, %entry -- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] -- %add = add i32 %index.0, 4 -- %cmp = icmp ugt i32 %add, %size -- br i1 %cmp, label %while.cond10, label %while.body -- --while.body: ; preds = %while.cond -- %add.ptr = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 -- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* -- store i32 %or7, i32 addrspace(3)* %0, align 4 -- br label %while.cond -- --while.cond10: ; preds = %while.cond, %while.body13 -- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ] -- %cmp11 = icmp ult i32 %index.1, %size -- br i1 %cmp11, label %while.body13, label %while.end14 -- --while.body13: ; preds = %while.cond10 -- %arrayidx = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 -- store i8 %val, i8 addrspace(3)* %arrayidx, align 1 -- %inc = add i32 %index.1, 1 -- br label %while.cond10 -- --while.end14: ; preds = %while.cond10 -- ret void --} -- --;The memset's source code. --; INLINE_OVERLOADABLE void __gen_memset(uchar* dst, uchar val, size_t size) { --; size_t index = 0; --; while(index < size) { --; dst[index] = val; --; index++; --; } --; } -- --define void @__gen_memset_p(i8 addrspace(0)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { --entry: -- %cmp3 = icmp eq i32 %size, 0 -- br i1 %cmp3, label %while.end, label %while.body -- --while.body: ; preds = %entry, %while.body -- %index.04 = phi i32 [ %inc, %while.body ], [ 0, %entry ] -- %0 = ptrtoint i8 addrspace(0)* %dst to i32 -- %1 = add i32 %0, %index.04 -- %2 = inttoptr i32 %1 to i8 addrspace(0)* -- store i8 %val, i8 addrspace(0)* %2, align 1 -- %inc = add i32 %index.04, 1 -- %cmp = icmp ult i32 %inc, %size -- br i1 %cmp, label %while.body, label %while.end -- --while.end: ; preds = %while.body, %entry -- ret void --} -- --define void @__gen_memset_g(i8 addrspace(1)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { --entry: -- %cmp3 = icmp eq i32 %size, 0 -- br i1 %cmp3, label %while.end, label %while.body -- --while.body: ; preds = %entry, %while.body -- %index.04 = phi i32 [ %inc, %while.body ], [ 0, %entry ] -- %0 = ptrtoint i8 addrspace(1)* %dst to i32 -- %1 = add i32 %0, %index.04 -- %2 = inttoptr i32 %1 to i8 addrspace(1)* -- store i8 %val, i8 addrspace(1)* %2, align 1 -- %inc = add i32 %index.04, 1 -- %cmp = icmp ult i32 %inc, %size -- br i1 %cmp, label %while.body, label %while.end -- --while.end: ; preds = %while.body, %entry -- ret void --} -- --define void @__gen_memset_l(i8 addrspace(3)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { --entry: -- %cmp3 = icmp eq i32 %size, 0 -- br i1 %cmp3, label %while.end, label %while.body -- --while.body: ; preds = %entry, %while.body -- %index.04 = phi i32 [ %inc, %while.body ], [ 0, %entry ] -- %0 = ptrtoint i8 addrspace(3)* %dst to i32 -- %1 = add i32 %0, %index.04 -- %2 = inttoptr i32 %1 to i8 addrspace(3)* -- store i8 %val, i8 addrspace(3)* %2, align 1 -- %inc = add i32 %index.04, 1 -- %cmp = icmp ult i32 %inc, %size -- br i1 %cmp, label %while.body, label %while.end -- --while.end: ; preds = %while.body, %entry -- ret void --} --- -1.8.3.2 |