diff options
author | Leopold Bloom | 2015-10-10 14:48:16 -0400 |
---|---|---|
committer | Leopold Bloom | 2015-10-10 14:48:16 -0400 |
commit | a657bc1c0cd20f02eafe5785f483df386844ab96 (patch) | |
tree | 8220506a84c638831c4f9eab8d04e436d6d3c5ee | |
parent | 9e22f1dcd290cf46281dc52fcdaed374c38310de (diff) | |
download | aur-a657bc1c0cd20f02eafe5785f483df386844ab96.tar.gz |
update to 1.1.1-2; fix llvm 3.7 support
-rw-r--r-- | PKGBUILD | 6 | ||||
-rw-r--r-- | llvm-3.7-patch-1.patch | 1193 | ||||
-rw-r--r-- | llvm-3.7-patch-2.patch | 1023 | ||||
-rw-r--r-- | llvm-3.7-patch-3.patch | 30 | ||||
-rw-r--r-- | llvm-3.7-patch-4.patch | 35 | ||||
-rw-r--r-- | llvm-3.7-patch-5.patch | 25 |
6 files changed, 2312 insertions, 0 deletions
@@ -16,6 +16,12 @@ source=("https://01.org/sites/default/files/beignet-$pkgver-source.tar.gz") sha256sums=('9bf4c69eb4fbd3c7cc9ef75c1952bca6f05259ffbe753a27e08ed98bb32e1119') build() { + cd "$srcdir/Beignet-$pkgver-Source" + patch -Np1 -i ../llvm-3.7-patch-1.patch + patch -Np1 -i ../llvm-3.7-patch-2.patch + patch -Np1 -i ../llvm-3.7-patch-3.patch + patch -Np1 -i ../llvm-3.7-patch-4.patch + patch -Np1 -i ../llvm-3.7-patch-5.patch mkdir -p "$srcdir/Beignet-$pkgver-Source/build" cd "$srcdir/Beignet-$pkgver-Source/build" cmake .. \ diff --git a/llvm-3.7-patch-1.patch b/llvm-3.7-patch-1.patch new file mode 100644 index 000000000000..a375eb6393e0 --- /dev/null +++ b/llvm-3.7-patch-1.patch @@ -0,0 +1,1193 @@ +llvm 3.7 change to llvm IR, need two copies if still use the llvm IR +to implement llvm.memset and llvm.memcpy. And opencl c is more clearly. + +Signed-off-by: Yang Rong <rong.r.yang at intel.com> +--- + backend/src/libocl/CMakeLists.txt | 5 +- + backend/src/libocl/include/ocl.h | 1 + + backend/src/libocl/include/ocl_memcpy.h | 51 +++ + backend/src/libocl/include/ocl_memset.h | 33 ++ + backend/src/libocl/src/ocl_memcpy.cl | 49 +++ + backend/src/libocl/src/ocl_memcpy.ll | 729 -------------------------------- + backend/src/libocl/src/ocl_memset.cl | 44 ++ + backend/src/libocl/src/ocl_memset.ll | 193 --------- + 8 files changed, 181 insertions(+), 924 deletions(-) + create mode 100644 backend/src/libocl/include/ocl_memcpy.h + create mode 100644 backend/src/libocl/include/ocl_memset.h + create mode 100644 backend/src/libocl/src/ocl_memcpy.cl + delete mode 100644 backend/src/libocl/src/ocl_memcpy.ll + create mode 100644 backend/src/libocl/src/ocl_memset.cl + delete mode 100644 backend/src/libocl/src/ocl_memset.ll + +diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt +index 0cd1eef..0fffd9b 100644 +--- a/backend/src/libocl/CMakeLists.txt ++++ b/backend/src/libocl/CMakeLists.txt +@@ -52,7 +52,8 @@ FOREACH(M ${OCL_COPY_HEADERS}) + COPY_THE_HEADER(${M}) + ENDFOREACH(M) + +-SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_misc ocl_vload ocl_geometric ocl_image) ++SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_memcpy ++ ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image) + FOREACH(M ${OCL_COPY_MODULES}) + COPY_THE_HEADER(${M}) + COPY_THE_SOURCE(${M}) +@@ -181,7 +182,7 @@ MACRO(ADD_LL_TO_BC_TARGET M) + ) + ENDMACRO(ADD_LL_TO_BC_TARGET) + +-SET (OCL_LL_MODULES ocl_barrier ocl_memcpy ocl_memset ocl_clz) ++SET (OCL_LL_MODULES ocl_barrier ocl_clz) + FOREACH(f ${OCL_LL_MODULES}) + COPY_THE_LL(${f}) + ADD_LL_TO_BC_TARGET(${f}) +diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h +index a4af4aa..7897567 100644 +--- a/backend/src/libocl/include/ocl.h ++++ b/backend/src/libocl/include/ocl.h +@@ -30,6 +30,7 @@ + #include "ocl_image.h" + #include "ocl_integer.h" + #include "ocl_math.h" ++#include "ocl_memcpy.h" + #include "ocl_misc.h" + #include "ocl_printf.h" + #include "ocl_relational.h" +diff --git a/backend/src/libocl/include/ocl_memcpy.h b/backend/src/libocl/include/ocl_memcpy.h +new file mode 100644 +index 0000000..2672298 +--- /dev/null ++++ b/backend/src/libocl/include/ocl_memcpy.h +@@ -0,0 +1,51 @@ ++/* ++ * Copyright © 2012 - 2014 Intel Corporation ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library. If not, see <http://www.gnu.org/licenses/>. ++ * ++ */ ++#ifndef __OCL_MEMCPY_H__ ++#define __OCL_MEMCPY_H__ ++#include "ocl_types.h" ++ ++///////////////////////////////////////////////////////////////////////////// ++// memcopy functions ++///////////////////////////////////////////////////////////////////////////// ++void __gen_memcpy_gg_align(__global uchar* dst, __global uchar* src, size_t size); ++void __gen_memcpy_gp_align(__global uchar* dst, __private uchar* src, size_t size); ++void __gen_memcpy_gl_align(__global uchar* dst, __local uchar* src, size_t size); ++void __gen_memcpy_gc_align(__global uchar* dst, __constant uchar* src, size_t size); ++void __gen_memcpy_pg_align(__private uchar* dst, __global uchar* src, size_t size); ++void __gen_memcpy_pp_align(__private uchar* dst, __private uchar* src, size_t size); ++void __gen_memcpy_pl_align(__private uchar* dst, __local uchar* src, size_t size); ++void __gen_memcpy_pc_align(__private uchar* dst, __constant uchar* src, size_t size); ++void __gen_memcpy_lg_align(__local uchar* dst, __global uchar* src, size_t size); ++void __gen_memcpy_lp_align(__local uchar* dst, __private uchar* src, size_t size); ++void __gen_memcpy_ll_align(__local uchar* dst, __local uchar* src, size_t size); ++void __gen_memcpy_lc_align(__local uchar* dst, __constant uchar* src, size_t size); ++ ++void __gen_memcpy_gg(__global uchar* dst, __global uchar* src, size_t size); ++void __gen_memcpy_gp(__global uchar* dst, __private uchar* src, size_t size); ++void __gen_memcpy_gl(__global uchar* dst, __local uchar* src, size_t size); ++void __gen_memcpy_gc(__global uchar* dst, __constant uchar* src, size_t size); ++void __gen_memcpy_pg(__private uchar* dst, __global uchar* src, size_t size); ++void __gen_memcpy_pp(__private uchar* dst, __private uchar* src, size_t size); ++void __gen_memcpy_pl(__private uchar* dst, __local uchar* src, size_t size); ++void __gen_memcpy_pc(__private uchar* dst, __constant uchar* src, size_t size); ++void __gen_memcpy_lg(__local uchar* dst, __global uchar* src, size_t size); ++void __gen_memcpy_lp(__local uchar* dst, __private uchar* src, size_t size); ++void __gen_memcpy_ll(__local uchar* dst, __local uchar* src, size_t size); ++void __gen_memcpy_lc(__local uchar* dst, __constant uchar* src, size_t size); ++ ++#endif /* __OCL_MEMCPY_H__ */ +diff --git a/backend/src/libocl/include/ocl_memset.h b/backend/src/libocl/include/ocl_memset.h +new file mode 100644 +index 0000000..2d444ad +--- /dev/null ++++ b/backend/src/libocl/include/ocl_memset.h +@@ -0,0 +1,33 @@ ++/* ++ * Copyright © 2012 - 2014 Intel Corporation ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library. If not, see <http://www.gnu.org/licenses/>. ++ * ++ */ ++#ifndef __OCL_MEMSET_H__ ++#define __OCL_MEMSET_H__ ++#include "ocl_types.h" ++ ++///////////////////////////////////////////////////////////////////////////// ++// memcopy functions ++///////////////////////////////////////////////////////////////////////////// ++void __gen_memset_g_align(__global uchar* dst, uchar val, size_t size); ++void __gen_memset_p_align(__private uchar* dst, uchar val, size_t size); ++void __gen_memset_l_align(__local uchar* dst, uchar val, size_t size); ++ ++void __gen_memset_g(__global uchar* dst, uchar val, size_t size); ++void __gen_memset_p(__private uchar* dst, uchar val, size_t size); ++void __gen_memset_l(__local uchar* dst, uchar val, size_t size); ++ ++#endif /* __OCL_MEMSET_H__ */ +diff --git a/backend/src/libocl/src/ocl_memcpy.cl b/backend/src/libocl/src/ocl_memcpy.cl +new file mode 100644 +index 0000000..85f490f +--- /dev/null ++++ b/backend/src/libocl/src/ocl_memcpy.cl +@@ -0,0 +1,49 @@ ++/* ++ * Copyright © 2012 - 2014 Intel Corporation ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library. If not, see <http://www.gnu.org/licenses/>. ++ * ++ */ ++#include "ocl_memcpy.h" ++ ++#define DECL_TWO_SPACE_MEMCOPY_FN(NAME, DST_SPACE, SRC_SPACE) \ ++void __gen_memcpy_ ##NAME## _align (DST_SPACE uchar* dst, SRC_SPACE uchar* src, size_t size) { \ ++ size_t index = 0; \ ++ while((index + 4) <= size) { \ ++ *((DST_SPACE uint *)(dst + index)) = *((SRC_SPACE uint *)(src + index)); \ ++ index += 4; \ ++ } \ ++ while(index < size) { \ ++ dst[index] = src[index]; \ ++ index++; \ ++ } \ ++} \ ++void __gen_memcpy_ ##NAME (DST_SPACE uchar* dst, SRC_SPACE uchar* src, size_t size) { \ ++ size_t index = 0; \ ++ while(index < size) { \ ++ dst[index] = src[index]; \ ++ index++; \ ++ } \ ++} ++ ++#define DECL_ONE_SPACE_MEMCOPY_FN(NAME, DST_SPACE) \ ++ DECL_TWO_SPACE_MEMCOPY_FN( NAME## g, DST_SPACE, __global) \ ++ DECL_TWO_SPACE_MEMCOPY_FN( NAME## l, DST_SPACE, __local) \ ++ DECL_TWO_SPACE_MEMCOPY_FN( NAME## p, DST_SPACE, __private) \ ++ DECL_TWO_SPACE_MEMCOPY_FN( NAME## c, DST_SPACE, __constant) ++ ++DECL_ONE_SPACE_MEMCOPY_FN(g, __global) ++DECL_ONE_SPACE_MEMCOPY_FN(l, __local) ++DECL_ONE_SPACE_MEMCOPY_FN(p, __private) ++ +diff --git a/backend/src/libocl/src/ocl_memcpy.ll b/backend/src/libocl/src/ocl_memcpy.ll +deleted file mode 100644 +index b3fadb2..0000000 +--- a/backend/src/libocl/src/ocl_memcpy.ll ++++ /dev/null +@@ -1,729 +0,0 @@ +-;The memcpy's source code. +-; INLINE_OVERLOADABLE void __gen_memcpy_align(uchar* dst, uchar* src, size_t size) { +-; size_t index = 0; +-; while((index + 4) <= size) { +-; *((uint *)(dst + index)) = *((uint *)(src + index)); +-; index += 4; +-; } +-; while(index < size) { +-; dst[index] = src[index]; +-; index++; +-; } +-; } +- +-define void @__gen_memcpy_gg_align(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* +- %1 = load i32 addrspace(1)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* +- store i32 %1, i32 addrspace(1)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1 +- %3 = load i8 addrspace(1)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_gp_align(i8 addrspace(1)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)* +- %1 = load i32 addrspace(0)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* +- store i32 %1, i32 addrspace(1)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1 +- %3 = load i8 addrspace(0)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_gl_align(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* +- %1 = load i32 addrspace(3)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* +- store i32 %1, i32 addrspace(1)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1 +- %3 = load i8 addrspace(3)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_pg_align(i8 addrspace(0)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* +- %1 = load i32 addrspace(1)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* +- store i32 %1, i32 addrspace(0)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1 +- %3 = load i8 addrspace(1)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_pp_align(i8 addrspace(0)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)* +- %1 = load i32 addrspace(0)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* +- store i32 %1, i32 addrspace(0)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1 +- %3 = load i8 addrspace(0)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_pl_align(i8 addrspace(0)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* +- %1 = load i32 addrspace(3)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* +- store i32 %1, i32 addrspace(0)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1 +- %3 = load i8 addrspace(3)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_lg_align(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* +- %1 = load i32 addrspace(1)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* +- store i32 %1, i32 addrspace(3)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1 +- %3 = load i8 addrspace(1)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_lp_align(i8 addrspace(3)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)* +- %1 = load i32 addrspace(0)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* +- store i32 %1, i32 addrspace(3)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1 +- %3 = load i8 addrspace(0)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_ll_align(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* +- %1 = load i32 addrspace(3)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* +- store i32 %1, i32 addrspace(3)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1 +- %3 = load i8 addrspace(3)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-;The memcpy's source code. +-; INLINE_OVERLOADABLE void __gen_memcpy(uchar* dst, uchar* src, size_t size) { +-; size_t index = 0; +-; while(index < size) { +-; dst[index] = src[index]; +-; index++; +-; } +-; } +- +-define void @__gen_memcpy_gg(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(1)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(1)* +- %3 = load i8 addrspace(1)* %2, align 1 +- %4 = ptrtoint i8 addrspace(1)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(1)* +- store i8 %3, i8 addrspace(1)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_gp(i8 addrspace(1)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(0)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(0)* +- %3 = load i8 addrspace(0)* %2, align 1 +- %4 = ptrtoint i8 addrspace(1)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(1)* +- store i8 %3, i8 addrspace(1)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_gl(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(3)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(3)* +- %3 = load i8 addrspace(3)* %2, align 1 +- %4 = ptrtoint i8 addrspace(1)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(1)* +- store i8 %3, i8 addrspace(1)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_pg(i8 addrspace(0)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(1)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(1)* +- %3 = load i8 addrspace(1)* %2, align 1 +- %4 = ptrtoint i8 addrspace(0)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(0)* +- store i8 %3, i8 addrspace(0)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_pp(i8 addrspace(0)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(0)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(0)* +- %3 = load i8 addrspace(0)* %2, align 1 +- %4 = ptrtoint i8 addrspace(0)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(0)* +- store i8 %3, i8 addrspace(0)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_pl(i8 addrspace(0)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(3)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(3)* +- %3 = load i8 addrspace(3)* %2, align 1 +- %4 = ptrtoint i8 addrspace(0)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(0)* +- store i8 %3, i8 addrspace(0)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_lg(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(1)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(1)* +- %3 = load i8 addrspace(1)* %2, align 1 +- %4 = ptrtoint i8 addrspace(3)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(3)* +- store i8 %3, i8 addrspace(3)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_lp(i8 addrspace(3)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(0)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(0)* +- %3 = load i8 addrspace(0)* %2, align 1 +- %4 = ptrtoint i8 addrspace(3)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(3)* +- store i8 %3, i8 addrspace(3)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_ll(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(3)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(3)* +- %3 = load i8 addrspace(3)* %2, align 1 +- %4 = ptrtoint i8 addrspace(3)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(3)* +- store i8 %3, i8 addrspace(3)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_gc_align(i8 addrspace(1)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* +- %1 = load i32 addrspace(2)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* +- store i32 %1, i32 addrspace(1)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1 +- %3 = load i8 addrspace(2)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_pc_align(i8 addrspace(0)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* +- %1 = load i32 addrspace(2)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* +- store i32 %1, i32 addrspace(0)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1 +- %3 = load i8 addrspace(2)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_lc_align(i8 addrspace(3)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* +- %1 = load i32 addrspace(2)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* +- store i32 %1, i32 addrspace(3)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1 +- %3 = load i8 addrspace(2)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_pc(i8 addrspace(0)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(2)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(2)* +- %3 = load i8 addrspace(2)* %2, align 1 +- %4 = ptrtoint i8 addrspace(0)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(0)* +- store i8 %3, i8 addrspace(0)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_gc(i8 addrspace(1)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(2)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(2)* +- %3 = load i8 addrspace(2)* %2, align 1 +- %4 = ptrtoint i8 addrspace(1)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(1)* +- store i8 %3, i8 addrspace(1)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_lc(i8 addrspace(3)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(2)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(2)* +- %3 = load i8 addrspace(2)* %2, align 1 +- %4 = ptrtoint i8 addrspace(3)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(3)* +- store i8 %3, i8 addrspace(3)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +diff --git a/backend/src/libocl/src/ocl_memset.cl b/backend/src/libocl/src/ocl_memset.cl +new file mode 100644 +index 0000000..b41851a +--- /dev/null ++++ b/backend/src/libocl/src/ocl_memset.cl +@@ -0,0 +1,44 @@ ++/* ++ * Copyright © 2012 - 2014 Intel Corporation ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library. If not, see <http://www.gnu.org/licenses/>. ++ * ++ */ ++#include "ocl_memset.h" ++ ++#define DECL_MEMSET_FN(NAME, DST_SPACE) \ ++void __gen_memset_ ##NAME## _align (DST_SPACE uchar* dst, uchar val, size_t size) { \ ++ size_t index = 0; \ ++ uint v = (val << 24) | (val << 16) | (val << 8) | val; \ ++ while((index + 4) >= size) { \ ++ *((DST_SPACE uint *)(dst + index)) = v; \ ++ index += 4; \ ++ } \ ++ while(index < size) { \ ++ dst[index] = val; \ ++ index++; \ ++ } \ ++} \ ++void __gen_memset_ ##NAME (DST_SPACE uchar* dst, uchar val, size_t size) { \ ++ size_t index = 0; \ ++ while(index < size) { \ ++ dst[index] = val; \ ++ index++; \ ++ } \ ++} ++ ++DECL_MEMSET_FN(g, __global) ++DECL_MEMSET_FN(l, __local) ++DECL_MEMSET_FN(p, __private) ++ +diff --git a/backend/src/libocl/src/ocl_memset.ll b/backend/src/libocl/src/ocl_memset.ll +deleted file mode 100644 +index 665eac4..0000000 +--- a/backend/src/libocl/src/ocl_memset.ll ++++ /dev/null +@@ -1,193 +0,0 @@ +-;The memset's source code. +-; INLINE_OVERLOADABLE void __gen_memset_align(uchar* dst, uchar val, size_t size) { +-; size_t index = 0; +-; uint v = (val << 24) | (val << 16) | (val << 8) | val; +-; while((index + 4) >= size) { +-; *((uint *)(dst + index)) = v; +-; index += 4; +-; } +-; while(index < size) { +-; dst[index] = val; +-; index++; +-; } +-; } +- +-define void @__gen_memset_p_align(i8* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { +-entry: +- %conv = zext i8 %val to i32 +- %shl = shl nuw i32 %conv, 24 +- %shl2 = shl nuw nsw i32 %conv, 16 +- %or = or i32 %shl, %shl2 +- %shl4 = shl nuw nsw i32 %conv, 8 +- %or5 = or i32 %or, %shl4 +- %or7 = or i32 %or5, %conv +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond10, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8* %dst, i32 %index.0 +- %0 = bitcast i8* %add.ptr to i32* +- store i32 %or7, i32* %0, align 4 +- br label %while.cond +- +-while.cond10: ; preds = %while.cond, %while.body13 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ] +- %cmp11 = icmp ult i32 %index.1, %size +- br i1 %cmp11, label %while.body13, label %while.end14 +- +-while.body13: ; preds = %while.cond10 +- %arrayidx = getelementptr inbounds i8* %dst, i32 %index.1 +- store i8 %val, i8* %arrayidx, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond10 +- +-while.end14: ; preds = %while.cond10 +- ret void +-} +- +-define void @__gen_memset_g_align(i8 addrspace(1)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { +-entry: +- %conv = zext i8 %val to i32 +- %shl = shl nuw i32 %conv, 24 +- %shl2 = shl nuw nsw i32 %conv, 16 +- %or = or i32 %shl, %shl2 +- %shl4 = shl nuw nsw i32 %conv, 8 +- %or5 = or i32 %or, %shl4 +- %or7 = or i32 %or5, %conv +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond10, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 +- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* +- store i32 %or7, i32 addrspace(1)* %0, align 4 +- br label %while.cond +- +-while.cond10: ; preds = %while.cond, %while.body13 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ] +- %cmp11 = icmp ult i32 %index.1, %size +- br i1 %cmp11, label %while.body13, label %while.end14 +- +-while.body13: ; preds = %while.cond10 +- %arrayidx = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 +- store i8 %val, i8 addrspace(1)* %arrayidx, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond10 +- +-while.end14: ; preds = %while.cond10 +- ret void +-} +- +-define void @__gen_memset_l_align(i8 addrspace(3)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { +-entry: +- %conv = zext i8 %val to i32 +- %shl = shl nuw i32 %conv, 24 +- %shl2 = shl nuw nsw i32 %conv, 16 +- %or = or i32 %shl, %shl2 +- %shl4 = shl nuw nsw i32 %conv, 8 +- %or5 = or i32 %or, %shl4 +- %or7 = or i32 %or5, %conv +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond10, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 +- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* +- store i32 %or7, i32 addrspace(3)* %0, align 4 +- br label %while.cond +- +-while.cond10: ; preds = %while.cond, %while.body13 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ] +- %cmp11 = icmp ult i32 %index.1, %size +- br i1 %cmp11, label %while.body13, label %while.end14 +- +-while.body13: ; preds = %while.cond10 +- %arrayidx = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 +- store i8 %val, i8 addrspace(3)* %arrayidx, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond10 +- +-while.end14: ; preds = %while.cond10 +- ret void +-} +- +-;The memset's source code. +-; INLINE_OVERLOADABLE void __gen_memset(uchar* dst, uchar val, size_t size) { +-; size_t index = 0; +-; while(index < size) { +-; dst[index] = val; +-; index++; +-; } +-; } +- +-define void @__gen_memset_p(i8 addrspace(0)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { +-entry: +- %cmp3 = icmp eq i32 %size, 0 +- br i1 %cmp3, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.04 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(0)* %dst to i32 +- %1 = add i32 %0, %index.04 +- %2 = inttoptr i32 %1 to i8 addrspace(0)* +- store i8 %val, i8 addrspace(0)* %2, align 1 +- %inc = add i32 %index.04, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memset_g(i8 addrspace(1)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { +-entry: +- %cmp3 = icmp eq i32 %size, 0 +- br i1 %cmp3, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.04 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(1)* %dst to i32 +- %1 = add i32 %0, %index.04 +- %2 = inttoptr i32 %1 to i8 addrspace(1)* +- store i8 %val, i8 addrspace(1)* %2, align 1 +- %inc = add i32 %index.04, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memset_l(i8 addrspace(3)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { +-entry: +- %cmp3 = icmp eq i32 %size, 0 +- br i1 %cmp3, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.04 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(3)* %dst to i32 +- %1 = add i32 %0, %index.04 +- %2 = inttoptr i32 %1 to i8 addrspace(3)* +- store i8 %val, i8 addrspace(3)* %2, align 1 +- %inc = add i32 %index.04, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +-- +1.8.3.2 diff --git a/llvm-3.7-patch-2.patch b/llvm-3.7-patch-2.patch new file mode 100644 index 000000000000..6fa33bd0c0bc --- /dev/null +++ b/llvm-3.7-patch-2.patch @@ -0,0 +1,1023 @@ +Move all llvm relative includes to llvm_includes.hpp. + +Signed-off-by: Yang Rong <rong.r.yang at intel.com> +--- + backend/src/backend/gen_program.cpp | 4 + + backend/src/llvm/ExpandConstantExpr.cpp | 7 +- + backend/src/llvm/ExpandLargeIntegers.cpp | 21 +--- + backend/src/llvm/ExpandUtils.cpp | 8 +- + backend/src/llvm/PromoteIntegers.cpp | 10 +- + backend/src/llvm/StripAttributes.cpp | 9 +- + backend/src/llvm/llvm_barrier_nodup.cpp | 25 +---- + backend/src/llvm/llvm_bitcode_link.cpp | 20 +--- + backend/src/llvm/llvm_gen_backend.cpp | 93 +++-------------- + backend/src/llvm/llvm_gen_backend.hpp | 4 - + backend/src/llvm/llvm_includes.hpp | 125 +++++++++++++++++++++++ + backend/src/llvm/llvm_intrinsic_lowering.cpp | 24 +---- + backend/src/llvm/llvm_loadstore_optimization.cpp | 36 +------ + backend/src/llvm/llvm_passes.cpp | 70 +------------ + backend/src/llvm/llvm_printf_parser.cpp | 34 +----- + backend/src/llvm/llvm_sampler_fix.cpp | 21 +--- + backend/src/llvm/llvm_scalarize.cpp | 35 +------ + backend/src/llvm/llvm_to_gen.cpp | 78 ++++++-------- + backend/src/llvm/llvm_unroll.cpp | 36 ++----- + 19 files changed, 207 insertions(+), 453 deletions(-) + create mode 100644 backend/src/llvm/llvm_includes.hpp + +diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp +index 3c4983e..73d78f8 100644 +--- a/backend/src/backend/gen_program.cpp ++++ b/backend/src/backend/gen_program.cpp +@@ -402,7 +402,11 @@ namespace gbe { + llvm::Module* src = (llvm::Module*)((GenProgram*)src_program)->module; + llvm::Module* dst = (llvm::Module*)((GenProgram*)dst_program)->module; + ++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7 ++ if (LLVMLinkModules(wrap(dst), wrap(src), LLVMLinkerPreserveSource_Removed, &errMsg)) { ++#else + if (LLVMLinkModules(wrap(dst), wrap(src), LLVMLinkerPreserveSource, &errMsg)) { ++#endif + if (err != NULL && errSize != NULL && stringSize > 0u) { + strncpy(err, errMsg, stringSize-1); + err[stringSize-1] = '\0'; +diff --git a/backend/src/llvm/ExpandConstantExpr.cpp b/backend/src/llvm/ExpandConstantExpr.cpp +index 5c5934a..c6f57b8 100644 +--- a/backend/src/llvm/ExpandConstantExpr.cpp ++++ b/backend/src/llvm/ExpandConstantExpr.cpp +@@ -77,12 +77,7 @@ + //===----------------------------------------------------------------------===// + + #include <map> +- +-#include "llvm/IR/IRBuilder.h" +-#include "llvm/IR/Constants.h" +-#include "llvm/IR/Function.h" +-#include "llvm/IR/Instructions.h" +-#include "llvm/Pass.h" ++#include "llvm_includes.hpp" + #include "llvm_gen_backend.hpp" + + using namespace llvm; +diff --git a/backend/src/llvm/ExpandLargeIntegers.cpp b/backend/src/llvm/ExpandLargeIntegers.cpp +index f7e59a5..20fdda9 100644 +--- a/backend/src/llvm/ExpandLargeIntegers.cpp ++++ b/backend/src/llvm/ExpandLargeIntegers.cpp +@@ -86,24 +86,9 @@ + // 2. OR x, 0 can be optimized as x. And x, 0 can be optimized as 0. + //===----------------------------------------------------------------------===// + +-#include "llvm/ADT/DenseMap.h" +-#include "llvm/ADT/PostOrderIterator.h" +-#include "llvm/ADT/STLExtras.h" +-#include "llvm/ADT/SmallVector.h" +-#if LLVM_VERSION_MINOR >= 5 +-#include "llvm/IR/CFG.h" +-#else +-#include "llvm/Support/CFG.h" +-#endif +-#include "llvm/IR/DataLayout.h" +-#include "llvm/IR/DerivedTypes.h" +-#include "llvm/IR/Function.h" +-#include "llvm/IR/IRBuilder.h" +-#include "llvm/IR/Instructions.h" +-#include "llvm/Pass.h" +-#include "llvm/Support/Debug.h" +-#include "llvm/Support/MathExtras.h" +-#include "llvm/Support/raw_ostream.h" ++ ++#include "llvm_includes.hpp" ++ + #include "llvm_gen_backend.hpp" + + using namespace llvm; +diff --git a/backend/src/llvm/ExpandUtils.cpp b/backend/src/llvm/ExpandUtils.cpp +index e6dfb52..801f969 100644 +--- a/backend/src/llvm/ExpandUtils.cpp ++++ b/backend/src/llvm/ExpandUtils.cpp +@@ -64,12 +64,8 @@ + // + //===----------------------------------------------------------------------===// + +-#include "llvm/IR/BasicBlock.h" +-#include "llvm/IR/Constants.h" +-#include "llvm/IR/Function.h" +-#include "llvm/IR/Instructions.h" +-#include "llvm/IR/Module.h" +-#include "llvm/Support/raw_ostream.h" ++#include "llvm_includes.hpp" ++ + #include "llvm_gen_backend.hpp" + + using namespace llvm; +diff --git a/backend/src/llvm/PromoteIntegers.cpp b/backend/src/llvm/PromoteIntegers.cpp +index aba42b9..b65440f 100644 +--- a/backend/src/llvm/PromoteIntegers.cpp ++++ b/backend/src/llvm/PromoteIntegers.cpp +@@ -84,14 +84,8 @@ + //===----------------------------------------------------------------------===// + + +-#include "llvm/ADT/DenseMap.h" +-#include "llvm/ADT/SmallVector.h" +-#include "llvm/IR/DerivedTypes.h" +-#include "llvm/IR/Function.h" +-#include "llvm/IR/Instructions.h" +-#include "llvm/IR/IRBuilder.h" +-#include "llvm/Pass.h" +-#include "llvm/Support/raw_ostream.h" ++#include "llvm_includes.hpp" ++ + #include "llvm_gen_backend.hpp" + + using namespace llvm; +diff --git a/backend/src/llvm/StripAttributes.cpp b/backend/src/llvm/StripAttributes.cpp +index 05cac17..e6df312 100644 +--- a/backend/src/llvm/StripAttributes.cpp ++++ b/backend/src/llvm/StripAttributes.cpp +@@ -69,14 +69,7 @@ + // * Calling conventions from functions and function calls. + // + +-#include "llvm/IR/Function.h" +-#include "llvm/Pass.h" +- +-#if LLVM_VERSION_MINOR >= 5 +-#include "llvm/IR/CallSite.h" +-#else +-#include "llvm/Support/CallSite.h" +-#endif ++#include "llvm_includes.hpp" + + #include "llvm_gen_backend.hpp" + +diff --git a/backend/src/llvm/llvm_barrier_nodup.cpp b/backend/src/llvm/llvm_barrier_nodup.cpp +index 19deafc..727e6bd 100644 +--- a/backend/src/llvm/llvm_barrier_nodup.cpp ++++ b/backend/src/llvm/llvm_barrier_nodup.cpp +@@ -28,30 +28,7 @@ + * + */ + +-#include "llvm/Config/llvm-config.h" +-#if LLVM_VERSION_MINOR <= 2 +-#include "llvm/Function.h" +-#include "llvm/InstrTypes.h" +-#include "llvm/Instructions.h" +-#include "llvm/IntrinsicInst.h" +-#include "llvm/Module.h" +-#else +-#include "llvm/IR/Function.h" +-#include "llvm/IR/InstrTypes.h" +-#include "llvm/IR/Instructions.h" +-#include "llvm/IR/IntrinsicInst.h" +-#include "llvm/IR/Module.h" +-#endif /* LLVM_VERSION_MINOR <= 2 */ +-#include "llvm/Pass.h" +-#if LLVM_VERSION_MINOR <= 1 +-#include "llvm/Support/IRBuilder.h" +-#elif LLVM_VERSION_MINOR == 2 +-#include "llvm/IRBuilder.h" +-#else +-#include "llvm/IR/IRBuilder.h" +-#endif /* LLVM_VERSION_MINOR <= 1 */ +-#include "llvm/Support/raw_ostream.h" +-#include "llvm/IR/Attributes.h" ++#include "llvm_includes.hpp" + + #include "llvm/llvm_gen_backend.hpp" + #include "sys/map.hpp" +diff --git a/backend/src/llvm/llvm_bitcode_link.cpp b/backend/src/llvm/llvm_bitcode_link.cpp +index ebf4386..56205bb 100644 +--- a/backend/src/llvm/llvm_bitcode_link.cpp ++++ b/backend/src/llvm/llvm_bitcode_link.cpp +@@ -21,24 +21,11 @@ + #include <iostream> + #include <sstream> + #include <set> +-#include "llvm/IR/Function.h" +-#include "llvm/IR/Instructions.h" +-#include "llvm/IR/Module.h" +-#include "llvm/IRReader/IRReader.h" +-#include "llvm/PassManager.h" +-#include "llvm/Pass.h" +-#include "llvm/IR/IRBuilder.h" +-#include "llvm/Support/FileSystem.h" +-#include "llvm/Support/MemoryBuffer.h" +-#include "llvm/Bitcode/ReaderWriter.h" +-#include "llvm/Transforms/IPO.h" +-#include "llvm/Transforms/Utils/Cloning.h" +-#include "llvm/Support/SourceMgr.h" + + #include "sys/cvar.hpp" + #include "src/GBEConfig.h" ++#include "llvm_includes.hpp" + #include "llvm/llvm_gen_backend.hpp" +-#include "llvm-c/Linker.h" + + using namespace llvm; + +@@ -248,8 +235,11 @@ namespace gbe + printf("Fatal Error: link the bitcode error:\n%s\n", errorMsg); + return NULL; + } +- ++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7 ++ llvm::legacy::PassManager passes; ++#else + llvm::PassManager passes; ++#endif + + passes.add(createInternalizePass(kernels)); + passes.add(createGlobalDCEPass()); +diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp +index 4905415..4f2fe89 100644 +--- a/backend/src/llvm/llvm_gen_backend.cpp ++++ b/backend/src/llvm/llvm_gen_backend.cpp +@@ -71,86 +71,7 @@ + * is intercepted, we just abort + */ + +-#include "llvm/Config/llvm-config.h" +-#if LLVM_VERSION_MINOR <= 2 +-#include "llvm/CallingConv.h" +-#include "llvm/Constants.h" +-#include "llvm/DerivedTypes.h" +-#include "llvm/Module.h" +-#include "llvm/Instructions.h" +-#else +-#include "llvm/IR/CallingConv.h" +-#include "llvm/IR/Constants.h" +-#include "llvm/IR/DerivedTypes.h" +-#include "llvm/IR/Module.h" +-#include "llvm/IR/Instructions.h" +-#endif /* LLVM_VERSION_MINOR <= 2 */ +-#include "llvm/Pass.h" +-#include "llvm/PassManager.h" +-#include "llvm/IR/IRBuilder.h" +-#if LLVM_VERSION_MINOR <= 2 +-#include "llvm/Intrinsics.h" +-#include "llvm/IntrinsicInst.h" +-#include "llvm/InlineAsm.h" +-#else +-#include "llvm/IR/Intrinsics.h" +-#include "llvm/IR/IntrinsicInst.h" +-#include "llvm/IR/InlineAsm.h" +-#endif /* LLVM_VERSION_MINOR <= 2 */ +-#include "llvm/ADT/StringExtras.h" +-#include "llvm/ADT/SmallString.h" +-#include "llvm/ADT/STLExtras.h" +-#include "llvm/Analysis/ConstantsScanner.h" +-#include "llvm/Analysis/LoopInfo.h" +-#include "llvm/Analysis/ValueTracking.h" +-#include "llvm/CodeGen/Passes.h" +-#include "llvm/CodeGen/IntrinsicLowering.h" +- +-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=5 +-#include "llvm/IR/Mangler.h" +-#else +-#include "llvm/Target/Mangler.h" +-#endif +- +-#include "llvm/ADT/PostOrderIterator.h" +-#include "llvm/Transforms/Scalar.h" +-#include "llvm/MC/MCAsmInfo.h" +-#include "llvm/MC/MCContext.h" +-#include "llvm/MC/MCInstrInfo.h" +-#include "llvm/MC/MCObjectFileInfo.h" +-#include "llvm/MC/MCRegisterInfo.h" +-#include "llvm/MC/MCSubtargetInfo.h" +-#include "llvm/MC/MCSymbol.h" +-#if !defined(LLVM_VERSION_MAJOR) || (LLVM_VERSION_MINOR == 1) +-#include "llvm/Target/TargetData.h" +-#elif LLVM_VERSION_MINOR == 2 +-#include "llvm/DataLayout.h" +-#else +-#include "llvm/IR/DataLayout.h" +-#endif +- +-#if LLVM_VERSION_MINOR >= 5 +-#include "llvm/IR/CallSite.h" +-#include "llvm/IR/CFG.h" +-#else +-#include "llvm/Support/CallSite.h" +-#include "llvm/Support/CFG.h" +-#endif +- +-#include "llvm/Support/ErrorHandling.h" +-#include "llvm/Support/FormattedStream.h" +-#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR <= 2) +-#include "llvm/Support/InstVisitor.h" +-#elif LLVM_VERSION_MINOR >= 5 +-#include "llvm/IR/InstVisitor.h" +-#else +-#include "llvm/InstVisitor.h" +-#endif +-#include "llvm/Support/MathExtras.h" +-#include "llvm/Support/TargetRegistry.h" +-#include "llvm/Support/Host.h" +-#include "llvm/Support/ToolOutputFile.h" +-#include "llvm/Support/SourceMgr.h" ++#include "llvm_includes.hpp" + + #include "llvm/llvm_gen_backend.hpp" + #include "ir/context.hpp" +@@ -527,14 +448,22 @@ namespace gbe + TheModule(0), + btiBase(BTI_RESERVED_NUM) + { ++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7 ++ initializeLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry()); ++#else + initializeLoopInfoPass(*PassRegistry::getPassRegistry()); ++#endif + pass = PASS_EMIT_REGISTERS; + } + + virtual const char *getPassName() const { return "Gen Back-End"; } + + void getAnalysisUsage(AnalysisUsage &AU) const { ++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7 ++ AU.addRequired<LoopInfoWrapperPass>(); ++#else + AU.addRequired<LoopInfo>(); ++#endif + AU.setPreservesAll(); + } + +@@ -564,7 +493,11 @@ namespace gbe + assignBti(F); + analyzePointerOrigin(F); + ++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7 ++ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); ++#else + LI = &getAnalysis<LoopInfo>(); ++#endif + emitFunction(F); + phiMap.clear(); + globalPointer.clear(); +diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp +index 1f16557..94a377b 100644 +--- a/backend/src/llvm/llvm_gen_backend.hpp ++++ b/backend/src/llvm/llvm_gen_backend.hpp +@@ -30,11 +30,7 @@ + #include "llvm/Config/llvm-config.h" + #include "llvm/Pass.h" + #include "llvm/Analysis/LoopPass.h" +-#if LLVM_VERSION_MINOR <= 2 +-#include "llvm/Instructions.h" +-#else + #include "llvm/IR/Instructions.h" +-#endif + #include "sys/platform.hpp" + #include "sys/map.hpp" + #include <algorithm> +diff --git a/backend/src/llvm/llvm_includes.hpp b/backend/src/llvm/llvm_includes.hpp +new file mode 100644 +index 0000000..fed3a18 +--- /dev/null ++++ b/backend/src/llvm/llvm_includes.hpp +@@ -0,0 +1,125 @@ ++/* ++ * Copyright © 2012 Intel Corporation ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library. If not, see <http://www.gnu.org/licenses/>. ++ * ++ * Author: Yang Rong <rong.r.yang at intel.com> ++ */ ++ ++/** ++ * \file llvm_includes.hpp ++ * \author Yang Rong <rong.r.yang at intel.com> ++ */ ++#ifndef __GBE_IR_LLVM_INCLUDES_HPP__ ++#define __GBE_IR_LLVM_INCLUDES_HPP__ ++ ++#include "llvm/Config/llvm-config.h" ++ ++#include "llvm/IR/BasicBlock.h" ++#include "llvm/IR/Constants.h" ++#include "llvm/IR/Function.h" ++#include "llvm/IR/Instructions.h" ++#include "llvm/IR/Module.h" ++#include "llvm/IR/IRBuilder.h" ++#include "llvm/IR/DataLayout.h" ++#include "llvm/IR/DerivedTypes.h" ++#include "llvm/IR/InstrTypes.h" ++#include "llvm/IR/IntrinsicInst.h" ++#include "llvm/IR/Attributes.h" ++#include "llvm/IR/CallingConv.h" ++#include "llvm/IR/Intrinsics.h" ++#include "llvm/IR/InlineAsm.h" ++#include "llvm/IR/LLVMContext.h" ++ ++#include "llvm_includes.hpp" ++ ++#include "llvm/Pass.h" ++#include "llvm/ADT/DenseMap.h" ++#include "llvm/ADT/PostOrderIterator.h" ++#include "llvm/ADT/STLExtras.h" ++#include "llvm/ADT/SmallVector.h" ++#include "llvm/ADT/StringExtras.h" ++#include "llvm/ADT/SmallString.h" ++ ++#include "llvm/Analysis/ScalarEvolution.h" ++#include "llvm/Analysis/ScalarEvolutionExpressions.h" ++#include "llvm/Analysis/CFGPrinter.h" ++#include "llvm/Analysis/LoopPass.h" ++#include "llvm/Analysis/TargetTransformInfo.h" ++#include "llvm/Analysis/LoopInfo.h" ++#include "llvm/Analysis/ValueTracking.h" ++#include "llvm/Analysis/Passes.h" ++ ++#include "llvm/Support/raw_ostream.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/MathExtras.h" ++#include "llvm/Support/FileSystem.h" ++#include "llvm/Support/MemoryBuffer.h" ++#include "llvm/Support/SourceMgr.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/FormattedStream.h" ++#include "llvm/Support/TargetRegistry.h" ++#include "llvm/Support/Host.h" ++#include "llvm/Support/ToolOutputFile.h" ++ ++#include "llvm-c/Linker.h" ++#include "llvm/IRReader/IRReader.h" ++#include "llvm/Bitcode/ReaderWriter.h" ++#include "llvm/Transforms/IPO.h" ++#include "llvm/Transforms/Utils/Cloning.h" ++ ++#include "llvm/CodeGen/Passes.h" ++#include "llvm/CodeGen/IntrinsicLowering.h" ++ ++#include "llvm/Transforms/Scalar.h" ++#include "llvm/MC/MCAsmInfo.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCInstrInfo.h" ++#include "llvm/MC/MCObjectFileInfo.h" ++#include "llvm/MC/MCRegisterInfo.h" ++#include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/MC/MCSymbol.h" ++ ++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=5 ++#include "llvm/IR/Mangler.h" ++#include "llvm/IR/CallSite.h" ++#include "llvm/IR/CFG.h" ++#include "llvm/IR/InstVisitor.h" ++#include "llvm/IR/IRPrintingPasses.h" ++#include "llvm/IR/Verifier.h" ++#include "llvm/IR/InstIterator.h" ++#include "llvm/IR/Dominators.h" ++#else ++#include "llvm/Support/CallSite.h" ++#include "llvm/Support/CFG.h" ++#include "llvm/Support/InstIterator.h" ++#include "llvm/InstVisitor.h" ++#include "llvm/Analysis/Verifier.h" ++#include "llvm/Analysis/Dominators.h" ++#include "llvm/Assembly/PrintModulePass.h" ++#include "llvm/Target/Mangler.h" ++#endif ++ ++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7 ++#include "llvm/Analysis/TargetLibraryInfo.h" ++#include "llvm/IR/LegacyPassManager.h" ++#else ++#include "llvm/Target/TargetLibraryInfo.h" ++#include "llvm/PassManager.h" ++#endif ++#include "llvm/ADT/Triple.h" ++ ++#include <clang/CodeGen/CodeGenAction.h> ++ ++#endif /* __GBE_IR_LLVM_INCLUDES_HPP__ */ +diff --git a/backend/src/llvm/llvm_intrinsic_lowering.cpp b/backend/src/llvm/llvm_intrinsic_lowering.cpp +index 7d1f8f0..b35d1e6 100644 +--- a/backend/src/llvm/llvm_intrinsic_lowering.cpp ++++ b/backend/src/llvm/llvm_intrinsic_lowering.cpp +@@ -20,29 +20,7 @@ + * \author Yang Rong <rong.r.yang at intel.com> + */ + +-#include "llvm/Config/llvm-config.h" +-#if LLVM_VERSION_MINOR <= 2 +-#include "llvm/Function.h" +-#include "llvm/InstrTypes.h" +-#include "llvm/Instructions.h" +-#include "llvm/IntrinsicInst.h" +-#include "llvm/Module.h" +-#else +-#include "llvm/IR/Function.h" +-#include "llvm/IR/InstrTypes.h" +-#include "llvm/IR/Instructions.h" +-#include "llvm/IR/IntrinsicInst.h" +-#include "llvm/IR/Module.h" +-#endif /* LLVM_VERSION_MINOR <= 2 */ +-#include "llvm/Pass.h" +-#if LLVM_VERSION_MINOR <= 1 +-#include "llvm/Support/IRBuilder.h" +-#elif LLVM_VERSION_MINOR == 2 +-#include "llvm/IRBuilder.h" +-#else +-#include "llvm/IR/IRBuilder.h" +-#endif /* LLVM_VERSION_MINOR <= 1 */ +-#include "llvm/Support/raw_ostream.h" ++#include "llvm_includes.hpp" + + #include "llvm/llvm_gen_backend.hpp" + #include "sys/map.hpp" +diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp b/backend/src/llvm/llvm_loadstore_optimization.cpp +index c6349fa..698fdc2 100644 +--- a/backend/src/llvm/llvm_loadstore_optimization.cpp ++++ b/backend/src/llvm/llvm_loadstore_optimization.cpp +@@ -22,37 +22,7 @@ + * from Vectorize passes in llvm. + */ + +-#include "llvm/IR/Instructions.h" +-#include "llvm/Pass.h" +-#include "llvm/PassManager.h" +- +-#include "llvm/Config/llvm-config.h" +-#include "llvm/ADT/DenseMap.h" +-#include "llvm/ADT/PostOrderIterator.h" +-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2 +-#include "llvm/Function.h" +-#include "llvm/InstrTypes.h" +-#include "llvm/Instructions.h" +-#include "llvm/IntrinsicInst.h" +-#include "llvm/Module.h" +-#else +-#include "llvm/IR/Function.h" +-#include "llvm/IR/InstrTypes.h" +-#include "llvm/IR/Instructions.h" +-#include "llvm/IR/IntrinsicInst.h" +-#include "llvm/IR/Module.h" +-#endif /* LLVM_VERSION_MINOR <= 2 */ +-#include "llvm/Pass.h" +-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 1 +-#include "llvm/Support/IRBuilder.h" +-#elif LLVM_VERSION_MINOR == 2 +-#include "llvm/IRBuilder.h" +-#else +-#include "llvm/IR/IRBuilder.h" +-#endif /* LLVM_VERSION_MINOR <= 1 */ +-#include "llvm/Support/raw_ostream.h" +-#include "llvm/Analysis/ScalarEvolution.h" +-#include "llvm/Analysis/ScalarEvolutionExpressions.h" ++#include "llvm_includes.hpp" + + using namespace llvm; + namespace gbe { +@@ -72,7 +42,9 @@ namespace gbe { + + virtual bool runOnBasicBlock(BasicBlock &BB) { + SE = &getAnalysis<ScalarEvolution>(); +- #if LLVM_VERSION_MINOR >= 5 ++ #if LLVM_VERSION_MINOR >= 7 ++ TD = &BB.getModule()->getDataLayout(); ++ #elif LLVM_VERSION_MINOR >= 5 + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + TD = DLP ? &DLP->getDataLayout() : nullptr; + #else +diff --git a/backend/src/llvm/llvm_passes.cpp b/backend/src/llvm/llvm_passes.cpp +index 223f61b..d5d965b 100644 +--- a/backend/src/llvm/llvm_passes.cpp ++++ b/backend/src/llvm/llvm_passes.cpp +@@ -30,75 +30,7 @@ + * Segovia) the right to use another license for it (MIT here) + */ + +-#include "llvm/Config/llvm-config.h" +-#if LLVM_VERSION_MINOR <= 2 +-#include "llvm/CallingConv.h" +-#include "llvm/Constants.h" +-#include "llvm/DerivedTypes.h" +-#include "llvm/Module.h" +-#include "llvm/Instructions.h" +-#else +-#include "llvm/IR/CallingConv.h" +-#include "llvm/IR/Constants.h" +-#include "llvm/IR/DerivedTypes.h" +-#include "llvm/IR/Module.h" +-#include "llvm/IR/Instructions.h" +-#endif /* LLVM_VERSION_MINOR <= 2 */ +-#include "llvm/Pass.h" +-#include "llvm/PassManager.h" +-#if LLVM_VERSION_MINOR <= 2 +-#include "llvm/Intrinsics.h" +-#include "llvm/IntrinsicInst.h" +-#include "llvm/InlineAsm.h" +-#else +-#include "llvm/IR/Intrinsics.h" +-#include "llvm/IR/IntrinsicInst.h" +-#include "llvm/IR/InlineAsm.h" +-#endif /* LLVM_VERSION_MINOR <= 2 */ +-#include "llvm/ADT/StringExtras.h" +-#include "llvm/ADT/SmallString.h" +-#include "llvm/ADT/STLExtras.h" +-#include "llvm/Analysis/ConstantsScanner.h" +-#include "llvm/Analysis/LoopInfo.h" +-#include "llvm/Analysis/ValueTracking.h" +-#include "llvm/CodeGen/Passes.h" +-#include "llvm/CodeGen/IntrinsicLowering.h" +- +-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=5 +-#include "llvm/IR/Mangler.h" +-#else +-#include "llvm/Target/Mangler.h" +-#endif +- +-#include "llvm/Transforms/Scalar.h" +-#include "llvm/MC/MCAsmInfo.h" +-#include "llvm/MC/MCContext.h" +-#include "llvm/MC/MCInstrInfo.h" +-#include "llvm/MC/MCObjectFileInfo.h" +-#include "llvm/MC/MCRegisterInfo.h" +-#include "llvm/MC/MCSubtargetInfo.h" +-#include "llvm/MC/MCSymbol.h" +-#if !defined(LLVM_VERSION_MAJOR) || (LLVM_VERSION_MINOR == 1) +-#include "llvm/Target/TargetData.h" +-#elif LLVM_VERSION_MINOR == 2 +-#include "llvm/DataLayout.h" +-#else +-#include "llvm/IR/DataLayout.h" +-#endif +-#include "llvm/Support/ErrorHandling.h" +-#include "llvm/Support/FormattedStream.h" +-#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR <= 2) +-#include "llvm/Support/InstVisitor.h" +-#elif LLVM_VERSION_MINOR >= 5 +-#include "llvm/IR/InstVisitor.h" +-#else +-#include "llvm/InstVisitor.h" +-#endif +-#include "llvm/Support/MathExtras.h" +-#include "llvm/Support/TargetRegistry.h" +-#include "llvm/Support/Host.h" +-#include "llvm/Support/ToolOutputFile.h" +-#include "llvm/Support/SourceMgr.h" ++#include "llvm_includes.hpp" + + #include "llvm/llvm_gen_backend.hpp" + #include "ir/unit.hpp" +diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp +index 3d84457..1e8427c 100644 +--- a/backend/src/llvm/llvm_printf_parser.cpp ++++ b/backend/src/llvm/llvm_printf_parser.cpp +@@ -33,39 +33,7 @@ + #include <stdio.h> + #include <stdlib.h> + +-#include "llvm/Config/llvm-config.h" +-#if LLVM_VERSION_MINOR <= 2 +-#include "llvm/Function.h" +-#include "llvm/InstrTypes.h" +-#include "llvm/Instructions.h" +-#include "llvm/IntrinsicInst.h" +-#include "llvm/Module.h" +-#else +-#include "llvm/IR/Function.h" +-#include "llvm/IR/InstrTypes.h" +-#include "llvm/IR/Instructions.h" +-#include "llvm/IR/IntrinsicInst.h" +-#include "llvm/IR/Module.h" +-#endif /* LLVM_VERSION_MINOR <= 2 */ +-#include "llvm/Pass.h" +-#if LLVM_VERSION_MINOR <= 1 +-#include "llvm/Support/IRBuilder.h" +-#elif LLVM_VERSION_MINOR == 2 +-#include "llvm/IRBuilder.h" +-#else +-#include "llvm/IR/IRBuilder.h" +-#endif /* LLVM_VERSION_MINOR <= 1 */ +- +-#if LLVM_VERSION_MINOR >= 5 +-#include "llvm/IR/CallSite.h" +-#include "llvm/IR/CFG.h" +-#else +-#include "llvm/Support/CallSite.h" +-#include "llvm/Support/CFG.h" +-#endif +- +-#include "llvm/Support/raw_ostream.h" +-#include "llvm/IR/Attributes.h" ++#include "llvm_includes.hpp" + + #include "llvm/llvm_gen_backend.hpp" + #include "sys/map.hpp" +diff --git a/backend/src/llvm/llvm_sampler_fix.cpp b/backend/src/llvm/llvm_sampler_fix.cpp +index 8c76324..01db8fe 100644 +--- a/backend/src/llvm/llvm_sampler_fix.cpp ++++ b/backend/src/llvm/llvm_sampler_fix.cpp +@@ -20,27 +20,8 @@ + * make sure to get correct pixel value. But for some other + * sampler, we don't need those work around code. + */ +-#include "llvm/IR/Instructions.h" +-#include "llvm/Pass.h" +-#include "llvm/PassManager.h" + +-#include "llvm/Config/llvm-config.h" +-#include "llvm/ADT/DenseMap.h" +-#include "llvm/ADT/PostOrderIterator.h" +-#include "llvm/IR/Function.h" +-#include "llvm/IR/InstrTypes.h" +-#include "llvm/IR/Instructions.h" +-#include "llvm/IR/IntrinsicInst.h" +-#include "llvm/IR/Module.h" +-#include "llvm/Pass.h" +-#include "llvm/IR/IRBuilder.h" +-#if LLVM_VERSION_MINOR >= 5 +-#include "llvm/IR/CFG.h" +-#else +-#include "llvm/Support/CFG.h" +-#endif +- +-#include "llvm/Analysis/ConstantsScanner.h" ++#include "llvm_includes.hpp" + + #include "llvm_gen_backend.hpp" + #include "ocl_common_defines.h" +diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp +index bc985c6..7ee5259 100644 +--- a/backend/src/llvm/llvm_scalarize.cpp ++++ b/backend/src/llvm/llvm_scalarize.cpp +@@ -59,39 +59,7 @@ + // + //===----------------------------------------------------------------------===// + +-#include "llvm/Config/llvm-config.h" +-#include "llvm/ADT/DenseMap.h" +-#include "llvm/ADT/PostOrderIterator.h" +-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2 +-#include "llvm/Function.h" +-#include "llvm/InstrTypes.h" +-#include "llvm/Instructions.h" +-#include "llvm/IntrinsicInst.h" +-#include "llvm/Module.h" +-#else +-#include "llvm/IR/Function.h" +-#include "llvm/IR/InstrTypes.h" +-#include "llvm/IR/Instructions.h" +-#include "llvm/IR/IntrinsicInst.h" +-#include "llvm/IR/Module.h" +-#endif /* LLVM_VERSION_MINOR <= 2 */ +-#include "llvm/Pass.h" +-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 1 +-#include "llvm/Support/IRBuilder.h" +-#elif LLVM_VERSION_MINOR == 2 +-#include "llvm/IRBuilder.h" +-#else +-#include "llvm/IR/IRBuilder.h" +-#endif /* LLVM_VERSION_MINOR <= 1 */ +- +-#if LLVM_VERSION_MINOR >= 5 +-#include "llvm/IR/CallSite.h" +-#include "llvm/IR/CFG.h" +-#else +-#include "llvm/Support/CallSite.h" +-#include "llvm/Support/CFG.h" +-#endif +-#include "llvm/Support/raw_ostream.h" ++#include "llvm_includes.hpp" + + #include "llvm/llvm_gen_backend.hpp" + #include "sys/map.hpp" +@@ -128,7 +96,6 @@ namespace gbe { + + Scalarize() : FunctionPass(ID) + { +- initializeLoopInfoPass(*PassRegistry::getPassRegistry()); + #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5 + initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()); + #else +diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp +index 891f2a1..538d1c5 100644 +--- a/backend/src/llvm/llvm_to_gen.cpp ++++ b/backend/src/llvm/llvm_to_gen.cpp +@@ -22,40 +22,8 @@ + * \author Benjamin Segovia <benjamin.segovia at intel.com> + */ + +-#include "llvm/Config/llvm-config.h" +-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2 +-#include "llvm/LLVMContext.h" +-#include "llvm/Module.h" +-#include "llvm/DataLayout.h" +-#else +-#include "llvm/IR/LLVMContext.h" +-#include "llvm/IR/Module.h" +-#include "llvm/IR/DataLayout.h" +-#endif /* LLVM_VERSION_MINOR <= 2 */ +-#include "llvm/PassManager.h" +-#include "llvm/Pass.h" +-#include "llvm/Analysis/Passes.h" +-#include "llvm/Transforms/IPO.h" +-#include "llvm/Target/TargetLibraryInfo.h" +-#include "llvm/ADT/Triple.h" +-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2 +-#include "llvm/Support/IRReader.h" +-#else +-#include "llvm/IRReader/IRReader.h" +-#include "llvm/Support/SourceMgr.h" +-#endif /* LLVM_VERSION_MINOR <= 2 */ +-#include "llvm/Support/raw_ostream.h" +-#include "llvm/Transforms/Scalar.h" +- +-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=5 +-#include "llvm/IR/IRPrintingPasses.h" +-#include "llvm/IR/Verifier.h" +-#else +-#include "llvm/Analysis/Verifier.h" +-#include "llvm/Assembly/PrintModulePass.h" +-#endif ++#include "llvm_includes.hpp" + +-#include "llvm/Analysis/CFGPrinter.h" + #include "llvm/llvm_gen_backend.hpp" + #include "llvm/llvm_to_gen.hpp" + #include "sys/cvar.hpp" +@@ -64,8 +32,6 @@ + #include "ir/function.hpp" + #include "ir/structurizer.hpp" + +-#include <clang/CodeGen/CodeGenAction.h> +- + #include <sys/types.h> + #include <sys/stat.h> + #include <fcntl.h> +@@ -78,11 +44,19 @@ namespace gbe + BVAR(OCL_OUTPUT_CFG_GEN_IR, false); + using namespace llvm; + +- void runFuntionPass(Module &mod, TargetLibraryInfo *libraryInfo, const DataLayout &DL) ++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7 ++ using namespace llvm::legacy; ++ #define TARGETLIBRARY TargetLibraryInfoImpl ++#else ++ #define TARGETLIBRARY TargetLibraryInfo ++#endif ++ ++ void runFuntionPass(Module &mod, TARGETLIBRARY *libraryInfo, const DataLayout &DL) + { + FunctionPassManager FPM(&mod); + +-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6 ++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7 ++#elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6 + FPM.add(new DataLayoutPass()); + #elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 5 + FPM.add(new DataLayoutPass(DL)); +@@ -95,7 +69,11 @@ namespace gbe + #else + FPM.add(createVerifierPass()); + #endif ++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7 ++ FPM.add(new TargetLibraryInfoWrapperPass(*libraryInfo)); ++#else + FPM.add(new TargetLibraryInfo(*libraryInfo)); ++#endif + FPM.add(createTypeBasedAliasAnalysisPass()); + FPM.add(createBasicAliasAnalysisPass()); + FPM.add(createCFGSimplificationPass()); +@@ -111,18 +89,24 @@ namespace gbe + FPM.doFinalization(); + } + +- void runModulePass(Module &mod, TargetLibraryInfo *libraryInfo, const DataLayout &DL, int optLevel, bool strictMath) ++ void runModulePass(Module &mod, TARGETLIBRARY *libraryInfo, const DataLayout &DL, int optLevel, bool strictMath) + { +- llvm::PassManager MPM; ++ PassManager MPM; + +-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6 ++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7 ++#elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6 + MPM.add(new DataLayoutPass()); + #elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 5 + MPM.add(new DataLayoutPass(DL)); + #else + MPM.add(new DataLayout(DL)); + #endif ++ ++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7 ++ MPM.add(new TargetLibraryInfoWrapperPass(*libraryInfo)); ++#else + MPM.add(new TargetLibraryInfo(*libraryInfo)); ++#endif + MPM.add(createTypeBasedAliasAnalysisPass()); + MPM.add(createBasicAliasAnalysisPass()); + MPM.add(createIntrinsicLoweringPass()); +@@ -202,7 +186,7 @@ namespace gbe + + #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5 + #define OUTPUT_BITCODE(STAGE, MOD) do { \ +- llvm::PassManager passes__; \ ++ PassManager passes__; \ + if (OCL_OUTPUT_LLVM_##STAGE) { \ + passes__.add(createPrintModulePass(*o)); \ + passes__.run(MOD); \ +@@ -210,7 +194,7 @@ namespace gbe + }while(0) + #else + #define OUTPUT_BITCODE(STAGE, MOD) do { \ +- llvm::PassManager passes__; \ ++ PassManager passes__; \ + if (OCL_OUTPUT_LLVM_##STAGE) { \ + passes__.add(createPrintModulePass(&*o)); \ + passes__.run(MOD); \ +@@ -260,16 +244,20 @@ namespace gbe + Module &mod = *M.get(); + DataLayout DL(&mod); + ++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7 ++ mod.setDataLayout(DL); ++#endif + Triple TargetTriple(mod.getTargetTriple()); +- TargetLibraryInfo *libraryInfo = new TargetLibraryInfo(TargetTriple); ++ TARGETLIBRARY *libraryInfo = new TARGETLIBRARY(TargetTriple); + libraryInfo->disableAllFunctions(); + + OUTPUT_BITCODE(AFTER_LINK, mod); + + runFuntionPass(mod, libraryInfo, DL); + runModulePass(mod, libraryInfo, DL, optLevel, strictMath); +- llvm::PassManager passes; +-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6 ++ PassManager passes; ++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7 ++#elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6 + passes.add(new DataLayoutPass()); + #elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 5 + passes.add(new DataLayoutPass(DL)); +diff --git a/backend/src/llvm/llvm_unroll.cpp b/backend/src/llvm/llvm_unroll.cpp +index 5d3fad8..6990e39 100644 +--- a/backend/src/llvm/llvm_unroll.cpp ++++ b/backend/src/llvm/llvm_unroll.cpp +@@ -18,34 +18,9 @@ + #include "llvm/Config/llvm-config.h" + #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5 + #include <set> +-#if LLVM_VERSION_MINOR <= 2 +-#include "llvm/Function.h" +-#include "llvm/InstrTypes.h" +-#include "llvm/Instructions.h" +-#include "llvm/IntrinsicInst.h" +-#include "llvm/Module.h" +-#else +-#include "llvm/IR/Function.h" +-#include "llvm/IR/InstrTypes.h" +-#include "llvm/IR/Instructions.h" +-#include "llvm/IR/IntrinsicInst.h" +-#include "llvm/IR/Module.h" +-#endif /* LLVM_VERSION_MINOR <= 2 */ +-#include "llvm/Pass.h" +-#if LLVM_VERSION_MINOR <= 1 +-#include "llvm/Support/IRBuilder.h" +-#elif LLVM_VERSION_MINOR == 2 +-#include "llvm/IRBuilder.h" +-#else +-#include "llvm/IR/IRBuilder.h" +-#endif /* LLVM_VERSION_MINOR <= 1 */ +-#include "llvm/Support/raw_ostream.h" +-#include "llvm/PassManager.h" +-#include "llvm/Transforms/Scalar.h" +-#include "llvm/Analysis/ScalarEvolution.h" +-#include "llvm/Analysis/LoopPass.h" +-#include "llvm/Analysis/TargetTransformInfo.h" +-#include "llvm/IR/Dominators.h" ++ ++#include "llvm_includes.hpp" ++ + #include "llvm/llvm_gen_backend.hpp" + #include "sys/map.hpp" + +@@ -61,8 +36,13 @@ namespace gbe { + LoopPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { ++#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR >= 7) ++ AU.addRequired<LoopInfoWrapperPass>(); ++ AU.addPreserved<LoopInfoWrapperPass>(); ++#else + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); ++#endif + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); +-- +1.8.3.2 diff --git a/llvm-3.7-patch-3.patch b/llvm-3.7-patch-3.patch new file mode 100644 index 000000000000..26df7ddfb275 --- /dev/null +++ b/llvm-3.7-patch-3.patch @@ -0,0 +1,30 @@ +Otherwise, createInstructionCombiningPass will convert some call to illegal +instruction in llvm3.7, for example utest compiler_time_stamp and test_load_program_from_spir. + +Signed-off-by: Yang Rong <rong.r.yang at intel.com> +--- + backend/src/llvm/llvm_to_gen.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp +index 538d1c5..24d4be7 100644 +--- a/backend/src/llvm/llvm_to_gen.cpp ++++ b/backend/src/llvm/llvm_to_gen.cpp +@@ -110,6 +110,7 @@ namespace gbe + MPM.add(createTypeBasedAliasAnalysisPass()); + MPM.add(createBasicAliasAnalysisPass()); + MPM.add(createIntrinsicLoweringPass()); ++ MPM.add(createStripAttributesPass()); // Strip unsupported attributes and calling conventions. + MPM.add(createSamplerFixPass()); + MPM.add(createGlobalOptimizerPass()); // Optimize out global vars + +@@ -119,7 +120,6 @@ namespace gbe + MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + MPM.add(createPruneEHPass()); // Remove dead EH info +- MPM.add(createStripAttributesPass()); // Strip unsupported attributes and calling conventions. + MPM.add(createBarrierNodupPass(false)); // remove noduplicate fnAttr before inlining. + MPM.add(createFunctionInliningPass(20000)); + MPM.add(createBarrierNodupPass(true)); // restore noduplicate fnAttr after inlining. +-- +1.8.3.2 diff --git a/llvm-3.7-patch-4.patch b/llvm-3.7-patch-4.patch new file mode 100644 index 000000000000..bf084ea19a8c --- /dev/null +++ b/llvm-3.7-patch-4.patch @@ -0,0 +1,35 @@ +It can fix datalayout mismatch warning in llvm3.7. + +Signed-off-by: Yang Rong <rong.r.yang at intel.com> +--- + backend/src/libocl/src/ocl_barrier.ll | 3 +++ + backend/src/libocl/src/ocl_clz.ll | 3 +++ + 2 files changed, 6 insertions(+) + +diff --git a/backend/src/libocl/src/ocl_barrier.ll b/backend/src/libocl/src/ocl_barrier.ll +index dc3579c..2765a71 100644 +--- a/backend/src/libocl/src/ocl_barrier.ll ++++ b/backend/src/libocl/src/ocl_barrier.ll +@@ -4,6 +4,9 @@ + ;#define CLK_LOCAL_MEM_FENCE (1 << 0) + ;#define CLK_GLOBAL_MEM_FENCE (1 << 1) + ++target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" ++target triple = "spir" ++ + declare i32 @_get_local_mem_fence() nounwind alwaysinline + declare i32 @_get_global_mem_fence() nounwind alwaysinline + declare void @__gen_ocl_barrier_local() nounwind alwaysinline noduplicate +diff --git a/backend/src/libocl/src/ocl_clz.ll b/backend/src/libocl/src/ocl_clz.ll +index a274cde..9522881 100644 +--- a/backend/src/libocl/src/ocl_clz.ll ++++ b/backend/src/libocl/src/ocl_clz.ll +@@ -1,3 +1,6 @@ ++target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" ++target triple = "spir" ++ + declare i8 @llvm.ctlz.i8(i8, i1) + declare i16 @llvm.ctlz.i16(i16, i1) + declare i32 @llvm.ctlz.i32(i32, i1) +-- +1.8.3.2 diff --git a/llvm-3.7-patch-5.patch b/llvm-3.7-patch-5.patch new file mode 100644 index 000000000000..9ccf8c82892d --- /dev/null +++ b/llvm-3.7-patch-5.patch @@ -0,0 +1,25 @@ +Must explicit use void if function don't have parameter. + +Signed-off-by: Yang Rong <rong.r.yang at intel.com> +--- + kernels/compiler_function_qualifiers.cl | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernels/compiler_function_qualifiers.cl b/kernels/compiler_function_qualifiers.cl +index c904c84..c9f7e5d 100644 +--- a/kernels/compiler_function_qualifiers.cl ++++ b/kernels/compiler_function_qualifiers.cl +@@ -1,9 +1,9 @@ + /* test OpenCL 1.1 Function Qualifiers (section 6.7) */ +-kernel void compiler_function_qualifiers() ++kernel void compiler_function_qualifiers(void) + __attribute__((vec_type_hint(float))) + __attribute__((work_group_size_hint(4,1,1))) + __attribute__((reqd_work_group_size(4,1,1))); + +-kernel void compiler_function_qualifiers() ++kernel void compiler_function_qualifiers(void) + { + } +-- +1.8.3.2 |