summarylogtreecommitdiffstats
diff options
context:
space:
mode:
authorLeopold Bloom2015-10-10 14:48:16 -0400
committerLeopold Bloom2015-10-10 14:48:16 -0400
commita657bc1c0cd20f02eafe5785f483df386844ab96 (patch)
tree8220506a84c638831c4f9eab8d04e436d6d3c5ee
parent9e22f1dcd290cf46281dc52fcdaed374c38310de (diff)
downloadaur-a657bc1c0cd20f02eafe5785f483df386844ab96.tar.gz
update to 1.1.1-2; fix llvm 3.7 support
-rw-r--r--PKGBUILD6
-rw-r--r--llvm-3.7-patch-1.patch1193
-rw-r--r--llvm-3.7-patch-2.patch1023
-rw-r--r--llvm-3.7-patch-3.patch30
-rw-r--r--llvm-3.7-patch-4.patch35
-rw-r--r--llvm-3.7-patch-5.patch25
6 files changed, 2312 insertions, 0 deletions
diff --git a/PKGBUILD b/PKGBUILD
index 09171026aab0..36a4d2acee94 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -16,6 +16,12 @@ source=("https://01.org/sites/default/files/beignet-$pkgver-source.tar.gz")
sha256sums=('9bf4c69eb4fbd3c7cc9ef75c1952bca6f05259ffbe753a27e08ed98bb32e1119')
build() {
+ cd "$srcdir/Beignet-$pkgver-Source"
+ patch -Np1 -i ../llvm-3.7-patch-1.patch
+ patch -Np1 -i ../llvm-3.7-patch-2.patch
+ patch -Np1 -i ../llvm-3.7-patch-3.patch
+ patch -Np1 -i ../llvm-3.7-patch-4.patch
+ patch -Np1 -i ../llvm-3.7-patch-5.patch
mkdir -p "$srcdir/Beignet-$pkgver-Source/build"
cd "$srcdir/Beignet-$pkgver-Source/build"
cmake .. \
diff --git a/llvm-3.7-patch-1.patch b/llvm-3.7-patch-1.patch
new file mode 100644
index 000000000000..a375eb6393e0
--- /dev/null
+++ b/llvm-3.7-patch-1.patch
@@ -0,0 +1,1193 @@
+llvm 3.7 change to llvm IR, need two copies if still use the llvm IR
+to implement llvm.memset and llvm.memcpy. And opencl c is more clearly.
+
+Signed-off-by: Yang Rong <rong.r.yang at intel.com>
+---
+ backend/src/libocl/CMakeLists.txt | 5 +-
+ backend/src/libocl/include/ocl.h | 1 +
+ backend/src/libocl/include/ocl_memcpy.h | 51 +++
+ backend/src/libocl/include/ocl_memset.h | 33 ++
+ backend/src/libocl/src/ocl_memcpy.cl | 49 +++
+ backend/src/libocl/src/ocl_memcpy.ll | 729 --------------------------------
+ backend/src/libocl/src/ocl_memset.cl | 44 ++
+ backend/src/libocl/src/ocl_memset.ll | 193 ---------
+ 8 files changed, 181 insertions(+), 924 deletions(-)
+ create mode 100644 backend/src/libocl/include/ocl_memcpy.h
+ create mode 100644 backend/src/libocl/include/ocl_memset.h
+ create mode 100644 backend/src/libocl/src/ocl_memcpy.cl
+ delete mode 100644 backend/src/libocl/src/ocl_memcpy.ll
+ create mode 100644 backend/src/libocl/src/ocl_memset.cl
+ delete mode 100644 backend/src/libocl/src/ocl_memset.ll
+
+diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt
+index 0cd1eef..0fffd9b 100644
+--- a/backend/src/libocl/CMakeLists.txt
++++ b/backend/src/libocl/CMakeLists.txt
+@@ -52,7 +52,8 @@ FOREACH(M ${OCL_COPY_HEADERS})
+ COPY_THE_HEADER(${M})
+ ENDFOREACH(M)
+
+-SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_misc ocl_vload ocl_geometric ocl_image)
++SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_memcpy
++ ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image)
+ FOREACH(M ${OCL_COPY_MODULES})
+ COPY_THE_HEADER(${M})
+ COPY_THE_SOURCE(${M})
+@@ -181,7 +182,7 @@ MACRO(ADD_LL_TO_BC_TARGET M)
+ )
+ ENDMACRO(ADD_LL_TO_BC_TARGET)
+
+-SET (OCL_LL_MODULES ocl_barrier ocl_memcpy ocl_memset ocl_clz)
++SET (OCL_LL_MODULES ocl_barrier ocl_clz)
+ FOREACH(f ${OCL_LL_MODULES})
+ COPY_THE_LL(${f})
+ ADD_LL_TO_BC_TARGET(${f})
+diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h
+index a4af4aa..7897567 100644
+--- a/backend/src/libocl/include/ocl.h
++++ b/backend/src/libocl/include/ocl.h
+@@ -30,6 +30,7 @@
+ #include "ocl_image.h"
+ #include "ocl_integer.h"
+ #include "ocl_math.h"
++#include "ocl_memcpy.h"
+ #include "ocl_misc.h"
+ #include "ocl_printf.h"
+ #include "ocl_relational.h"
+diff --git a/backend/src/libocl/include/ocl_memcpy.h b/backend/src/libocl/include/ocl_memcpy.h
+new file mode 100644
+index 0000000..2672298
+--- /dev/null
++++ b/backend/src/libocl/include/ocl_memcpy.h
+@@ -0,0 +1,51 @@
++/*
++ * Copyright © 2012 - 2014 Intel Corporation
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
++ *
++ */
++#ifndef __OCL_MEMCPY_H__
++#define __OCL_MEMCPY_H__
++#include "ocl_types.h"
++
++/////////////////////////////////////////////////////////////////////////////
++// memcopy functions
++/////////////////////////////////////////////////////////////////////////////
++void __gen_memcpy_gg_align(__global uchar* dst, __global uchar* src, size_t size);
++void __gen_memcpy_gp_align(__global uchar* dst, __private uchar* src, size_t size);
++void __gen_memcpy_gl_align(__global uchar* dst, __local uchar* src, size_t size);
++void __gen_memcpy_gc_align(__global uchar* dst, __constant uchar* src, size_t size);
++void __gen_memcpy_pg_align(__private uchar* dst, __global uchar* src, size_t size);
++void __gen_memcpy_pp_align(__private uchar* dst, __private uchar* src, size_t size);
++void __gen_memcpy_pl_align(__private uchar* dst, __local uchar* src, size_t size);
++void __gen_memcpy_pc_align(__private uchar* dst, __constant uchar* src, size_t size);
++void __gen_memcpy_lg_align(__local uchar* dst, __global uchar* src, size_t size);
++void __gen_memcpy_lp_align(__local uchar* dst, __private uchar* src, size_t size);
++void __gen_memcpy_ll_align(__local uchar* dst, __local uchar* src, size_t size);
++void __gen_memcpy_lc_align(__local uchar* dst, __constant uchar* src, size_t size);
++
++void __gen_memcpy_gg(__global uchar* dst, __global uchar* src, size_t size);
++void __gen_memcpy_gp(__global uchar* dst, __private uchar* src, size_t size);
++void __gen_memcpy_gl(__global uchar* dst, __local uchar* src, size_t size);
++void __gen_memcpy_gc(__global uchar* dst, __constant uchar* src, size_t size);
++void __gen_memcpy_pg(__private uchar* dst, __global uchar* src, size_t size);
++void __gen_memcpy_pp(__private uchar* dst, __private uchar* src, size_t size);
++void __gen_memcpy_pl(__private uchar* dst, __local uchar* src, size_t size);
++void __gen_memcpy_pc(__private uchar* dst, __constant uchar* src, size_t size);
++void __gen_memcpy_lg(__local uchar* dst, __global uchar* src, size_t size);
++void __gen_memcpy_lp(__local uchar* dst, __private uchar* src, size_t size);
++void __gen_memcpy_ll(__local uchar* dst, __local uchar* src, size_t size);
++void __gen_memcpy_lc(__local uchar* dst, __constant uchar* src, size_t size);
++
++#endif /* __OCL_MEMCPY_H__ */
+diff --git a/backend/src/libocl/include/ocl_memset.h b/backend/src/libocl/include/ocl_memset.h
+new file mode 100644
+index 0000000..2d444ad
+--- /dev/null
++++ b/backend/src/libocl/include/ocl_memset.h
+@@ -0,0 +1,33 @@
++/*
++ * Copyright © 2012 - 2014 Intel Corporation
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
++ *
++ */
++#ifndef __OCL_MEMSET_H__
++#define __OCL_MEMSET_H__
++#include "ocl_types.h"
++
++/////////////////////////////////////////////////////////////////////////////
++// memcopy functions
++/////////////////////////////////////////////////////////////////////////////
++void __gen_memset_g_align(__global uchar* dst, uchar val, size_t size);
++void __gen_memset_p_align(__private uchar* dst, uchar val, size_t size);
++void __gen_memset_l_align(__local uchar* dst, uchar val, size_t size);
++
++void __gen_memset_g(__global uchar* dst, uchar val, size_t size);
++void __gen_memset_p(__private uchar* dst, uchar val, size_t size);
++void __gen_memset_l(__local uchar* dst, uchar val, size_t size);
++
++#endif /* __OCL_MEMSET_H__ */
+diff --git a/backend/src/libocl/src/ocl_memcpy.cl b/backend/src/libocl/src/ocl_memcpy.cl
+new file mode 100644
+index 0000000..85f490f
+--- /dev/null
++++ b/backend/src/libocl/src/ocl_memcpy.cl
+@@ -0,0 +1,49 @@
++/*
++ * Copyright © 2012 - 2014 Intel Corporation
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
++ *
++ */
++#include "ocl_memcpy.h"
++
++#define DECL_TWO_SPACE_MEMCOPY_FN(NAME, DST_SPACE, SRC_SPACE) \
++void __gen_memcpy_ ##NAME## _align (DST_SPACE uchar* dst, SRC_SPACE uchar* src, size_t size) { \
++ size_t index = 0; \
++ while((index + 4) <= size) { \
++ *((DST_SPACE uint *)(dst + index)) = *((SRC_SPACE uint *)(src + index)); \
++ index += 4; \
++ } \
++ while(index < size) { \
++ dst[index] = src[index]; \
++ index++; \
++ } \
++} \
++void __gen_memcpy_ ##NAME (DST_SPACE uchar* dst, SRC_SPACE uchar* src, size_t size) { \
++ size_t index = 0; \
++ while(index < size) { \
++ dst[index] = src[index]; \
++ index++; \
++ } \
++}
++
++#define DECL_ONE_SPACE_MEMCOPY_FN(NAME, DST_SPACE) \
++ DECL_TWO_SPACE_MEMCOPY_FN( NAME## g, DST_SPACE, __global) \
++ DECL_TWO_SPACE_MEMCOPY_FN( NAME## l, DST_SPACE, __local) \
++ DECL_TWO_SPACE_MEMCOPY_FN( NAME## p, DST_SPACE, __private) \
++ DECL_TWO_SPACE_MEMCOPY_FN( NAME## c, DST_SPACE, __constant)
++
++DECL_ONE_SPACE_MEMCOPY_FN(g, __global)
++DECL_ONE_SPACE_MEMCOPY_FN(l, __local)
++DECL_ONE_SPACE_MEMCOPY_FN(p, __private)
++
+diff --git a/backend/src/libocl/src/ocl_memcpy.ll b/backend/src/libocl/src/ocl_memcpy.ll
+deleted file mode 100644
+index b3fadb2..0000000
+--- a/backend/src/libocl/src/ocl_memcpy.ll
++++ /dev/null
+@@ -1,729 +0,0 @@
+-;The memcpy's source code.
+-; INLINE_OVERLOADABLE void __gen_memcpy_align(uchar* dst, uchar* src, size_t size) {
+-; size_t index = 0;
+-; while((index + 4) <= size) {
+-; *((uint *)(dst + index)) = *((uint *)(src + index));
+-; index += 4;
+-; }
+-; while(index < size) {
+-; dst[index] = src[index];
+-; index++;
+-; }
+-; }
+-
+-define void @__gen_memcpy_gg_align(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- br label %while.cond
+-
+-while.cond: ; preds = %while.body, %entry
+- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+- %add = add i32 %index.0, 4
+- %cmp = icmp ugt i32 %add, %size
+- br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body: ; preds = %while.cond
+- %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0
+- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
+- %1 = load i32 addrspace(1)* %0, align 4
+- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
+- store i32 %1, i32 addrspace(1)* %2, align 4
+- br label %while.cond
+-
+-while.cond3: ; preds = %while.cond, %while.body5
+- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+- %cmp4 = icmp ult i32 %index.1, %size
+- br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5: ; preds = %while.cond3
+- %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1
+- %3 = load i8 addrspace(1)* %arrayidx, align 1
+- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
+- %inc = add i32 %index.1, 1
+- br label %while.cond3
+-
+-while.end7: ; preds = %while.cond3
+- ret void
+-}
+-
+-define void @__gen_memcpy_gp_align(i8 addrspace(1)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- br label %while.cond
+-
+-while.cond: ; preds = %while.body, %entry
+- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+- %add = add i32 %index.0, 4
+- %cmp = icmp ugt i32 %add, %size
+- br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body: ; preds = %while.cond
+- %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0
+- %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)*
+- %1 = load i32 addrspace(0)* %0, align 4
+- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
+- store i32 %1, i32 addrspace(1)* %2, align 4
+- br label %while.cond
+-
+-while.cond3: ; preds = %while.cond, %while.body5
+- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+- %cmp4 = icmp ult i32 %index.1, %size
+- br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5: ; preds = %while.cond3
+- %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1
+- %3 = load i8 addrspace(0)* %arrayidx, align 1
+- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
+- %inc = add i32 %index.1, 1
+- br label %while.cond3
+-
+-while.end7: ; preds = %while.cond3
+- ret void
+-}
+-
+-define void @__gen_memcpy_gl_align(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- br label %while.cond
+-
+-while.cond: ; preds = %while.body, %entry
+- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+- %add = add i32 %index.0, 4
+- %cmp = icmp ugt i32 %add, %size
+- br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body: ; preds = %while.cond
+- %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0
+- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
+- %1 = load i32 addrspace(3)* %0, align 4
+- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
+- store i32 %1, i32 addrspace(1)* %2, align 4
+- br label %while.cond
+-
+-while.cond3: ; preds = %while.cond, %while.body5
+- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+- %cmp4 = icmp ult i32 %index.1, %size
+- br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5: ; preds = %while.cond3
+- %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1
+- %3 = load i8 addrspace(3)* %arrayidx, align 1
+- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
+- %inc = add i32 %index.1, 1
+- br label %while.cond3
+-
+-while.end7: ; preds = %while.cond3
+- ret void
+-}
+-
+-define void @__gen_memcpy_pg_align(i8 addrspace(0)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- br label %while.cond
+-
+-while.cond: ; preds = %while.body, %entry
+- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+- %add = add i32 %index.0, 4
+- %cmp = icmp ugt i32 %add, %size
+- br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body: ; preds = %while.cond
+- %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0
+- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
+- %1 = load i32 addrspace(1)* %0, align 4
+- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
+- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
+- store i32 %1, i32 addrspace(0)* %2, align 4
+- br label %while.cond
+-
+-while.cond3: ; preds = %while.cond, %while.body5
+- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+- %cmp4 = icmp ult i32 %index.1, %size
+- br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5: ; preds = %while.cond3
+- %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1
+- %3 = load i8 addrspace(1)* %arrayidx, align 1
+- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
+- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
+- %inc = add i32 %index.1, 1
+- br label %while.cond3
+-
+-while.end7: ; preds = %while.cond3
+- ret void
+-}
+-
+-define void @__gen_memcpy_pp_align(i8 addrspace(0)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- br label %while.cond
+-
+-while.cond: ; preds = %while.body, %entry
+- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+- %add = add i32 %index.0, 4
+- %cmp = icmp ugt i32 %add, %size
+- br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body: ; preds = %while.cond
+- %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0
+- %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)*
+- %1 = load i32 addrspace(0)* %0, align 4
+- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
+- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
+- store i32 %1, i32 addrspace(0)* %2, align 4
+- br label %while.cond
+-
+-while.cond3: ; preds = %while.cond, %while.body5
+- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+- %cmp4 = icmp ult i32 %index.1, %size
+- br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5: ; preds = %while.cond3
+- %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1
+- %3 = load i8 addrspace(0)* %arrayidx, align 1
+- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
+- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
+- %inc = add i32 %index.1, 1
+- br label %while.cond3
+-
+-while.end7: ; preds = %while.cond3
+- ret void
+-}
+-
+-define void @__gen_memcpy_pl_align(i8 addrspace(0)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- br label %while.cond
+-
+-while.cond: ; preds = %while.body, %entry
+- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+- %add = add i32 %index.0, 4
+- %cmp = icmp ugt i32 %add, %size
+- br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body: ; preds = %while.cond
+- %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0
+- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
+- %1 = load i32 addrspace(3)* %0, align 4
+- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
+- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
+- store i32 %1, i32 addrspace(0)* %2, align 4
+- br label %while.cond
+-
+-while.cond3: ; preds = %while.cond, %while.body5
+- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+- %cmp4 = icmp ult i32 %index.1, %size
+- br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5: ; preds = %while.cond3
+- %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1
+- %3 = load i8 addrspace(3)* %arrayidx, align 1
+- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
+- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
+- %inc = add i32 %index.1, 1
+- br label %while.cond3
+-
+-while.end7: ; preds = %while.cond3
+- ret void
+-}
+-
+-define void @__gen_memcpy_lg_align(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- br label %while.cond
+-
+-while.cond: ; preds = %while.body, %entry
+- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+- %add = add i32 %index.0, 4
+- %cmp = icmp ugt i32 %add, %size
+- br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body: ; preds = %while.cond
+- %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0
+- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
+- %1 = load i32 addrspace(1)* %0, align 4
+- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
+- store i32 %1, i32 addrspace(3)* %2, align 4
+- br label %while.cond
+-
+-while.cond3: ; preds = %while.cond, %while.body5
+- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+- %cmp4 = icmp ult i32 %index.1, %size
+- br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5: ; preds = %while.cond3
+- %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1
+- %3 = load i8 addrspace(1)* %arrayidx, align 1
+- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
+- %inc = add i32 %index.1, 1
+- br label %while.cond3
+-
+-while.end7: ; preds = %while.cond3
+- ret void
+-}
+-
+-define void @__gen_memcpy_lp_align(i8 addrspace(3)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- br label %while.cond
+-
+-while.cond: ; preds = %while.body, %entry
+- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+- %add = add i32 %index.0, 4
+- %cmp = icmp ugt i32 %add, %size
+- br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body: ; preds = %while.cond
+- %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0
+- %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)*
+- %1 = load i32 addrspace(0)* %0, align 4
+- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
+- store i32 %1, i32 addrspace(3)* %2, align 4
+- br label %while.cond
+-
+-while.cond3: ; preds = %while.cond, %while.body5
+- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+- %cmp4 = icmp ult i32 %index.1, %size
+- br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5: ; preds = %while.cond3
+- %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1
+- %3 = load i8 addrspace(0)* %arrayidx, align 1
+- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
+- %inc = add i32 %index.1, 1
+- br label %while.cond3
+-
+-while.end7: ; preds = %while.cond3
+- ret void
+-}
+-
+-define void @__gen_memcpy_ll_align(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- br label %while.cond
+-
+-while.cond: ; preds = %while.body, %entry
+- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+- %add = add i32 %index.0, 4
+- %cmp = icmp ugt i32 %add, %size
+- br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body: ; preds = %while.cond
+- %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0
+- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
+- %1 = load i32 addrspace(3)* %0, align 4
+- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
+- store i32 %1, i32 addrspace(3)* %2, align 4
+- br label %while.cond
+-
+-while.cond3: ; preds = %while.cond, %while.body5
+- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+- %cmp4 = icmp ult i32 %index.1, %size
+- br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5: ; preds = %while.cond3
+- %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1
+- %3 = load i8 addrspace(3)* %arrayidx, align 1
+- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
+- %inc = add i32 %index.1, 1
+- br label %while.cond3
+-
+-while.end7: ; preds = %while.cond3
+- ret void
+-}
+-
+-;The memcpy's source code.
+-; INLINE_OVERLOADABLE void __gen_memcpy(uchar* dst, uchar* src, size_t size) {
+-; size_t index = 0;
+-; while(index < size) {
+-; dst[index] = src[index];
+-; index++;
+-; }
+-; }
+-
+-define void @__gen_memcpy_gg(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- %cmp4 = icmp eq i32 %size, 0
+- br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body: ; preds = %entry, %while.body
+- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+- %0 = ptrtoint i8 addrspace(1)* %src to i32
+- %1 = add i32 %0, %index.05
+- %2 = inttoptr i32 %1 to i8 addrspace(1)*
+- %3 = load i8 addrspace(1)* %2, align 1
+- %4 = ptrtoint i8 addrspace(1)* %dst to i32
+- %5 = add i32 %4, %index.05
+- %6 = inttoptr i32 %5 to i8 addrspace(1)*
+- store i8 %3, i8 addrspace(1)* %6, align 1
+- %inc = add i32 %index.05, 1
+- %cmp = icmp ult i32 %inc, %size
+- br i1 %cmp, label %while.body, label %while.end
+-
+-while.end: ; preds = %while.body, %entry
+- ret void
+-}
+-
+-define void @__gen_memcpy_gp(i8 addrspace(1)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- %cmp4 = icmp eq i32 %size, 0
+- br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body: ; preds = %entry, %while.body
+- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+- %0 = ptrtoint i8 addrspace(0)* %src to i32
+- %1 = add i32 %0, %index.05
+- %2 = inttoptr i32 %1 to i8 addrspace(0)*
+- %3 = load i8 addrspace(0)* %2, align 1
+- %4 = ptrtoint i8 addrspace(1)* %dst to i32
+- %5 = add i32 %4, %index.05
+- %6 = inttoptr i32 %5 to i8 addrspace(1)*
+- store i8 %3, i8 addrspace(1)* %6, align 1
+- %inc = add i32 %index.05, 1
+- %cmp = icmp ult i32 %inc, %size
+- br i1 %cmp, label %while.body, label %while.end
+-
+-while.end: ; preds = %while.body, %entry
+- ret void
+-}
+-
+-define void @__gen_memcpy_gl(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- %cmp4 = icmp eq i32 %size, 0
+- br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body: ; preds = %entry, %while.body
+- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+- %0 = ptrtoint i8 addrspace(3)* %src to i32
+- %1 = add i32 %0, %index.05
+- %2 = inttoptr i32 %1 to i8 addrspace(3)*
+- %3 = load i8 addrspace(3)* %2, align 1
+- %4 = ptrtoint i8 addrspace(1)* %dst to i32
+- %5 = add i32 %4, %index.05
+- %6 = inttoptr i32 %5 to i8 addrspace(1)*
+- store i8 %3, i8 addrspace(1)* %6, align 1
+- %inc = add i32 %index.05, 1
+- %cmp = icmp ult i32 %inc, %size
+- br i1 %cmp, label %while.body, label %while.end
+-
+-while.end: ; preds = %while.body, %entry
+- ret void
+-}
+-
+-define void @__gen_memcpy_pg(i8 addrspace(0)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- %cmp4 = icmp eq i32 %size, 0
+- br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body: ; preds = %entry, %while.body
+- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+- %0 = ptrtoint i8 addrspace(1)* %src to i32
+- %1 = add i32 %0, %index.05
+- %2 = inttoptr i32 %1 to i8 addrspace(1)*
+- %3 = load i8 addrspace(1)* %2, align 1
+- %4 = ptrtoint i8 addrspace(0)* %dst to i32
+- %5 = add i32 %4, %index.05
+- %6 = inttoptr i32 %5 to i8 addrspace(0)*
+- store i8 %3, i8 addrspace(0)* %6, align 1
+- %inc = add i32 %index.05, 1
+- %cmp = icmp ult i32 %inc, %size
+- br i1 %cmp, label %while.body, label %while.end
+-
+-while.end: ; preds = %while.body, %entry
+- ret void
+-}
+-
+-define void @__gen_memcpy_pp(i8 addrspace(0)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- %cmp4 = icmp eq i32 %size, 0
+- br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body: ; preds = %entry, %while.body
+- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+- %0 = ptrtoint i8 addrspace(0)* %src to i32
+- %1 = add i32 %0, %index.05
+- %2 = inttoptr i32 %1 to i8 addrspace(0)*
+- %3 = load i8 addrspace(0)* %2, align 1
+- %4 = ptrtoint i8 addrspace(0)* %dst to i32
+- %5 = add i32 %4, %index.05
+- %6 = inttoptr i32 %5 to i8 addrspace(0)*
+- store i8 %3, i8 addrspace(0)* %6, align 1
+- %inc = add i32 %index.05, 1
+- %cmp = icmp ult i32 %inc, %size
+- br i1 %cmp, label %while.body, label %while.end
+-
+-while.end: ; preds = %while.body, %entry
+- ret void
+-}
+-
+-define void @__gen_memcpy_pl(i8 addrspace(0)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- %cmp4 = icmp eq i32 %size, 0
+- br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body: ; preds = %entry, %while.body
+- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+- %0 = ptrtoint i8 addrspace(3)* %src to i32
+- %1 = add i32 %0, %index.05
+- %2 = inttoptr i32 %1 to i8 addrspace(3)*
+- %3 = load i8 addrspace(3)* %2, align 1
+- %4 = ptrtoint i8 addrspace(0)* %dst to i32
+- %5 = add i32 %4, %index.05
+- %6 = inttoptr i32 %5 to i8 addrspace(0)*
+- store i8 %3, i8 addrspace(0)* %6, align 1
+- %inc = add i32 %index.05, 1
+- %cmp = icmp ult i32 %inc, %size
+- br i1 %cmp, label %while.body, label %while.end
+-
+-while.end: ; preds = %while.body, %entry
+- ret void
+-}
+-
+-define void @__gen_memcpy_lg(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- %cmp4 = icmp eq i32 %size, 0
+- br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body: ; preds = %entry, %while.body
+- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+- %0 = ptrtoint i8 addrspace(1)* %src to i32
+- %1 = add i32 %0, %index.05
+- %2 = inttoptr i32 %1 to i8 addrspace(1)*
+- %3 = load i8 addrspace(1)* %2, align 1
+- %4 = ptrtoint i8 addrspace(3)* %dst to i32
+- %5 = add i32 %4, %index.05
+- %6 = inttoptr i32 %5 to i8 addrspace(3)*
+- store i8 %3, i8 addrspace(3)* %6, align 1
+- %inc = add i32 %index.05, 1
+- %cmp = icmp ult i32 %inc, %size
+- br i1 %cmp, label %while.body, label %while.end
+-
+-while.end: ; preds = %while.body, %entry
+- ret void
+-}
+-
+-define void @__gen_memcpy_lp(i8 addrspace(3)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- %cmp4 = icmp eq i32 %size, 0
+- br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body: ; preds = %entry, %while.body
+- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+- %0 = ptrtoint i8 addrspace(0)* %src to i32
+- %1 = add i32 %0, %index.05
+- %2 = inttoptr i32 %1 to i8 addrspace(0)*
+- %3 = load i8 addrspace(0)* %2, align 1
+- %4 = ptrtoint i8 addrspace(3)* %dst to i32
+- %5 = add i32 %4, %index.05
+- %6 = inttoptr i32 %5 to i8 addrspace(3)*
+- store i8 %3, i8 addrspace(3)* %6, align 1
+- %inc = add i32 %index.05, 1
+- %cmp = icmp ult i32 %inc, %size
+- br i1 %cmp, label %while.body, label %while.end
+-
+-while.end: ; preds = %while.body, %entry
+- ret void
+-}
+-
+-define void @__gen_memcpy_ll(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- %cmp4 = icmp eq i32 %size, 0
+- br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body: ; preds = %entry, %while.body
+- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+- %0 = ptrtoint i8 addrspace(3)* %src to i32
+- %1 = add i32 %0, %index.05
+- %2 = inttoptr i32 %1 to i8 addrspace(3)*
+- %3 = load i8 addrspace(3)* %2, align 1
+- %4 = ptrtoint i8 addrspace(3)* %dst to i32
+- %5 = add i32 %4, %index.05
+- %6 = inttoptr i32 %5 to i8 addrspace(3)*
+- store i8 %3, i8 addrspace(3)* %6, align 1
+- %inc = add i32 %index.05, 1
+- %cmp = icmp ult i32 %inc, %size
+- br i1 %cmp, label %while.body, label %while.end
+-
+-while.end: ; preds = %while.body, %entry
+- ret void
+-}
+-
+-define void @__gen_memcpy_gc_align(i8 addrspace(1)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- br label %while.cond
+-
+-while.cond: ; preds = %while.body, %entry
+- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+- %add = add i32 %index.0, 4
+- %cmp = icmp ugt i32 %add, %size
+- br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body: ; preds = %while.cond
+- %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0
+- %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)*
+- %1 = load i32 addrspace(2)* %0, align 4
+- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
+- store i32 %1, i32 addrspace(1)* %2, align 4
+- br label %while.cond
+-
+-while.cond3: ; preds = %while.cond, %while.body5
+- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+- %cmp4 = icmp ult i32 %index.1, %size
+- br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5: ; preds = %while.cond3
+- %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1
+- %3 = load i8 addrspace(2)* %arrayidx, align 1
+- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
+- %inc = add i32 %index.1, 1
+- br label %while.cond3
+-
+-while.end7: ; preds = %while.cond3
+- ret void
+-}
+-
+-define void @__gen_memcpy_pc_align(i8 addrspace(0)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- br label %while.cond
+-
+-while.cond: ; preds = %while.body, %entry
+- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+- %add = add i32 %index.0, 4
+- %cmp = icmp ugt i32 %add, %size
+- br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body: ; preds = %while.cond
+- %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0
+- %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)*
+- %1 = load i32 addrspace(2)* %0, align 4
+- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
+- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
+- store i32 %1, i32 addrspace(0)* %2, align 4
+- br label %while.cond
+-
+-while.cond3: ; preds = %while.cond, %while.body5
+- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+- %cmp4 = icmp ult i32 %index.1, %size
+- br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5: ; preds = %while.cond3
+- %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1
+- %3 = load i8 addrspace(2)* %arrayidx, align 1
+- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
+- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
+- %inc = add i32 %index.1, 1
+- br label %while.cond3
+-
+-while.end7: ; preds = %while.cond3
+- ret void
+-}
+-
+-define void @__gen_memcpy_lc_align(i8 addrspace(3)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- br label %while.cond
+-
+-while.cond: ; preds = %while.body, %entry
+- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+- %add = add i32 %index.0, 4
+- %cmp = icmp ugt i32 %add, %size
+- br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body: ; preds = %while.cond
+- %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0
+- %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)*
+- %1 = load i32 addrspace(2)* %0, align 4
+- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
+- store i32 %1, i32 addrspace(3)* %2, align 4
+- br label %while.cond
+-
+-while.cond3: ; preds = %while.cond, %while.body5
+- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+- %cmp4 = icmp ult i32 %index.1, %size
+- br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5: ; preds = %while.cond3
+- %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1
+- %3 = load i8 addrspace(2)* %arrayidx, align 1
+- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
+- %inc = add i32 %index.1, 1
+- br label %while.cond3
+-
+-while.end7: ; preds = %while.cond3
+- ret void
+-}
+-
+-define void @__gen_memcpy_pc(i8 addrspace(0)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- %cmp4 = icmp eq i32 %size, 0
+- br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body: ; preds = %entry, %while.body
+- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+- %0 = ptrtoint i8 addrspace(2)* %src to i32
+- %1 = add i32 %0, %index.05
+- %2 = inttoptr i32 %1 to i8 addrspace(2)*
+- %3 = load i8 addrspace(2)* %2, align 1
+- %4 = ptrtoint i8 addrspace(0)* %dst to i32
+- %5 = add i32 %4, %index.05
+- %6 = inttoptr i32 %5 to i8 addrspace(0)*
+- store i8 %3, i8 addrspace(0)* %6, align 1
+- %inc = add i32 %index.05, 1
+- %cmp = icmp ult i32 %inc, %size
+- br i1 %cmp, label %while.body, label %while.end
+-
+-while.end: ; preds = %while.body, %entry
+- ret void
+-}
+-
+-define void @__gen_memcpy_gc(i8 addrspace(1)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- %cmp4 = icmp eq i32 %size, 0
+- br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body: ; preds = %entry, %while.body
+- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+- %0 = ptrtoint i8 addrspace(2)* %src to i32
+- %1 = add i32 %0, %index.05
+- %2 = inttoptr i32 %1 to i8 addrspace(2)*
+- %3 = load i8 addrspace(2)* %2, align 1
+- %4 = ptrtoint i8 addrspace(1)* %dst to i32
+- %5 = add i32 %4, %index.05
+- %6 = inttoptr i32 %5 to i8 addrspace(1)*
+- store i8 %3, i8 addrspace(1)* %6, align 1
+- %inc = add i32 %index.05, 1
+- %cmp = icmp ult i32 %inc, %size
+- br i1 %cmp, label %while.body, label %while.end
+-
+-while.end: ; preds = %while.body, %entry
+- ret void
+-}
+-
+-define void @__gen_memcpy_lc(i8 addrspace(3)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+- %cmp4 = icmp eq i32 %size, 0
+- br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body: ; preds = %entry, %while.body
+- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+- %0 = ptrtoint i8 addrspace(2)* %src to i32
+- %1 = add i32 %0, %index.05
+- %2 = inttoptr i32 %1 to i8 addrspace(2)*
+- %3 = load i8 addrspace(2)* %2, align 1
+- %4 = ptrtoint i8 addrspace(3)* %dst to i32
+- %5 = add i32 %4, %index.05
+- %6 = inttoptr i32 %5 to i8 addrspace(3)*
+- store i8 %3, i8 addrspace(3)* %6, align 1
+- %inc = add i32 %index.05, 1
+- %cmp = icmp ult i32 %inc, %size
+- br i1 %cmp, label %while.body, label %while.end
+-
+-while.end: ; preds = %while.body, %entry
+- ret void
+-}
+diff --git a/backend/src/libocl/src/ocl_memset.cl b/backend/src/libocl/src/ocl_memset.cl
+new file mode 100644
+index 0000000..b41851a
+--- /dev/null
++++ b/backend/src/libocl/src/ocl_memset.cl
+@@ -0,0 +1,44 @@
++/*
++ * Copyright © 2012 - 2014 Intel Corporation
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
++ *
++ */
++#include "ocl_memset.h"
++
++#define DECL_MEMSET_FN(NAME, DST_SPACE) \
++void __gen_memset_ ##NAME## _align (DST_SPACE uchar* dst, uchar val, size_t size) { \
++ size_t index = 0; \
++ uint v = (val << 24) | (val << 16) | (val << 8) | val; \
++ while((index + 4) >= size) { \
++ *((DST_SPACE uint *)(dst + index)) = v; \
++ index += 4; \
++ } \
++ while(index < size) { \
++ dst[index] = val; \
++ index++; \
++ } \
++} \
++void __gen_memset_ ##NAME (DST_SPACE uchar* dst, uchar val, size_t size) { \
++ size_t index = 0; \
++ while(index < size) { \
++ dst[index] = val; \
++ index++; \
++ } \
++}
++
++DECL_MEMSET_FN(g, __global)
++DECL_MEMSET_FN(l, __local)
++DECL_MEMSET_FN(p, __private)
++
+diff --git a/backend/src/libocl/src/ocl_memset.ll b/backend/src/libocl/src/ocl_memset.ll
+deleted file mode 100644
+index 665eac4..0000000
+--- a/backend/src/libocl/src/ocl_memset.ll
++++ /dev/null
+@@ -1,193 +0,0 @@
+-;The memset's source code.
+-; INLINE_OVERLOADABLE void __gen_memset_align(uchar* dst, uchar val, size_t size) {
+-; size_t index = 0;
+-; uint v = (val << 24) | (val << 16) | (val << 8) | val;
+-; while((index + 4) >= size) {
+-; *((uint *)(dst + index)) = v;
+-; index += 4;
+-; }
+-; while(index < size) {
+-; dst[index] = val;
+-; index++;
+-; }
+-; }
+-
+-define void @__gen_memset_p_align(i8* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline {
+-entry:
+- %conv = zext i8 %val to i32
+- %shl = shl nuw i32 %conv, 24
+- %shl2 = shl nuw nsw i32 %conv, 16
+- %or = or i32 %shl, %shl2
+- %shl4 = shl nuw nsw i32 %conv, 8
+- %or5 = or i32 %or, %shl4
+- %or7 = or i32 %or5, %conv
+- br label %while.cond
+-
+-while.cond: ; preds = %while.body, %entry
+- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+- %add = add i32 %index.0, 4
+- %cmp = icmp ugt i32 %add, %size
+- br i1 %cmp, label %while.cond10, label %while.body
+-
+-while.body: ; preds = %while.cond
+- %add.ptr = getelementptr inbounds i8* %dst, i32 %index.0
+- %0 = bitcast i8* %add.ptr to i32*
+- store i32 %or7, i32* %0, align 4
+- br label %while.cond
+-
+-while.cond10: ; preds = %while.cond, %while.body13
+- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ]
+- %cmp11 = icmp ult i32 %index.1, %size
+- br i1 %cmp11, label %while.body13, label %while.end14
+-
+-while.body13: ; preds = %while.cond10
+- %arrayidx = getelementptr inbounds i8* %dst, i32 %index.1
+- store i8 %val, i8* %arrayidx, align 1
+- %inc = add i32 %index.1, 1
+- br label %while.cond10
+-
+-while.end14: ; preds = %while.cond10
+- ret void
+-}
+-
+-define void @__gen_memset_g_align(i8 addrspace(1)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline {
+-entry:
+- %conv = zext i8 %val to i32
+- %shl = shl nuw i32 %conv, 24
+- %shl2 = shl nuw nsw i32 %conv, 16
+- %or = or i32 %shl, %shl2
+- %shl4 = shl nuw nsw i32 %conv, 8
+- %or5 = or i32 %or, %shl4
+- %or7 = or i32 %or5, %conv
+- br label %while.cond
+-
+-while.cond: ; preds = %while.body, %entry
+- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+- %add = add i32 %index.0, 4
+- %cmp = icmp ugt i32 %add, %size
+- br i1 %cmp, label %while.cond10, label %while.body
+-
+-while.body: ; preds = %while.cond
+- %add.ptr = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
+- store i32 %or7, i32 addrspace(1)* %0, align 4
+- br label %while.cond
+-
+-while.cond10: ; preds = %while.cond, %while.body13
+- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ]
+- %cmp11 = icmp ult i32 %index.1, %size
+- br i1 %cmp11, label %while.body13, label %while.end14
+-
+-while.body13: ; preds = %while.cond10
+- %arrayidx = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+- store i8 %val, i8 addrspace(1)* %arrayidx, align 1
+- %inc = add i32 %index.1, 1
+- br label %while.cond10
+-
+-while.end14: ; preds = %while.cond10
+- ret void
+-}
+-
+-define void @__gen_memset_l_align(i8 addrspace(3)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline {
+-entry:
+- %conv = zext i8 %val to i32
+- %shl = shl nuw i32 %conv, 24
+- %shl2 = shl nuw nsw i32 %conv, 16
+- %or = or i32 %shl, %shl2
+- %shl4 = shl nuw nsw i32 %conv, 8
+- %or5 = or i32 %or, %shl4
+- %or7 = or i32 %or5, %conv
+- br label %while.cond
+-
+-while.cond: ; preds = %while.body, %entry
+- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+- %add = add i32 %index.0, 4
+- %cmp = icmp ugt i32 %add, %size
+- br i1 %cmp, label %while.cond10, label %while.body
+-
+-while.body: ; preds = %while.cond
+- %add.ptr = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
+- store i32 %or7, i32 addrspace(3)* %0, align 4
+- br label %while.cond
+-
+-while.cond10: ; preds = %while.cond, %while.body13
+- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ]
+- %cmp11 = icmp ult i32 %index.1, %size
+- br i1 %cmp11, label %while.body13, label %while.end14
+-
+-while.body13: ; preds = %while.cond10
+- %arrayidx = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+- store i8 %val, i8 addrspace(3)* %arrayidx, align 1
+- %inc = add i32 %index.1, 1
+- br label %while.cond10
+-
+-while.end14: ; preds = %while.cond10
+- ret void
+-}
+-
+-;The memset's source code.
+-; INLINE_OVERLOADABLE void __gen_memset(uchar* dst, uchar val, size_t size) {
+-; size_t index = 0;
+-; while(index < size) {
+-; dst[index] = val;
+-; index++;
+-; }
+-; }
+-
+-define void @__gen_memset_p(i8 addrspace(0)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline {
+-entry:
+- %cmp3 = icmp eq i32 %size, 0
+- br i1 %cmp3, label %while.end, label %while.body
+-
+-while.body: ; preds = %entry, %while.body
+- %index.04 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+- %0 = ptrtoint i8 addrspace(0)* %dst to i32
+- %1 = add i32 %0, %index.04
+- %2 = inttoptr i32 %1 to i8 addrspace(0)*
+- store i8 %val, i8 addrspace(0)* %2, align 1
+- %inc = add i32 %index.04, 1
+- %cmp = icmp ult i32 %inc, %size
+- br i1 %cmp, label %while.body, label %while.end
+-
+-while.end: ; preds = %while.body, %entry
+- ret void
+-}
+-
+-define void @__gen_memset_g(i8 addrspace(1)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline {
+-entry:
+- %cmp3 = icmp eq i32 %size, 0
+- br i1 %cmp3, label %while.end, label %while.body
+-
+-while.body: ; preds = %entry, %while.body
+- %index.04 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+- %0 = ptrtoint i8 addrspace(1)* %dst to i32
+- %1 = add i32 %0, %index.04
+- %2 = inttoptr i32 %1 to i8 addrspace(1)*
+- store i8 %val, i8 addrspace(1)* %2, align 1
+- %inc = add i32 %index.04, 1
+- %cmp = icmp ult i32 %inc, %size
+- br i1 %cmp, label %while.body, label %while.end
+-
+-while.end: ; preds = %while.body, %entry
+- ret void
+-}
+-
+-define void @__gen_memset_l(i8 addrspace(3)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline {
+-entry:
+- %cmp3 = icmp eq i32 %size, 0
+- br i1 %cmp3, label %while.end, label %while.body
+-
+-while.body: ; preds = %entry, %while.body
+- %index.04 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+- %0 = ptrtoint i8 addrspace(3)* %dst to i32
+- %1 = add i32 %0, %index.04
+- %2 = inttoptr i32 %1 to i8 addrspace(3)*
+- store i8 %val, i8 addrspace(3)* %2, align 1
+- %inc = add i32 %index.04, 1
+- %cmp = icmp ult i32 %inc, %size
+- br i1 %cmp, label %while.body, label %while.end
+-
+-while.end: ; preds = %while.body, %entry
+- ret void
+-}
+--
+1.8.3.2
diff --git a/llvm-3.7-patch-2.patch b/llvm-3.7-patch-2.patch
new file mode 100644
index 000000000000..6fa33bd0c0bc
--- /dev/null
+++ b/llvm-3.7-patch-2.patch
@@ -0,0 +1,1023 @@
+Move all llvm relative includes to llvm_includes.hpp.
+
+Signed-off-by: Yang Rong <rong.r.yang at intel.com>
+---
+ backend/src/backend/gen_program.cpp | 4 +
+ backend/src/llvm/ExpandConstantExpr.cpp | 7 +-
+ backend/src/llvm/ExpandLargeIntegers.cpp | 21 +---
+ backend/src/llvm/ExpandUtils.cpp | 8 +-
+ backend/src/llvm/PromoteIntegers.cpp | 10 +-
+ backend/src/llvm/StripAttributes.cpp | 9 +-
+ backend/src/llvm/llvm_barrier_nodup.cpp | 25 +----
+ backend/src/llvm/llvm_bitcode_link.cpp | 20 +---
+ backend/src/llvm/llvm_gen_backend.cpp | 93 +++--------------
+ backend/src/llvm/llvm_gen_backend.hpp | 4 -
+ backend/src/llvm/llvm_includes.hpp | 125 +++++++++++++++++++++++
+ backend/src/llvm/llvm_intrinsic_lowering.cpp | 24 +----
+ backend/src/llvm/llvm_loadstore_optimization.cpp | 36 +------
+ backend/src/llvm/llvm_passes.cpp | 70 +------------
+ backend/src/llvm/llvm_printf_parser.cpp | 34 +-----
+ backend/src/llvm/llvm_sampler_fix.cpp | 21 +---
+ backend/src/llvm/llvm_scalarize.cpp | 35 +------
+ backend/src/llvm/llvm_to_gen.cpp | 78 ++++++--------
+ backend/src/llvm/llvm_unroll.cpp | 36 ++-----
+ 19 files changed, 207 insertions(+), 453 deletions(-)
+ create mode 100644 backend/src/llvm/llvm_includes.hpp
+
+diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
+index 3c4983e..73d78f8 100644
+--- a/backend/src/backend/gen_program.cpp
++++ b/backend/src/backend/gen_program.cpp
+@@ -402,7 +402,11 @@ namespace gbe {
+ llvm::Module* src = (llvm::Module*)((GenProgram*)src_program)->module;
+ llvm::Module* dst = (llvm::Module*)((GenProgram*)dst_program)->module;
+
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
++ if (LLVMLinkModules(wrap(dst), wrap(src), LLVMLinkerPreserveSource_Removed, &errMsg)) {
++#else
+ if (LLVMLinkModules(wrap(dst), wrap(src), LLVMLinkerPreserveSource, &errMsg)) {
++#endif
+ if (err != NULL && errSize != NULL && stringSize > 0u) {
+ strncpy(err, errMsg, stringSize-1);
+ err[stringSize-1] = '\0';
+diff --git a/backend/src/llvm/ExpandConstantExpr.cpp b/backend/src/llvm/ExpandConstantExpr.cpp
+index 5c5934a..c6f57b8 100644
+--- a/backend/src/llvm/ExpandConstantExpr.cpp
++++ b/backend/src/llvm/ExpandConstantExpr.cpp
+@@ -77,12 +77,7 @@
+ //===----------------------------------------------------------------------===//
+
+ #include <map>
+-
+-#include "llvm/IR/IRBuilder.h"
+-#include "llvm/IR/Constants.h"
+-#include "llvm/IR/Function.h"
+-#include "llvm/IR/Instructions.h"
+-#include "llvm/Pass.h"
++#include "llvm_includes.hpp"
+ #include "llvm_gen_backend.hpp"
+
+ using namespace llvm;
+diff --git a/backend/src/llvm/ExpandLargeIntegers.cpp b/backend/src/llvm/ExpandLargeIntegers.cpp
+index f7e59a5..20fdda9 100644
+--- a/backend/src/llvm/ExpandLargeIntegers.cpp
++++ b/backend/src/llvm/ExpandLargeIntegers.cpp
+@@ -86,24 +86,9 @@
+ // 2. OR x, 0 can be optimized as x. And x, 0 can be optimized as 0.
+ //===----------------------------------------------------------------------===//
+
+-#include "llvm/ADT/DenseMap.h"
+-#include "llvm/ADT/PostOrderIterator.h"
+-#include "llvm/ADT/STLExtras.h"
+-#include "llvm/ADT/SmallVector.h"
+-#if LLVM_VERSION_MINOR >= 5
+-#include "llvm/IR/CFG.h"
+-#else
+-#include "llvm/Support/CFG.h"
+-#endif
+-#include "llvm/IR/DataLayout.h"
+-#include "llvm/IR/DerivedTypes.h"
+-#include "llvm/IR/Function.h"
+-#include "llvm/IR/IRBuilder.h"
+-#include "llvm/IR/Instructions.h"
+-#include "llvm/Pass.h"
+-#include "llvm/Support/Debug.h"
+-#include "llvm/Support/MathExtras.h"
+-#include "llvm/Support/raw_ostream.h"
++
++#include "llvm_includes.hpp"
++
+ #include "llvm_gen_backend.hpp"
+
+ using namespace llvm;
+diff --git a/backend/src/llvm/ExpandUtils.cpp b/backend/src/llvm/ExpandUtils.cpp
+index e6dfb52..801f969 100644
+--- a/backend/src/llvm/ExpandUtils.cpp
++++ b/backend/src/llvm/ExpandUtils.cpp
+@@ -64,12 +64,8 @@
+ //
+ //===----------------------------------------------------------------------===//
+
+-#include "llvm/IR/BasicBlock.h"
+-#include "llvm/IR/Constants.h"
+-#include "llvm/IR/Function.h"
+-#include "llvm/IR/Instructions.h"
+-#include "llvm/IR/Module.h"
+-#include "llvm/Support/raw_ostream.h"
++#include "llvm_includes.hpp"
++
+ #include "llvm_gen_backend.hpp"
+
+ using namespace llvm;
+diff --git a/backend/src/llvm/PromoteIntegers.cpp b/backend/src/llvm/PromoteIntegers.cpp
+index aba42b9..b65440f 100644
+--- a/backend/src/llvm/PromoteIntegers.cpp
++++ b/backend/src/llvm/PromoteIntegers.cpp
+@@ -84,14 +84,8 @@
+ //===----------------------------------------------------------------------===//
+
+
+-#include "llvm/ADT/DenseMap.h"
+-#include "llvm/ADT/SmallVector.h"
+-#include "llvm/IR/DerivedTypes.h"
+-#include "llvm/IR/Function.h"
+-#include "llvm/IR/Instructions.h"
+-#include "llvm/IR/IRBuilder.h"
+-#include "llvm/Pass.h"
+-#include "llvm/Support/raw_ostream.h"
++#include "llvm_includes.hpp"
++
+ #include "llvm_gen_backend.hpp"
+
+ using namespace llvm;
+diff --git a/backend/src/llvm/StripAttributes.cpp b/backend/src/llvm/StripAttributes.cpp
+index 05cac17..e6df312 100644
+--- a/backend/src/llvm/StripAttributes.cpp
++++ b/backend/src/llvm/StripAttributes.cpp
+@@ -69,14 +69,7 @@
+ // * Calling conventions from functions and function calls.
+ //
+
+-#include "llvm/IR/Function.h"
+-#include "llvm/Pass.h"
+-
+-#if LLVM_VERSION_MINOR >= 5
+-#include "llvm/IR/CallSite.h"
+-#else
+-#include "llvm/Support/CallSite.h"
+-#endif
++#include "llvm_includes.hpp"
+
+ #include "llvm_gen_backend.hpp"
+
+diff --git a/backend/src/llvm/llvm_barrier_nodup.cpp b/backend/src/llvm/llvm_barrier_nodup.cpp
+index 19deafc..727e6bd 100644
+--- a/backend/src/llvm/llvm_barrier_nodup.cpp
++++ b/backend/src/llvm/llvm_barrier_nodup.cpp
+@@ -28,30 +28,7 @@
+ *
+ */
+
+-#include "llvm/Config/llvm-config.h"
+-#if LLVM_VERSION_MINOR <= 2
+-#include "llvm/Function.h"
+-#include "llvm/InstrTypes.h"
+-#include "llvm/Instructions.h"
+-#include "llvm/IntrinsicInst.h"
+-#include "llvm/Module.h"
+-#else
+-#include "llvm/IR/Function.h"
+-#include "llvm/IR/InstrTypes.h"
+-#include "llvm/IR/Instructions.h"
+-#include "llvm/IR/IntrinsicInst.h"
+-#include "llvm/IR/Module.h"
+-#endif /* LLVM_VERSION_MINOR <= 2 */
+-#include "llvm/Pass.h"
+-#if LLVM_VERSION_MINOR <= 1
+-#include "llvm/Support/IRBuilder.h"
+-#elif LLVM_VERSION_MINOR == 2
+-#include "llvm/IRBuilder.h"
+-#else
+-#include "llvm/IR/IRBuilder.h"
+-#endif /* LLVM_VERSION_MINOR <= 1 */
+-#include "llvm/Support/raw_ostream.h"
+-#include "llvm/IR/Attributes.h"
++#include "llvm_includes.hpp"
+
+ #include "llvm/llvm_gen_backend.hpp"
+ #include "sys/map.hpp"
+diff --git a/backend/src/llvm/llvm_bitcode_link.cpp b/backend/src/llvm/llvm_bitcode_link.cpp
+index ebf4386..56205bb 100644
+--- a/backend/src/llvm/llvm_bitcode_link.cpp
++++ b/backend/src/llvm/llvm_bitcode_link.cpp
+@@ -21,24 +21,11 @@
+ #include <iostream>
+ #include <sstream>
+ #include <set>
+-#include "llvm/IR/Function.h"
+-#include "llvm/IR/Instructions.h"
+-#include "llvm/IR/Module.h"
+-#include "llvm/IRReader/IRReader.h"
+-#include "llvm/PassManager.h"
+-#include "llvm/Pass.h"
+-#include "llvm/IR/IRBuilder.h"
+-#include "llvm/Support/FileSystem.h"
+-#include "llvm/Support/MemoryBuffer.h"
+-#include "llvm/Bitcode/ReaderWriter.h"
+-#include "llvm/Transforms/IPO.h"
+-#include "llvm/Transforms/Utils/Cloning.h"
+-#include "llvm/Support/SourceMgr.h"
+
+ #include "sys/cvar.hpp"
+ #include "src/GBEConfig.h"
++#include "llvm_includes.hpp"
+ #include "llvm/llvm_gen_backend.hpp"
+-#include "llvm-c/Linker.h"
+
+ using namespace llvm;
+
+@@ -248,8 +235,11 @@ namespace gbe
+ printf("Fatal Error: link the bitcode error:\n%s\n", errorMsg);
+ return NULL;
+ }
+-
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7
++ llvm::legacy::PassManager passes;
++#else
+ llvm::PassManager passes;
++#endif
+
+ passes.add(createInternalizePass(kernels));
+ passes.add(createGlobalDCEPass());
+diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
+index 4905415..4f2fe89 100644
+--- a/backend/src/llvm/llvm_gen_backend.cpp
++++ b/backend/src/llvm/llvm_gen_backend.cpp
+@@ -71,86 +71,7 @@
+ * is intercepted, we just abort
+ */
+
+-#include "llvm/Config/llvm-config.h"
+-#if LLVM_VERSION_MINOR <= 2
+-#include "llvm/CallingConv.h"
+-#include "llvm/Constants.h"
+-#include "llvm/DerivedTypes.h"
+-#include "llvm/Module.h"
+-#include "llvm/Instructions.h"
+-#else
+-#include "llvm/IR/CallingConv.h"
+-#include "llvm/IR/Constants.h"
+-#include "llvm/IR/DerivedTypes.h"
+-#include "llvm/IR/Module.h"
+-#include "llvm/IR/Instructions.h"
+-#endif /* LLVM_VERSION_MINOR <= 2 */
+-#include "llvm/Pass.h"
+-#include "llvm/PassManager.h"
+-#include "llvm/IR/IRBuilder.h"
+-#if LLVM_VERSION_MINOR <= 2
+-#include "llvm/Intrinsics.h"
+-#include "llvm/IntrinsicInst.h"
+-#include "llvm/InlineAsm.h"
+-#else
+-#include "llvm/IR/Intrinsics.h"
+-#include "llvm/IR/IntrinsicInst.h"
+-#include "llvm/IR/InlineAsm.h"
+-#endif /* LLVM_VERSION_MINOR <= 2 */
+-#include "llvm/ADT/StringExtras.h"
+-#include "llvm/ADT/SmallString.h"
+-#include "llvm/ADT/STLExtras.h"
+-#include "llvm/Analysis/ConstantsScanner.h"
+-#include "llvm/Analysis/LoopInfo.h"
+-#include "llvm/Analysis/ValueTracking.h"
+-#include "llvm/CodeGen/Passes.h"
+-#include "llvm/CodeGen/IntrinsicLowering.h"
+-
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=5
+-#include "llvm/IR/Mangler.h"
+-#else
+-#include "llvm/Target/Mangler.h"
+-#endif
+-
+-#include "llvm/ADT/PostOrderIterator.h"
+-#include "llvm/Transforms/Scalar.h"
+-#include "llvm/MC/MCAsmInfo.h"
+-#include "llvm/MC/MCContext.h"
+-#include "llvm/MC/MCInstrInfo.h"
+-#include "llvm/MC/MCObjectFileInfo.h"
+-#include "llvm/MC/MCRegisterInfo.h"
+-#include "llvm/MC/MCSubtargetInfo.h"
+-#include "llvm/MC/MCSymbol.h"
+-#if !defined(LLVM_VERSION_MAJOR) || (LLVM_VERSION_MINOR == 1)
+-#include "llvm/Target/TargetData.h"
+-#elif LLVM_VERSION_MINOR == 2
+-#include "llvm/DataLayout.h"
+-#else
+-#include "llvm/IR/DataLayout.h"
+-#endif
+-
+-#if LLVM_VERSION_MINOR >= 5
+-#include "llvm/IR/CallSite.h"
+-#include "llvm/IR/CFG.h"
+-#else
+-#include "llvm/Support/CallSite.h"
+-#include "llvm/Support/CFG.h"
+-#endif
+-
+-#include "llvm/Support/ErrorHandling.h"
+-#include "llvm/Support/FormattedStream.h"
+-#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR <= 2)
+-#include "llvm/Support/InstVisitor.h"
+-#elif LLVM_VERSION_MINOR >= 5
+-#include "llvm/IR/InstVisitor.h"
+-#else
+-#include "llvm/InstVisitor.h"
+-#endif
+-#include "llvm/Support/MathExtras.h"
+-#include "llvm/Support/TargetRegistry.h"
+-#include "llvm/Support/Host.h"
+-#include "llvm/Support/ToolOutputFile.h"
+-#include "llvm/Support/SourceMgr.h"
++#include "llvm_includes.hpp"
+
+ #include "llvm/llvm_gen_backend.hpp"
+ #include "ir/context.hpp"
+@@ -527,14 +448,22 @@ namespace gbe
+ TheModule(0),
+ btiBase(BTI_RESERVED_NUM)
+ {
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7
++ initializeLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry());
++#else
+ initializeLoopInfoPass(*PassRegistry::getPassRegistry());
++#endif
+ pass = PASS_EMIT_REGISTERS;
+ }
+
+ virtual const char *getPassName() const { return "Gen Back-End"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7
++ AU.addRequired<LoopInfoWrapperPass>();
++#else
+ AU.addRequired<LoopInfo>();
++#endif
+ AU.setPreservesAll();
+ }
+
+@@ -564,7 +493,11 @@ namespace gbe
+ assignBti(F);
+ analyzePointerOrigin(F);
+
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7
++ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
++#else
+ LI = &getAnalysis<LoopInfo>();
++#endif
+ emitFunction(F);
+ phiMap.clear();
+ globalPointer.clear();
+diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp
+index 1f16557..94a377b 100644
+--- a/backend/src/llvm/llvm_gen_backend.hpp
++++ b/backend/src/llvm/llvm_gen_backend.hpp
+@@ -30,11 +30,7 @@
+ #include "llvm/Config/llvm-config.h"
+ #include "llvm/Pass.h"
+ #include "llvm/Analysis/LoopPass.h"
+-#if LLVM_VERSION_MINOR <= 2
+-#include "llvm/Instructions.h"
+-#else
+ #include "llvm/IR/Instructions.h"
+-#endif
+ #include "sys/platform.hpp"
+ #include "sys/map.hpp"
+ #include <algorithm>
+diff --git a/backend/src/llvm/llvm_includes.hpp b/backend/src/llvm/llvm_includes.hpp
+new file mode 100644
+index 0000000..fed3a18
+--- /dev/null
++++ b/backend/src/llvm/llvm_includes.hpp
+@@ -0,0 +1,125 @@
++/*
++ * Copyright © 2012 Intel Corporation
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
++ *
++ * Author: Yang Rong <rong.r.yang at intel.com>
++ */
++
++/**
++ * \file llvm_includes.hpp
++ * \author Yang Rong <rong.r.yang at intel.com>
++ */
++#ifndef __GBE_IR_LLVM_INCLUDES_HPP__
++#define __GBE_IR_LLVM_INCLUDES_HPP__
++
++#include "llvm/Config/llvm-config.h"
++
++#include "llvm/IR/BasicBlock.h"
++#include "llvm/IR/Constants.h"
++#include "llvm/IR/Function.h"
++#include "llvm/IR/Instructions.h"
++#include "llvm/IR/Module.h"
++#include "llvm/IR/IRBuilder.h"
++#include "llvm/IR/DataLayout.h"
++#include "llvm/IR/DerivedTypes.h"
++#include "llvm/IR/InstrTypes.h"
++#include "llvm/IR/IntrinsicInst.h"
++#include "llvm/IR/Attributes.h"
++#include "llvm/IR/CallingConv.h"
++#include "llvm/IR/Intrinsics.h"
++#include "llvm/IR/InlineAsm.h"
++#include "llvm/IR/LLVMContext.h"
++
++#include "llvm_includes.hpp"
++
++#include "llvm/Pass.h"
++#include "llvm/ADT/DenseMap.h"
++#include "llvm/ADT/PostOrderIterator.h"
++#include "llvm/ADT/STLExtras.h"
++#include "llvm/ADT/SmallVector.h"
++#include "llvm/ADT/StringExtras.h"
++#include "llvm/ADT/SmallString.h"
++
++#include "llvm/Analysis/ScalarEvolution.h"
++#include "llvm/Analysis/ScalarEvolutionExpressions.h"
++#include "llvm/Analysis/CFGPrinter.h"
++#include "llvm/Analysis/LoopPass.h"
++#include "llvm/Analysis/TargetTransformInfo.h"
++#include "llvm/Analysis/LoopInfo.h"
++#include "llvm/Analysis/ValueTracking.h"
++#include "llvm/Analysis/Passes.h"
++
++#include "llvm/Support/raw_ostream.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/MathExtras.h"
++#include "llvm/Support/FileSystem.h"
++#include "llvm/Support/MemoryBuffer.h"
++#include "llvm/Support/SourceMgr.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/FormattedStream.h"
++#include "llvm/Support/TargetRegistry.h"
++#include "llvm/Support/Host.h"
++#include "llvm/Support/ToolOutputFile.h"
++
++#include "llvm-c/Linker.h"
++#include "llvm/IRReader/IRReader.h"
++#include "llvm/Bitcode/ReaderWriter.h"
++#include "llvm/Transforms/IPO.h"
++#include "llvm/Transforms/Utils/Cloning.h"
++
++#include "llvm/CodeGen/Passes.h"
++#include "llvm/CodeGen/IntrinsicLowering.h"
++
++#include "llvm/Transforms/Scalar.h"
++#include "llvm/MC/MCAsmInfo.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCInstrInfo.h"
++#include "llvm/MC/MCObjectFileInfo.h"
++#include "llvm/MC/MCRegisterInfo.h"
++#include "llvm/MC/MCSubtargetInfo.h"
++#include "llvm/MC/MCSymbol.h"
++
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=5
++#include "llvm/IR/Mangler.h"
++#include "llvm/IR/CallSite.h"
++#include "llvm/IR/CFG.h"
++#include "llvm/IR/InstVisitor.h"
++#include "llvm/IR/IRPrintingPasses.h"
++#include "llvm/IR/Verifier.h"
++#include "llvm/IR/InstIterator.h"
++#include "llvm/IR/Dominators.h"
++#else
++#include "llvm/Support/CallSite.h"
++#include "llvm/Support/CFG.h"
++#include "llvm/Support/InstIterator.h"
++#include "llvm/InstVisitor.h"
++#include "llvm/Analysis/Verifier.h"
++#include "llvm/Analysis/Dominators.h"
++#include "llvm/Assembly/PrintModulePass.h"
++#include "llvm/Target/Mangler.h"
++#endif
++
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7
++#include "llvm/Analysis/TargetLibraryInfo.h"
++#include "llvm/IR/LegacyPassManager.h"
++#else
++#include "llvm/Target/TargetLibraryInfo.h"
++#include "llvm/PassManager.h"
++#endif
++#include "llvm/ADT/Triple.h"
++
++#include <clang/CodeGen/CodeGenAction.h>
++
++#endif /* __GBE_IR_LLVM_INCLUDES_HPP__ */
+diff --git a/backend/src/llvm/llvm_intrinsic_lowering.cpp b/backend/src/llvm/llvm_intrinsic_lowering.cpp
+index 7d1f8f0..b35d1e6 100644
+--- a/backend/src/llvm/llvm_intrinsic_lowering.cpp
++++ b/backend/src/llvm/llvm_intrinsic_lowering.cpp
+@@ -20,29 +20,7 @@
+ * \author Yang Rong <rong.r.yang at intel.com>
+ */
+
+-#include "llvm/Config/llvm-config.h"
+-#if LLVM_VERSION_MINOR <= 2
+-#include "llvm/Function.h"
+-#include "llvm/InstrTypes.h"
+-#include "llvm/Instructions.h"
+-#include "llvm/IntrinsicInst.h"
+-#include "llvm/Module.h"
+-#else
+-#include "llvm/IR/Function.h"
+-#include "llvm/IR/InstrTypes.h"
+-#include "llvm/IR/Instructions.h"
+-#include "llvm/IR/IntrinsicInst.h"
+-#include "llvm/IR/Module.h"
+-#endif /* LLVM_VERSION_MINOR <= 2 */
+-#include "llvm/Pass.h"
+-#if LLVM_VERSION_MINOR <= 1
+-#include "llvm/Support/IRBuilder.h"
+-#elif LLVM_VERSION_MINOR == 2
+-#include "llvm/IRBuilder.h"
+-#else
+-#include "llvm/IR/IRBuilder.h"
+-#endif /* LLVM_VERSION_MINOR <= 1 */
+-#include "llvm/Support/raw_ostream.h"
++#include "llvm_includes.hpp"
+
+ #include "llvm/llvm_gen_backend.hpp"
+ #include "sys/map.hpp"
+diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp b/backend/src/llvm/llvm_loadstore_optimization.cpp
+index c6349fa..698fdc2 100644
+--- a/backend/src/llvm/llvm_loadstore_optimization.cpp
++++ b/backend/src/llvm/llvm_loadstore_optimization.cpp
+@@ -22,37 +22,7 @@
+ * from Vectorize passes in llvm.
+ */
+
+-#include "llvm/IR/Instructions.h"
+-#include "llvm/Pass.h"
+-#include "llvm/PassManager.h"
+-
+-#include "llvm/Config/llvm-config.h"
+-#include "llvm/ADT/DenseMap.h"
+-#include "llvm/ADT/PostOrderIterator.h"
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
+-#include "llvm/Function.h"
+-#include "llvm/InstrTypes.h"
+-#include "llvm/Instructions.h"
+-#include "llvm/IntrinsicInst.h"
+-#include "llvm/Module.h"
+-#else
+-#include "llvm/IR/Function.h"
+-#include "llvm/IR/InstrTypes.h"
+-#include "llvm/IR/Instructions.h"
+-#include "llvm/IR/IntrinsicInst.h"
+-#include "llvm/IR/Module.h"
+-#endif /* LLVM_VERSION_MINOR <= 2 */
+-#include "llvm/Pass.h"
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 1
+-#include "llvm/Support/IRBuilder.h"
+-#elif LLVM_VERSION_MINOR == 2
+-#include "llvm/IRBuilder.h"
+-#else
+-#include "llvm/IR/IRBuilder.h"
+-#endif /* LLVM_VERSION_MINOR <= 1 */
+-#include "llvm/Support/raw_ostream.h"
+-#include "llvm/Analysis/ScalarEvolution.h"
+-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
++#include "llvm_includes.hpp"
+
+ using namespace llvm;
+ namespace gbe {
+@@ -72,7 +42,9 @@ namespace gbe {
+
+ virtual bool runOnBasicBlock(BasicBlock &BB) {
+ SE = &getAnalysis<ScalarEvolution>();
+- #if LLVM_VERSION_MINOR >= 5
++ #if LLVM_VERSION_MINOR >= 7
++ TD = &BB.getModule()->getDataLayout();
++ #elif LLVM_VERSION_MINOR >= 5
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ TD = DLP ? &DLP->getDataLayout() : nullptr;
+ #else
+diff --git a/backend/src/llvm/llvm_passes.cpp b/backend/src/llvm/llvm_passes.cpp
+index 223f61b..d5d965b 100644
+--- a/backend/src/llvm/llvm_passes.cpp
++++ b/backend/src/llvm/llvm_passes.cpp
+@@ -30,75 +30,7 @@
+ * Segovia) the right to use another license for it (MIT here)
+ */
+
+-#include "llvm/Config/llvm-config.h"
+-#if LLVM_VERSION_MINOR <= 2
+-#include "llvm/CallingConv.h"
+-#include "llvm/Constants.h"
+-#include "llvm/DerivedTypes.h"
+-#include "llvm/Module.h"
+-#include "llvm/Instructions.h"
+-#else
+-#include "llvm/IR/CallingConv.h"
+-#include "llvm/IR/Constants.h"
+-#include "llvm/IR/DerivedTypes.h"
+-#include "llvm/IR/Module.h"
+-#include "llvm/IR/Instructions.h"
+-#endif /* LLVM_VERSION_MINOR <= 2 */
+-#include "llvm/Pass.h"
+-#include "llvm/PassManager.h"
+-#if LLVM_VERSION_MINOR <= 2
+-#include "llvm/Intrinsics.h"
+-#include "llvm/IntrinsicInst.h"
+-#include "llvm/InlineAsm.h"
+-#else
+-#include "llvm/IR/Intrinsics.h"
+-#include "llvm/IR/IntrinsicInst.h"
+-#include "llvm/IR/InlineAsm.h"
+-#endif /* LLVM_VERSION_MINOR <= 2 */
+-#include "llvm/ADT/StringExtras.h"
+-#include "llvm/ADT/SmallString.h"
+-#include "llvm/ADT/STLExtras.h"
+-#include "llvm/Analysis/ConstantsScanner.h"
+-#include "llvm/Analysis/LoopInfo.h"
+-#include "llvm/Analysis/ValueTracking.h"
+-#include "llvm/CodeGen/Passes.h"
+-#include "llvm/CodeGen/IntrinsicLowering.h"
+-
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=5
+-#include "llvm/IR/Mangler.h"
+-#else
+-#include "llvm/Target/Mangler.h"
+-#endif
+-
+-#include "llvm/Transforms/Scalar.h"
+-#include "llvm/MC/MCAsmInfo.h"
+-#include "llvm/MC/MCContext.h"
+-#include "llvm/MC/MCInstrInfo.h"
+-#include "llvm/MC/MCObjectFileInfo.h"
+-#include "llvm/MC/MCRegisterInfo.h"
+-#include "llvm/MC/MCSubtargetInfo.h"
+-#include "llvm/MC/MCSymbol.h"
+-#if !defined(LLVM_VERSION_MAJOR) || (LLVM_VERSION_MINOR == 1)
+-#include "llvm/Target/TargetData.h"
+-#elif LLVM_VERSION_MINOR == 2
+-#include "llvm/DataLayout.h"
+-#else
+-#include "llvm/IR/DataLayout.h"
+-#endif
+-#include "llvm/Support/ErrorHandling.h"
+-#include "llvm/Support/FormattedStream.h"
+-#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR <= 2)
+-#include "llvm/Support/InstVisitor.h"
+-#elif LLVM_VERSION_MINOR >= 5
+-#include "llvm/IR/InstVisitor.h"
+-#else
+-#include "llvm/InstVisitor.h"
+-#endif
+-#include "llvm/Support/MathExtras.h"
+-#include "llvm/Support/TargetRegistry.h"
+-#include "llvm/Support/Host.h"
+-#include "llvm/Support/ToolOutputFile.h"
+-#include "llvm/Support/SourceMgr.h"
++#include "llvm_includes.hpp"
+
+ #include "llvm/llvm_gen_backend.hpp"
+ #include "ir/unit.hpp"
+diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp
+index 3d84457..1e8427c 100644
+--- a/backend/src/llvm/llvm_printf_parser.cpp
++++ b/backend/src/llvm/llvm_printf_parser.cpp
+@@ -33,39 +33,7 @@
+ #include <stdio.h>
+ #include <stdlib.h>
+
+-#include "llvm/Config/llvm-config.h"
+-#if LLVM_VERSION_MINOR <= 2
+-#include "llvm/Function.h"
+-#include "llvm/InstrTypes.h"
+-#include "llvm/Instructions.h"
+-#include "llvm/IntrinsicInst.h"
+-#include "llvm/Module.h"
+-#else
+-#include "llvm/IR/Function.h"
+-#include "llvm/IR/InstrTypes.h"
+-#include "llvm/IR/Instructions.h"
+-#include "llvm/IR/IntrinsicInst.h"
+-#include "llvm/IR/Module.h"
+-#endif /* LLVM_VERSION_MINOR <= 2 */
+-#include "llvm/Pass.h"
+-#if LLVM_VERSION_MINOR <= 1
+-#include "llvm/Support/IRBuilder.h"
+-#elif LLVM_VERSION_MINOR == 2
+-#include "llvm/IRBuilder.h"
+-#else
+-#include "llvm/IR/IRBuilder.h"
+-#endif /* LLVM_VERSION_MINOR <= 1 */
+-
+-#if LLVM_VERSION_MINOR >= 5
+-#include "llvm/IR/CallSite.h"
+-#include "llvm/IR/CFG.h"
+-#else
+-#include "llvm/Support/CallSite.h"
+-#include "llvm/Support/CFG.h"
+-#endif
+-
+-#include "llvm/Support/raw_ostream.h"
+-#include "llvm/IR/Attributes.h"
++#include "llvm_includes.hpp"
+
+ #include "llvm/llvm_gen_backend.hpp"
+ #include "sys/map.hpp"
+diff --git a/backend/src/llvm/llvm_sampler_fix.cpp b/backend/src/llvm/llvm_sampler_fix.cpp
+index 8c76324..01db8fe 100644
+--- a/backend/src/llvm/llvm_sampler_fix.cpp
++++ b/backend/src/llvm/llvm_sampler_fix.cpp
+@@ -20,27 +20,8 @@
+ * make sure to get correct pixel value. But for some other
+ * sampler, we don't need those work around code.
+ */
+-#include "llvm/IR/Instructions.h"
+-#include "llvm/Pass.h"
+-#include "llvm/PassManager.h"
+
+-#include "llvm/Config/llvm-config.h"
+-#include "llvm/ADT/DenseMap.h"
+-#include "llvm/ADT/PostOrderIterator.h"
+-#include "llvm/IR/Function.h"
+-#include "llvm/IR/InstrTypes.h"
+-#include "llvm/IR/Instructions.h"
+-#include "llvm/IR/IntrinsicInst.h"
+-#include "llvm/IR/Module.h"
+-#include "llvm/Pass.h"
+-#include "llvm/IR/IRBuilder.h"
+-#if LLVM_VERSION_MINOR >= 5
+-#include "llvm/IR/CFG.h"
+-#else
+-#include "llvm/Support/CFG.h"
+-#endif
+-
+-#include "llvm/Analysis/ConstantsScanner.h"
++#include "llvm_includes.hpp"
+
+ #include "llvm_gen_backend.hpp"
+ #include "ocl_common_defines.h"
+diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
+index bc985c6..7ee5259 100644
+--- a/backend/src/llvm/llvm_scalarize.cpp
++++ b/backend/src/llvm/llvm_scalarize.cpp
+@@ -59,39 +59,7 @@
+ //
+ //===----------------------------------------------------------------------===//
+
+-#include "llvm/Config/llvm-config.h"
+-#include "llvm/ADT/DenseMap.h"
+-#include "llvm/ADT/PostOrderIterator.h"
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
+-#include "llvm/Function.h"
+-#include "llvm/InstrTypes.h"
+-#include "llvm/Instructions.h"
+-#include "llvm/IntrinsicInst.h"
+-#include "llvm/Module.h"
+-#else
+-#include "llvm/IR/Function.h"
+-#include "llvm/IR/InstrTypes.h"
+-#include "llvm/IR/Instructions.h"
+-#include "llvm/IR/IntrinsicInst.h"
+-#include "llvm/IR/Module.h"
+-#endif /* LLVM_VERSION_MINOR <= 2 */
+-#include "llvm/Pass.h"
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 1
+-#include "llvm/Support/IRBuilder.h"
+-#elif LLVM_VERSION_MINOR == 2
+-#include "llvm/IRBuilder.h"
+-#else
+-#include "llvm/IR/IRBuilder.h"
+-#endif /* LLVM_VERSION_MINOR <= 1 */
+-
+-#if LLVM_VERSION_MINOR >= 5
+-#include "llvm/IR/CallSite.h"
+-#include "llvm/IR/CFG.h"
+-#else
+-#include "llvm/Support/CallSite.h"
+-#include "llvm/Support/CFG.h"
+-#endif
+-#include "llvm/Support/raw_ostream.h"
++#include "llvm_includes.hpp"
+
+ #include "llvm/llvm_gen_backend.hpp"
+ #include "sys/map.hpp"
+@@ -128,7 +96,6 @@ namespace gbe {
+
+ Scalarize() : FunctionPass(ID)
+ {
+- initializeLoopInfoPass(*PassRegistry::getPassRegistry());
+ #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
+ initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
+ #else
+diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
+index 891f2a1..538d1c5 100644
+--- a/backend/src/llvm/llvm_to_gen.cpp
++++ b/backend/src/llvm/llvm_to_gen.cpp
+@@ -22,40 +22,8 @@
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+-#include "llvm/Config/llvm-config.h"
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
+-#include "llvm/LLVMContext.h"
+-#include "llvm/Module.h"
+-#include "llvm/DataLayout.h"
+-#else
+-#include "llvm/IR/LLVMContext.h"
+-#include "llvm/IR/Module.h"
+-#include "llvm/IR/DataLayout.h"
+-#endif /* LLVM_VERSION_MINOR <= 2 */
+-#include "llvm/PassManager.h"
+-#include "llvm/Pass.h"
+-#include "llvm/Analysis/Passes.h"
+-#include "llvm/Transforms/IPO.h"
+-#include "llvm/Target/TargetLibraryInfo.h"
+-#include "llvm/ADT/Triple.h"
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
+-#include "llvm/Support/IRReader.h"
+-#else
+-#include "llvm/IRReader/IRReader.h"
+-#include "llvm/Support/SourceMgr.h"
+-#endif /* LLVM_VERSION_MINOR <= 2 */
+-#include "llvm/Support/raw_ostream.h"
+-#include "llvm/Transforms/Scalar.h"
+-
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=5
+-#include "llvm/IR/IRPrintingPasses.h"
+-#include "llvm/IR/Verifier.h"
+-#else
+-#include "llvm/Analysis/Verifier.h"
+-#include "llvm/Assembly/PrintModulePass.h"
+-#endif
++#include "llvm_includes.hpp"
+
+-#include "llvm/Analysis/CFGPrinter.h"
+ #include "llvm/llvm_gen_backend.hpp"
+ #include "llvm/llvm_to_gen.hpp"
+ #include "sys/cvar.hpp"
+@@ -64,8 +32,6 @@
+ #include "ir/function.hpp"
+ #include "ir/structurizer.hpp"
+
+-#include <clang/CodeGen/CodeGenAction.h>
+-
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <fcntl.h>
+@@ -78,11 +44,19 @@ namespace gbe
+ BVAR(OCL_OUTPUT_CFG_GEN_IR, false);
+ using namespace llvm;
+
+- void runFuntionPass(Module &mod, TargetLibraryInfo *libraryInfo, const DataLayout &DL)
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
++ using namespace llvm::legacy;
++ #define TARGETLIBRARY TargetLibraryInfoImpl
++#else
++ #define TARGETLIBRARY TargetLibraryInfo
++#endif
++
++ void runFuntionPass(Module &mod, TARGETLIBRARY *libraryInfo, const DataLayout &DL)
+ {
+ FunctionPassManager FPM(&mod);
+
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
++#elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6
+ FPM.add(new DataLayoutPass());
+ #elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 5
+ FPM.add(new DataLayoutPass(DL));
+@@ -95,7 +69,11 @@ namespace gbe
+ #else
+ FPM.add(createVerifierPass());
+ #endif
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
++ FPM.add(new TargetLibraryInfoWrapperPass(*libraryInfo));
++#else
+ FPM.add(new TargetLibraryInfo(*libraryInfo));
++#endif
+ FPM.add(createTypeBasedAliasAnalysisPass());
+ FPM.add(createBasicAliasAnalysisPass());
+ FPM.add(createCFGSimplificationPass());
+@@ -111,18 +89,24 @@ namespace gbe
+ FPM.doFinalization();
+ }
+
+- void runModulePass(Module &mod, TargetLibraryInfo *libraryInfo, const DataLayout &DL, int optLevel, bool strictMath)
++ void runModulePass(Module &mod, TARGETLIBRARY *libraryInfo, const DataLayout &DL, int optLevel, bool strictMath)
+ {
+- llvm::PassManager MPM;
++ PassManager MPM;
+
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
++#elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6
+ MPM.add(new DataLayoutPass());
+ #elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 5
+ MPM.add(new DataLayoutPass(DL));
+ #else
+ MPM.add(new DataLayout(DL));
+ #endif
++
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
++ MPM.add(new TargetLibraryInfoWrapperPass(*libraryInfo));
++#else
+ MPM.add(new TargetLibraryInfo(*libraryInfo));
++#endif
+ MPM.add(createTypeBasedAliasAnalysisPass());
+ MPM.add(createBasicAliasAnalysisPass());
+ MPM.add(createIntrinsicLoweringPass());
+@@ -202,7 +186,7 @@ namespace gbe
+
+ #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
+ #define OUTPUT_BITCODE(STAGE, MOD) do { \
+- llvm::PassManager passes__; \
++ PassManager passes__; \
+ if (OCL_OUTPUT_LLVM_##STAGE) { \
+ passes__.add(createPrintModulePass(*o)); \
+ passes__.run(MOD); \
+@@ -210,7 +194,7 @@ namespace gbe
+ }while(0)
+ #else
+ #define OUTPUT_BITCODE(STAGE, MOD) do { \
+- llvm::PassManager passes__; \
++ PassManager passes__; \
+ if (OCL_OUTPUT_LLVM_##STAGE) { \
+ passes__.add(createPrintModulePass(&*o)); \
+ passes__.run(MOD); \
+@@ -260,16 +244,20 @@ namespace gbe
+ Module &mod = *M.get();
+ DataLayout DL(&mod);
+
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
++ mod.setDataLayout(DL);
++#endif
+ Triple TargetTriple(mod.getTargetTriple());
+- TargetLibraryInfo *libraryInfo = new TargetLibraryInfo(TargetTriple);
++ TARGETLIBRARY *libraryInfo = new TARGETLIBRARY(TargetTriple);
+ libraryInfo->disableAllFunctions();
+
+ OUTPUT_BITCODE(AFTER_LINK, mod);
+
+ runFuntionPass(mod, libraryInfo, DL);
+ runModulePass(mod, libraryInfo, DL, optLevel, strictMath);
+- llvm::PassManager passes;
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6
++ PassManager passes;
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
++#elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6
+ passes.add(new DataLayoutPass());
+ #elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 5
+ passes.add(new DataLayoutPass(DL));
+diff --git a/backend/src/llvm/llvm_unroll.cpp b/backend/src/llvm/llvm_unroll.cpp
+index 5d3fad8..6990e39 100644
+--- a/backend/src/llvm/llvm_unroll.cpp
++++ b/backend/src/llvm/llvm_unroll.cpp
+@@ -18,34 +18,9 @@
+ #include "llvm/Config/llvm-config.h"
+ #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
+ #include <set>
+-#if LLVM_VERSION_MINOR <= 2
+-#include "llvm/Function.h"
+-#include "llvm/InstrTypes.h"
+-#include "llvm/Instructions.h"
+-#include "llvm/IntrinsicInst.h"
+-#include "llvm/Module.h"
+-#else
+-#include "llvm/IR/Function.h"
+-#include "llvm/IR/InstrTypes.h"
+-#include "llvm/IR/Instructions.h"
+-#include "llvm/IR/IntrinsicInst.h"
+-#include "llvm/IR/Module.h"
+-#endif /* LLVM_VERSION_MINOR <= 2 */
+-#include "llvm/Pass.h"
+-#if LLVM_VERSION_MINOR <= 1
+-#include "llvm/Support/IRBuilder.h"
+-#elif LLVM_VERSION_MINOR == 2
+-#include "llvm/IRBuilder.h"
+-#else
+-#include "llvm/IR/IRBuilder.h"
+-#endif /* LLVM_VERSION_MINOR <= 1 */
+-#include "llvm/Support/raw_ostream.h"
+-#include "llvm/PassManager.h"
+-#include "llvm/Transforms/Scalar.h"
+-#include "llvm/Analysis/ScalarEvolution.h"
+-#include "llvm/Analysis/LoopPass.h"
+-#include "llvm/Analysis/TargetTransformInfo.h"
+-#include "llvm/IR/Dominators.h"
++
++#include "llvm_includes.hpp"
++
+ #include "llvm/llvm_gen_backend.hpp"
+ #include "sys/map.hpp"
+
+@@ -61,8 +36,13 @@ namespace gbe {
+ LoopPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
++#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR >= 7)
++ AU.addRequired<LoopInfoWrapperPass>();
++ AU.addPreserved<LoopInfoWrapperPass>();
++#else
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
++#endif
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+--
+1.8.3.2
diff --git a/llvm-3.7-patch-3.patch b/llvm-3.7-patch-3.patch
new file mode 100644
index 000000000000..26df7ddfb275
--- /dev/null
+++ b/llvm-3.7-patch-3.patch
@@ -0,0 +1,30 @@
+Otherwise, createInstructionCombiningPass will convert some call to illegal
+instruction in llvm3.7, for example utest compiler_time_stamp and test_load_program_from_spir.
+
+Signed-off-by: Yang Rong <rong.r.yang at intel.com>
+---
+ backend/src/llvm/llvm_to_gen.cpp | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
+index 538d1c5..24d4be7 100644
+--- a/backend/src/llvm/llvm_to_gen.cpp
++++ b/backend/src/llvm/llvm_to_gen.cpp
+@@ -110,6 +110,7 @@ namespace gbe
+ MPM.add(createTypeBasedAliasAnalysisPass());
+ MPM.add(createBasicAliasAnalysisPass());
+ MPM.add(createIntrinsicLoweringPass());
++ MPM.add(createStripAttributesPass()); // Strip unsupported attributes and calling conventions.
+ MPM.add(createSamplerFixPass());
+ MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
+
+@@ -119,7 +120,6 @@ namespace gbe
+ MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE
+ MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
+ MPM.add(createPruneEHPass()); // Remove dead EH info
+- MPM.add(createStripAttributesPass()); // Strip unsupported attributes and calling conventions.
+ MPM.add(createBarrierNodupPass(false)); // remove noduplicate fnAttr before inlining.
+ MPM.add(createFunctionInliningPass(20000));
+ MPM.add(createBarrierNodupPass(true)); // restore noduplicate fnAttr after inlining.
+--
+1.8.3.2
diff --git a/llvm-3.7-patch-4.patch b/llvm-3.7-patch-4.patch
new file mode 100644
index 000000000000..bf084ea19a8c
--- /dev/null
+++ b/llvm-3.7-patch-4.patch
@@ -0,0 +1,35 @@
+It can fix datalayout mismatch warning in llvm3.7.
+
+Signed-off-by: Yang Rong <rong.r.yang at intel.com>
+---
+ backend/src/libocl/src/ocl_barrier.ll | 3 +++
+ backend/src/libocl/src/ocl_clz.ll | 3 +++
+ 2 files changed, 6 insertions(+)
+
+diff --git a/backend/src/libocl/src/ocl_barrier.ll b/backend/src/libocl/src/ocl_barrier.ll
+index dc3579c..2765a71 100644
+--- a/backend/src/libocl/src/ocl_barrier.ll
++++ b/backend/src/libocl/src/ocl_barrier.ll
+@@ -4,6 +4,9 @@
+ ;#define CLK_LOCAL_MEM_FENCE (1 << 0)
+ ;#define CLK_GLOBAL_MEM_FENCE (1 << 1)
+
++target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
++target triple = "spir"
++
+ declare i32 @_get_local_mem_fence() nounwind alwaysinline
+ declare i32 @_get_global_mem_fence() nounwind alwaysinline
+ declare void @__gen_ocl_barrier_local() nounwind alwaysinline noduplicate
+diff --git a/backend/src/libocl/src/ocl_clz.ll b/backend/src/libocl/src/ocl_clz.ll
+index a274cde..9522881 100644
+--- a/backend/src/libocl/src/ocl_clz.ll
++++ b/backend/src/libocl/src/ocl_clz.ll
+@@ -1,3 +1,6 @@
++target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
++target triple = "spir"
++
+ declare i8 @llvm.ctlz.i8(i8, i1)
+ declare i16 @llvm.ctlz.i16(i16, i1)
+ declare i32 @llvm.ctlz.i32(i32, i1)
+--
+1.8.3.2
diff --git a/llvm-3.7-patch-5.patch b/llvm-3.7-patch-5.patch
new file mode 100644
index 000000000000..9ccf8c82892d
--- /dev/null
+++ b/llvm-3.7-patch-5.patch
@@ -0,0 +1,25 @@
+Must explicit use void if function don't have parameter.
+
+Signed-off-by: Yang Rong <rong.r.yang at intel.com>
+---
+ kernels/compiler_function_qualifiers.cl | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernels/compiler_function_qualifiers.cl b/kernels/compiler_function_qualifiers.cl
+index c904c84..c9f7e5d 100644
+--- a/kernels/compiler_function_qualifiers.cl
++++ b/kernels/compiler_function_qualifiers.cl
+@@ -1,9 +1,9 @@
+ /* test OpenCL 1.1 Function Qualifiers (section 6.7) */
+-kernel void compiler_function_qualifiers()
++kernel void compiler_function_qualifiers(void)
+ __attribute__((vec_type_hint(float)))
+ __attribute__((work_group_size_hint(4,1,1)))
+ __attribute__((reqd_work_group_size(4,1,1)));
+
+-kernel void compiler_function_qualifiers()
++kernel void compiler_function_qualifiers(void)
+ {
+ }
+--
+1.8.3.2