+++ /dev/null
-diff -Nur -x .git llvm-3.2.src/autoconf/configure.ac llvm-r600/autoconf/configure.ac
---- llvm-3.2.src/autoconf/configure.ac 2012-11-21 17:13:35.000000000 +0100
-+++ llvm-r600/autoconf/configure.ac 2013-01-25 19:43:56.096716416 +0100
-@@ -751,6 +751,11 @@
-
- if test ${enableval} != "disable"
- then
-+ if test ${enableval} = "AMDGPU"
-+ then
-+ AC_MSG_ERROR([The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600])
-+ enableval="R600"
-+ fi
- TARGETS_TO_BUILD="$enableval $TARGETS_TO_BUILD"
- fi
-
-diff -Nur -x .git llvm-3.2.src/configure llvm-r600/configure
---- llvm-3.2.src/configure 2012-11-21 17:13:35.000000000 +0100
-+++ llvm-r600/configure 2013-01-25 19:43:56.173383081 +0100
-@@ -5473,6 +5473,13 @@
-
- if test ${enableval} != "disable"
- then
-+ if test ${enableval} = "AMDGPU"
-+ then
-+ { { echo "$as_me:$LINENO: error: The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600" >&5
-+echo "$as_me: error: The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600" >&2;}
-+ { (exit 1); exit 1; }; }
-+ enableval="R600"
-+ fi
- TARGETS_TO_BUILD="$enableval $TARGETS_TO_BUILD"
- fi
-
-@@ -10316,7 +10323,7 @@
- lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
- lt_status=$lt_dlunknown
- cat > conftest.$ac_ext <<EOF
--#line 10317 "configure"
-+#line 10326 "configure"
- #include "confdefs.h"
-
- #if HAVE_DLFCN_H
-diff -Nur -x .git llvm-3.2.src/include/llvm/IntrinsicsR600.td llvm-r600/include/llvm/IntrinsicsR600.td
---- llvm-3.2.src/include/llvm/IntrinsicsR600.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/include/llvm/IntrinsicsR600.td 2013-01-25 19:43:56.433383075 +0100
-@@ -0,0 +1,36 @@
-+//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file defines all of the R600-specific intrinsics.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+let TargetPrefix = "r600" in {
-+
-+class R600ReadPreloadRegisterIntrinsic<string name>
-+ : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-+ GCCBuiltin<name>;
-+
-+multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
-+ def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
-+ def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
-+ def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
-+}
-+
-+defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
-+ "__builtin_r600_read_global_size">;
-+defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
-+ "__builtin_r600_read_local_size">;
-+defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
-+ "__builtin_r600_read_ngroups">;
-+defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
-+ "__builtin_r600_read_tgid">;
-+defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
-+ "__builtin_r600_read_tidig">;
-+} // End TargetPrefix = "r600"
-diff -Nur -x .git llvm-3.2.src/include/llvm/Intrinsics.td llvm-r600/include/llvm/Intrinsics.td
---- llvm-3.2.src/include/llvm/Intrinsics.td 2012-10-20 01:00:20.000000000 +0200
-+++ llvm-r600/include/llvm/Intrinsics.td 2013-01-25 19:43:56.426716409 +0100
-@@ -469,3 +469,4 @@
- include "llvm/IntrinsicsHexagon.td"
- include "llvm/IntrinsicsNVVM.td"
- include "llvm/IntrinsicsMips.td"
-+include "llvm/IntrinsicsR600.td"
-diff -Nur -x .git llvm-3.2.src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm-r600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
---- llvm-3.2.src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 2012-11-26 18:01:12.000000000 +0100
-+++ llvm-r600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 2013-01-25 19:43:56.720049736 +0100
-@@ -8514,11 +8514,8 @@
- if (Opcode == ISD::DELETED_NODE &&
- (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
- Opcode = Opc;
-- // If not supported by target, bail out.
-- if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal &&
-- TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom)
-- return SDValue();
- }
-+
- if (Opc != Opcode)
- return SDValue();
-
-@@ -8543,6 +8540,10 @@
- assert(SrcVT != MVT::Other && "Cannot determine source type!");
-
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
-+
-+ if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
-+ return SDValue();
-+
- SmallVector<SDValue, 8> Opnds;
- for (unsigned i = 0; i != NumInScalars; ++i) {
- SDValue In = N->getOperand(i);
-diff -Nur -x .git llvm-3.2.src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm-r600/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
---- llvm-3.2.src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 2012-10-24 19:25:11.000000000 +0200
-+++ llvm-r600/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 2013-01-25 19:43:56.733383069 +0100
-@@ -731,9 +731,10 @@
- return;
- }
- case TargetLowering::Promote: {
-- assert(VT.isVector() && "Unknown legal promote case!");
-- Value = DAG.getNode(ISD::BITCAST, dl,
-- TLI.getTypeToPromoteTo(ISD::STORE, VT), Value);
-+ EVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
-+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
-+ "Can only promote stores to same size type");
-+ Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
- SDValue Result =
- DAG.getStore(Chain, dl, Value, Ptr,
- ST->getPointerInfo(), isVolatile,
-@@ -889,10 +890,9 @@
- break;
- }
- case TargetLowering::Promote: {
-- // Only promote a load of vector type to another.
-- assert(VT.isVector() && "Cannot promote this load!");
-- // Change base type to a different vector type.
- EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
-+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
-+ "Can only promote loads to same size type");
-
- SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
-diff -Nur -x .git llvm-3.2.src/lib/Target/LLVMBuild.txt llvm-r600/lib/Target/LLVMBuild.txt
---- llvm-3.2.src/lib/Target/LLVMBuild.txt 2012-07-16 20:19:46.000000000 +0200
-+++ llvm-r600/lib/Target/LLVMBuild.txt 2013-01-25 19:43:57.173383060 +0100
-@@ -16,7 +16,7 @@
- ;===------------------------------------------------------------------------===;
-
- [common]
--subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
-+subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
-
- ; This is a special group whose required libraries are extended (by llvm-build)
- ; with the best execution engine (the native JIT, if available, or the
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.cpp llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.cpp 2013-01-25 19:43:57.423383055 +0100
-@@ -0,0 +1,138 @@
-+//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+///
-+/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
-+/// code. When passed an MCAsmStreamer it prints assembly and when passed
-+/// an MCObjectStreamer it outputs binary code.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+
-+
-+#include "AMDGPUAsmPrinter.h"
-+#include "AMDGPU.h"
-+#include "SIMachineFunctionInfo.h"
-+#include "SIRegisterInfo.h"
-+#include "llvm/MC/MCStreamer.h"
-+#include "llvm/Target/TargetLoweringObjectFile.h"
-+#include "llvm/Support/TargetRegistry.h"
-+
-+using namespace llvm;
-+
-+
-+static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
-+ MCStreamer &Streamer) {
-+ return new AMDGPUAsmPrinter(tm, Streamer);
-+}
-+
-+extern "C" void LLVMInitializeR600AsmPrinter() {
-+ TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
-+}
-+
-+/// We need to override this function so we can avoid
-+/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
-+bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
-+ if (STM.dumpCode()) {
-+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-+ MF.dump();
-+#endif
-+ }
-+ SetupMachineFunction(MF);
-+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
-+ if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-+ EmitProgramInfo(MF);
-+ }
-+ EmitFunctionBody();
-+ return false;
-+}
-+
-+void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
-+ unsigned MaxSGPR = 0;
-+ unsigned MaxVGPR = 0;
-+ bool VCCUsed = false;
-+ const SIRegisterInfo * RI =
-+ static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
-+
-+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-+ BB != BB_E; ++BB) {
-+ MachineBasicBlock &MBB = *BB;
-+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-+ I != E; ++I) {
-+ MachineInstr &MI = *I;
-+
-+ unsigned numOperands = MI.getNumOperands();
-+ for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
-+ MachineOperand & MO = MI.getOperand(op_idx);
-+ unsigned maxUsed;
-+ unsigned width = 0;
-+ bool isSGPR = false;
-+ unsigned reg;
-+ unsigned hwReg;
-+ if (!MO.isReg()) {
-+ continue;
-+ }
-+ reg = MO.getReg();
-+ if (reg == AMDGPU::VCC) {
-+ VCCUsed = true;
-+ continue;
-+ }
-+ switch (reg) {
-+ default: break;
-+ case AMDGPU::EXEC:
-+ case AMDGPU::SI_LITERAL_CONSTANT:
-+ case AMDGPU::SREG_LIT_0:
-+ case AMDGPU::M0:
-+ continue;
-+ }
-+
-+ if (AMDGPU::SReg_32RegClass.contains(reg)) {
-+ isSGPR = true;
-+ width = 1;
-+ } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
-+ isSGPR = false;
-+ width = 1;
-+ } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
-+ isSGPR = true;
-+ width = 2;
-+ } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
-+ isSGPR = false;
-+ width = 2;
-+ } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
-+ isSGPR = true;
-+ width = 4;
-+ } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
-+ isSGPR = false;
-+ width = 4;
-+ } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
-+ isSGPR = true;
-+ width = 8;
-+ } else {
-+ assert(!"Unknown register class");
-+ }
-+ hwReg = RI->getEncodingValue(reg);
-+ maxUsed = hwReg + width - 1;
-+ if (isSGPR) {
-+ MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
-+ } else {
-+ MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
-+ }
-+ }
-+ }
-+ }
-+ if (VCCUsed) {
-+ MaxSGPR += 2;
-+ }
-+ SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
-+ OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
-+ OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
-+ OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.h llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.h 2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,44 @@
-+//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief AMDGPU Assembly printer class.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPU_ASMPRINTER_H
-+#define AMDGPU_ASMPRINTER_H
-+
-+#include "llvm/CodeGen/AsmPrinter.h"
-+
-+namespace llvm {
-+
-+class AMDGPUAsmPrinter : public AsmPrinter {
-+
-+public:
-+ explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-+ : AsmPrinter(TM, Streamer) { }
-+
-+ virtual bool runOnMachineFunction(MachineFunction &MF);
-+
-+ virtual const char *getPassName() const {
-+ return "AMDGPU Assembly Printer";
-+ }
-+
-+ /// \brief Emit register usage information so that the GPU driver
-+ /// can correctly setup the GPU state.
-+ void EmitProgramInfo(MachineFunction &MF);
-+
-+ /// Implemented in AMDGPUMCInstLower.cpp
-+ virtual void EmitInstruction(const MachineInstr *MI);
-+};
-+
-+} // End anonymous llvm
-+
-+#endif //AMDGPU_ASMPRINTER_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUCodeEmitter.h llvm-r600/lib/Target/R600/AMDGPUCodeEmitter.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUCodeEmitter.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUCodeEmitter.h 2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,49 @@
-+//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief CodeEmitter interface for R600 and SI codegen.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUCODEEMITTER_H
-+#define AMDGPUCODEEMITTER_H
-+
-+namespace llvm {
-+
-+class AMDGPUCodeEmitter {
-+public:
-+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
-+ virtual uint64_t getMachineOpValue(const MachineInstr &MI,
-+ const MachineOperand &MO) const { return 0; }
-+ virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
-+ unsigned OpNo) const {
-+ return 0;
-+ }
-+ virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
-+ unsigned OpNo) const {
-+ return 0;
-+ }
-+ virtual uint64_t VOPPostEncode(const MachineInstr &MI,
-+ uint64_t Value) const {
-+ return Value;
-+ }
-+ virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
-+ unsigned OpNo) const {
-+ return 0;
-+ }
-+ virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
-+ const {
-+ return 0;
-+ }
-+};
-+
-+} // End namespace llvm
-+
-+#endif // AMDGPUCODEEMITTER_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUConvertToISA.cpp llvm-r600/lib/Target/R600/AMDGPUConvertToISA.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUConvertToISA.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUConvertToISA.cpp 2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,62 @@
-+//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief This pass lowers AMDIL machine instructions to the appropriate
-+/// hardware instructions.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPU.h"
-+#include "AMDGPUInstrInfo.h"
-+#include "llvm/CodeGen/MachineFunctionPass.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+class AMDGPUConvertToISAPass : public MachineFunctionPass {
-+
-+private:
-+ static char ID;
-+ TargetMachine &TM;
-+
-+public:
-+ AMDGPUConvertToISAPass(TargetMachine &tm) :
-+ MachineFunctionPass(ID), TM(tm) { }
-+
-+ virtual bool runOnMachineFunction(MachineFunction &MF);
-+
-+ virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
-+
-+};
-+
-+} // End anonymous namespace
-+
-+char AMDGPUConvertToISAPass::ID = 0;
-+
-+FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
-+ return new AMDGPUConvertToISAPass(tm);
-+}
-+
-+bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) {
-+ const AMDGPUInstrInfo * TII =
-+ static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
-+
-+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-+ BB != BB_E; ++BB) {
-+ MachineBasicBlock &MBB = *BB;
-+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-+ I != E; ++I) {
-+ MachineInstr &MI = *I;
-+ TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
-+ }
-+ }
-+ return false;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPU.h llvm-r600/lib/Target/R600/AMDGPU.h
---- llvm-3.2.src/lib/Target/R600/AMDGPU.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPU.h 2013-01-25 19:43:57.423383055 +0100
-@@ -0,0 +1,51 @@
-+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPU_H
-+#define AMDGPU_H
-+
-+#include "AMDGPUTargetMachine.h"
-+#include "llvm/Support/TargetRegistry.h"
-+#include "llvm/Target/TargetMachine.h"
-+
-+namespace llvm {
-+
-+class FunctionPass;
-+class AMDGPUTargetMachine;
-+
-+// R600 Passes
-+FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
-+FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
-+FunctionPass *createR600LowerConstCopy(TargetMachine &tm);
-+
-+// SI Passes
-+FunctionPass *createSIAnnotateControlFlowPass();
-+FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
-+FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
-+FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
-+FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
-+FunctionPass *createSIInsertWaits(TargetMachine &tm);
-+
-+// Passes common to R600 and SI
-+Pass *createAMDGPUStructurizeCFGPass();
-+FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
-+
-+} // End namespace llvm
-+
-+namespace ShaderType {
-+ enum Type {
-+ PIXEL = 0,
-+ VERTEX = 1,
-+ GEOMETRY = 2,
-+ COMPUTE = 3
-+ };
-+}
-+
-+#endif // AMDGPU_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.cpp llvm-r600/lib/Target/R600/AMDGPUInstrInfo.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.cpp 2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,257 @@
-+//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Implementation of the TargetInstrInfo class that is common to all
-+/// AMD GPUs.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUInstrInfo.h"
-+#include "AMDGPURegisterInfo.h"
-+#include "AMDGPUTargetMachine.h"
-+#include "AMDIL.h"
-+#include "llvm/CodeGen/MachineFrameInfo.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+
-+#define GET_INSTRINFO_CTOR
-+#include "AMDGPUGenInstrInfo.inc"
-+
-+using namespace llvm;
-+
-+AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
-+ : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
-+
-+const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
-+ return RI;
-+}
-+
-+bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
-+ unsigned &SrcReg, unsigned &DstReg,
-+ unsigned &SubIdx) const {
-+// TODO: Implement this function
-+ return false;
-+}
-+
-+unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-+ int &FrameIndex) const {
-+// TODO: Implement this function
-+ return 0;
-+}
-+
-+unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
-+ int &FrameIndex) const {
-+// TODO: Implement this function
-+ return 0;
-+}
-+
-+bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
-+ const MachineMemOperand *&MMO,
-+ int &FrameIndex) const {
-+// TODO: Implement this function
-+ return false;
-+}
-+unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
-+ int &FrameIndex) const {
-+// TODO: Implement this function
-+ return 0;
-+}
-+unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
-+ int &FrameIndex) const {
-+// TODO: Implement this function
-+ return 0;
-+}
-+bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
-+ const MachineMemOperand *&MMO,
-+ int &FrameIndex) const {
-+// TODO: Implement this function
-+ return false;
-+}
-+
-+MachineInstr *
-+AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
-+ MachineBasicBlock::iterator &MBBI,
-+ LiveVariables *LV) const {
-+// TODO: Implement this function
-+ return NULL;
-+}
-+bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
-+ MachineBasicBlock &MBB) const {
-+ while (iter != MBB.end()) {
-+ switch (iter->getOpcode()) {
-+ default:
-+ break;
-+ case AMDGPU::BRANCH_COND_i32:
-+ case AMDGPU::BRANCH_COND_f32:
-+ case AMDGPU::BRANCH:
-+ return true;
-+ };
-+ ++iter;
-+ }
-+ return false;
-+}
-+
-+MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
-+ MachineBasicBlock::iterator tmp = MBB->end();
-+ if (!MBB->size()) {
-+ return MBB->end();
-+ }
-+ while (--tmp) {
-+ if (tmp->getOpcode() == AMDGPU::ENDLOOP
-+ || tmp->getOpcode() == AMDGPU::ENDIF
-+ || tmp->getOpcode() == AMDGPU::ELSE) {
-+ if (tmp == MBB->begin()) {
-+ return tmp;
-+ } else {
-+ continue;
-+ }
-+ } else {
-+ return ++tmp;
-+ }
-+ }
-+ return MBB->end();
-+}
-+
-+void
-+AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI,
-+ unsigned SrcReg, bool isKill,
-+ int FrameIndex,
-+ const TargetRegisterClass *RC,
-+ const TargetRegisterInfo *TRI) const {
-+ assert(!"Not Implemented");
-+}
-+
-+void
-+AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI,
-+ unsigned DestReg, int FrameIndex,
-+ const TargetRegisterClass *RC,
-+ const TargetRegisterInfo *TRI) const {
-+ assert(!"Not Implemented");
-+}
-+
-+MachineInstr *
-+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
-+ MachineInstr *MI,
-+ const SmallVectorImpl<unsigned> &Ops,
-+ int FrameIndex) const {
-+// TODO: Implement this function
-+ return 0;
-+}
-+MachineInstr*
-+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
-+ MachineInstr *MI,
-+ const SmallVectorImpl<unsigned> &Ops,
-+ MachineInstr *LoadMI) const {
-+ // TODO: Implement this function
-+ return 0;
-+}
-+bool
-+AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
-+ const SmallVectorImpl<unsigned> &Ops) const {
-+ // TODO: Implement this function
-+ return false;
-+}
-+bool
-+AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
-+ unsigned Reg, bool UnfoldLoad,
-+ bool UnfoldStore,
-+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
-+ // TODO: Implement this function
-+ return false;
-+}
-+
-+bool
-+AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
-+ SmallVectorImpl<SDNode*> &NewNodes) const {
-+ // TODO: Implement this function
-+ return false;
-+}
-+
-+unsigned
-+AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
-+ bool UnfoldLoad, bool UnfoldStore,
-+ unsigned *LoadRegIndex) const {
-+ // TODO: Implement this function
-+ return 0;
-+}
-+
-+bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
-+ int64_t Offset1, int64_t Offset2,
-+ unsigned NumLoads) const {
-+ assert(Offset2 > Offset1
-+ && "Second offset should be larger than first offset!");
-+ // If we have less than 16 loads in a row, and the offsets are within 16,
-+ // then schedule together.
-+ // TODO: Make the loads schedule near if it fits in a cacheline
-+ return (NumLoads < 16 && (Offset2 - Offset1) < 16);
-+}
-+
-+bool
-+AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
-+ const {
-+ // TODO: Implement this function
-+ return true;
-+}
-+void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI) const {
-+ // TODO: Implement this function
-+}
-+
-+bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
-+ // TODO: Implement this function
-+ return false;
-+}
-+bool
-+AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-+ const SmallVectorImpl<MachineOperand> &Pred2)
-+ const {
-+ // TODO: Implement this function
-+ return false;
-+}
-+
-+bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
-+ std::vector<MachineOperand> &Pred) const {
-+ // TODO: Implement this function
-+ return false;
-+}
-+
-+bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
-+ // TODO: Implement this function
-+ return MI->getDesc().isPredicable();
-+}
-+
-+bool
-+AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
-+ // TODO: Implement this function
-+ return true;
-+}
-+
-+void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
-+ DebugLoc DL) const {
-+ MachineRegisterInfo &MRI = MF.getRegInfo();
-+ const AMDGPURegisterInfo & RI = getRegisterInfo();
-+
-+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
-+ MachineOperand &MO = MI.getOperand(i);
-+ // Convert dst regclass to one that is supported by the ISA
-+ if (MO.isReg() && MO.isDef()) {
-+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
-+ const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
-+ const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
-+
-+ assert(newRegClass);
-+
-+ MRI.setRegClass(MO.getReg(), newRegClass);
-+ }
-+ }
-+ }
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.h llvm-r600/lib/Target/R600/AMDGPUInstrInfo.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.h 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,149 @@
-+//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Contains the definition of a TargetInstrInfo class that is common
-+/// to all AMD GPUs.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUINSTRUCTIONINFO_H
-+#define AMDGPUINSTRUCTIONINFO_H
-+
-+#include "AMDGPURegisterInfo.h"
-+#include "AMDGPUInstrInfo.h"
-+#include "llvm/Target/TargetInstrInfo.h"
-+
-+#include <map>
-+
-+#define GET_INSTRINFO_HEADER
-+#define GET_INSTRINFO_ENUM
-+#include "AMDGPUGenInstrInfo.inc"
-+
-+#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT
-+#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT
-+#define OPCODE_IS_ZERO AMDGPU::PRED_SETE
-+#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE
-+
-+namespace llvm {
-+
-+class AMDGPUTargetMachine;
-+class MachineFunction;
-+class MachineInstr;
-+class MachineInstrBuilder;
-+
-+class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
-+private:
-+ const AMDGPURegisterInfo RI;
-+ TargetMachine &TM;
-+ bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
-+ MachineBasicBlock &MBB) const;
-+public:
-+ explicit AMDGPUInstrInfo(TargetMachine &tm);
-+
-+ virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
-+
-+ bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
-+ unsigned &DstReg, unsigned &SubIdx) const;
-+
-+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
-+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
-+ int &FrameIndex) const;
-+ bool hasLoadFromStackSlot(const MachineInstr *MI,
-+ const MachineMemOperand *&MMO,
-+ int &FrameIndex) const;
-+ unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
-+ unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
-+ int &FrameIndex) const;
-+ bool hasStoreFromStackSlot(const MachineInstr *MI,
-+ const MachineMemOperand *&MMO,
-+ int &FrameIndex) const;
-+
-+ MachineInstr *
-+ convertToThreeAddress(MachineFunction::iterator &MFI,
-+ MachineBasicBlock::iterator &MBBI,
-+ LiveVariables *LV) const;
-+
-+
-+ virtual void copyPhysReg(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI, DebugLoc DL,
-+ unsigned DestReg, unsigned SrcReg,
-+ bool KillSrc) const = 0;
-+
-+ void storeRegToStackSlot(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI,
-+ unsigned SrcReg, bool isKill, int FrameIndex,
-+ const TargetRegisterClass *RC,
-+ const TargetRegisterInfo *TRI) const;
-+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI,
-+ unsigned DestReg, int FrameIndex,
-+ const TargetRegisterClass *RC,
-+ const TargetRegisterInfo *TRI) const;
-+
-+protected:
-+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
-+ MachineInstr *MI,
-+ const SmallVectorImpl<unsigned> &Ops,
-+ int FrameIndex) const;
-+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
-+ MachineInstr *MI,
-+ const SmallVectorImpl<unsigned> &Ops,
-+ MachineInstr *LoadMI) const;
-+public:
-+ bool canFoldMemoryOperand(const MachineInstr *MI,
-+ const SmallVectorImpl<unsigned> &Ops) const;
-+ bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
-+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
-+ SmallVectorImpl<MachineInstr *> &NewMIs) const;
-+ bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
-+ SmallVectorImpl<SDNode *> &NewNodes) const;
-+ unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
-+ bool UnfoldLoad, bool UnfoldStore,
-+ unsigned *LoadRegIndex = 0) const;
-+ bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
-+ int64_t Offset1, int64_t Offset2,
-+ unsigned NumLoads) const;
-+
-+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-+ void insertNoop(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI) const;
-+ bool isPredicated(const MachineInstr *MI) const;
-+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-+ const SmallVectorImpl<MachineOperand> &Pred2) const;
-+ bool DefinesPredicate(MachineInstr *MI,
-+ std::vector<MachineOperand> &Pred) const;
-+ bool isPredicable(MachineInstr *MI) const;
-+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
-+
-+ // Helper functions that check the opcode for status information
-+ bool isLoadInst(llvm::MachineInstr *MI) const;
-+ bool isExtLoadInst(llvm::MachineInstr *MI) const;
-+ bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
-+ bool isSExtLoadInst(llvm::MachineInstr *MI) const;
-+ bool isZExtLoadInst(llvm::MachineInstr *MI) const;
-+ bool isAExtLoadInst(llvm::MachineInstr *MI) const;
-+ bool isStoreInst(llvm::MachineInstr *MI) const;
-+ bool isTruncStoreInst(llvm::MachineInstr *MI) const;
-+
-+ virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
-+ int64_t Imm) const = 0;
-+ virtual unsigned getIEQOpcode() const = 0;
-+ virtual bool isMov(unsigned opcode) const = 0;
-+
-+ /// \brief Convert the AMDIL MachineInstr to a supported ISA
-+ /// MachineInstr
-+ virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
-+ DebugLoc DL) const;
-+
-+};
-+
-+} // End llvm namespace
-+
-+#endif // AMDGPUINSTRINFO_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.td llvm-r600/lib/Target/R600/AMDGPUInstrInfo.td
---- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.td 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,74 @@
-+//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file contains DAG node defintions for the AMDGPU target.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+//===----------------------------------------------------------------------===//
-+// AMDGPU DAG Profiles
-+//===----------------------------------------------------------------------===//
-+
-+def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
-+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
-+]>;
-+
-+//===----------------------------------------------------------------------===//
-+// AMDGPU DAG Nodes
-+//
-+
-+// out = ((a << 32) | b) >> c)
-+//
-+// Can be used to optimize rtol:
-+// rotl(a, b) = bitalign(a, a, 32 - b)
-+def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
-+
-+// This argument to this node is a dword address.
-+def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
-+
-+// out = a - floor(a)
-+def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
-+
-+// out = max(a, b) a and b are floats
-+def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
-+ [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// out = max(a, b) a and b are signed ints
-+def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
-+ [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// out = max(a, b) a and b are unsigned ints
-+def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
-+ [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// out = min(a, b) a and b are floats
-+def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
-+ [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// out = min(a, b) a snd b are signed ints
-+def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
-+ [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// out = min(a, b) a and b are unsigned ints
-+def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
-+ [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// urecip - This operation is a helper for integer division, it returns the
-+// result of 1 / a as a fractional unsigned integer.
-+// out = (2^32 / a) + e
-+// e is rounding error
-+def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
-+
-+def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstructions.td llvm-r600/lib/Target/R600/AMDGPUInstructions.td
---- llvm-3.2.src/lib/Target/R600/AMDGPUInstructions.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUInstructions.td 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,190 @@
-+//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file contains instruction defs that are common to all hw codegen
-+// targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
-+ field bits<16> AMDILOp = 0;
-+ field bits<3> Gen = 0;
-+
-+ let Namespace = "AMDGPU";
-+ let OutOperandList = outs;
-+ let InOperandList = ins;
-+ let AsmString = asm;
-+ let Pattern = pattern;
-+ let Itinerary = NullALU;
-+ let TSFlags{42-40} = Gen;
-+ let TSFlags{63-48} = AMDILOp;
-+}
-+
-+class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
-+ : AMDGPUInst<outs, ins, asm, pattern> {
-+
-+ field bits<32> Inst = 0xffffffff;
-+
-+}
-+
-+def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
-+
-+def COND_EQ : PatLeaf <
-+ (cond),
-+ [{switch(N->get()){{default: return false;
-+ case ISD::SETOEQ: case ISD::SETUEQ:
-+ case ISD::SETEQ: return true;}}}]
-+>;
-+
-+def COND_NE : PatLeaf <
-+ (cond),
-+ [{switch(N->get()){{default: return false;
-+ case ISD::SETONE: case ISD::SETUNE:
-+ case ISD::SETNE: return true;}}}]
-+>;
-+def COND_GT : PatLeaf <
-+ (cond),
-+ [{switch(N->get()){{default: return false;
-+ case ISD::SETOGT: case ISD::SETUGT:
-+ case ISD::SETGT: return true;}}}]
-+>;
-+
-+def COND_GE : PatLeaf <
-+ (cond),
-+ [{switch(N->get()){{default: return false;
-+ case ISD::SETOGE: case ISD::SETUGE:
-+ case ISD::SETGE: return true;}}}]
-+>;
-+
-+def COND_LT : PatLeaf <
-+ (cond),
-+ [{switch(N->get()){{default: return false;
-+ case ISD::SETOLT: case ISD::SETULT:
-+ case ISD::SETLT: return true;}}}]
-+>;
-+
-+def COND_LE : PatLeaf <
-+ (cond),
-+ [{switch(N->get()){{default: return false;
-+ case ISD::SETOLE: case ISD::SETULE:
-+ case ISD::SETLE: return true;}}}]
-+>;
-+
-+//===----------------------------------------------------------------------===//
-+// Load/Store Pattern Fragments
-+//===----------------------------------------------------------------------===//
-+
-+def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
-+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
-+}]>;
-+
-+class Constants {
-+int TWO_PI = 0x40c90fdb;
-+int PI = 0x40490fdb;
-+int TWO_PI_INV = 0x3e22f983;
-+}
-+def CONST : Constants;
-+
-+def FP_ZERO : PatLeaf <
-+ (fpimm),
-+ [{return N->getValueAPF().isZero();}]
-+>;
-+
-+def FP_ONE : PatLeaf <
-+ (fpimm),
-+ [{return N->isExactlyValue(1.0);}]
-+>;
-+
-+let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
-+
-+class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
-+ (outs rc:$dst),
-+ (ins rc:$src0),
-+ "CLAMP $dst, $src0",
-+ [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
-+>;
-+
-+class FABS <RegisterClass rc> : AMDGPUShaderInst <
-+ (outs rc:$dst),
-+ (ins rc:$src0),
-+ "FABS $dst, $src0",
-+ [(set rc:$dst, (fabs rc:$src0))]
-+>;
-+
-+class FNEG <RegisterClass rc> : AMDGPUShaderInst <
-+ (outs rc:$dst),
-+ (ins rc:$src0),
-+ "FNEG $dst, $src0",
-+ [(set rc:$dst, (fneg rc:$src0))]
-+>;
-+
-+def SHADER_TYPE : AMDGPUShaderInst <
-+ (outs),
-+ (ins i32imm:$type),
-+ "SHADER_TYPE $type",
-+ [(int_AMDGPU_shader_type imm:$type)]
-+>;
-+
-+} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
-+
-+/* Generic helper patterns for intrinsics */
-+/* -------------------------------------- */
-+
-+class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
-+ RegisterClass rc> : Pat <
-+ (fpow rc:$src0, rc:$src1),
-+ (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
-+>;
-+
-+/* Other helper patterns */
-+/* --------------------- */
-+
-+/* Extract element pattern */
-+class Extract_Element <ValueType sub_type, ValueType vec_type,
-+ RegisterClass vec_class, int sub_idx,
-+ SubRegIndex sub_reg>: Pat<
-+ (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
-+ (EXTRACT_SUBREG vec_class:$src, sub_reg)
-+>;
-+
-+/* Insert element pattern */
-+class Insert_Element <ValueType elem_type, ValueType vec_type,
-+ RegisterClass elem_class, RegisterClass vec_class,
-+ int sub_idx, SubRegIndex sub_reg> : Pat <
-+
-+ (vec_type (vector_insert (vec_type vec_class:$vec),
-+ (elem_type elem_class:$elem), sub_idx)),
-+ (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
-+>;
-+
-+// Vector Build pattern
-+class Vector_Build <ValueType vecType, RegisterClass vectorClass,
-+ ValueType elemType, RegisterClass elemClass> : Pat <
-+ (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
-+ (elemType elemClass:$z), (elemType elemClass:$w))),
-+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
-+ (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
-+ elemClass:$z, sel_z), elemClass:$w, sel_w)
-+>;
-+
-+// bitconvert pattern
-+class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
-+ (dt (bitconvert (st rc:$src0))),
-+ (dt rc:$src0)
-+>;
-+
-+class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
-+ (vt (AMDGPUdwordaddr (vt rc:$addr))),
-+ (vt rc:$addr)
-+>;
-+
-+include "R600Instructions.td"
-+
-+include "SIInstrInfo.td"
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUIntrinsics.td llvm-r600/lib/Target/R600/AMDGPUIntrinsics.td
---- llvm-3.2.src/lib/Target/R600/AMDGPUIntrinsics.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUIntrinsics.td 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,62 @@
-+//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file defines intrinsics that are used by all hw codegen targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+let TargetPrefix = "AMDGPU", isTarget = 1 in {
-+
-+ def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
-+ def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
-+
-+ def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
-+ def int_AMDGPU_kilp : Intrinsic<[], [], []>;
-+ def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-+
-+ def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
-+}
-+
-+let TargetPrefix = "TGSI", isTarget = 1 in {
-+
-+ def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>;
-+}
-+
-+include "SIIntrinsics.td"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.cpp llvm-r600/lib/Target/R600/AMDGPUISelLowering.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUISelLowering.cpp 2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,418 @@
-+//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief This is the parent TargetLowering class for hardware code gen
-+/// targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUISelLowering.h"
-+#include "AMDILIntrinsicInfo.h"
-+#include "llvm/CodeGen/MachineFunction.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+#include "llvm/CodeGen/SelectionDAG.h"
-+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-+
-+using namespace llvm;
-+
-+AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
-+ TargetLowering(TM, new TargetLoweringObjectFileELF()) {
-+
-+ // Initialize target lowering borrowed from AMDIL
-+ InitAMDILLowering();
-+
-+ // We need to custom lower some of the intrinsics
-+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
-+
-+ // Library functions. These default to Expand, but we have instructions
-+ // for them.
-+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
-+ setOperationAction(ISD::FEXP2, MVT::f32, Legal);
-+ setOperationAction(ISD::FPOW, MVT::f32, Legal);
-+ setOperationAction(ISD::FLOG2, MVT::f32, Legal);
-+ setOperationAction(ISD::FABS, MVT::f32, Legal);
-+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
-+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
-+
-+ // Lower floating point store/load to integer store/load to reduce the number
-+ // of patterns in tablegen.
-+ setOperationAction(ISD::STORE, MVT::f32, Promote);
-+ AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
-+
-+ setOperationAction(ISD::STORE, MVT::v4f32, Promote);
-+ AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
-+
-+ setOperationAction(ISD::LOAD, MVT::f32, Promote);
-+ AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
-+
-+ setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
-+ AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
-+
-+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
-+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
-+ setOperationAction(ISD::UREM, MVT::i32, Expand);
-+}
-+
-+//===---------------------------------------------------------------------===//
-+// TargetLowering Callbacks
-+//===---------------------------------------------------------------------===//
-+
-+SDValue AMDGPUTargetLowering::LowerFormalArguments(
-+ SDValue Chain,
-+ CallingConv::ID CallConv,
-+ bool isVarArg,
-+ const SmallVectorImpl<ISD::InputArg> &Ins,
-+ DebugLoc DL, SelectionDAG &DAG,
-+ SmallVectorImpl<SDValue> &InVals) const {
-+ for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
-+ InVals.push_back(SDValue());
-+ }
-+ return Chain;
-+}
-+
-+SDValue AMDGPUTargetLowering::LowerReturn(
-+ SDValue Chain,
-+ CallingConv::ID CallConv,
-+ bool isVarArg,
-+ const SmallVectorImpl<ISD::OutputArg> &Outs,
-+ const SmallVectorImpl<SDValue> &OutVals,
-+ DebugLoc DL, SelectionDAG &DAG) const {
-+ return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
-+}
-+
-+//===---------------------------------------------------------------------===//
-+// Target specific lowering
-+//===---------------------------------------------------------------------===//
-+
-+SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
-+ const {
-+ switch (Op.getOpcode()) {
-+ default:
-+ Op.getNode()->dump();
-+ assert(0 && "Custom lowering code for this"
-+ "instruction is not implemented yet!");
-+ break;
-+ // AMDIL DAG lowering
-+ case ISD::SDIV: return LowerSDIV(Op, DAG);
-+ case ISD::SREM: return LowerSREM(Op, DAG);
-+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
-+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
-+ // AMDGPU DAG lowering
-+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
-+ case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
-+ }
-+ return Op;
-+}
-+
-+SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
-+ SelectionDAG &DAG) const {
-+ unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT VT = Op.getValueType();
-+
-+ switch (IntrinsicID) {
-+ default: return Op;
-+ case AMDGPUIntrinsic::AMDIL_abs:
-+ return LowerIntrinsicIABS(Op, DAG);
-+ case AMDGPUIntrinsic::AMDIL_exp:
-+ return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
-+ case AMDGPUIntrinsic::AMDGPU_lrp:
-+ return LowerIntrinsicLRP(Op, DAG);
-+ case AMDGPUIntrinsic::AMDIL_fraction:
-+ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
-+ case AMDGPUIntrinsic::AMDIL_mad:
-+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2), Op.getOperand(3));
-+ case AMDGPUIntrinsic::AMDIL_max:
-+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2));
-+ case AMDGPUIntrinsic::AMDGPU_imax:
-+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2));
-+ case AMDGPUIntrinsic::AMDGPU_umax:
-+ return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2));
-+ case AMDGPUIntrinsic::AMDIL_min:
-+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2));
-+ case AMDGPUIntrinsic::AMDGPU_imin:
-+ return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2));
-+ case AMDGPUIntrinsic::AMDGPU_umin:
-+ return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2));
-+ case AMDGPUIntrinsic::AMDIL_round_nearest:
-+ return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
-+ }
-+}
-+
-+///IABS(a) = SMAX(sub(0, a), a)
-+SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
-+ SelectionDAG &DAG) const {
-+
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT VT = Op.getValueType();
-+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
-+ Op.getOperand(1));
-+
-+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
-+}
-+
-+/// Linear Interpolation
-+/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
-+SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
-+ SelectionDAG &DAG) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT VT = Op.getValueType();
-+ SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
-+ DAG.getConstantFP(1.0f, MVT::f32),
-+ Op.getOperand(1));
-+ SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
-+ Op.getOperand(3));
-+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2),
-+ OneSubAC);
-+}
-+
-+/// \brief Generate Min/Max node
-+SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
-+ SelectionDAG &DAG) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT VT = Op.getValueType();
-+
-+ SDValue LHS = Op.getOperand(0);
-+ SDValue RHS = Op.getOperand(1);
-+ SDValue True = Op.getOperand(2);
-+ SDValue False = Op.getOperand(3);
-+ SDValue CC = Op.getOperand(4);
-+
-+ if (VT != MVT::f32 ||
-+ !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
-+ return SDValue();
-+ }
-+
-+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
-+ switch (CCOpcode) {
-+ case ISD::SETOEQ:
-+ case ISD::SETONE:
-+ case ISD::SETUNE:
-+ case ISD::SETNE:
-+ case ISD::SETUEQ:
-+ case ISD::SETEQ:
-+ case ISD::SETFALSE:
-+ case ISD::SETFALSE2:
-+ case ISD::SETTRUE:
-+ case ISD::SETTRUE2:
-+ case ISD::SETUO:
-+ case ISD::SETO:
-+ assert(0 && "Operation should already be optimised !");
-+ case ISD::SETULE:
-+ case ISD::SETULT:
-+ case ISD::SETOLE:
-+ case ISD::SETOLT:
-+ case ISD::SETLE:
-+ case ISD::SETLT: {
-+ if (LHS == True)
-+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
-+ else
-+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
-+ }
-+ case ISD::SETGT:
-+ case ISD::SETGE:
-+ case ISD::SETUGE:
-+ case ISD::SETOGE:
-+ case ISD::SETUGT:
-+ case ISD::SETOGT: {
-+ if (LHS == True)
-+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
-+ else
-+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
-+ }
-+ case ISD::SETCC_INVALID:
-+ assert(0 && "Invalid setcc condcode !");
-+ }
-+ return Op;
-+}
-+
-+
-+
-+SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
-+ SelectionDAG &DAG) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT VT = Op.getValueType();
-+
-+ SDValue Num = Op.getOperand(0);
-+ SDValue Den = Op.getOperand(1);
-+
-+ SmallVector<SDValue, 8> Results;
-+
-+ // RCP = URECIP(Den) = 2^32 / Den + e
-+ // e is rounding error.
-+ SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
-+
-+ // RCP_LO = umulo(RCP, Den) */
-+ SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
-+
-+ // RCP_HI = mulhu (RCP, Den) */
-+ SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
-+
-+ // NEG_RCP_LO = -RCP_LO
-+ SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
-+ RCP_LO);
-+
-+ // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
-+ SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
-+ NEG_RCP_LO, RCP_LO,
-+ ISD::SETEQ);
-+ // Calculate the rounding error from the URECIP instruction
-+ // E = mulhu(ABS_RCP_LO, RCP)
-+ SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
-+
-+ // RCP_A_E = RCP + E
-+ SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
-+
-+ // RCP_S_E = RCP - E
-+ SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
-+
-+ // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
-+ SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
-+ RCP_A_E, RCP_S_E,
-+ ISD::SETEQ);
-+ // Quotient = mulhu(Tmp0, Num)
-+ SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
-+
-+ // Num_S_Remainder = Quotient * Den
-+ SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
-+
-+ // Remainder = Num - Num_S_Remainder
-+ SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
-+
-+ // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
-+ SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
-+ DAG.getConstant(-1, VT),
-+ DAG.getConstant(0, VT),
-+ ISD::SETGE);
-+ // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
-+ SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
-+ DAG.getConstant(0, VT),
-+ DAG.getConstant(-1, VT),
-+ DAG.getConstant(0, VT),
-+ ISD::SETGE);
-+ // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
-+ SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
-+ Remainder_GE_Zero);
-+
-+ // Calculate Division result:
-+
-+ // Quotient_A_One = Quotient + 1
-+ SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
-+ DAG.getConstant(1, VT));
-+
-+ // Quotient_S_One = Quotient - 1
-+ SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
-+ DAG.getConstant(1, VT));
-+
-+ // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
-+ SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
-+ Quotient, Quotient_A_One, ISD::SETEQ);
-+
-+ // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
-+ Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
-+ Quotient_S_One, Div, ISD::SETEQ);
-+
-+ // Calculate Rem result:
-+
-+ // Remainder_S_Den = Remainder - Den
-+ SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
-+
-+ // Remainder_A_Den = Remainder + Den
-+ SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
-+
-+ // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
-+ SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
-+ Remainder, Remainder_S_Den, ISD::SETEQ);
-+
-+ // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
-+ Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
-+ Remainder_A_Den, Rem, ISD::SETEQ);
-+ SDValue Ops[2];
-+ Ops[0] = Div;
-+ Ops[1] = Rem;
-+ return DAG.getMergeValues(Ops, 2, DL);
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Helper functions
-+//===----------------------------------------------------------------------===//
-+
-+bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
-+ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
-+ return CFP->isExactlyValue(1.0);
-+ }
-+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
-+ return C->isAllOnesValue();
-+ }
-+ return false;
-+}
-+
-+bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
-+ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
-+ return CFP->getValueAPF().isZero();
-+ }
-+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
-+ return C->isNullValue();
-+ }
-+ return false;
-+}
-+
-+SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
-+ const TargetRegisterClass *RC,
-+ unsigned Reg, EVT VT) const {
-+ MachineFunction &MF = DAG.getMachineFunction();
-+ MachineRegisterInfo &MRI = MF.getRegInfo();
-+ unsigned VirtualRegister;
-+ if (!MRI.isLiveIn(Reg)) {
-+ VirtualRegister = MRI.createVirtualRegister(RC);
-+ MRI.addLiveIn(Reg, VirtualRegister);
-+ } else {
-+ VirtualRegister = MRI.getLiveInVirtReg(Reg);
-+ }
-+ return DAG.getRegister(VirtualRegister, VT);
-+}
-+
-+#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
-+
-+const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
-+ switch (Opcode) {
-+ default: return 0;
-+ // AMDIL DAG nodes
-+ NODE_NAME_CASE(MAD);
-+ NODE_NAME_CASE(CALL);
-+ NODE_NAME_CASE(UMUL);
-+ NODE_NAME_CASE(DIV_INF);
-+ NODE_NAME_CASE(RET_FLAG);
-+ NODE_NAME_CASE(BRANCH_COND);
-+
-+ // AMDGPU DAG nodes
-+ NODE_NAME_CASE(DWORDADDR)
-+ NODE_NAME_CASE(FRACT)
-+ NODE_NAME_CASE(FMAX)
-+ NODE_NAME_CASE(SMAX)
-+ NODE_NAME_CASE(UMAX)
-+ NODE_NAME_CASE(FMIN)
-+ NODE_NAME_CASE(SMIN)
-+ NODE_NAME_CASE(UMIN)
-+ NODE_NAME_CASE(URECIP)
-+ NODE_NAME_CASE(INTERP)
-+ NODE_NAME_CASE(INTERP_P0)
-+ NODE_NAME_CASE(EXPORT)
-+ NODE_NAME_CASE(CONST_ADDRESS)
-+ }
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.h llvm-r600/lib/Target/R600/AMDGPUISelLowering.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUISelLowering.h 2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,145 @@
-+//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Interface definition of the TargetLowering class that is common
-+/// to all AMD GPUs.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUISELLOWERING_H
-+#define AMDGPUISELLOWERING_H
-+
-+#include "llvm/Target/TargetLowering.h"
-+
-+namespace llvm {
-+
-+class MachineRegisterInfo;
-+
-+class AMDGPUTargetLowering : public TargetLowering {
-+private:
-+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
-+
-+protected:
-+
-+ /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
-+ /// MachineFunction.
-+ ///
-+ /// \returns a RegisterSDNode representing Reg.
-+ SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
-+ unsigned Reg, EVT VT) const;
-+
-+ bool isHWTrueValue(SDValue Op) const;
-+ bool isHWFalseValue(SDValue Op) const;
-+
-+public:
-+ AMDGPUTargetLowering(TargetMachine &TM);
-+
-+ virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
-+ bool isVarArg,
-+ const SmallVectorImpl<ISD::InputArg> &Ins,
-+ DebugLoc DL, SelectionDAG &DAG,
-+ SmallVectorImpl<SDValue> &InVals) const;
-+
-+ virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
-+ bool isVarArg,
-+ const SmallVectorImpl<ISD::OutputArg> &Outs,
-+ const SmallVectorImpl<SDValue> &OutVals,
-+ DebugLoc DL, SelectionDAG &DAG) const;
-+
-+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
-+ virtual const char* getTargetNodeName(unsigned Opcode) const;
-+
-+// Functions defined in AMDILISelLowering.cpp
-+public:
-+
-+ /// \brief Determine which of the bits specified in \p Mask are known to be
-+ /// either zero or one and return them in the \p KnownZero and \p KnownOne
-+ /// bitsets.
-+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
-+ APInt &KnownZero,
-+ APInt &KnownOne,
-+ const SelectionDAG &DAG,
-+ unsigned Depth = 0) const;
-+
-+ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
-+ const CallInst &I, unsigned Intrinsic) const;
-+
-+ /// We want to mark f32/f64 floating point values as legal.
-+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
-+
-+ /// We don't want to shrink f64/f32 constants.
-+ bool ShouldShrinkFPConstant(EVT VT) const;
-+
-+private:
-+ void InitAMDILLowering();
-+ SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
-+ EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
-+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
-+};
-+
-+namespace AMDGPUISD {
-+
-+enum {
-+ // AMDIL ISD Opcodes
-+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
-+ MAD, // 32bit Fused Multiply Add instruction
-+ CALL, // Function call based on a single integer
-+ UMUL, // 32bit unsigned multiplication
-+ DIV_INF, // Divide with infinity returned on zero divisor
-+ RET_FLAG,
-+ BRANCH_COND,
-+ // End AMDIL ISD Opcodes
-+ BITALIGN,
-+ DWORDADDR,
-+ FRACT,
-+ FMAX,
-+ SMAX,
-+ UMAX,
-+ FMIN,
-+ SMIN,
-+ UMIN,
-+ URECIP,
-+ INTERP,
-+ INTERP_P0,
-+ EXPORT,
-+ CONST_ADDRESS,
-+ LAST_AMDGPU_ISD_NUMBER
-+};
-+
-+
-+} // End namespace AMDGPUISD
-+
-+namespace SIISD {
-+
-+enum {
-+ SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
-+ VCC_AND,
-+ VCC_BITCAST
-+};
-+
-+} // End namespace SIISD
-+
-+} // End namespace llvm
-+
-+#endif // AMDGPUISELLOWERING_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.cpp llvm-r600/lib/Target/R600/AMDGPUMCInstLower.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUMCInstLower.cpp 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,83 @@
-+//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+
-+#include "AMDGPUMCInstLower.h"
-+#include "AMDGPUAsmPrinter.h"
-+#include "R600InstrInfo.h"
-+#include "llvm/CodeGen/MachineBasicBlock.h"
-+#include "llvm/CodeGen/MachineInstr.h"
-+#include "llvm/Constants.h"
-+#include "llvm/MC/MCInst.h"
-+#include "llvm/MC/MCStreamer.h"
-+#include "llvm/MC/MCExpr.h"
-+#include "llvm/Support/ErrorHandling.h"
-+
-+using namespace llvm;
-+
-+AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx):
-+ Ctx(ctx)
-+{ }
-+
-+void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
-+ OutMI.setOpcode(MI->getOpcode());
-+
-+ for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
-+ const MachineOperand &MO = MI->getOperand(i);
-+
-+ MCOperand MCOp;
-+ switch (MO.getType()) {
-+ default:
-+ llvm_unreachable("unknown operand type");
-+ case MachineOperand::MO_FPImmediate: {
-+ const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
-+ assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
-+ "Only floating point immediates are supported at the moment.");
-+ MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
-+ break;
-+ }
-+ case MachineOperand::MO_Immediate:
-+ MCOp = MCOperand::CreateImm(MO.getImm());
-+ break;
-+ case MachineOperand::MO_Register:
-+ MCOp = MCOperand::CreateReg(MO.getReg());
-+ break;
-+ case MachineOperand::MO_MachineBasicBlock:
-+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
-+ MO.getMBB()->getSymbol(), Ctx));
-+ }
-+ OutMI.addOperand(MCOp);
-+ }
-+}
-+
-+void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-+ AMDGPUMCInstLower MCInstLowering(OutContext);
-+
-+ if (MI->isBundle()) {
-+ const MachineBasicBlock *MBB = MI->getParent();
-+ MachineBasicBlock::const_instr_iterator I = MI;
-+ ++I;
-+ while (I != MBB->end() && I->isInsideBundle()) {
-+ MCInst MCBundleInst;
-+ const MachineInstr *BundledInst = I;
-+ MCInstLowering.lower(BundledInst, MCBundleInst);
-+ OutStreamer.EmitInstruction(MCBundleInst);
-+ ++I;
-+ }
-+ } else {
-+ MCInst TmpInst;
-+ MCInstLowering.lower(MI, TmpInst);
-+ OutStreamer.EmitInstruction(TmpInst);
-+ }
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.h llvm-r600/lib/Target/R600/AMDGPUMCInstLower.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUMCInstLower.h 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,34 @@
-+//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPU_MCINSTLOWER_H
-+#define AMDGPU_MCINSTLOWER_H
-+
-+namespace llvm {
-+
-+class MCInst;
-+class MCContext;
-+class MachineInstr;
-+
-+class AMDGPUMCInstLower {
-+
-+ MCContext &Ctx;
-+
-+public:
-+ AMDGPUMCInstLower(MCContext &ctx);
-+
-+ /// \brief Lower a MachineInstr to an MCInst
-+ void lower(const MachineInstr *MI, MCInst &OutMI) const;
-+
-+};
-+
-+} // End namespace llvm
-+
-+#endif //AMDGPU_MCINSTLOWER_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.cpp llvm-r600/lib/Target/R600/AMDGPURegisterInfo.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.cpp 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,51 @@
-+//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Parent TargetRegisterInfo class common to all hw codegen targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPURegisterInfo.h"
-+#include "AMDGPUTargetMachine.h"
-+
-+using namespace llvm;
-+
-+AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
-+ const TargetInstrInfo &tii)
-+: AMDGPUGenRegisterInfo(0),
-+ TM(tm),
-+ TII(tii)
-+ { }
-+
-+//===----------------------------------------------------------------------===//
-+// Function handling callbacks - Functions are a seldom used feature of GPUS, so
-+// they are not supported at this time.
-+//===----------------------------------------------------------------------===//
-+
-+const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
-+
-+const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
-+ const {
-+ return &CalleeSavedReg;
-+}
-+
-+void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
-+ int SPAdj,
-+ RegScavenger *RS) const {
-+ assert(!"Subroutines not supported yet");
-+}
-+
-+unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-+ assert(!"Subroutines not supported yet");
-+ return 0;
-+}
-+
-+#define GET_REGINFO_TARGET_DESC
-+#include "AMDGPUGenRegisterInfo.inc"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.h llvm-r600/lib/Target/R600/AMDGPURegisterInfo.h
---- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.h 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,63 @@
-+//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief TargetRegisterInfo interface that is implemented by all hw codegen
-+/// targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUREGISTERINFO_H
-+#define AMDGPUREGISTERINFO_H
-+
-+#include "llvm/ADT/BitVector.h"
-+#include "llvm/Target/TargetRegisterInfo.h"
-+
-+#define GET_REGINFO_HEADER
-+#define GET_REGINFO_ENUM
-+#include "AMDGPUGenRegisterInfo.inc"
-+
-+namespace llvm {
-+
-+class AMDGPUTargetMachine;
-+class TargetInstrInfo;
-+
-+struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
-+ TargetMachine &TM;
-+ const TargetInstrInfo &TII;
-+ static const uint16_t CalleeSavedReg;
-+
-+ AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
-+
-+ virtual BitVector getReservedRegs(const MachineFunction &MF) const {
-+ assert(!"Unimplemented"); return BitVector();
-+ }
-+
-+ /// \param RC is an AMDIL reg class.
-+ ///
-+ /// \returns The ISA reg class that is equivalent to \p RC.
-+ virtual const TargetRegisterClass * getISARegClass(
-+ const TargetRegisterClass * RC) const {
-+ assert(!"Unimplemented"); return NULL;
-+ }
-+
-+ virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
-+ assert(!"Unimplemented"); return NULL;
-+ }
-+
-+ const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
-+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
-+ RegScavenger *RS) const;
-+ unsigned getFrameRegister(const MachineFunction &MF) const;
-+
-+};
-+
-+} // End namespace llvm
-+
-+#endif // AMDIDSAREGISTERINFO_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.td llvm-r600/lib/Target/R600/AMDGPURegisterInfo.td
---- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.td 2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,22 @@
-+//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// Tablegen register definitions common to all hw codegen targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+let Namespace = "AMDGPU" in {
-+ def sel_x : SubRegIndex;
-+ def sel_y : SubRegIndex;
-+ def sel_z : SubRegIndex;
-+ def sel_w : SubRegIndex;
-+}
-+
-+include "R600RegisterInfo.td"
-+include "SIRegisterInfo.td"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUStructurizeCFG.cpp llvm-r600/lib/Target/R600/AMDGPUStructurizeCFG.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUStructurizeCFG.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUStructurizeCFG.cpp 2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,714 @@
-+//===-- AMDGPUStructurizeCFG.cpp - ------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// The pass implemented in this file transforms the programs control flow
-+/// graph into a form that's suitable for code generation on hardware that
-+/// implements control flow by execution masking. This currently includes all
-+/// AMD GPUs but may as well be useful for other types of hardware.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPU.h"
-+#include "llvm/Module.h"
-+#include "llvm/ADT/SCCIterator.h"
-+#include "llvm/Analysis/RegionIterator.h"
-+#include "llvm/Analysis/RegionInfo.h"
-+#include "llvm/Analysis/RegionPass.h"
-+#include "llvm/Transforms/Utils/SSAUpdater.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+// Definition of the complex types used in this pass.
-+
-+typedef std::pair<BasicBlock *, Value *> BBValuePair;
-+typedef ArrayRef<BasicBlock*> BBVecRef;
-+
-+typedef SmallVector<RegionNode*, 8> RNVector;
-+typedef SmallVector<BasicBlock*, 8> BBVector;
-+typedef SmallVector<BBValuePair, 2> BBValueVector;
-+
-+typedef DenseMap<PHINode *, BBValueVector> PhiMap;
-+typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
-+typedef DenseMap<BasicBlock *, Value *> BBPredicates;
-+typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
-+typedef DenseMap<BasicBlock *, unsigned> VisitedMap;
-+
-+// The name for newly created blocks.
-+
-+static const char *FlowBlockName = "Flow";
-+
-+/// @brief Transforms the control flow graph on one single entry/exit region
-+/// at a time.
-+///
-+/// After the transform all "If"/"Then"/"Else" style control flow looks like
-+/// this:
-+///
-+/// \verbatim
-+/// 1
-+/// ||
-+/// | |
-+/// 2 |
-+/// | /
-+/// |/
-+/// 3
-+/// || Where:
-+/// | | 1 = "If" block, calculates the condition
-+/// 4 | 2 = "Then" subregion, runs if the condition is true
-+/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
-+/// |/ 4 = "Else" optional subregion, runs if the condition is false
-+/// 5 5 = "End" block, also rejoins the control flow
-+/// \endverbatim
-+///
-+/// Control flow is expressed as a branch where the true exit goes into the
-+/// "Then"/"Else" region, while the false exit skips the region
-+/// The condition for the optional "Else" region is expressed as a PHI node.
-+/// The incomming values of the PHI node are true for the "If" edge and false
-+/// for the "Then" edge.
-+///
-+/// Additionally to that even complicated loops look like this:
-+///
-+/// \verbatim
-+/// 1
-+/// ||
-+/// | |
-+/// 2 ^ Where:
-+/// | / 1 = "Entry" block
-+/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
-+/// 3 3 = "Flow" block, with back edge to entry block
-+/// |
-+/// \endverbatim
-+///
-+/// The back edge of the "Flow" block is always on the false side of the branch
-+/// while the true side continues the general flow. So the loop condition
-+/// consist of a network of PHI nodes where the true incoming values expresses
-+/// breaks and the false values expresses continue states.
-+class AMDGPUStructurizeCFG : public RegionPass {
-+
-+ static char ID;
-+
-+ Type *Boolean;
-+ ConstantInt *BoolTrue;
-+ ConstantInt *BoolFalse;
-+ UndefValue *BoolUndef;
-+
-+ Function *Func;
-+ Region *ParentRegion;
-+
-+ DominatorTree *DT;
-+
-+ RNVector Order;
-+ VisitedMap Visited;
-+ PredMap Predicates;
-+ BBPhiMap DeletedPhis;
-+ BBVector FlowsInserted;
-+
-+ BasicBlock *LoopStart;
-+ BasicBlock *LoopEnd;
-+ BBPredicates LoopPred;
-+
-+ void orderNodes();
-+
-+ void buildPredicate(BranchInst *Term, unsigned Idx,
-+ BBPredicates &Pred, bool Invert);
-+
-+ void analyzeBlock(BasicBlock *BB);
-+
-+ void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx);
-+
-+ void collectInfos();
-+
-+ bool dominatesPredicates(BasicBlock *A, BasicBlock *B);
-+
-+ void killTerminator(BasicBlock *BB);
-+
-+ RegionNode *skipChained(RegionNode *Node);
-+
-+ void delPhiValues(BasicBlock *From, BasicBlock *To);
-+
-+ void addPhiValues(BasicBlock *From, BasicBlock *To);
-+
-+ BasicBlock *getNextFlow(BasicBlock *Prev);
-+
-+ bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node);
-+
-+ BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node);
-+
-+ void createFlow();
-+
-+ void insertConditions();
-+
-+ void rebuildSSA();
-+
-+public:
-+ AMDGPUStructurizeCFG():
-+ RegionPass(ID) {
-+
-+ initializeRegionInfoPass(*PassRegistry::getPassRegistry());
-+ }
-+
-+ virtual bool doInitialization(Region *R, RGPassManager &RGM);
-+
-+ virtual bool runOnRegion(Region *R, RGPassManager &RGM);
-+
-+ virtual const char *getPassName() const {
-+ return "AMDGPU simplify control flow";
-+ }
-+
-+ void getAnalysisUsage(AnalysisUsage &AU) const {
-+
-+ AU.addRequired<DominatorTree>();
-+ AU.addPreserved<DominatorTree>();
-+ RegionPass::getAnalysisUsage(AU);
-+ }
-+
-+};
-+
-+} // end anonymous namespace
-+
-+char AMDGPUStructurizeCFG::ID = 0;
-+
-+/// \brief Initialize the types and constants used in the pass
-+bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
-+ LLVMContext &Context = R->getEntry()->getContext();
-+
-+ Boolean = Type::getInt1Ty(Context);
-+ BoolTrue = ConstantInt::getTrue(Context);
-+ BoolFalse = ConstantInt::getFalse(Context);
-+ BoolUndef = UndefValue::get(Boolean);
-+
-+ return false;
-+}
-+
-+/// \brief Build up the general order of nodes
-+void AMDGPUStructurizeCFG::orderNodes() {
-+ scc_iterator<Region *> I = scc_begin(ParentRegion),
-+ E = scc_end(ParentRegion);
-+ for (Order.clear(); I != E; ++I) {
-+ std::vector<RegionNode *> &Nodes = *I;
-+ Order.append(Nodes.begin(), Nodes.end());
-+ }
-+}
-+
-+/// \brief Build blocks and loop predicates
-+void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx,
-+ BBPredicates &Pred, bool Invert) {
-+ Value *True = Invert ? BoolFalse : BoolTrue;
-+ Value *False = Invert ? BoolTrue : BoolFalse;
-+
-+ RegionInfo *RI = ParentRegion->getRegionInfo();
-+ BasicBlock *BB = Term->getParent();
-+
-+ // Handle the case where multiple regions start at the same block
-+ Region *R = BB != ParentRegion->getEntry() ?
-+ RI->getRegionFor(BB) : ParentRegion;
-+
-+ if (R == ParentRegion) {
-+ // It's a top level block in our region
-+ Value *Cond = True;
-+ if (Term->isConditional()) {
-+ BasicBlock *Other = Term->getSuccessor(!Idx);
-+
-+ if (Visited.count(Other)) {
-+ if (!Pred.count(Other))
-+ Pred[Other] = False;
-+
-+ if (!Pred.count(BB))
-+ Pred[BB] = True;
-+ return;
-+ }
-+ Cond = Term->getCondition();
-+
-+ if (Idx != Invert)
-+ Cond = BinaryOperator::CreateNot(Cond, "", Term);
-+ }
-+
-+ Pred[BB] = Cond;
-+
-+ } else if (ParentRegion->contains(R)) {
-+ // It's a block in a sub region
-+ while(R->getParent() != ParentRegion)
-+ R = R->getParent();
-+
-+ Pred[R->getEntry()] = True;
-+
-+ } else {
-+ // It's a branch from outside into our parent region
-+ Pred[BB] = True;
-+ }
-+}
-+
-+/// \brief Analyze the successors of each block and build up predicates
-+void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) {
-+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-+ BBPredicates &Pred = Predicates[BB];
-+
-+ for (; PI != PE; ++PI) {
-+ BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
-+
-+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
-+ BasicBlock *Succ = Term->getSuccessor(i);
-+ if (Succ != BB)
-+ continue;
-+ buildPredicate(Term, i, Pred, false);
-+ }
-+ }
-+}
-+
-+/// \brief Analyze the conditions leading to loop to a previous block
-+void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) {
-+ BranchInst *Term = cast<BranchInst>(BB->getTerminator());
-+
-+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
-+ BasicBlock *Succ = Term->getSuccessor(i);
-+
-+ // Ignore it if it's not a back edge
-+ if (!Visited.count(Succ))
-+ continue;
-+
-+ buildPredicate(Term, i, LoopPred, true);
-+
-+ LoopEnd = BB;
-+ if (Visited[Succ] < LoopIdx) {
-+ LoopIdx = Visited[Succ];
-+ LoopStart = Succ;
-+ }
-+ }
-+}
-+
-+/// \brief Collect various loop and predicate infos
-+void AMDGPUStructurizeCFG::collectInfos() {
-+ unsigned Number = 0, LoopIdx = ~0;
-+
-+ // Reset predicate
-+ Predicates.clear();
-+
-+ // and loop infos
-+ LoopStart = LoopEnd = 0;
-+ LoopPred.clear();
-+
-+ RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
-+ for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) {
-+
-+ // Analyze all the conditions leading to a node
-+ analyzeBlock((*OI)->getEntry());
-+
-+ if ((*OI)->isSubRegion())
-+ continue;
-+
-+ // Find the first/last loop nodes and loop predicates
-+ analyzeLoop((*OI)->getNodeAs<BasicBlock>(), LoopIdx);
-+ }
-+}
-+
-+/// \brief Does A dominate all the predicates of B ?
-+bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) {
-+ BBPredicates &Preds = Predicates[B];
-+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
-+ PI != PE; ++PI) {
-+
-+ if (!DT->dominates(A, PI->first))
-+ return false;
-+ }
-+ return true;
-+}
-+
-+/// \brief Remove phi values from all successors and the remove the terminator.
-+void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
-+ TerminatorInst *Term = BB->getTerminator();
-+ if (!Term)
-+ return;
-+
-+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
-+ SI != SE; ++SI) {
-+
-+ delPhiValues(BB, *SI);
-+ }
-+
-+ Term->eraseFromParent();
-+}
-+
-+/// First: Skip forward to the first region node that either isn't a subregion or not
-+/// dominating it's exit, remove all the skipped nodes from the node order.
-+///
-+/// Second: Handle the first successor directly if the resulting nodes successor
-+/// predicates are still dominated by the original entry
-+RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) {
-+ BasicBlock *Entry = Node->getEntry();
-+
-+ // Skip forward as long as it is just a linear flow
-+ while (true) {
-+ BasicBlock *Entry = Node->getEntry();
-+ BasicBlock *Exit;
-+
-+ if (Node->isSubRegion()) {
-+ Exit = Node->getNodeAs<Region>()->getExit();
-+ } else {
-+ TerminatorInst *Term = Entry->getTerminator();
-+ if (Term->getNumSuccessors() != 1)
-+ break;
-+ Exit = Term->getSuccessor(0);
-+ }
-+
-+ // It's a back edge, break here so we can insert a loop node
-+ if (!Visited.count(Exit))
-+ return Node;
-+
-+ // More than node edges are pointing to exit
-+ if (!DT->dominates(Entry, Exit))
-+ return Node;
-+
-+ RegionNode *Next = ParentRegion->getNode(Exit);
-+ RNVector::iterator I = std::find(Order.begin(), Order.end(), Next);
-+ assert(I != Order.end());
-+
-+ Visited.erase(Next->getEntry());
-+ Order.erase(I);
-+ Node = Next;
-+ }
-+
-+ BasicBlock *BB = Node->getEntry();
-+ TerminatorInst *Term = BB->getTerminator();
-+ if (Term->getNumSuccessors() != 2)
-+ return Node;
-+
-+ // Our node has exactly two succesors, check if we can handle
-+ // any of them directly
-+ BasicBlock *Succ = Term->getSuccessor(0);
-+ if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) {
-+ Succ = Term->getSuccessor(1);
-+ if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ))
-+ return Node;
-+ } else {
-+ BasicBlock *Succ2 = Term->getSuccessor(1);
-+ if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] &&
-+ dominatesPredicates(Entry, Succ2))
-+ Succ = Succ2;
-+ }
-+
-+ RegionNode *Next = ParentRegion->getNode(Succ);
-+ RNVector::iterator E = Order.end();
-+ RNVector::iterator I = std::find(Order.begin(), E, Next);
-+ assert(I != E);
-+
-+ killTerminator(BB);
-+ FlowsInserted.push_back(BB);
-+ Visited.erase(Succ);
-+ Order.erase(I);
-+ return ParentRegion->getNode(wireFlowBlock(BB, Next));
-+}
-+
-+/// \brief Remove all PHI values coming from "From" into "To" and remember
-+/// them in DeletedPhis
-+void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
-+ PhiMap &Map = DeletedPhis[To];
-+ for (BasicBlock::iterator I = To->begin(), E = To->end();
-+ I != E && isa<PHINode>(*I);) {
-+
-+ PHINode &Phi = cast<PHINode>(*I++);
-+ while (Phi.getBasicBlockIndex(From) != -1) {
-+ Value *Deleted = Phi.removeIncomingValue(From, false);
-+ Map[&Phi].push_back(std::make_pair(From, Deleted));
-+ }
-+ }
-+}
-+
-+/// \brief Add the PHI values back once we knew the new predecessor
-+void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
-+ if (!DeletedPhis.count(To))
-+ return;
-+
-+ PhiMap &Map = DeletedPhis[To];
-+ SSAUpdater Updater;
-+
-+ for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
-+
-+ PHINode *Phi = I->first;
-+ Updater.Initialize(Phi->getType(), "");
-+ BasicBlock *Fallback = To;
-+ bool HaveFallback = false;
-+
-+ for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end();
-+ VI != VE; ++VI) {
-+
-+ Updater.AddAvailableValue(VI->first, VI->second);
-+ BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first);
-+ if (Dom == VI->first)
-+ HaveFallback = true;
-+ else if (Dom != Fallback)
-+ HaveFallback = false;
-+ Fallback = Dom;
-+ }
-+ if (!HaveFallback) {
-+ Value *Undef = UndefValue::get(Phi->getType());
-+ Updater.AddAvailableValue(Fallback, Undef);
-+ }
-+
-+ Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From);
-+ }
-+ DeletedPhis.erase(To);
-+}
-+
-+/// \brief Create a new flow node and update dominator tree and region info
-+BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) {
-+ LLVMContext &Context = Func->getContext();
-+ BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
-+ Order.back()->getEntry();
-+ BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
-+ Func, Insert);
-+ DT->addNewBlock(Flow, Prev);
-+ ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
-+ FlowsInserted.push_back(Flow);
-+ return Flow;
-+}
-+
-+/// \brief Can we predict that this node will always be called?
-+bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev,
-+ BasicBlock *Node) {
-+ BBPredicates &Preds = Predicates[Node];
-+ bool Dominated = false;
-+
-+ for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
-+ I != E; ++I) {
-+
-+ if (I->second != BoolTrue)
-+ return false;
-+
-+ if (!Dominated && DT->dominates(I->first, Prev))
-+ Dominated = true;
-+ }
-+ return Dominated;
-+}
-+
-+/// \brief Wire up the new control flow by inserting or updating the branch
-+/// instructions at node exits
-+BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev,
-+ RegionNode *Node) {
-+ BasicBlock *Entry = Node->getEntry();
-+
-+ if (LoopStart == Entry) {
-+ LoopStart = Prev;
-+ LoopPred[Prev] = BoolTrue;
-+ }
-+
-+ // Wire it up temporary, skipChained may recurse into us
-+ BranchInst::Create(Entry, Prev);
-+ DT->changeImmediateDominator(Entry, Prev);
-+ addPhiValues(Prev, Entry);
-+
-+ Node = skipChained(Node);
-+
-+ BasicBlock *Next = getNextFlow(Prev);
-+ if (!isPredictableTrue(Prev, Entry)) {
-+ // Let Prev point to entry and next block
-+ Prev->getTerminator()->eraseFromParent();
-+ BranchInst::Create(Entry, Next, BoolUndef, Prev);
-+ } else {
-+ DT->changeImmediateDominator(Next, Entry);
-+ }
-+
-+ // Let node exit(s) point to next block
-+ if (Node->isSubRegion()) {
-+ Region *SubRegion = Node->getNodeAs<Region>();
-+ BasicBlock *Exit = SubRegion->getExit();
-+
-+ // Find all the edges from the sub region to the exit
-+ BBVector ToDo;
-+ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
-+ if (SubRegion->contains(*I))
-+ ToDo.push_back(*I);
-+ }
-+
-+ // Modify the edges to point to the new flow block
-+ for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) {
-+ delPhiValues(*I, Exit);
-+ TerminatorInst *Term = (*I)->getTerminator();
-+ Term->replaceUsesOfWith(Exit, Next);
-+ }
-+
-+ // Update the region info
-+ SubRegion->replaceExit(Next);
-+
-+ } else {
-+ BasicBlock *BB = Node->getNodeAs<BasicBlock>();
-+ killTerminator(BB);
-+ BranchInst::Create(Next, BB);
-+
-+ if (BB == LoopEnd)
-+ LoopEnd = 0;
-+ }
-+
-+ return Next;
-+}
-+
-+/// Destroy node order and visited map, build up flow order instead.
-+/// After this function control flow looks like it should be, but
-+/// branches only have undefined conditions.
-+void AMDGPUStructurizeCFG::createFlow() {
-+ DeletedPhis.clear();
-+
-+ BasicBlock *Prev = Order.pop_back_val()->getEntry();
-+ assert(Prev == ParentRegion->getEntry() && "Incorrect node order!");
-+ Visited.erase(Prev);
-+
-+ if (LoopStart == Prev) {
-+ // Loop starts at entry, split entry so that we can predicate it
-+ BasicBlock::iterator Insert = Prev->getFirstInsertionPt();
-+ BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName);
-+ DT->addNewBlock(Split, Prev);
-+ ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
-+ Predicates[Split] = Predicates[Prev];
-+ Order.push_back(ParentRegion->getBBNode(Split));
-+ LoopPred[Prev] = BoolTrue;
-+
-+ } else if (LoopStart == Order.back()->getEntry()) {
-+ // Loop starts behind entry, split entry so that we can jump to it
-+ Instruction *Term = Prev->getTerminator();
-+ BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName);
-+ DT->addNewBlock(Split, Prev);
-+ ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
-+ Prev = Split;
-+ }
-+
-+ killTerminator(Prev);
-+ FlowsInserted.clear();
-+ FlowsInserted.push_back(Prev);
-+
-+ while (!Order.empty()) {
-+ RegionNode *Node = Order.pop_back_val();
-+ Visited.erase(Node->getEntry());
-+ Prev = wireFlowBlock(Prev, Node);
-+ if (LoopStart && !LoopEnd) {
-+ // Create an extra loop end node
-+ LoopEnd = Prev;
-+ Prev = getNextFlow(LoopEnd);
-+ BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd);
-+ addPhiValues(LoopEnd, LoopStart);
-+ }
-+ }
-+
-+ BasicBlock *Exit = ParentRegion->getExit();
-+ BranchInst::Create(Exit, Prev);
-+ addPhiValues(Prev, Exit);
-+ if (DT->dominates(ParentRegion->getEntry(), Exit))
-+ DT->changeImmediateDominator(Exit, Prev);
-+
-+ if (LoopStart && LoopEnd) {
-+ BBVector::iterator FI = std::find(FlowsInserted.begin(),
-+ FlowsInserted.end(),
-+ LoopStart);
-+ for (; *FI != LoopEnd; ++FI) {
-+ addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0));
-+ }
-+ }
-+
-+ assert(Order.empty());
-+ assert(Visited.empty());
-+ assert(DeletedPhis.empty());
-+}
-+
-+/// \brief Insert the missing branch conditions
-+void AMDGPUStructurizeCFG::insertConditions() {
-+ SSAUpdater PhiInserter;
-+
-+ for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end();
-+ FI != FE; ++FI) {
-+
-+ BranchInst *Term = cast<BranchInst>((*FI)->getTerminator());
-+ if (Term->isUnconditional())
-+ continue;
-+
-+ PhiInserter.Initialize(Boolean, "");
-+ PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse);
-+
-+ BasicBlock *Succ = Term->getSuccessor(0);
-+ BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ];
-+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
-+ PI != PE; ++PI) {
-+
-+ PhiInserter.AddAvailableValue(PI->first, PI->second);
-+ }
-+
-+ Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI));
-+ }
-+}
-+
-+/// Handle a rare case where the disintegrated nodes instructions
-+/// no longer dominate all their uses. Not sure if this is really nessasary
-+void AMDGPUStructurizeCFG::rebuildSSA() {
-+ SSAUpdater Updater;
-+ for (Region::block_iterator I = ParentRegion->block_begin(),
-+ E = ParentRegion->block_end();
-+ I != E; ++I) {
-+
-+ BasicBlock *BB = *I;
-+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
-+ II != IE; ++II) {
-+
-+ bool Initialized = false;
-+ for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
-+
-+ Next = I->getNext();
-+
-+ Instruction *User = cast<Instruction>(I->getUser());
-+ if (User->getParent() == BB) {
-+ continue;
-+
-+ } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
-+ if (UserPN->getIncomingBlock(*I) == BB)
-+ continue;
-+ }
-+
-+ if (DT->dominates(II, User))
-+ continue;
-+
-+ if (!Initialized) {
-+ Value *Undef = UndefValue::get(II->getType());
-+ Updater.Initialize(II->getType(), "");
-+ Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
-+ Updater.AddAvailableValue(BB, II);
-+ Initialized = true;
-+ }
-+ Updater.RewriteUseAfterInsertions(*I);
-+ }
-+ }
-+ }
-+}
-+
-+/// \brief Run the transformation for each region found
-+bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
-+ if (R->isTopLevelRegion())
-+ return false;
-+
-+ Func = R->getEntry()->getParent();
-+ ParentRegion = R;
-+
-+ DT = &getAnalysis<DominatorTree>();
-+
-+ orderNodes();
-+ collectInfos();
-+ createFlow();
-+ insertConditions();
-+ rebuildSSA();
-+
-+ Order.clear();
-+ Visited.clear();
-+ Predicates.clear();
-+ DeletedPhis.clear();
-+ FlowsInserted.clear();
-+
-+ return true;
-+}
-+
-+/// \brief Create the pass
-+Pass *llvm::createAMDGPUStructurizeCFGPass() {
-+ return new AMDGPUStructurizeCFG();
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.cpp llvm-r600/lib/Target/R600/AMDGPUSubtarget.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUSubtarget.cpp 2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,87 @@
-+//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUSubtarget.h"
-+
-+using namespace llvm;
-+
-+#define GET_SUBTARGETINFO_ENUM
-+#define GET_SUBTARGETINFO_TARGET_DESC
-+#define GET_SUBTARGETINFO_CTOR
-+#include "AMDGPUGenSubtargetInfo.inc"
-+
-+AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
-+ AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) {
-+ InstrItins = getInstrItineraryForCPU(CPU);
-+
-+ memset(CapsOverride, 0, sizeof(*CapsOverride)
-+ * AMDGPUDeviceInfo::MaxNumberCapabilities);
-+ // Default card
-+ StringRef GPU = CPU;
-+ Is64bit = false;
-+ DefaultSize[0] = 64;
-+ DefaultSize[1] = 1;
-+ DefaultSize[2] = 1;
-+ ParseSubtargetFeatures(GPU, FS);
-+ DevName = GPU;
-+ Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
-+}
-+
-+AMDGPUSubtarget::~AMDGPUSubtarget() {
-+ delete Device;
-+}
-+
-+bool
-+AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const {
-+ assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
-+ "Caps index is out of bounds!");
-+ return CapsOverride[caps];
-+}
-+bool
-+AMDGPUSubtarget::is64bit() const {
-+ return Is64bit;
-+}
-+bool
-+AMDGPUSubtarget::isTargetELF() const {
-+ return false;
-+}
-+size_t
-+AMDGPUSubtarget::getDefaultSize(uint32_t dim) const {
-+ if (dim > 3) {
-+ return 1;
-+ } else {
-+ return DefaultSize[dim];
-+ }
-+}
-+
-+std::string
-+AMDGPUSubtarget::getDataLayout() const {
-+ if (!Device) {
-+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
-+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
-+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
-+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
-+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
-+ }
-+ return Device->getDataLayout();
-+}
-+
-+std::string
-+AMDGPUSubtarget::getDeviceName() const {
-+ return DevName;
-+}
-+const AMDGPUDevice *
-+AMDGPUSubtarget::device() const {
-+ return Device;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.h llvm-r600/lib/Target/R600/AMDGPUSubtarget.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUSubtarget.h 2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,65 @@
-+//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief AMDGPU specific subclass of TargetSubtarget.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUSUBTARGET_H
-+#define AMDGPUSUBTARGET_H
-+#include "AMDILDevice.h"
-+#include "llvm/ADT/StringExtras.h"
-+#include "llvm/ADT/StringRef.h"
-+#include "llvm/Target/TargetSubtargetInfo.h"
-+
-+#define GET_SUBTARGETINFO_HEADER
-+#include "AMDGPUGenSubtargetInfo.inc"
-+
-+#define MAX_CB_SIZE (1 << 16)
-+
-+namespace llvm {
-+
-+class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
-+private:
-+ bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
-+ const AMDGPUDevice *Device;
-+ size_t DefaultSize[3];
-+ std::string DevName;
-+ bool Is64bit;
-+ bool Is32on64bit;
-+ bool DumpCode;
-+ bool R600ALUInst;
-+
-+ InstrItineraryData InstrItins;
-+
-+public:
-+ AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
-+ virtual ~AMDGPUSubtarget();
-+
-+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
-+ virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS);
-+
-+ bool isOverride(AMDGPUDeviceInfo::Caps) const;
-+ bool is64bit() const;
-+
-+ // Helper functions to simplify if statements
-+ bool isTargetELF() const;
-+ const AMDGPUDevice* device() const;
-+ std::string getDataLayout() const;
-+ std::string getDeviceName() const;
-+ virtual size_t getDefaultSize(uint32_t dim) const;
-+ bool dumpCode() const { return DumpCode; }
-+ bool r600ALUEncoding() const { return R600ALUInst; }
-+
-+};
-+
-+} // End namespace llvm
-+
-+#endif // AMDGPUSUBTARGET_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.cpp llvm-r600/lib/Target/R600/AMDGPUTargetMachine.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUTargetMachine.cpp 2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,148 @@
-+//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief The AMDGPU target machine contains all of the hardware specific
-+/// information needed to emit code for R600 and SI GPUs.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUTargetMachine.h"
-+#include "AMDGPU.h"
-+#include "R600ISelLowering.h"
-+#include "R600InstrInfo.h"
-+#include "SIISelLowering.h"
-+#include "SIInstrInfo.h"
-+#include "llvm/Analysis/Passes.h"
-+#include "llvm/Analysis/Verifier.h"
-+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-+#include "llvm/CodeGen/MachineModuleInfo.h"
-+#include "llvm/CodeGen/Passes.h"
-+#include "llvm/MC/MCAsmInfo.h"
-+#include "llvm/PassManager.h"
-+#include "llvm/Support/TargetRegistry.h"
-+#include "llvm/Support/raw_os_ostream.h"
-+#include "llvm/Transforms/IPO.h"
-+#include "llvm/Transforms/Scalar.h"
-+#include <llvm/CodeGen/Passes.h>
-+
-+using namespace llvm;
-+
-+extern "C" void LLVMInitializeR600Target() {
-+ // Register the target
-+ RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
-+}
-+
-+AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
-+ StringRef CPU, StringRef FS,
-+ TargetOptions Options,
-+ Reloc::Model RM, CodeModel::Model CM,
-+ CodeGenOpt::Level OptLevel
-+)
-+:
-+ LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
-+ Subtarget(TT, CPU, FS),
-+ Layout(Subtarget.getDataLayout()),
-+ FrameLowering(TargetFrameLowering::StackGrowsUp,
-+ Subtarget.device()->getStackAlignment(), 0),
-+ IntrinsicInfo(this),
-+ InstrItins(&Subtarget.getInstrItineraryData()) {
-+ // TLInfo uses InstrInfo so it must be initialized after.
-+ if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
-+ InstrInfo = new R600InstrInfo(*this);
-+ TLInfo = new R600TargetLowering(*this);
-+ } else {
-+ InstrInfo = new SIInstrInfo(*this);
-+ TLInfo = new SITargetLowering(*this);
-+ }
-+}
-+
-+AMDGPUTargetMachine::~AMDGPUTargetMachine() {
-+}
-+
-+namespace {
-+class AMDGPUPassConfig : public TargetPassConfig {
-+public:
-+ AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
-+ : TargetPassConfig(TM, PM) {}
-+
-+ AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
-+ return getTM<AMDGPUTargetMachine>();
-+ }
-+
-+ virtual bool addPreISel();
-+ virtual bool addInstSelector();
-+ virtual bool addPreRegAlloc();
-+ virtual bool addPostRegAlloc();
-+ virtual bool addPreSched2();
-+ virtual bool addPreEmitPass();
-+};
-+} // End of anonymous namespace
-+
-+TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
-+ return new AMDGPUPassConfig(this, PM);
-+}
-+
-+bool
-+AMDGPUPassConfig::addPreISel() {
-+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-+ addPass(createAMDGPUStructurizeCFGPass());
-+ addPass(createSIAnnotateControlFlowPass());
-+ }
-+ return false;
-+}
-+
-+bool AMDGPUPassConfig::addInstSelector() {
-+ addPass(createAMDGPUPeepholeOpt(*TM));
-+ addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
-+ return false;
-+}
-+
-+bool AMDGPUPassConfig::addPreRegAlloc() {
-+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-+
-+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-+ addPass(createSIAssignInterpRegsPass(*TM));
-+ }
-+ addPass(createAMDGPUConvertToISAPass(*TM));
-+ return false;
-+}
-+
-+bool AMDGPUPassConfig::addPostRegAlloc() {
-+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-+
-+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-+ addPass(createSIInsertWaits(*TM));
-+ }
-+ return false;
-+}
-+
-+bool AMDGPUPassConfig::addPreSched2() {
-+
-+ addPass(&IfConverterID);
-+ return false;
-+}
-+
-+bool AMDGPUPassConfig::addPreEmitPass() {
-+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
-+ addPass(createAMDGPUCFGPreparationPass(*TM));
-+ addPass(createAMDGPUCFGStructurizerPass(*TM));
-+ addPass(createR600ExpandSpecialInstrsPass(*TM));
-+ addPass(createR600LowerConstCopy(*TM));
-+ addPass(&FinalizeMachineBundlesID);
-+ } else {
-+ addPass(createSILowerLiteralConstantsPass(*TM));
-+ addPass(createSILowerControlFlowPass(*TM));
-+ }
-+
-+ return false;
-+}
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.h llvm-r600/lib/Target/R600/AMDGPUTargetMachine.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUTargetMachine.h 2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,70 @@
-+//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief The AMDGPU TargetMachine interface definition for hw codgen targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPU_TARGET_MACHINE_H
-+#define AMDGPU_TARGET_MACHINE_H
-+
-+#include "AMDGPUInstrInfo.h"
-+#include "AMDGPUSubtarget.h"
-+#include "AMDILFrameLowering.h"
-+#include "AMDILIntrinsicInfo.h"
-+#include "R600ISelLowering.h"
-+#include "llvm/ADT/OwningPtr.h"
-+#include "llvm/DataLayout.h"
-+
-+namespace llvm {
-+
-+MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
-+
-+class AMDGPUTargetMachine : public LLVMTargetMachine {
-+
-+ AMDGPUSubtarget Subtarget;
-+ const DataLayout Layout;
-+ AMDGPUFrameLowering FrameLowering;
-+ AMDGPUIntrinsicInfo IntrinsicInfo;
-+ const AMDGPUInstrInfo * InstrInfo;
-+ AMDGPUTargetLowering * TLInfo;
-+ const InstrItineraryData* InstrItins;
-+
-+public:
-+ AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
-+ StringRef CPU,
-+ TargetOptions Options,
-+ Reloc::Model RM, CodeModel::Model CM,
-+ CodeGenOpt::Level OL);
-+ ~AMDGPUTargetMachine();
-+ virtual const AMDGPUFrameLowering* getFrameLowering() const {
-+ return &FrameLowering;
-+ }
-+ virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
-+ return &IntrinsicInfo;
-+ }
-+ virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
-+ virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
-+ virtual const AMDGPURegisterInfo *getRegisterInfo() const {
-+ return &InstrInfo->getRegisterInfo();
-+ }
-+ virtual AMDGPUTargetLowering * getTargetLowering() const {
-+ return TLInfo;
-+ }
-+ virtual const InstrItineraryData* getInstrItineraryData() const {
-+ return InstrItins;
-+ }
-+ virtual const DataLayout* getDataLayout() const { return &Layout; }
-+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
-+};
-+
-+} // End namespace llvm
-+
-+#endif // AMDGPU_TARGET_MACHINE_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPU.td llvm-r600/lib/Target/R600/AMDGPU.td
---- llvm-3.2.src/lib/Target/R600/AMDGPU.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPU.td 2013-01-25 19:43:57.423383055 +0100
-@@ -0,0 +1,40 @@
-+//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+
-+// Include AMDIL TD files
-+include "AMDILBase.td"
-+
-+
-+def AMDGPUInstrInfo : InstrInfo {
-+ let guessInstructionProperties = 1;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Declare the target which we are implementing
-+//===----------------------------------------------------------------------===//
-+def AMDGPUAsmWriter : AsmWriter {
-+ string AsmWriterClassName = "InstPrinter";
-+ int Variant = 0;
-+ bit isMCAsmWriter = 1;
-+}
-+
-+def AMDGPU : Target {
-+ // Pull in Instruction Info:
-+ let InstructionSet = AMDGPUInstrInfo;
-+ let AssemblyWriters = [AMDGPUAsmWriter];
-+}
-+
-+// Include AMDGPU TD files
-+include "R600Schedule.td"
-+include "SISchedule.td"
-+include "Processors.td"
-+include "AMDGPUInstrInfo.td"
-+include "AMDGPUIntrinsics.td"
-+include "AMDGPURegisterInfo.td"
-+include "AMDGPUInstructions.td"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.cpp llvm-r600/lib/Target/R600/AMDIL7XXDevice.cpp
---- llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDIL7XXDevice.cpp 2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,115 @@
-+//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+// \file
-+//==-----------------------------------------------------------------------===//
-+#include "AMDIL7XXDevice.h"
-+#include "AMDGPUSubtarget.h"
-+#include "AMDILDevice.h"
-+
-+using namespace llvm;
-+
-+AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) {
-+ setCaps();
-+ std::string name = mSTM->getDeviceName();
-+ if (name == "rv710") {
-+ DeviceFlag = OCL_DEVICE_RV710;
-+ } else if (name == "rv730") {
-+ DeviceFlag = OCL_DEVICE_RV730;
-+ } else {
-+ DeviceFlag = OCL_DEVICE_RV770;
-+ }
-+}
-+
-+AMDGPU7XXDevice::~AMDGPU7XXDevice() {
-+}
-+
-+void AMDGPU7XXDevice::setCaps() {
-+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
-+}
-+
-+size_t AMDGPU7XXDevice::getMaxLDSSize() const {
-+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
-+ return MAX_LDS_SIZE_700;
-+ }
-+ return 0;
-+}
-+
-+size_t AMDGPU7XXDevice::getWavefrontSize() const {
-+ return AMDGPUDevice::HalfWavefrontSize;
-+}
-+
-+uint32_t AMDGPU7XXDevice::getGeneration() const {
-+ return AMDGPUDeviceInfo::HD4XXX;
-+}
-+
-+uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const {
-+ switch (DeviceID) {
-+ default:
-+ assert(0 && "ID type passed in is unknown!");
-+ break;
-+ case GLOBAL_ID:
-+ case CONSTANT_ID:
-+ case RAW_UAV_ID:
-+ case ARENA_UAV_ID:
-+ break;
-+ case LDS_ID:
-+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
-+ return DEFAULT_LDS_ID;
-+ }
-+ break;
-+ case SCRATCH_ID:
-+ if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
-+ return DEFAULT_SCRATCH_ID;
-+ }
-+ break;
-+ case GDS_ID:
-+ assert(0 && "GDS UAV ID is not supported on this chip");
-+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
-+ return DEFAULT_GDS_ID;
-+ }
-+ break;
-+ };
-+
-+ return 0;
-+}
-+
-+uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const {
-+ return 1;
-+}
-+
-+AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) {
-+ setCaps();
-+}
-+
-+AMDGPU770Device::~AMDGPU770Device() {
-+}
-+
-+void AMDGPU770Device::setCaps() {
-+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
-+ mSWBits.set(AMDGPUDeviceInfo::FMA);
-+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
-+ }
-+ mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
-+ mHWBits.reset(AMDGPUDeviceInfo::LongOps);
-+ mSWBits.set(AMDGPUDeviceInfo::LongOps);
-+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
-+}
-+
-+size_t AMDGPU770Device::getWavefrontSize() const {
-+ return AMDGPUDevice::WavefrontSize;
-+}
-+
-+AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) {
-+}
-+
-+AMDGPU710Device::~AMDGPU710Device() {
-+}
-+
-+size_t AMDGPU710Device::getWavefrontSize() const {
-+ return AMDGPUDevice::QuarterWavefrontSize;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.h llvm-r600/lib/Target/R600/AMDIL7XXDevice.h
---- llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDIL7XXDevice.h 2013-01-25 19:43:57.436716388 +0100
-@@ -0,0 +1,72 @@
-+//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+/// \file
-+/// \brief Interface for the subtarget data classes.
-+///
-+/// This file will define the interface that each generation needs to
-+/// implement in order to correctly answer queries on the capabilities of the
-+/// specific hardware.
-+//===----------------------------------------------------------------------===//
-+#ifndef AMDIL7XXDEVICEIMPL_H
-+#define AMDIL7XXDEVICEIMPL_H
-+#include "AMDILDevice.h"
-+
-+namespace llvm {
-+class AMDGPUSubtarget;
-+
-+//===----------------------------------------------------------------------===//
-+// 7XX generation of devices and their respective sub classes
-+//===----------------------------------------------------------------------===//
-+
-+/// \brief The AMDGPU7XXDevice class represents the generic 7XX device.
-+///
-+/// All 7XX devices are derived from this class. The AMDGPU7XX device will only
-+/// support the minimal features that are required to be considered OpenCL 1.0
-+/// compliant and nothing more.
-+class AMDGPU7XXDevice : public AMDGPUDevice {
-+public:
-+ AMDGPU7XXDevice(AMDGPUSubtarget *ST);
-+ virtual ~AMDGPU7XXDevice();
-+ virtual size_t getMaxLDSSize() const;
-+ virtual size_t getWavefrontSize() const;
-+ virtual uint32_t getGeneration() const;
-+ virtual uint32_t getResourceID(uint32_t DeviceID) const;
-+ virtual uint32_t getMaxNumUAVs() const;
-+
-+protected:
-+ virtual void setCaps();
-+};
-+
-+/// \brief The AMDGPU770Device class represents the RV770 chip and it's
-+/// derivative cards.
-+///
-+/// The difference between this device and the base class is this device device
-+/// adds support for double precision and has a larger wavefront size.
-+class AMDGPU770Device : public AMDGPU7XXDevice {
-+public:
-+ AMDGPU770Device(AMDGPUSubtarget *ST);
-+ virtual ~AMDGPU770Device();
-+ virtual size_t getWavefrontSize() const;
-+private:
-+ virtual void setCaps();
-+};
-+
-+/// \brief The AMDGPU710Device class derives from the 7XX base class.
-+///
-+/// This class is a smaller derivative, so we need to overload some of the
-+/// functions in order to correctly specify this information.
-+class AMDGPU710Device : public AMDGPU7XXDevice {
-+public:
-+ AMDGPU710Device(AMDGPUSubtarget *ST);
-+ virtual ~AMDGPU710Device();
-+ virtual size_t getWavefrontSize() const;
-+};
-+
-+} // namespace llvm
-+#endif // AMDILDEVICEIMPL_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILBase.td llvm-r600/lib/Target/R600/AMDILBase.td
---- llvm-3.2.src/lib/Target/R600/AMDILBase.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILBase.td 2013-01-25 19:43:57.436716388 +0100
-@@ -0,0 +1,85 @@
-+//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+// Target-independent interfaces which we are implementing
-+//===----------------------------------------------------------------------===//
-+
-+include "llvm/Target/Target.td"
-+
-+// Dummy Instruction itineraries for pseudo instructions
-+def ALU_NULL : FuncUnit;
-+def NullALU : InstrItinClass;
-+
-+//===----------------------------------------------------------------------===//
-+// AMDIL Subtarget features.
-+//===----------------------------------------------------------------------===//
-+def FeatureFP64 : SubtargetFeature<"fp64",
-+ "CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
-+ "true",
-+ "Enable 64bit double precision operations">;
-+def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
-+ "CapsOverride[AMDGPUDeviceInfo::ByteStores]",
-+ "true",
-+ "Enable byte addressable stores">;
-+def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
-+ "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
-+ "true",
-+ "Enable duplicate barrier detection(HD5XXX or later).">;
-+def FeatureImages : SubtargetFeature<"images",
-+ "CapsOverride[AMDGPUDeviceInfo::Images]",
-+ "true",
-+ "Enable image functions">;
-+def FeatureMultiUAV : SubtargetFeature<"multi_uav",
-+ "CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
-+ "true",
-+ "Generate multiple UAV code(HD5XXX family or later)">;
-+def FeatureMacroDB : SubtargetFeature<"macrodb",
-+ "CapsOverride[AMDGPUDeviceInfo::MacroDB]",
-+ "true",
-+ "Use internal macrodb, instead of macrodb in driver">;
-+def FeatureNoAlias : SubtargetFeature<"noalias",
-+ "CapsOverride[AMDGPUDeviceInfo::NoAlias]",
-+ "true",
-+ "assert that all kernel argument pointers are not aliased">;
-+def FeatureNoInline : SubtargetFeature<"no-inline",
-+ "CapsOverride[AMDGPUDeviceInfo::NoInline]",
-+ "true",
-+ "specify whether to not inline functions">;
-+
-+def Feature64BitPtr : SubtargetFeature<"64BitPtr",
-+ "Is64bit",
-+ "false",
-+ "Specify if 64bit addressing should be used.">;
-+
-+def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
-+ "Is32on64bit",
-+ "false",
-+ "Specify if 64bit sized pointers with 32bit addressing should be used.">;
-+def FeatureDebug : SubtargetFeature<"debug",
-+ "CapsOverride[AMDGPUDeviceInfo::Debug]",
-+ "true",
-+ "Debug mode is enabled, so disable hardware accelerated address spaces.">;
-+def FeatureDumpCode : SubtargetFeature <"DumpCode",
-+ "DumpCode",
-+ "true",
-+ "Dump MachineInstrs in the CodeEmitter">;
-+
-+def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
-+ "R600ALUInst",
-+ "false",
-+ "Older version of ALU instructions encoding.">;
-+
-+
-+//===----------------------------------------------------------------------===//
-+// Register File, Calling Conv, Instruction Descriptions
-+//===----------------------------------------------------------------------===//
-+
-+
-+include "AMDILRegisterInfo.td"
-+include "AMDILInstrInfo.td"
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILCFGStructurizer.cpp llvm-r600/lib/Target/R600/AMDILCFGStructurizer.cpp
---- llvm-3.2.src/lib/Target/R600/AMDILCFGStructurizer.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILCFGStructurizer.cpp 2013-01-25 19:43:57.436716388 +0100
-@@ -0,0 +1,3045 @@
-+//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//==-----------------------------------------------------------------------===//
-+
-+#define DEBUGME 0
-+#define DEBUG_TYPE "structcfg"
-+
-+#include "AMDGPUInstrInfo.h"
-+#include "AMDIL.h"
-+#include "llvm/ADT/SCCIterator.h"
-+#include "llvm/ADT/SmallVector.h"
-+#include "llvm/ADT/Statistic.h"
-+#include "llvm/Analysis/DominatorInternals.h"
-+#include "llvm/Analysis/Dominators.h"
-+#include "llvm/CodeGen/MachinePostDominators.h"
-+#include "llvm/CodeGen/MachineDominators.h"
-+#include "llvm/CodeGen/MachineFunction.h"
-+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-+#include "llvm/CodeGen/MachineFunctionPass.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineJumpTableInfo.h"
-+#include "llvm/CodeGen/MachineLoopInfo.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+#include "llvm/Target/TargetInstrInfo.h"
-+
-+using namespace llvm;
-+
-+// TODO: move-begin.
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// Statistics for CFGStructurizer.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
-+ "matched");
-+STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
-+ "matched");
-+STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
-+ "pattern matched");
-+STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
-+ "pattern matched");
-+STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
-+ "matched");
-+STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
-+STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// Miscellaneous utility for CFGStructurizer.
-+//
-+//===----------------------------------------------------------------------===//
-+namespace llvmCFGStruct {
-+#define SHOWNEWINSTR(i) \
-+ if (DEBUGME) errs() << "New instr: " << *i << "\n"
-+
-+#define SHOWNEWBLK(b, msg) \
-+if (DEBUGME) { \
-+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
-+ errs() << "\n"; \
-+}
-+
-+#define SHOWBLK_DETAIL(b, msg) \
-+if (DEBUGME) { \
-+ if (b) { \
-+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
-+ b->print(errs()); \
-+ errs() << "\n"; \
-+ } \
-+}
-+
-+#define INVALIDSCCNUM -1
-+#define INVALIDREGNUM 0
-+
-+template<class LoopinfoT>
-+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
-+ for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
-+ iterEnd = LoopInfo.end();
-+ iter != iterEnd; ++iter) {
-+ (*iter)->print(OS, 0);
-+ }
-+}
-+
-+template<class NodeT>
-+void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
-+ size_t sz = Src.size();
-+ for (size_t i = 0; i < sz/2; ++i) {
-+ NodeT *t = Src[i];
-+ Src[i] = Src[sz - i - 1];
-+ Src[sz - i - 1] = t;
-+ }
-+}
-+
-+} //end namespace llvmCFGStruct
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// supporting data structure for CFGStructurizer
-+//
-+//===----------------------------------------------------------------------===//
-+
-+namespace llvmCFGStruct {
-+template<class PassT>
-+struct CFGStructTraits {
-+};
-+
-+template <class InstrT>
-+class BlockInformation {
-+public:
-+ bool isRetired;
-+ int sccNum;
-+ //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
-+ //Instructions defining the corresponding successor.
-+ BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
-+};
-+
-+template <class BlockT, class InstrT, class RegiT>
-+class LandInformation {
-+public:
-+ BlockT *landBlk;
-+ std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
-+ //WHILELOOP(thisloop) init before entering
-+ //thisloop.
-+ std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
-+ //WHILELOOP(thisloop) init after entering
-+ //thisloop.
-+ std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
-+ //land block, branch cond on this reg.
-+ std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
-+ //endif" after ENDLOOP(thisloop) break
-+ //outerLoopOf(thisLoop).
-+ std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
-+ //endif" after ENDLOOP(thisloop) continue on
-+ //outerLoopOf(thisLoop).
-+ LandInformation() : landBlk(NULL) {}
-+};
-+
-+} //end of namespace llvmCFGStruct
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// CFGStructurizer
-+//
-+//===----------------------------------------------------------------------===//
-+
-+namespace llvmCFGStruct {
-+// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
-+template<class PassT>
-+class CFGStructurizer {
-+public:
-+ typedef enum {
-+ Not_SinglePath = 0,
-+ SinglePath_InPath = 1,
-+ SinglePath_NotInPath = 2
-+ } PathToKind;
-+
-+public:
-+ typedef typename PassT::InstructionType InstrT;
-+ typedef typename PassT::FunctionType FuncT;
-+ typedef typename PassT::DominatortreeType DomTreeT;
-+ typedef typename PassT::PostDominatortreeType PostDomTreeT;
-+ typedef typename PassT::DomTreeNodeType DomTreeNodeT;
-+ typedef typename PassT::LoopinfoType LoopInfoT;
-+
-+ typedef GraphTraits<FuncT *> FuncGTraits;
-+ //typedef FuncGTraits::nodes_iterator BlockIterator;
-+ typedef typename FuncT::iterator BlockIterator;
-+
-+ typedef typename FuncGTraits::NodeType BlockT;
-+ typedef GraphTraits<BlockT *> BlockGTraits;
-+ typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
-+ //typedef BlockGTraits::succ_iterator InstructionIterator;
-+ typedef typename BlockT::iterator InstrIterator;
-+
-+ typedef CFGStructTraits<PassT> CFGTraits;
-+ typedef BlockInformation<InstrT> BlockInfo;
-+ typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
-+
-+ typedef int RegiT;
-+ typedef typename PassT::LoopType LoopT;
-+ typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
-+ typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
-+ //landing info for loop break
-+ typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
-+
-+public:
-+ CFGStructurizer();
-+ ~CFGStructurizer();
-+
-+ /// Perform the CFG structurization
-+ bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
-+
-+ /// Perform the CFG preparation
-+ bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
-+
-+private:
-+ void reversePredicateSetter(typename BlockT::iterator);
-+ void orderBlocks();
-+ void printOrderedBlocks(llvm::raw_ostream &OS);
-+ int patternMatch(BlockT *CurBlock);
-+ int patternMatchGroup(BlockT *CurBlock);
-+
-+ int serialPatternMatch(BlockT *CurBlock);
-+ int ifPatternMatch(BlockT *CurBlock);
-+ int switchPatternMatch(BlockT *CurBlock);
-+ int loopendPatternMatch(BlockT *CurBlock);
-+ int loopPatternMatch(BlockT *CurBlock);
-+
-+ int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
-+ int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
-+ //int loopWithoutBreak(BlockT *);
-+
-+ void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
-+ BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
-+ void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
-+ BlockT *ContBlock, LoopT *contLoop);
-+ bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
-+ int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
-+ BlockT *FalseBlock);
-+ int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
-+ BlockT *FalseBlock);
-+ int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
-+ BlockT *FalseBlock, BlockT **LandBlockPtr);
-+ void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
-+ BlockT *FalseBlock, BlockT *LandBlock,
-+ bool Detail = false);
-+ PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
-+ bool AllowSideEntry = true);
-+ BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
-+ bool AllowSideEntry = true);
-+ int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
-+ void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
-+
-+ void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
-+ BlockT *TrueBlock, BlockT *FalseBlock,
-+ BlockT *LandBlock);
-+ void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
-+ void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
-+ BlockT *ExitLandBlock, RegiT SetReg);
-+ void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
-+ RegiT SetReg);
-+ BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
-+ std::set<BlockT*> &ExitBlockSet,
-+ BlockT *ExitLandBlk);
-+ BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
-+ BlockTSmallerVector &ExitingBlocks,
-+ BlockTSmallerVector &ExitBlocks);
-+ BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
-+ void removeUnconditionalBranch(BlockT *SrcBlock);
-+ void removeRedundantConditionalBranch(BlockT *SrcBlock);
-+ void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
-+
-+ void removeSuccessor(BlockT *SrcBlock);
-+ BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
-+ BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
-+
-+ void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
-+ InstrIterator InsertPos);
-+
-+ void recordSccnum(BlockT *SrcBlock, int SCCNum);
-+ int getSCCNum(BlockT *srcBlk);
-+
-+ void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
-+ bool isRetiredBlock(BlockT *SrcBlock);
-+ bool isActiveLoophead(BlockT *CurBlock);
-+ bool needMigrateBlock(BlockT *Block);
-+
-+ BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
-+ BlockTSmallerVector &exitBlocks,
-+ std::set<BlockT*> &ExitBlockSet);
-+ void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
-+ BlockT *getLoopLandBlock(LoopT *LoopRep);
-+ LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
-+
-+ void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
-+ void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
-+ void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
-+ void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
-+ void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
-+
-+ bool hasBackEdge(BlockT *curBlock);
-+ unsigned getLoopDepth (LoopT *LoopRep);
-+ int countActiveBlock(
-+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
-+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
-+ BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
-+ BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
-+
-+private:
-+ DomTreeT *domTree;
-+ PostDomTreeT *postDomTree;
-+ LoopInfoT *loopInfo;
-+ PassT *passRep;
-+ FuncT *funcRep;
-+
-+ BlockInfoMap blockInfoMap;
-+ LoopLandInfoMap loopLandInfoMap;
-+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
-+ const AMDGPURegisterInfo *TRI;
-+
-+}; //template class CFGStructurizer
-+
-+template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
-+ : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
-+}
-+
-+template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
-+ for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
-+ E = blockInfoMap.end(); I != E; ++I) {
-+ delete I->second;
-+ }
-+}
-+
-+template<class PassT>
-+bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
-+ const AMDGPURegisterInfo * tri) {
-+ passRep = &pass;
-+ funcRep = &func;
-+ TRI = tri;
-+
-+ bool changed = false;
-+
-+ //FIXME: if not reducible flow graph, make it so ???
-+
-+ if (DEBUGME) {
-+ errs() << "AMDGPUCFGStructurizer::prepare\n";
-+ }
-+
-+ loopInfo = CFGTraits::getLoopInfo(pass);
-+ if (DEBUGME) {
-+ errs() << "LoopInfo:\n";
-+ PrintLoopinfo(*loopInfo, errs());
-+ }
-+
-+ orderBlocks();
-+ if (DEBUGME) {
-+ errs() << "Ordered blocks:\n";
-+ printOrderedBlocks(errs());
-+ }
-+
-+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
-+
-+ for (typename LoopInfoT::iterator iter = loopInfo->begin(),
-+ iterEnd = loopInfo->end();
-+ iter != iterEnd; ++iter) {
-+ LoopT* loopRep = (*iter);
-+ BlockTSmallerVector exitingBlks;
-+ loopRep->getExitingBlocks(exitingBlks);
-+
-+ if (exitingBlks.size() == 0) {
-+ BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
-+ if (dummyExitBlk != NULL)
-+ retBlks.push_back(dummyExitBlk);
-+ }
-+ }
-+
-+ // Remove unconditional branch instr.
-+ // Add dummy exit block iff there are multiple returns.
-+
-+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
-+ iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
-+ iterBlk != iterEndBlk;
-+ ++iterBlk) {
-+ BlockT *curBlk = *iterBlk;
-+ removeUnconditionalBranch(curBlk);
-+ removeRedundantConditionalBranch(curBlk);
-+ if (CFGTraits::isReturnBlock(curBlk)) {
-+ retBlks.push_back(curBlk);
-+ }
-+ assert(curBlk->succ_size() <= 2);
-+ } //for
-+
-+ if (retBlks.size() >= 2) {
-+ addDummyExitBlock(retBlks);
-+ changed = true;
-+ }
-+
-+ return changed;
-+} //CFGStructurizer::prepare
-+
-+template<class PassT>
-+bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
-+ const AMDGPURegisterInfo * tri) {
-+ passRep = &pass;
-+ funcRep = &func;
-+ TRI = tri;
-+
-+ //Assume reducible CFG...
-+ if (DEBUGME) {
-+ errs() << "AMDGPUCFGStructurizer::run\n";
-+ func.viewCFG();
-+ }
-+
-+ domTree = CFGTraits::getDominatorTree(pass);
-+ if (DEBUGME) {
-+ domTree->print(errs(), (const llvm::Module*)0);
-+ }
-+
-+ postDomTree = CFGTraits::getPostDominatorTree(pass);
-+ if (DEBUGME) {
-+ postDomTree->print(errs());
-+ }
-+
-+ loopInfo = CFGTraits::getLoopInfo(pass);
-+ if (DEBUGME) {
-+ errs() << "LoopInfo:\n";
-+ PrintLoopinfo(*loopInfo, errs());
-+ }
-+
-+ orderBlocks();
-+#ifdef STRESSTEST
-+ //Use the worse block ordering to test the algorithm.
-+ ReverseVector(orderedBlks);
-+#endif
-+
-+ if (DEBUGME) {
-+ errs() << "Ordered blocks:\n";
-+ printOrderedBlocks(errs());
-+ }
-+ int numIter = 0;
-+ bool finish = false;
-+ BlockT *curBlk;
-+ bool makeProgress = false;
-+ int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
-+ orderedBlks.end());
-+
-+ do {
-+ ++numIter;
-+ if (DEBUGME) {
-+ errs() << "numIter = " << numIter
-+ << ", numRemaintedBlk = " << numRemainedBlk << "\n";
-+ }
-+
-+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
-+ iterBlk = orderedBlks.begin();
-+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
-+ iterBlkEnd = orderedBlks.end();
-+
-+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
-+ sccBeginIter = iterBlk;
-+ BlockT *sccBeginBlk = NULL;
-+ int sccNumBlk = 0; // The number of active blocks, init to a
-+ // maximum possible number.
-+ int sccNumIter; // Number of iteration in this SCC.
-+
-+ while (iterBlk != iterBlkEnd) {
-+ curBlk = *iterBlk;
-+
-+ if (sccBeginBlk == NULL) {
-+ sccBeginIter = iterBlk;
-+ sccBeginBlk = curBlk;
-+ sccNumIter = 0;
-+ sccNumBlk = numRemainedBlk; // Init to maximum possible number.
-+ if (DEBUGME) {
-+ errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
-+ errs() << "\n";
-+ }
-+ }
-+
-+ if (!isRetiredBlock(curBlk)) {
-+ patternMatch(curBlk);
-+ }
-+
-+ ++iterBlk;
-+
-+ bool contNextScc = true;
-+ if (iterBlk == iterBlkEnd
-+ || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
-+ // Just finish one scc.
-+ ++sccNumIter;
-+ int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
-+ if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
-+ if (DEBUGME) {
-+ errs() << "Can't reduce SCC " << getSCCNum(curBlk)
-+ << ", sccNumIter = " << sccNumIter;
-+ errs() << "doesn't make any progress\n";
-+ }
-+ contNextScc = true;
-+ } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
-+ sccNumBlk = sccRemainedNumBlk;
-+ iterBlk = sccBeginIter;
-+ contNextScc = false;
-+ if (DEBUGME) {
-+ errs() << "repeat processing SCC" << getSCCNum(curBlk)
-+ << "sccNumIter = " << sccNumIter << "\n";
-+ func.viewCFG();
-+ }
-+ } else {
-+ // Finish the current scc.
-+ contNextScc = true;
-+ }
-+ } else {
-+ // Continue on next component in the current scc.
-+ contNextScc = false;
-+ }
-+
-+ if (contNextScc) {
-+ sccBeginBlk = NULL;
-+ }
-+ } //while, "one iteration" over the function.
-+
-+ BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
-+ if (entryBlk->succ_size() == 0) {
-+ finish = true;
-+ if (DEBUGME) {
-+ errs() << "Reduce to one block\n";
-+ }
-+ } else {
-+ int newnumRemainedBlk
-+ = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
-+ // consider cloned blocks ??
-+ if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
-+ makeProgress = true;
-+ numRemainedBlk = newnumRemainedBlk;
-+ } else {
-+ makeProgress = false;
-+ if (DEBUGME) {
-+ errs() << "No progress\n";
-+ }
-+ }
-+ }
-+ } while (!finish && makeProgress);
-+
-+ // Misc wrap up to maintain the consistency of the Function representation.
-+ CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
-+
-+ // Detach retired Block, release memory.
-+ for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
-+ iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
-+ if ((*iterMap).second && (*iterMap).second->isRetired) {
-+ assert(((*iterMap).first)->getNumber() != -1);
-+ if (DEBUGME) {
-+ errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
-+ }
-+ (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
-+ }
-+ delete (*iterMap).second;
-+ }
-+ blockInfoMap.clear();
-+
-+ // clear loopLandInfoMap
-+ for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
-+ iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
-+ delete (*iterMap).second;
-+ }
-+ loopLandInfoMap.clear();
-+
-+ if (DEBUGME) {
-+ func.viewCFG();
-+ }
-+
-+ if (!finish) {
-+ assert(!"IRREDUCIBL_CF");
-+ }
-+
-+ return true;
-+} //CFGStructurizer::run
-+
-+/// Print the ordered Blocks.
-+///
-+template<class PassT>
-+void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
-+ size_t i = 0;
-+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
-+ iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
-+ iterBlk != iterBlkEnd;
-+ ++iterBlk, ++i) {
-+ os << "BB" << (*iterBlk)->getNumber();
-+ os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
-+ if (i != 0 && i % 10 == 0) {
-+ os << "\n";
-+ } else {
-+ os << " ";
-+ }
-+ }
-+} //printOrderedBlocks
-+
-+/// Compute the reversed DFS post order of Blocks
-+///
-+template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
-+ int sccNum = 0;
-+ BlockT *bb;
-+ for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
-+ sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
-+ std::vector<BlockT *> &sccNext = *sccIter;
-+ for (typename std::vector<BlockT *>::const_iterator
-+ blockIter = sccNext.begin(), blockEnd = sccNext.end();
-+ blockIter != blockEnd; ++blockIter) {
-+ bb = *blockIter;
-+ orderedBlks.push_back(bb);
-+ recordSccnum(bb, sccNum);
-+ }
-+ }
-+
-+ //walk through all the block in func to check for unreachable
-+ for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
-+ blockEnd1 = FuncGTraits::nodes_end(funcRep);
-+ blockIter1 != blockEnd1; ++blockIter1) {
-+ BlockT *bb = &(*blockIter1);
-+ sccNum = getSCCNum(bb);
-+ if (sccNum == INVALIDSCCNUM) {
-+ errs() << "unreachable block BB" << bb->getNumber() << "\n";
-+ }
-+ }
-+} //orderBlocks
-+
-+template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
-+ int numMatch = 0;
-+ int curMatch;
-+
-+ if (DEBUGME) {
-+ errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
-+ }
-+
-+ while ((curMatch = patternMatchGroup(curBlk)) > 0) {
-+ numMatch += curMatch;
-+ }
-+
-+ if (DEBUGME) {
-+ errs() << "End patternMatch BB" << curBlk->getNumber()
-+ << ", numMatch = " << numMatch << "\n";
-+ }
-+
-+ return numMatch;
-+} //patternMatch
-+
-+template<class PassT>
-+int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
-+ int numMatch = 0;
-+ numMatch += serialPatternMatch(curBlk);
-+ numMatch += ifPatternMatch(curBlk);
-+ numMatch += loopendPatternMatch(curBlk);
-+ numMatch += loopPatternMatch(curBlk);
-+ return numMatch;
-+}//patternMatchGroup
-+
-+template<class PassT>
-+int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
-+ if (curBlk->succ_size() != 1) {
-+ return 0;
-+ }
-+
-+ BlockT *childBlk = *curBlk->succ_begin();
-+ if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
-+ return 0;
-+ }
-+
-+ mergeSerialBlock(curBlk, childBlk);
-+ ++numSerialPatternMatch;
-+ return 1;
-+} //serialPatternMatch
-+
-+template<class PassT>
-+int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
-+ //two edges
-+ if (curBlk->succ_size() != 2) {
-+ return 0;
-+ }
-+
-+ if (hasBackEdge(curBlk)) {
-+ return 0;
-+ }
-+
-+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
-+ if (branchInstr == NULL) {
-+ return 0;
-+ }
-+
-+ assert(CFGTraits::isCondBranch(branchInstr));
-+
-+ BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
-+ BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
-+ BlockT *landBlk;
-+ int cloned = 0;
-+
-+ // TODO: Simplify
-+ if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
-+ && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
-+ landBlk = *trueBlk->succ_begin();
-+ } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
-+ landBlk = NULL;
-+ } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
-+ landBlk = falseBlk;
-+ falseBlk = NULL;
-+ } else if (falseBlk->succ_size() == 1
-+ && *falseBlk->succ_begin() == trueBlk) {
-+ landBlk = trueBlk;
-+ trueBlk = NULL;
-+ } else if (falseBlk->succ_size() == 1
-+ && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
-+ landBlk = *falseBlk->succ_begin();
-+ } else if (trueBlk->succ_size() == 1
-+ && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
-+ landBlk = *trueBlk->succ_begin();
-+ } else {
-+ return handleJumpintoIf(curBlk, trueBlk, falseBlk);
-+ }
-+
-+ // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
-+ // new BB created for landBlk==NULL may introduce new challenge to the
-+ // reduction process.
-+ if (landBlk != NULL &&
-+ ((trueBlk && trueBlk->pred_size() > 1)
-+ || (falseBlk && falseBlk->pred_size() > 1))) {
-+ cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
-+ }
-+
-+ if (trueBlk && trueBlk->pred_size() > 1) {
-+ trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
-+ ++cloned;
-+ }
-+
-+ if (falseBlk && falseBlk->pred_size() > 1) {
-+ falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
-+ ++cloned;
-+ }
-+
-+ mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
-+
-+ ++numIfPatternMatch;
-+
-+ numClonedBlock += cloned;
-+
-+ return 1 + cloned;
-+} //ifPatternMatch
-+
-+template<class PassT>
-+int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
-+ return 0;
-+} //switchPatternMatch
-+
-+template<class PassT>
-+int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
-+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
-+ typename std::vector<LoopT *> nestedLoops;
-+ while (loopRep) {
-+ nestedLoops.push_back(loopRep);
-+ loopRep = loopRep->getParentLoop();
-+ }
-+
-+ if (nestedLoops.size() == 0) {
-+ return 0;
-+ }
-+
-+ // Process nested loop outside->inside, so "continue" to a outside loop won't
-+ // be mistaken as "break" of the current loop.
-+ int num = 0;
-+ for (typename std::vector<LoopT *>::reverse_iterator
-+ iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
-+ iter != iterEnd; ++iter) {
-+ loopRep = *iter;
-+
-+ if (getLoopLandBlock(loopRep) != NULL) {
-+ continue;
-+ }
-+
-+ BlockT *loopHeader = loopRep->getHeader();
-+
-+ int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
-+
-+ if (numBreak == -1) {
-+ break;
-+ }
-+
-+ int numCont = loopcontPatternMatch(loopRep, loopHeader);
-+ num += numBreak + numCont;
-+ }
-+
-+ return num;
-+} //loopendPatternMatch
-+
-+template<class PassT>
-+int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
-+ if (curBlk->succ_size() != 0) {
-+ return 0;
-+ }
-+
-+ int numLoop = 0;
-+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
-+ while (loopRep && loopRep->getHeader() == curBlk) {
-+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
-+ if (loopLand) {
-+ BlockT *landBlk = loopLand->landBlk;
-+ assert(landBlk);
-+ if (!isRetiredBlock(landBlk)) {
-+ mergeLooplandBlock(curBlk, loopLand);
-+ ++numLoop;
-+ }
-+ }
-+ loopRep = loopRep->getParentLoop();
-+ }
-+
-+ numLoopPatternMatch += numLoop;
-+
-+ return numLoop;
-+} //loopPatternMatch
-+
-+template<class PassT>
-+int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
-+ BlockT *loopHeader) {
-+ BlockTSmallerVector exitingBlks;
-+ loopRep->getExitingBlocks(exitingBlks);
-+
-+ if (DEBUGME) {
-+ errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
-+ }
-+
-+ if (exitingBlks.size() == 0) {
-+ setLoopLandBlock(loopRep);
-+ return 0;
-+ }
-+
-+ // Compute the corresponding exitBlks and exit block set.
-+ BlockTSmallerVector exitBlks;
-+ std::set<BlockT *> exitBlkSet;
-+ for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
-+ iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
-+ BlockT *exitingBlk = *iter;
-+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
-+ exitBlks.push_back(exitBlk);
-+ exitBlkSet.insert(exitBlk); //non-duplicate insert
-+ }
-+
-+ assert(exitBlkSet.size() > 0);
-+ assert(exitBlks.size() == exitingBlks.size());
-+
-+ if (DEBUGME) {
-+ errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
-+ }
-+
-+ // Find exitLandBlk.
-+ BlockT *exitLandBlk = NULL;
-+ int numCloned = 0;
-+ int numSerial = 0;
-+
-+ if (exitBlkSet.size() == 1) {
-+ exitLandBlk = *exitBlkSet.begin();
-+ } else {
-+ exitLandBlk = findNearestCommonPostDom(exitBlkSet);
-+
-+ if (exitLandBlk == NULL) {
-+ return -1;
-+ }
-+
-+ bool allInPath = true;
-+ bool allNotInPath = true;
-+ for (typename std::set<BlockT*>::const_iterator
-+ iter = exitBlkSet.begin(),
-+ iterEnd = exitBlkSet.end();
-+ iter != iterEnd; ++iter) {
-+ BlockT *exitBlk = *iter;
-+
-+ PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
-+ if (DEBUGME) {
-+ errs() << "BB" << exitBlk->getNumber()
-+ << " to BB" << exitLandBlk->getNumber() << " PathToKind="
-+ << pathKind << "\n";
-+ }
-+
-+ allInPath = allInPath && (pathKind == SinglePath_InPath);
-+ allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
-+
-+ if (!allInPath && !allNotInPath) {
-+ if (DEBUGME) {
-+ errs() << "singlePath check fail\n";
-+ }
-+ return -1;
-+ }
-+ } // check all exit blocks
-+
-+ if (allNotInPath) {
-+
-+ // TODO: Simplify, maybe separate function?
-+ LoopT *parentLoopRep = loopRep->getParentLoop();
-+ BlockT *parentLoopHeader = NULL;
-+ if (parentLoopRep)
-+ parentLoopHeader = parentLoopRep->getHeader();
-+
-+ if (exitLandBlk == parentLoopHeader &&
-+ (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
-+ loopRep,
-+ exitBlkSet,
-+ exitLandBlk)) != NULL) {
-+ if (DEBUGME) {
-+ errs() << "relocateLoopcontBlock success\n";
-+ }
-+ } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
-+ exitingBlks,
-+ exitBlks)) != NULL) {
-+ if (DEBUGME) {
-+ errs() << "insertEndbranchBlock success\n";
-+ }
-+ } else {
-+ if (DEBUGME) {
-+ errs() << "loop exit fail\n";
-+ }
-+ return -1;
-+ }
-+ }
-+
-+ // Handle side entry to exit path.
-+ exitBlks.clear();
-+ exitBlkSet.clear();
-+ for (typename BlockTSmallerVector::iterator iterExiting =
-+ exitingBlks.begin(),
-+ iterExitingEnd = exitingBlks.end();
-+ iterExiting != iterExitingEnd; ++iterExiting) {
-+ BlockT *exitingBlk = *iterExiting;
-+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
-+ BlockT *newExitBlk = exitBlk;
-+
-+ if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
-+ newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
-+ ++numCloned;
-+ }
-+
-+ numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
-+
-+ exitBlks.push_back(newExitBlk);
-+ exitBlkSet.insert(newExitBlk);
-+ }
-+
-+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
-+ iterExitEnd = exitBlks.end();
-+ iterExit != iterExitEnd; ++iterExit) {
-+ BlockT *exitBlk = *iterExit;
-+ numSerial += serialPatternMatch(exitBlk);
-+ }
-+
-+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
-+ iterExitEnd = exitBlks.end();
-+ iterExit != iterExitEnd; ++iterExit) {
-+ BlockT *exitBlk = *iterExit;
-+ if (exitBlk->pred_size() > 1) {
-+ if (exitBlk != exitLandBlk) {
-+ return -1;
-+ }
-+ } else {
-+ if (exitBlk != exitLandBlk &&
-+ (exitBlk->succ_size() != 1 ||
-+ *exitBlk->succ_begin() != exitLandBlk)) {
-+ return -1;
-+ }
-+ }
-+ }
-+ } // else
-+
-+ exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
-+
-+ // Fold break into the breaking block. Leverage across level breaks.
-+ assert(exitingBlks.size() == exitBlks.size());
-+ for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
-+ iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
-+ iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
-+ BlockT *exitBlk = *iterExit;
-+ BlockT *exitingBlk = *iterExiting;
-+ assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
-+ LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
-+ handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
-+ }
-+
-+ int numBreak = static_cast<int>(exitingBlks.size());
-+ numLoopbreakPatternMatch += numBreak;
-+ numClonedBlock += numCloned;
-+ return numBreak + numSerial + numCloned;
-+} //loopbreakPatternMatch
-+
-+template<class PassT>
-+int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
-+ BlockT *loopHeader) {
-+ int numCont = 0;
-+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
-+ for (typename InvBlockGTraits::ChildIteratorType iter =
-+ InvBlockGTraits::child_begin(loopHeader),
-+ iterEnd = InvBlockGTraits::child_end(loopHeader);
-+ iter != iterEnd; ++iter) {
-+ BlockT *curBlk = *iter;
-+ if (loopRep->contains(curBlk)) {
-+ handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
-+ loopHeader, loopRep);
-+ contBlk.push_back(curBlk);
-+ ++numCont;
-+ }
-+ }
-+
-+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
-+ iter = contBlk.begin(), iterEnd = contBlk.end();
-+ iter != iterEnd; ++iter) {
-+ (*iter)->removeSuccessor(loopHeader);
-+ }
-+
-+ numLoopcontPatternMatch += numCont;
-+
-+ return numCont;
-+} //loopcontPatternMatch
-+
-+
-+template<class PassT>
-+bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
-+ BlockT *src2Blk) {
-+ // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
-+ // same loop with LoopLandInfo without explicitly keeping track of
-+ // loopContBlks and loopBreakBlks, this is a method to get the information.
-+ //
-+ if (src1Blk->succ_size() == 0) {
-+ LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
-+ if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
-+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-+ if (theEntry != NULL) {
-+ if (DEBUGME) {
-+ errs() << "isLoopContBreakBlock yes src1 = BB"
-+ << src1Blk->getNumber()
-+ << " src2 = BB" << src2Blk->getNumber() << "\n";
-+ }
-+ return true;
-+ }
-+ }
-+ }
-+ return false;
-+} //isSameloopDetachedContbreak
-+
-+template<class PassT>
-+int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
-+ BlockT *trueBlk,
-+ BlockT *falseBlk) {
-+ int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
-+ if (num == 0) {
-+ if (DEBUGME) {
-+ errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
-+ }
-+ num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
-+ }
-+ return num;
-+}
-+
-+template<class PassT>
-+int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
-+ BlockT *trueBlk,
-+ BlockT *falseBlk) {
-+ int num = 0;
-+ BlockT *downBlk;
-+
-+ //trueBlk could be the common post dominator
-+ downBlk = trueBlk;
-+
-+ if (DEBUGME) {
-+ errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
-+ << " true = BB" << trueBlk->getNumber()
-+ << ", numSucc=" << trueBlk->succ_size()
-+ << " false = BB" << falseBlk->getNumber() << "\n";
-+ }
-+
-+ while (downBlk) {
-+ if (DEBUGME) {
-+ errs() << "check down = BB" << downBlk->getNumber();
-+ }
-+
-+ if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
-+ if (DEBUGME) {
-+ errs() << " working\n";
-+ }
-+
-+ num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
-+ num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
-+
-+ numClonedBlock += num;
-+ num += serialPatternMatch(*headBlk->succ_begin());
-+ num += serialPatternMatch(*(++headBlk->succ_begin()));
-+ num += ifPatternMatch(headBlk);
-+ assert(num > 0);
-+
-+ break;
-+ }
-+ if (DEBUGME) {
-+ errs() << " not working\n";
-+ }
-+ downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
-+ } // walk down the postDomTree
-+
-+ return num;
-+} //handleJumpintoIf
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
-+ BlockT *trueBlk,
-+ BlockT *falseBlk,
-+ BlockT *landBlk,
-+ bool detail) {
-+ errs() << "head = BB" << headBlk->getNumber()
-+ << " size = " << headBlk->size();
-+ if (detail) {
-+ errs() << "\n";
-+ headBlk->print(errs());
-+ errs() << "\n";
-+ }
-+
-+ if (trueBlk) {
-+ errs() << ", true = BB" << trueBlk->getNumber() << " size = "
-+ << trueBlk->size() << " numPred = " << trueBlk->pred_size();
-+ if (detail) {
-+ errs() << "\n";
-+ trueBlk->print(errs());
-+ errs() << "\n";
-+ }
-+ }
-+ if (falseBlk) {
-+ errs() << ", false = BB" << falseBlk->getNumber() << " size = "
-+ << falseBlk->size() << " numPred = " << falseBlk->pred_size();
-+ if (detail) {
-+ errs() << "\n";
-+ falseBlk->print(errs());
-+ errs() << "\n";
-+ }
-+ }
-+ if (landBlk) {
-+ errs() << ", land = BB" << landBlk->getNumber() << " size = "
-+ << landBlk->size() << " numPred = " << landBlk->pred_size();
-+ if (detail) {
-+ errs() << "\n";
-+ landBlk->print(errs());
-+ errs() << "\n";
-+ }
-+ }
-+
-+ errs() << "\n";
-+} //showImproveSimpleJumpintoIf
-+
-+template<class PassT>
-+int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
-+ BlockT *trueBlk,
-+ BlockT *falseBlk,
-+ BlockT **plandBlk) {
-+ bool migrateTrue = false;
-+ bool migrateFalse = false;
-+
-+ BlockT *landBlk = *plandBlk;
-+
-+ assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
-+ && (falseBlk == NULL || falseBlk->succ_size() <= 1));
-+
-+ if (trueBlk == falseBlk) {
-+ return 0;
-+ }
-+
-+ migrateTrue = needMigrateBlock(trueBlk);
-+ migrateFalse = needMigrateBlock(falseBlk);
-+
-+ if (!migrateTrue && !migrateFalse) {
-+ return 0;
-+ }
-+
-+ // If we need to migrate either trueBlk and falseBlk, migrate the rest that
-+ // have more than one predecessors. without doing this, its predecessor
-+ // rather than headBlk will have undefined value in initReg.
-+ if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
-+ migrateTrue = true;
-+ }
-+ if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
-+ migrateFalse = true;
-+ }
-+
-+ if (DEBUGME) {
-+ errs() << "before improveSimpleJumpintoIf: ";
-+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
-+ }
-+
-+ // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
-+ //
-+ // new: headBlk => if () {initReg = 1; org trueBlk branch} else
-+ // {initReg = 0; org falseBlk branch }
-+ // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
-+ // => org landBlk
-+ // if landBlk->pred_size() > 2, put the about if-else inside
-+ // if (initReg !=2) {...}
-+ //
-+ // add initReg = initVal to headBlk
-+
-+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
-+ unsigned initReg =
-+ funcRep->getRegInfo().createVirtualRegister(I32RC);
-+ if (!migrateTrue || !migrateFalse) {
-+ int initVal = migrateTrue ? 0 : 1;
-+ CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
-+ }
-+
-+ int numNewBlk = 0;
-+
-+ if (landBlk == NULL) {
-+ landBlk = funcRep->CreateMachineBasicBlock();
-+ funcRep->push_back(landBlk); //insert to function
-+
-+ if (trueBlk) {
-+ trueBlk->addSuccessor(landBlk);
-+ } else {
-+ headBlk->addSuccessor(landBlk);
-+ }
-+
-+ if (falseBlk) {
-+ falseBlk->addSuccessor(landBlk);
-+ } else {
-+ headBlk->addSuccessor(landBlk);
-+ }
-+
-+ numNewBlk ++;
-+ }
-+
-+ bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
-+
-+ //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
-+ typename BlockT::iterator insertPos =
-+ CFGTraits::getInstrPos
-+ (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
-+
-+ if (landBlkHasOtherPred) {
-+ unsigned immReg =
-+ funcRep->getRegInfo().createVirtualRegister(I32RC);
-+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
-+ unsigned cmpResReg =
-+ funcRep->getRegInfo().createVirtualRegister(I32RC);
-+
-+ CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
-+ initReg, immReg);
-+ CFGTraits::insertCondBranchBefore(landBlk, insertPos,
-+ AMDGPU::IF_PREDICATE_SET, passRep,
-+ cmpResReg, DebugLoc());
-+ }
-+
-+ CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET,
-+ passRep, initReg, DebugLoc());
-+
-+ if (migrateTrue) {
-+ migrateInstruction(trueBlk, landBlk, insertPos);
-+ // need to uncondionally insert the assignment to ensure a path from its
-+ // predecessor rather than headBlk has valid value in initReg if
-+ // (initVal != 1).
-+ CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
-+ }
-+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
-+
-+ if (migrateFalse) {
-+ migrateInstruction(falseBlk, landBlk, insertPos);
-+ // need to uncondionally insert the assignment to ensure a path from its
-+ // predecessor rather than headBlk has valid value in initReg if
-+ // (initVal != 0)
-+ CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
-+ }
-+
-+ if (landBlkHasOtherPred) {
-+ // add endif
-+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
-+
-+ // put initReg = 2 to other predecessors of landBlk
-+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
-+ predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
-+ ++predIter) {
-+ BlockT *curBlk = *predIter;
-+ if (curBlk != trueBlk && curBlk != falseBlk) {
-+ CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
-+ }
-+ } //for
-+ }
-+ if (DEBUGME) {
-+ errs() << "result from improveSimpleJumpintoIf: ";
-+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
-+ }
-+
-+ // update landBlk
-+ *plandBlk = landBlk;
-+
-+ return numNewBlk;
-+} //improveSimpleJumpintoIf
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
-+ LoopT *exitingLoop,
-+ BlockT *exitBlk,
-+ LoopT *exitLoop,
-+ BlockT *landBlk) {
-+ if (DEBUGME) {
-+ errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
-+ << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
-+ }
-+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
-+
-+ RegiT initReg = INVALIDREGNUM;
-+ if (exitingLoop != exitLoop) {
-+ initReg = static_cast<int>
-+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
-+ assert(initReg != INVALIDREGNUM);
-+ addLoopBreakInitReg(exitLoop, initReg);
-+ while (exitingLoop != exitLoop && exitingLoop) {
-+ addLoopBreakOnReg(exitingLoop, initReg);
-+ exitingLoop = exitingLoop->getParentLoop();
-+ }
-+ assert(exitingLoop == exitLoop);
-+ }
-+
-+ mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
-+
-+} //handleLoopbreak
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
-+ LoopT *contingLoop,
-+ BlockT *contBlk,
-+ LoopT *contLoop) {
-+ if (DEBUGME) {
-+ errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
-+ << " header = BB" << contBlk->getNumber() << "\n";
-+
-+ errs() << "Trying to continue loop-depth = "
-+ << getLoopDepth(contLoop)
-+ << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
-+ }
-+
-+ RegiT initReg = INVALIDREGNUM;
-+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
-+ if (contingLoop != contLoop) {
-+ initReg = static_cast<int>
-+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
-+ assert(initReg != INVALIDREGNUM);
-+ addLoopContInitReg(contLoop, initReg);
-+ while (contingLoop && contingLoop->getParentLoop() != contLoop) {
-+ addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
-+ contingLoop = contingLoop->getParentLoop();
-+ }
-+ assert(contingLoop && contingLoop->getParentLoop() == contLoop);
-+ addLoopContOnReg(contingLoop, initReg);
-+ }
-+
-+ settleLoopcontBlock(contingBlk, contBlk, initReg);
-+} //handleLoopcontBlock
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
-+ if (DEBUGME) {
-+ errs() << "serialPattern BB" << dstBlk->getNumber()
-+ << " <= BB" << srcBlk->getNumber() << "\n";
-+ }
-+ dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
-+
-+ dstBlk->removeSuccessor(srcBlk);
-+ CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
-+
-+ removeSuccessor(srcBlk);
-+ retireBlock(dstBlk, srcBlk);
-+} //mergeSerialBlock
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
-+ BlockT *curBlk,
-+ BlockT *trueBlk,
-+ BlockT *falseBlk,
-+ BlockT *landBlk) {
-+ if (DEBUGME) {
-+ errs() << "ifPattern BB" << curBlk->getNumber();
-+ errs() << "{ ";
-+ if (trueBlk) {
-+ errs() << "BB" << trueBlk->getNumber();
-+ }
-+ errs() << " } else ";
-+ errs() << "{ ";
-+ if (falseBlk) {
-+ errs() << "BB" << falseBlk->getNumber();
-+ }
-+ errs() << " }\n ";
-+ errs() << "landBlock: ";
-+ if (landBlk == NULL) {
-+ errs() << "NULL";
-+ } else {
-+ errs() << "BB" << landBlk->getNumber();
-+ }
-+ errs() << "\n";
-+ }
-+
-+ int oldOpcode = branchInstr->getOpcode();
-+ DebugLoc branchDL = branchInstr->getDebugLoc();
-+
-+// transform to
-+// if cond
-+// trueBlk
-+// else
-+// falseBlk
-+// endif
-+// landBlk
-+
-+ typename BlockT::iterator branchInstrPos =
-+ CFGTraits::getInstrPos(curBlk, branchInstr);
-+ CFGTraits::insertCondBranchBefore(branchInstrPos,
-+ CFGTraits::getBranchNzeroOpcode(oldOpcode),
-+ passRep,
-+ branchDL);
-+
-+ if (trueBlk) {
-+ curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end());
-+ curBlk->removeSuccessor(trueBlk);
-+ if (landBlk && trueBlk->succ_size()!=0) {
-+ trueBlk->removeSuccessor(landBlk);
-+ }
-+ retireBlock(curBlk, trueBlk);
-+ }
-+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
-+
-+ if (falseBlk) {
-+ curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(),
-+ falseBlk->end());
-+ curBlk->removeSuccessor(falseBlk);
-+ if (landBlk && falseBlk->succ_size() != 0) {
-+ falseBlk->removeSuccessor(landBlk);
-+ }
-+ retireBlock(curBlk, falseBlk);
-+ }
-+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
-+
-+ branchInstr->eraseFromParent();
-+
-+ if (landBlk && trueBlk && falseBlk) {
-+ curBlk->addSuccessor(landBlk);
-+ }
-+
-+} //mergeIfthenelseBlock
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
-+ LoopLandInfo *loopLand) {
-+ BlockT *landBlk = loopLand->landBlk;
-+
-+ if (DEBUGME) {
-+ errs() << "loopPattern header = BB" << dstBlk->getNumber()
-+ << " land = BB" << landBlk->getNumber() << "\n";
-+ }
-+
-+ // Loop contInitRegs are init at the beginning of the loop.
-+ for (typename std::set<RegiT>::const_iterator iter =
-+ loopLand->contInitRegs.begin(),
-+ iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
-+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
-+ }
-+
-+ /* we last inserterd the DebugLoc in the
-+ * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
-+ * search for the DebugLoc in the that statement.
-+ * if not found, we have to insert the empty/default DebugLoc */
-+ InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
-+ DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
-+
-+ CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
-+ // Loop breakInitRegs are init before entering the loop.
-+ for (typename std::set<RegiT>::const_iterator iter =
-+ loopLand->breakInitRegs.begin(),
-+ iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
-+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
-+ }
-+ // Loop endbranchInitRegs are init before entering the loop.
-+ for (typename std::set<RegiT>::const_iterator iter =
-+ loopLand->endbranchInitRegs.begin(),
-+ iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
-+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
-+ }
-+
-+ /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
-+ * search for the DebugLoc in the continue statement.
-+ * if not found, we have to insert the empty/default DebugLoc */
-+ InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
-+ DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
-+
-+ CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
-+ // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
-+ // loop.
-+ for (typename std::set<RegiT>::const_iterator iter =
-+ loopLand->breakOnRegs.begin(),
-+ iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
-+ CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep,
-+ *iter);
-+ }
-+
-+ // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
-+ // loop.
-+ for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
-+ iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
-+ CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
-+ passRep, *iter);
-+ }
-+
-+ dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
-+
-+ for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
-+ iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
-+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
-+ }
-+
-+ removeSuccessor(landBlk);
-+ retireBlock(dstBlk, landBlk);
-+} //mergeLooplandBlock
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) {
-+ while (I--) {
-+ if (I->getOpcode() == AMDGPU::PRED_X) {
-+ switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
-+ case OPCODE_IS_ZERO_INT:
-+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
-+ return;
-+ case OPCODE_IS_NOT_ZERO_INT:
-+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
-+ return;
-+ case OPCODE_IS_ZERO:
-+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
-+ return;
-+ case OPCODE_IS_NOT_ZERO:
-+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
-+ return;
-+ default:
-+ assert(0 && "PRED_X Opcode invalid!");
-+ }
-+ }
-+ }
-+}
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
-+ BlockT *exitBlk,
-+ BlockT *exitLandBlk,
-+ RegiT setReg) {
-+ if (DEBUGME) {
-+ errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
-+ << " exit = BB" << exitBlk->getNumber()
-+ << " land = BB" << exitLandBlk->getNumber() << "\n";
-+ }
-+
-+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
-+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
-+
-+ DebugLoc DL = branchInstr->getDebugLoc();
-+
-+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
-+
-+ // transform exitingBlk to
-+ // if ( ) {
-+ // exitBlk (if exitBlk != exitLandBlk)
-+ // setReg = 1
-+ // break
-+ // }endif
-+ // successor = {orgSuccessor(exitingBlk) - exitBlk}
-+
-+ typename BlockT::iterator branchInstrPos =
-+ CFGTraits::getInstrPos(exitingBlk, branchInstr);
-+
-+ if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
-+ //break_logical
-+
-+ if (trueBranch != exitBlk) {
-+ reversePredicateSetter(branchInstrPos);
-+ }
-+ CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
-+ } else {
-+ if (trueBranch != exitBlk) {
-+ reversePredicateSetter(branchInstr);
-+ }
-+ CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
-+ if (exitBlk != exitLandBlk) {
-+ //splice is insert-before ...
-+ exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
-+ exitBlk->end());
-+ }
-+ if (setReg != INVALIDREGNUM) {
-+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
-+ }
-+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
-+ } //if_logical
-+
-+ //now branchInst can be erase safely
-+ branchInstr->eraseFromParent();
-+
-+ //now take care of successors, retire blocks
-+ exitingBlk->removeSuccessor(exitBlk);
-+ if (exitBlk != exitLandBlk) {
-+ //splice is insert-before ...
-+ exitBlk->removeSuccessor(exitLandBlk);
-+ retireBlock(exitingBlk, exitBlk);
-+ }
-+
-+} //mergeLoopbreakBlock
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
-+ BlockT *contBlk,
-+ RegiT setReg) {
-+ if (DEBUGME) {
-+ errs() << "settleLoopcontBlock conting = BB"
-+ << contingBlk->getNumber()
-+ << ", cont = BB" << contBlk->getNumber() << "\n";
-+ }
-+
-+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
-+ if (branchInstr) {
-+ assert(CFGTraits::isCondBranch(branchInstr));
-+ typename BlockT::iterator branchInstrPos =
-+ CFGTraits::getInstrPos(contingBlk, branchInstr);
-+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
-+ int oldOpcode = branchInstr->getOpcode();
-+ DebugLoc DL = branchInstr->getDebugLoc();
-+
-+ // transform contingBlk to
-+ // if () {
-+ // move instr after branchInstr
-+ // continue
-+ // or
-+ // setReg = 1
-+ // break
-+ // }endif
-+ // successor = {orgSuccessor(contingBlk) - loopHeader}
-+
-+ bool useContinueLogical =
-+ (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
-+
-+ if (useContinueLogical == false) {
-+ int branchOpcode =
-+ trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
-+ : CFGTraits::getBranchZeroOpcode(oldOpcode);
-+
-+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
-+
-+ if (setReg != INVALIDREGNUM) {
-+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
-+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
-+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
-+ } else {
-+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
-+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
-+ }
-+
-+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
-+ } else {
-+ int branchOpcode =
-+ trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
-+ : CFGTraits::getContinueZeroOpcode(oldOpcode);
-+
-+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
-+ }
-+
-+ branchInstr->eraseFromParent();
-+ } else {
-+ // if we've arrived here then we've already erased the branch instruction
-+ // travel back up the basic block to see the last reference of our debug location
-+ // we've just inserted that reference here so it should be representative
-+ if (setReg != INVALIDREGNUM) {
-+ CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
-+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
-+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
-+ } else {
-+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
-+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
-+ }
-+ } //else
-+
-+} //settleLoopcontBlock
-+
-+// BBs in exitBlkSet are determined as in break-path for loopRep,
-+// before we can put code for BBs as inside loop-body for loopRep
-+// check whether those BBs are determined as cont-BB for parentLoopRep
-+// earlier.
-+// If so, generate a new BB newBlk
-+// (1) set newBlk common successor of BBs in exitBlkSet
-+// (2) change the continue-instr in BBs in exitBlkSet to break-instr
-+// (3) generate continue-instr in newBlk
-+//
-+template<class PassT>
-+typename CFGStructurizer<PassT>::BlockT *
-+CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
-+ LoopT *loopRep,
-+ std::set<BlockT *> &exitBlkSet,
-+ BlockT *exitLandBlk) {
-+ std::set<BlockT *> endBlkSet;
-+
-+
-+
-+ for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
-+ iterEnd = exitBlkSet.end();
-+ iter != iterEnd; ++iter) {
-+ BlockT *exitBlk = *iter;
-+ BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
-+
-+ if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
-+ return NULL;
-+
-+ endBlkSet.insert(endBlk);
-+ }
-+
-+ BlockT *newBlk = funcRep->CreateMachineBasicBlock();
-+ funcRep->push_back(newBlk); //insert to function
-+ CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
-+ SHOWNEWBLK(newBlk, "New continue block: ");
-+
-+ for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
-+ iterEnd = endBlkSet.end();
-+ iter != iterEnd; ++iter) {
-+ BlockT *endBlk = *iter;
-+ InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
-+ if (contInstr) {
-+ contInstr->eraseFromParent();
-+ }
-+ endBlk->addSuccessor(newBlk);
-+ if (DEBUGME) {
-+ errs() << "Add new continue Block to BB"
-+ << endBlk->getNumber() << " successors\n";
-+ }
-+ }
-+
-+ return newBlk;
-+} //relocateLoopcontBlock
-+
-+
-+// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
-+// LoopLandBlock. This BB branch on the loop endBranchInit register to the
-+// pathes corresponding to the loop exiting branches.
-+
-+template<class PassT>
-+typename CFGStructurizer<PassT>::BlockT *
-+CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
-+ BlockTSmallerVector &exitingBlks,
-+ BlockTSmallerVector &exitBlks) {
-+ const AMDGPUInstrInfo *tii =
-+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
-+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
-+
-+ RegiT endBranchReg = static_cast<int>
-+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
-+ assert(endBranchReg >= 0);
-+
-+ // reg = 0 before entering the loop
-+ addLoopEndbranchInitReg(loopRep, endBranchReg);
-+
-+ uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
-+ assert(numBlks >=2 && numBlks == exitBlks.size());
-+
-+ BlockT *preExitingBlk = exitingBlks[0];
-+ BlockT *preExitBlk = exitBlks[0];
-+ BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
-+ funcRep->push_back(preBranchBlk); //insert to function
-+ SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
-+
-+ BlockT *newLandBlk = preBranchBlk;
-+
-+ CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
-+ newLandBlk);
-+ preExitingBlk->removeSuccessor(preExitBlk);
-+ preExitingBlk->addSuccessor(newLandBlk);
-+
-+ //it is redundant to add reg = 0 to exitingBlks[0]
-+
-+ // For 1..n th exiting path (the last iteration handles two pathes) create the
-+ // branch to the previous path and the current path.
-+ for (uint32_t i = 1; i < numBlks; ++i) {
-+ BlockT *curExitingBlk = exitingBlks[i];
-+ BlockT *curExitBlk = exitBlks[i];
-+ BlockT *curBranchBlk;
-+
-+ if (i == numBlks - 1) {
-+ curBranchBlk = curExitBlk;
-+ } else {
-+ curBranchBlk = funcRep->CreateMachineBasicBlock();
-+ funcRep->push_back(curBranchBlk); //insert to function
-+ SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
-+ }
-+
-+ // Add reg = i to exitingBlks[i].
-+ CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
-+ endBranchReg, i);
-+
-+ // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
-+ // (exitingBlks[i], newLandBlk).
-+ CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
-+ newLandBlk);
-+ curExitingBlk->removeSuccessor(curExitBlk);
-+ curExitingBlk->addSuccessor(newLandBlk);
-+
-+ // add to preBranchBlk the branch instruction:
-+ // if (endBranchReg == preVal)
-+ // preExitBlk
-+ // else
-+ // curBranchBlk
-+ //
-+ // preValReg = i - 1
-+
-+ DebugLoc DL;
-+ RegiT preValReg = static_cast<int>
-+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
-+
-+ preBranchBlk->insert(preBranchBlk->begin(),
-+ tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
-+ i - 1));
-+
-+ // condResReg = (endBranchReg == preValReg)
-+ RegiT condResReg = static_cast<int>
-+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
-+ BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
-+ .addReg(endBranchReg).addReg(preValReg);
-+
-+ BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
-+ .addMBB(preExitBlk).addReg(condResReg);
-+
-+ preBranchBlk->addSuccessor(preExitBlk);
-+ preBranchBlk->addSuccessor(curBranchBlk);
-+
-+ // Update preExitingBlk, preExitBlk, preBranchBlk.
-+ preExitingBlk = curExitingBlk;
-+ preExitBlk = curExitBlk;
-+ preBranchBlk = curBranchBlk;
-+
-+ } //end for 1 .. n blocks
-+
-+ return newLandBlk;
-+} //addLoopEndbranchBlock
-+
-+template<class PassT>
-+typename CFGStructurizer<PassT>::PathToKind
-+CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
-+ bool allowSideEntry) {
-+ assert(dstBlk);
-+
-+ if (srcBlk == dstBlk) {
-+ return SinglePath_InPath;
-+ }
-+
-+ while (srcBlk && srcBlk->succ_size() == 1) {
-+ srcBlk = *srcBlk->succ_begin();
-+ if (srcBlk == dstBlk) {
-+ return SinglePath_InPath;
-+ }
-+
-+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
-+ return Not_SinglePath;
-+ }
-+ }
-+
-+ if (srcBlk && srcBlk->succ_size()==0) {
-+ return SinglePath_NotInPath;
-+ }
-+
-+ return Not_SinglePath;
-+} //singlePathTo
-+
-+// If there is a single path from srcBlk to dstBlk, return the last block before
-+// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
-+// last block in the path Otherwise, return NULL
-+template<class PassT>
-+typename CFGStructurizer<PassT>::BlockT *
-+CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
-+ bool allowSideEntry) {
-+ assert(dstBlk);
-+
-+ if (srcBlk == dstBlk) {
-+ return srcBlk;
-+ }
-+
-+ if (srcBlk->succ_size() == 0) {
-+ return srcBlk;
-+ }
-+
-+ while (srcBlk && srcBlk->succ_size() == 1) {
-+ BlockT *preBlk = srcBlk;
-+
-+ srcBlk = *srcBlk->succ_begin();
-+ if (srcBlk == NULL) {
-+ return preBlk;
-+ }
-+
-+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
-+ return NULL;
-+ }
-+ }
-+
-+ if (srcBlk && srcBlk->succ_size()==0) {
-+ return srcBlk;
-+ }
-+
-+ return NULL;
-+
-+} //singlePathEnd
-+
-+template<class PassT>
-+int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
-+ BlockT *dstBlk) {
-+ int cloned = 0;
-+ assert(preBlk->isSuccessor(srcBlk));
-+ while (srcBlk && srcBlk != dstBlk) {
-+ assert(srcBlk->succ_size() == 1);
-+ if (srcBlk->pred_size() > 1) {
-+ srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
-+ ++cloned;
-+ }
-+
-+ preBlk = srcBlk;
-+ srcBlk = *srcBlk->succ_begin();
-+ }
-+
-+ return cloned;
-+} //cloneOnSideEntryTo
-+
-+template<class PassT>
-+typename CFGStructurizer<PassT>::BlockT *
-+CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
-+ BlockT *predBlk) {
-+ assert(predBlk->isSuccessor(curBlk) &&
-+ "succBlk is not a prececessor of curBlk");
-+
-+ BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
-+ CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
-+ //srcBlk, oldBlk, newBlk
-+
-+ predBlk->removeSuccessor(curBlk);
-+ predBlk->addSuccessor(cloneBlk);
-+
-+ // add all successor to cloneBlk
-+ CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
-+
-+ numClonedInstr += curBlk->size();
-+
-+ if (DEBUGME) {
-+ errs() << "Cloned block: " << "BB"
-+ << curBlk->getNumber() << "size " << curBlk->size() << "\n";
-+ }
-+
-+ SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
-+
-+ return cloneBlk;
-+} //cloneBlockForPredecessor
-+
-+template<class PassT>
-+typename CFGStructurizer<PassT>::BlockT *
-+CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
-+ BlockT *exitingBlk) {
-+ BlockT *exitBlk = NULL;
-+
-+ for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
-+ iterSuccEnd = exitingBlk->succ_end();
-+ iterSucc != iterSuccEnd; ++iterSucc) {
-+ BlockT *curBlk = *iterSucc;
-+ if (!loopRep->contains(curBlk)) {
-+ assert(exitBlk == NULL);
-+ exitBlk = curBlk;
-+ }
-+ }
-+
-+ assert(exitBlk != NULL);
-+
-+ return exitBlk;
-+} //exitingBlock2ExitBlock
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
-+ BlockT *dstBlk,
-+ InstrIterator insertPos) {
-+ InstrIterator spliceEnd;
-+ //look for the input branchinstr, not the AMDGPU branchinstr
-+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
-+ if (branchInstr == NULL) {
-+ if (DEBUGME) {
-+ errs() << "migrateInstruction don't see branch instr\n" ;
-+ }
-+ spliceEnd = srcBlk->end();
-+ } else {
-+ if (DEBUGME) {
-+ errs() << "migrateInstruction see branch instr\n" ;
-+ branchInstr->dump();
-+ }
-+ spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
-+ }
-+ if (DEBUGME) {
-+ errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
-+ << "srcSize = " << srcBlk->size() << "\n";
-+ }
-+
-+ //splice insert before insertPos
-+ dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
-+
-+ if (DEBUGME) {
-+ errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
-+ << "srcSize = " << srcBlk->size() << "\n";
-+ }
-+} //migrateInstruction
-+
-+// normalizeInfiniteLoopExit change
-+// B1:
-+// uncond_br LoopHeader
-+//
-+// to
-+// B1:
-+// cond_br 1 LoopHeader dummyExit
-+// and return the newly added dummy exit block
-+//
-+template<class PassT>
-+typename CFGStructurizer<PassT>::BlockT *
-+CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
-+ BlockT *loopHeader;
-+ BlockT *loopLatch;
-+ loopHeader = LoopRep->getHeader();
-+ loopLatch = LoopRep->getLoopLatch();
-+ BlockT *dummyExitBlk = NULL;
-+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
-+ if (loopHeader!=NULL && loopLatch!=NULL) {
-+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
-+ if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
-+ dummyExitBlk = funcRep->CreateMachineBasicBlock();
-+ funcRep->push_back(dummyExitBlk); //insert to function
-+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
-+
-+ if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
-+
-+ typename BlockT::iterator insertPos =
-+ CFGTraits::getInstrPos(loopLatch, branchInstr);
-+ unsigned immReg =
-+ funcRep->getRegInfo().createVirtualRegister(I32RC);
-+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
-+ InstrT *newInstr =
-+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
-+ MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
-+
-+ SHOWNEWINSTR(newInstr);
-+
-+ branchInstr->eraseFromParent();
-+ loopLatch->addSuccessor(dummyExitBlk);
-+ }
-+ }
-+
-+ return dummyExitBlk;
-+} //normalizeInfiniteLoopExit
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
-+ InstrT *branchInstr;
-+
-+ // I saw two unconditional branch in one basic block in example
-+ // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
-+ while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
-+ && CFGTraits::isUncondBranch(branchInstr)) {
-+ if (DEBUGME) {
-+ errs() << "Removing unconditional branch instruction" ;
-+ branchInstr->dump();
-+ }
-+ branchInstr->eraseFromParent();
-+ }
-+} //removeUnconditionalBranch
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
-+ if (srcBlk->succ_size() == 2) {
-+ BlockT *blk1 = *srcBlk->succ_begin();
-+ BlockT *blk2 = *(++srcBlk->succ_begin());
-+
-+ if (blk1 == blk2) {
-+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
-+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
-+ if (DEBUGME) {
-+ errs() << "Removing unneeded conditional branch instruction" ;
-+ branchInstr->dump();
-+ }
-+ branchInstr->eraseFromParent();
-+ SHOWNEWBLK(blk1, "Removing redundant successor");
-+ srcBlk->removeSuccessor(blk1);
-+ }
-+ }
-+} //removeRedundantConditionalBranch
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
-+ DEFAULT_VEC_SLOTS> &retBlks) {
-+ BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
-+ funcRep->push_back(dummyExitBlk); //insert to function
-+ CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
-+
-+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
-+ retBlks.begin(),
-+ iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
-+ BlockT *curBlk = *iter;
-+ InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
-+ if (curInstr) {
-+ curInstr->eraseFromParent();
-+ }
-+ curBlk->addSuccessor(dummyExitBlk);
-+ if (DEBUGME) {
-+ errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
-+ << " successors\n";
-+ }
-+ } //for
-+
-+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
-+} //addDummyExitBlock
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
-+ while (srcBlk->succ_size()) {
-+ srcBlk->removeSuccessor(*srcBlk->succ_begin());
-+ }
-+}
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
-+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
-+
-+ if (srcBlkInfo == NULL) {
-+ srcBlkInfo = new BlockInfo();
-+ }
-+
-+ srcBlkInfo->sccNum = sccNum;
-+}
-+
-+template<class PassT>
-+int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
-+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
-+ return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
-+}
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
-+ if (DEBUGME) {
-+ errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
-+ }
-+
-+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
-+
-+ if (srcBlkInfo == NULL) {
-+ srcBlkInfo = new BlockInfo();
-+ }
-+
-+ srcBlkInfo->isRetired = true;
-+ assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
-+ && "can't retire block yet");
-+}
-+
-+template<class PassT>
-+bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
-+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
-+ return (srcBlkInfo && srcBlkInfo->isRetired);
-+}
-+
-+template<class PassT>
-+bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
-+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
-+ while (loopRep && loopRep->getHeader() == curBlk) {
-+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
-+
-+ if(loopLand == NULL)
-+ return true;
-+
-+ BlockT *landBlk = loopLand->landBlk;
-+ assert(landBlk);
-+ if (!isRetiredBlock(landBlk)) {
-+ return true;
-+ }
-+
-+ loopRep = loopRep->getParentLoop();
-+ }
-+
-+ return false;
-+} //isActiveLoophead
-+
-+template<class PassT>
-+bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
-+ const unsigned blockSizeThreshold = 30;
-+ const unsigned cloneInstrThreshold = 100;
-+
-+ bool multiplePreds = blk && (blk->pred_size() > 1);
-+
-+ if(!multiplePreds)
-+ return false;
-+
-+ unsigned blkSize = blk->size();
-+ return ((blkSize > blockSizeThreshold)
-+ && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
-+} //needMigrateBlock
-+
-+template<class PassT>
-+typename CFGStructurizer<PassT>::BlockT *
-+CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
-+ BlockTSmallerVector &exitBlks,
-+ std::set<BlockT *> &exitBlkSet) {
-+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
-+
-+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
-+ predIterEnd = landBlk->pred_end();
-+ predIter != predIterEnd; ++predIter) {
-+ BlockT *curBlk = *predIter;
-+ if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
-+ inpathBlks.push_back(curBlk);
-+ }
-+ } //for
-+
-+ //if landBlk has predecessors that are not in the given loop,
-+ //create a new block
-+ BlockT *newLandBlk = landBlk;
-+ if (inpathBlks.size() != landBlk->pred_size()) {
-+ newLandBlk = funcRep->CreateMachineBasicBlock();
-+ funcRep->push_back(newLandBlk); //insert to function
-+ newLandBlk->addSuccessor(landBlk);
-+ for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
-+ inpathBlks.begin(),
-+ iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
-+ BlockT *curBlk = *iter;
-+ CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
-+ //srcBlk, oldBlk, newBlk
-+ curBlk->removeSuccessor(landBlk);
-+ curBlk->addSuccessor(newLandBlk);
-+ }
-+ for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
-+ if (exitBlks[i] == landBlk) {
-+ exitBlks[i] = newLandBlk;
-+ }
-+ }
-+ SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
-+ }
-+
-+ setLoopLandBlock(loopRep, newLandBlk);
-+
-+ return newLandBlk;
-+} // recordLoopbreakLand
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
-+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-+
-+ if (theEntry == NULL) {
-+ theEntry = new LoopLandInfo();
-+ }
-+ assert(theEntry->landBlk == NULL);
-+
-+ if (blk == NULL) {
-+ blk = funcRep->CreateMachineBasicBlock();
-+ funcRep->push_back(blk); //insert to function
-+ SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
-+ }
-+
-+ theEntry->landBlk = blk;
-+
-+ if (DEBUGME) {
-+ errs() << "setLoopLandBlock loop-header = BB"
-+ << loopRep->getHeader()->getNumber()
-+ << " landing-block = BB" << blk->getNumber() << "\n";
-+ }
-+} // setLoopLandBlock
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
-+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-+
-+ if (theEntry == NULL) {
-+ theEntry = new LoopLandInfo();
-+ }
-+
-+ theEntry->breakOnRegs.insert(regNum);
-+
-+ if (DEBUGME) {
-+ errs() << "addLoopBreakOnReg loop-header = BB"
-+ << loopRep->getHeader()->getNumber()
-+ << " regNum = " << regNum << "\n";
-+ }
-+} // addLoopBreakOnReg
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
-+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-+
-+ if (theEntry == NULL) {
-+ theEntry = new LoopLandInfo();
-+ }
-+ theEntry->contOnRegs.insert(regNum);
-+
-+ if (DEBUGME) {
-+ errs() << "addLoopContOnReg loop-header = BB"
-+ << loopRep->getHeader()->getNumber()
-+ << " regNum = " << regNum << "\n";
-+ }
-+} // addLoopContOnReg
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
-+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-+
-+ if (theEntry == NULL) {
-+ theEntry = new LoopLandInfo();
-+ }
-+ theEntry->breakInitRegs.insert(regNum);
-+
-+ if (DEBUGME) {
-+ errs() << "addLoopBreakInitReg loop-header = BB"
-+ << loopRep->getHeader()->getNumber()
-+ << " regNum = " << regNum << "\n";
-+ }
-+} // addLoopBreakInitReg
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
-+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-+
-+ if (theEntry == NULL) {
-+ theEntry = new LoopLandInfo();
-+ }
-+ theEntry->contInitRegs.insert(regNum);
-+
-+ if (DEBUGME) {
-+ errs() << "addLoopContInitReg loop-header = BB"
-+ << loopRep->getHeader()->getNumber()
-+ << " regNum = " << regNum << "\n";
-+ }
-+} // addLoopContInitReg
-+
-+template<class PassT>
-+void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
-+ RegiT regNum) {
-+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-+
-+ if (theEntry == NULL) {
-+ theEntry = new LoopLandInfo();
-+ }
-+ theEntry->endbranchInitRegs.insert(regNum);
-+
-+ if (DEBUGME) {
-+ errs() << "addLoopEndbranchInitReg loop-header = BB"
-+ << loopRep->getHeader()->getNumber()
-+ << " regNum = " << regNum << "\n";
-+ }
-+} // addLoopEndbranchInitReg
-+
-+template<class PassT>
-+typename CFGStructurizer<PassT>::LoopLandInfo *
-+CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
-+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-+
-+ return theEntry;
-+} // getLoopLandInfo
-+
-+template<class PassT>
-+typename CFGStructurizer<PassT>::BlockT *
-+CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
-+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-+
-+ return theEntry ? theEntry->landBlk : NULL;
-+} // getLoopLandBlock
-+
-+
-+template<class PassT>
-+bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
-+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
-+ if (loopRep == NULL)
-+ return false;
-+
-+ BlockT *loopHeader = loopRep->getHeader();
-+
-+ return curBlk->isSuccessor(loopHeader);
-+
-+} //hasBackEdge
-+
-+template<class PassT>
-+unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
-+ return loopRep ? loopRep->getLoopDepth() : 0;
-+} //getLoopDepth
-+
-+template<class PassT>
-+int CFGStructurizer<PassT>::countActiveBlock
-+(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
-+ typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
-+ int count = 0;
-+ while (iterStart != iterEnd) {
-+ if (!isRetiredBlock(*iterStart)) {
-+ ++count;
-+ }
-+ ++iterStart;
-+ }
-+
-+ return count;
-+} //countActiveBlock
-+
-+// This is work around solution for findNearestCommonDominator not avaiable to
-+// post dom a proper fix should go to Dominators.h.
-+
-+template<class PassT>
-+typename CFGStructurizer<PassT>::BlockT*
-+CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
-+
-+ if (postDomTree->dominates(blk1, blk2)) {
-+ return blk1;
-+ }
-+ if (postDomTree->dominates(blk2, blk1)) {
-+ return blk2;
-+ }
-+
-+ DomTreeNodeT *node1 = postDomTree->getNode(blk1);
-+ DomTreeNodeT *node2 = postDomTree->getNode(blk2);
-+
-+ // Handle newly cloned node.
-+ if (node1 == NULL && blk1->succ_size() == 1) {
-+ return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
-+ }
-+ if (node2 == NULL && blk2->succ_size() == 1) {
-+ return findNearestCommonPostDom(blk1, *blk2->succ_begin());
-+ }
-+
-+ if (node1 == NULL || node2 == NULL) {
-+ return NULL;
-+ }
-+
-+ node1 = node1->getIDom();
-+ while (node1) {
-+ if (postDomTree->dominates(node1, node2)) {
-+ return node1->getBlock();
-+ }
-+ node1 = node1->getIDom();
-+ }
-+
-+ return NULL;
-+}
-+
-+template<class PassT>
-+typename CFGStructurizer<PassT>::BlockT *
-+CFGStructurizer<PassT>::findNearestCommonPostDom
-+(typename std::set<BlockT *> &blks) {
-+ BlockT *commonDom;
-+ typename std::set<BlockT *>::const_iterator iter = blks.begin();
-+ typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
-+ for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
-+ BlockT *curBlk = *iter;
-+ if (curBlk != commonDom) {
-+ commonDom = findNearestCommonPostDom(curBlk, commonDom);
-+ }
-+ }
-+
-+ if (DEBUGME) {
-+ errs() << "Common post dominator for exit blocks is ";
-+ if (commonDom) {
-+ errs() << "BB" << commonDom->getNumber() << "\n";
-+ } else {
-+ errs() << "NULL\n";
-+ }
-+ }
-+
-+ return commonDom;
-+} //findNearestCommonPostDom
-+
-+} //end namespace llvm
-+
-+//todo: move-end
-+
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// CFGStructurizer for AMDGPU
-+//
-+//===----------------------------------------------------------------------===//
-+
-+
-+using namespace llvmCFGStruct;
-+
-+namespace llvm {
-+class AMDGPUCFGStructurizer : public MachineFunctionPass {
-+public:
-+ typedef MachineInstr InstructionType;
-+ typedef MachineFunction FunctionType;
-+ typedef MachineBasicBlock BlockType;
-+ typedef MachineLoopInfo LoopinfoType;
-+ typedef MachineDominatorTree DominatortreeType;
-+ typedef MachinePostDominatorTree PostDominatortreeType;
-+ typedef MachineDomTreeNode DomTreeNodeType;
-+ typedef MachineLoop LoopType;
-+
-+protected:
-+ TargetMachine &TM;
-+ const TargetInstrInfo *TII;
-+ const AMDGPURegisterInfo *TRI;
-+
-+public:
-+ AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
-+ const TargetInstrInfo *getTargetInstrInfo() const;
-+
-+private:
-+
-+};
-+
-+} //end of namespace llvm
-+AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm)
-+: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
-+ TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())) {
-+}
-+
-+const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
-+ return TII;
-+}
-+//===----------------------------------------------------------------------===//
-+//
-+// CFGPrepare
-+//
-+//===----------------------------------------------------------------------===//
-+
-+
-+using namespace llvmCFGStruct;
-+
-+namespace llvm {
-+class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer {
-+public:
-+ static char ID;
-+
-+public:
-+ AMDGPUCFGPrepare(TargetMachine &tm);
-+
-+ virtual const char *getPassName() const;
-+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-+
-+ bool runOnMachineFunction(MachineFunction &F);
-+
-+private:
-+
-+};
-+
-+char AMDGPUCFGPrepare::ID = 0;
-+} //end of namespace llvm
-+
-+AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
-+ : AMDGPUCFGStructurizer(ID, tm ) {
-+}
-+const char *AMDGPUCFGPrepare::getPassName() const {
-+ return "AMD IL Control Flow Graph Preparation Pass";
-+}
-+
-+void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
-+ AU.addPreserved<MachineFunctionAnalysis>();
-+ AU.addRequired<MachineFunctionAnalysis>();
-+ AU.addRequired<MachineDominatorTree>();
-+ AU.addRequired<MachinePostDominatorTree>();
-+ AU.addRequired<MachineLoopInfo>();
-+}
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// CFGPerform
-+//
-+//===----------------------------------------------------------------------===//
-+
-+
-+using namespace llvmCFGStruct;
-+
-+namespace llvm {
-+class AMDGPUCFGPerform : public AMDGPUCFGStructurizer {
-+public:
-+ static char ID;
-+
-+public:
-+ AMDGPUCFGPerform(TargetMachine &tm);
-+ virtual const char *getPassName() const;
-+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-+ bool runOnMachineFunction(MachineFunction &F);
-+
-+private:
-+
-+};
-+
-+char AMDGPUCFGPerform::ID = 0;
-+} //end of namespace llvm
-+
-+ AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
-+: AMDGPUCFGStructurizer(ID, tm) {
-+}
-+
-+const char *AMDGPUCFGPerform::getPassName() const {
-+ return "AMD IL Control Flow Graph structurizer Pass";
-+}
-+
-+void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
-+ AU.addPreserved<MachineFunctionAnalysis>();
-+ AU.addRequired<MachineFunctionAnalysis>();
-+ AU.addRequired<MachineDominatorTree>();
-+ AU.addRequired<MachinePostDominatorTree>();
-+ AU.addRequired<MachineLoopInfo>();
-+}
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// CFGStructTraits<AMDGPUCFGStructurizer>
-+//
-+//===----------------------------------------------------------------------===//
-+
-+namespace llvmCFGStruct {
-+// this class is tailor to the AMDGPU backend
-+template<>
-+struct CFGStructTraits<AMDGPUCFGStructurizer> {
-+ typedef int RegiT;
-+
-+ static int getBranchNzeroOpcode(int oldOpcode) {
-+ switch(oldOpcode) {
-+ case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
-+ case AMDGPU::BRANCH_COND_i32:
-+ case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
-+ default:
-+ assert(0 && "internal error");
-+ }
-+ return -1;
-+ }
-+
-+ static int getBranchZeroOpcode(int oldOpcode) {
-+ switch(oldOpcode) {
-+ case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
-+ case AMDGPU::BRANCH_COND_i32:
-+ case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
-+ default:
-+ assert(0 && "internal error");
-+ }
-+ return -1;
-+ }
-+
-+ static int getContinueNzeroOpcode(int oldOpcode) {
-+ switch(oldOpcode) {
-+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
-+ default:
-+ assert(0 && "internal error");
-+ };
-+ return -1;
-+ }
-+
-+ static int getContinueZeroOpcode(int oldOpcode) {
-+ switch(oldOpcode) {
-+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
-+ default:
-+ assert(0 && "internal error");
-+ }
-+ return -1;
-+ }
-+
-+ static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
-+ return instr->getOperand(0).getMBB();
-+ }
-+
-+ static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
-+ instr->getOperand(0).setMBB(blk);
-+ }
-+
-+ static MachineBasicBlock *
-+ getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
-+ assert(blk->succ_size() == 2);
-+ MachineBasicBlock *trueBranch = getTrueBranch(instr);
-+ MachineBasicBlock::succ_iterator iter = blk->succ_begin();
-+ MachineBasicBlock::succ_iterator iterNext = iter;
-+ ++iterNext;
-+
-+ return (*iter == trueBranch) ? *iterNext : *iter;
-+ }
-+
-+ static bool isCondBranch(MachineInstr *instr) {
-+ switch (instr->getOpcode()) {
-+ case AMDGPU::JUMP:
-+ return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
-+ case AMDGPU::BRANCH_COND_i32:
-+ case AMDGPU::BRANCH_COND_f32:
-+ break;
-+ default:
-+ return false;
-+ }
-+ return true;
-+ }
-+
-+ static bool isUncondBranch(MachineInstr *instr) {
-+ switch (instr->getOpcode()) {
-+ case AMDGPU::JUMP:
-+ return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
-+ case AMDGPU::BRANCH:
-+ return true;
-+ default:
-+ return false;
-+ }
-+ return true;
-+ }
-+
-+ static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
-+ //get DebugLoc from the first MachineBasicBlock instruction with debug info
-+ DebugLoc DL;
-+ for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
-+ MachineInstr *instr = &(*iter);
-+ if (instr->getDebugLoc().isUnknown() == false) {
-+ DL = instr->getDebugLoc();
-+ }
-+ }
-+ return DL;
-+ }
-+
-+ static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
-+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
-+ MachineInstr *instr = &*iter;
-+ if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
-+ return instr;
-+ }
-+ return NULL;
-+ }
-+
-+ // The correct naming for this is getPossibleLoopendBlockBranchInstr.
-+ //
-+ // BB with backward-edge could have move instructions after the branch
-+ // instruction. Such move instruction "belong to" the loop backward-edge.
-+ //
-+ static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
-+ const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
-+ blk->getParent()->getTarget().getInstrInfo());
-+
-+ for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
-+ iterEnd = blk->rend(); iter != iterEnd; ++iter) {
-+ // FIXME: Simplify
-+ MachineInstr *instr = &*iter;
-+ if (instr) {
-+ if (isCondBranch(instr) || isUncondBranch(instr)) {
-+ return instr;
-+ } else if (!TII->isMov(instr->getOpcode())) {
-+ break;
-+ }
-+ }
-+ }
-+ return NULL;
-+ }
-+
-+ static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
-+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
-+ if (iter != blk->rend()) {
-+ MachineInstr *instr = &(*iter);
-+ if (instr->getOpcode() == AMDGPU::RETURN) {
-+ return instr;
-+ }
-+ }
-+ return NULL;
-+ }
-+
-+ static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
-+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
-+ if (iter != blk->rend()) {
-+ MachineInstr *instr = &(*iter);
-+ if (instr->getOpcode() == AMDGPU::CONTINUE) {
-+ return instr;
-+ }
-+ }
-+ return NULL;
-+ }
-+
-+ static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
-+ for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
-+ MachineInstr *instr = &(*iter);
-+ if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) {
-+ return instr;
-+ }
-+ }
-+ return NULL;
-+ }
-+
-+ static bool isReturnBlock(MachineBasicBlock *blk) {
-+ MachineInstr *instr = getReturnInstr(blk);
-+ bool isReturn = (blk->succ_size() == 0);
-+ if (instr) {
-+ assert(isReturn);
-+ } else if (isReturn) {
-+ if (DEBUGME) {
-+ errs() << "BB" << blk->getNumber()
-+ <<" is return block without RETURN instr\n";
-+ }
-+ }
-+
-+ return isReturn;
-+ }
-+
-+ static MachineBasicBlock::iterator
-+ getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
-+ assert(instr->getParent() == blk && "instruction doesn't belong to block");
-+ MachineBasicBlock::iterator iter = blk->begin();
-+ MachineBasicBlock::iterator iterEnd = blk->end();
-+ while (&(*iter) != instr && iter != iterEnd) {
-+ ++iter;
-+ }
-+
-+ assert(iter != iterEnd);
-+ return iter;
-+ }//getInstrPos
-+
-+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
-+ AMDGPUCFGStructurizer *passRep) {
-+ return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
-+ } //insertInstrBefore
-+
-+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
-+ AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
-+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
-+ MachineInstr *newInstr =
-+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
-+
-+ MachineBasicBlock::iterator res;
-+ if (blk->begin() != blk->end()) {
-+ blk->insert(blk->begin(), newInstr);
-+ } else {
-+ blk->push_back(newInstr);
-+ }
-+
-+ SHOWNEWINSTR(newInstr);
-+
-+ return newInstr;
-+ } //insertInstrBefore
-+
-+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
-+ AMDGPUCFGStructurizer *passRep) {
-+ insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
-+ } //insertInstrEnd
-+
-+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
-+ AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
-+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
-+ MachineInstr *newInstr = blk->getParent()
-+ ->CreateMachineInstr(tii->get(newOpcode), DL);
-+
-+ blk->push_back(newInstr);
-+ //assume the instruction doesn't take any reg operand ...
-+
-+ SHOWNEWINSTR(newInstr);
-+ } //insertInstrEnd
-+
-+ static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
-+ int newOpcode,
-+ AMDGPUCFGStructurizer *passRep) {
-+ MachineInstr *oldInstr = &(*instrPos);
-+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
-+ MachineBasicBlock *blk = oldInstr->getParent();
-+ MachineInstr *newInstr =
-+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
-+ DebugLoc());
-+
-+ blk->insert(instrPos, newInstr);
-+ //assume the instruction doesn't take any reg operand ...
-+
-+ SHOWNEWINSTR(newInstr);
-+ return newInstr;
-+ } //insertInstrBefore
-+
-+ static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
-+ int newOpcode,
-+ AMDGPUCFGStructurizer *passRep,
-+ DebugLoc DL) {
-+ MachineInstr *oldInstr = &(*instrPos);
-+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
-+ MachineBasicBlock *blk = oldInstr->getParent();
-+ MachineInstr *newInstr =
-+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
-+ DL);
-+
-+ blk->insert(instrPos, newInstr);
-+ MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
-+ false);
-+
-+ SHOWNEWINSTR(newInstr);
-+ //erase later oldInstr->eraseFromParent();
-+ } //insertCondBranchBefore
-+
-+ static void insertCondBranchBefore(MachineBasicBlock *blk,
-+ MachineBasicBlock::iterator insertPos,
-+ int newOpcode,
-+ AMDGPUCFGStructurizer *passRep,
-+ RegiT regNum,
-+ DebugLoc DL) {
-+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
-+
-+ MachineInstr *newInstr =
-+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
-+
-+ //insert before
-+ blk->insert(insertPos, newInstr);
-+ MachineInstrBuilder(newInstr).addReg(regNum, false);
-+
-+ SHOWNEWINSTR(newInstr);
-+ } //insertCondBranchBefore
-+
-+ static void insertCondBranchEnd(MachineBasicBlock *blk,
-+ int newOpcode,
-+ AMDGPUCFGStructurizer *passRep,
-+ RegiT regNum) {
-+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
-+ MachineInstr *newInstr =
-+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
-+
-+ blk->push_back(newInstr);
-+ MachineInstrBuilder(newInstr).addReg(regNum, false);
-+
-+ SHOWNEWINSTR(newInstr);
-+ } //insertCondBranchEnd
-+
-+
-+ static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
-+ AMDGPUCFGStructurizer *passRep,
-+ RegiT regNum, int regVal) {
-+ MachineInstr *oldInstr = &(*instrPos);
-+ const AMDGPUInstrInfo *tii =
-+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
-+ MachineBasicBlock *blk = oldInstr->getParent();
-+ MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
-+ regVal);
-+ blk->insert(instrPos, newInstr);
-+
-+ SHOWNEWINSTR(newInstr);
-+ } //insertAssignInstrBefore
-+
-+ static void insertAssignInstrBefore(MachineBasicBlock *blk,
-+ AMDGPUCFGStructurizer *passRep,
-+ RegiT regNum, int regVal) {
-+ const AMDGPUInstrInfo *tii =
-+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
-+
-+ MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
-+ regVal);
-+ if (blk->begin() != blk->end()) {
-+ blk->insert(blk->begin(), newInstr);
-+ } else {
-+ blk->push_back(newInstr);
-+ }
-+
-+ SHOWNEWINSTR(newInstr);
-+
-+ } //insertInstrBefore
-+
-+ static void insertCompareInstrBefore(MachineBasicBlock *blk,
-+ MachineBasicBlock::iterator instrPos,
-+ AMDGPUCFGStructurizer *passRep,
-+ RegiT dstReg, RegiT src1Reg,
-+ RegiT src2Reg) {
-+ const AMDGPUInstrInfo *tii =
-+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
-+ MachineInstr *newInstr =
-+ blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
-+
-+ MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
-+ MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
-+ MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
-+
-+ blk->insert(instrPos, newInstr);
-+ SHOWNEWINSTR(newInstr);
-+
-+ } //insertCompareInstrBefore
-+
-+ static void cloneSuccessorList(MachineBasicBlock *dstBlk,
-+ MachineBasicBlock *srcBlk) {
-+ for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
-+ iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
-+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
-+ }
-+ } //cloneSuccessorList
-+
-+ static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
-+ MachineFunction *func = srcBlk->getParent();
-+ MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
-+ func->push_back(newBlk); //insert to function
-+ for (MachineBasicBlock::iterator iter = srcBlk->begin(),
-+ iterEnd = srcBlk->end();
-+ iter != iterEnd; ++iter) {
-+ MachineInstr *instr = func->CloneMachineInstr(iter);
-+ newBlk->push_back(instr);
-+ }
-+ return newBlk;
-+ }
-+
-+ //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
-+ //the AMDGPU instruction is not recognized as terminator fix this and retire
-+ //this routine
-+ static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
-+ MachineBasicBlock *oldBlk,
-+ MachineBasicBlock *newBlk) {
-+ MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
-+ if (branchInstr && isCondBranch(branchInstr) &&
-+ getTrueBranch(branchInstr) == oldBlk) {
-+ setTrueBranch(branchInstr, newBlk);
-+ }
-+ }
-+
-+ static void wrapup(MachineBasicBlock *entryBlk) {
-+ assert((!entryBlk->getParent()->getJumpTableInfo()
-+ || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
-+ && "found a jump table");
-+
-+ //collect continue right before endloop
-+ SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
-+ MachineBasicBlock::iterator pre = entryBlk->begin();
-+ MachineBasicBlock::iterator iterEnd = entryBlk->end();
-+ MachineBasicBlock::iterator iter = pre;
-+ while (iter != iterEnd) {
-+ if (pre->getOpcode() == AMDGPU::CONTINUE
-+ && iter->getOpcode() == AMDGPU::ENDLOOP) {
-+ contInstr.push_back(pre);
-+ }
-+ pre = iter;
-+ ++iter;
-+ } //end while
-+
-+ //delete continue right before endloop
-+ for (unsigned i = 0; i < contInstr.size(); ++i) {
-+ contInstr[i]->eraseFromParent();
-+ }
-+
-+ // TODO to fix up jump table so later phase won't be confused. if
-+ // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
-+ // there isn't such an interface yet. alternatively, replace all the other
-+ // blocks in the jump table with the entryBlk //}
-+
-+ } //wrapup
-+
-+ static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
-+ return &pass.getAnalysis<MachineDominatorTree>();
-+ }
-+
-+ static MachinePostDominatorTree*
-+ getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
-+ return &pass.getAnalysis<MachinePostDominatorTree>();
-+ }
-+
-+ static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
-+ return &pass.getAnalysis<MachineLoopInfo>();
-+ }
-+}; // template class CFGStructTraits
-+} //end of namespace llvm
-+
-+// createAMDGPUCFGPreparationPass- Returns a pass
-+FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
-+ ) {
-+ return new AMDGPUCFGPrepare(tm );
-+}
-+
-+bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
-+ return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
-+ *this,
-+ TRI);
-+}
-+
-+// createAMDGPUCFGStructurizerPass- Returns a pass
-+FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
-+ ) {
-+ return new AMDGPUCFGPerform(tm );
-+}
-+
-+bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
-+ return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
-+ *this,
-+ TRI);
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILDevice.cpp llvm-r600/lib/Target/R600/AMDILDevice.cpp
---- llvm-3.2.src/lib/Target/R600/AMDILDevice.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILDevice.cpp 2013-01-25 19:43:57.440049721 +0100
-@@ -0,0 +1,124 @@
-+//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//==-----------------------------------------------------------------------===//
-+#include "AMDILDevice.h"
-+#include "AMDGPUSubtarget.h"
-+
-+using namespace llvm;
-+// Default implementation for all of the classes.
-+AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST) {
-+ mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
-+ mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
-+ setCaps();
-+ DeviceFlag = OCL_DEVICE_ALL;
-+}
-+
-+AMDGPUDevice::~AMDGPUDevice() {
-+ mHWBits.clear();
-+ mSWBits.clear();
-+}
-+
-+size_t AMDGPUDevice::getMaxGDSSize() const {
-+ return 0;
-+}
-+
-+uint32_t
-+AMDGPUDevice::getDeviceFlag() const {
-+ return DeviceFlag;
-+}
-+
-+size_t AMDGPUDevice::getMaxNumCBs() const {
-+ if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
-+ return HW_MAX_NUM_CB;
-+ }
-+
-+ return 0;
-+}
-+
-+size_t AMDGPUDevice::getMaxCBSize() const {
-+ if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
-+ return MAX_CB_SIZE;
-+ }
-+
-+ return 0;
-+}
-+
-+size_t AMDGPUDevice::getMaxScratchSize() const {
-+ return 65536;
-+}
-+
-+uint32_t AMDGPUDevice::getStackAlignment() const {
-+ return 16;
-+}
-+
-+void AMDGPUDevice::setCaps() {
-+ mSWBits.set(AMDGPUDeviceInfo::HalfOps);
-+ mSWBits.set(AMDGPUDeviceInfo::ByteOps);
-+ mSWBits.set(AMDGPUDeviceInfo::ShortOps);
-+ mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
-+ if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) {
-+ mSWBits.set(AMDGPUDeviceInfo::NoInline);
-+ }
-+ if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) {
-+ mSWBits.set(AMDGPUDeviceInfo::MacroDB);
-+ }
-+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
-+ mSWBits.set(AMDGPUDeviceInfo::ConstantMem);
-+ } else {
-+ mHWBits.set(AMDGPUDeviceInfo::ConstantMem);
-+ }
-+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
-+ mSWBits.set(AMDGPUDeviceInfo::PrivateMem);
-+ } else {
-+ mHWBits.set(AMDGPUDeviceInfo::PrivateMem);
-+ }
-+ if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) {
-+ mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
-+ }
-+ mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
-+ mSWBits.set(AMDGPUDeviceInfo::LongOps);
-+}
-+
-+AMDGPUDeviceInfo::ExecutionMode
-+AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const {
-+ if (mHWBits[Caps]) {
-+ assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
-+ return AMDGPUDeviceInfo::Hardware;
-+ }
-+
-+ if (mSWBits[Caps]) {
-+ assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
-+ return AMDGPUDeviceInfo::Software;
-+ }
-+
-+ return AMDGPUDeviceInfo::Unsupported;
-+
-+}
-+
-+bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const {
-+ return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported;
-+}
-+
-+bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const {
-+ return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware;
-+}
-+
-+bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const {
-+ return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software;
-+}
-+
-+std::string
-+AMDGPUDevice::getDataLayout() const {
-+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
-+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
-+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
-+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
-+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
-+ "-n8:16:32:64");
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILDevice.h llvm-r600/lib/Target/R600/AMDILDevice.h
---- llvm-3.2.src/lib/Target/R600/AMDILDevice.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILDevice.h 2013-01-25 19:43:57.440049721 +0100
-@@ -0,0 +1,117 @@
-+//===---- AMDILDevice.h - Define Device Data for AMDGPU -----*- C++ -*------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Interface for the subtarget data classes.
-+//
-+/// This file will define the interface that each generation needs to
-+/// implement in order to correctly answer queries on the capabilities of the
-+/// specific hardware.
-+//===----------------------------------------------------------------------===//
-+#ifndef AMDILDEVICEIMPL_H
-+#define AMDILDEVICEIMPL_H
-+#include "AMDIL.h"
-+#include "llvm/ADT/BitVector.h"
-+
-+namespace llvm {
-+ class AMDGPUSubtarget;
-+ class MCStreamer;
-+//===----------------------------------------------------------------------===//
-+// Interface for data that is specific to a single device
-+//===----------------------------------------------------------------------===//
-+class AMDGPUDevice {
-+public:
-+ AMDGPUDevice(AMDGPUSubtarget *ST);
-+ virtual ~AMDGPUDevice();
-+
-+ // Enum values for the various memory types.
-+ enum {
-+ RAW_UAV_ID = 0,
-+ ARENA_UAV_ID = 1,
-+ LDS_ID = 2,
-+ GDS_ID = 3,
-+ SCRATCH_ID = 4,
-+ CONSTANT_ID = 5,
-+ GLOBAL_ID = 6,
-+ MAX_IDS = 7
-+ } IO_TYPE_IDS;
-+
-+ /// \returns The max LDS size that the hardware supports. Size is in
-+ /// bytes.
-+ virtual size_t getMaxLDSSize() const = 0;
-+
-+ /// \returns The max GDS size that the hardware supports if the GDS is
-+ /// supported by the hardware. Size is in bytes.
-+ virtual size_t getMaxGDSSize() const;
-+
-+ /// \returns The max number of hardware constant address spaces that
-+ /// are supported by this device.
-+ virtual size_t getMaxNumCBs() const;
-+
-+ /// \returns The max number of bytes a single hardware constant buffer
-+ /// can support. Size is in bytes.
-+ virtual size_t getMaxCBSize() const;
-+
-+ /// \returns The max number of bytes allowed by the hardware scratch
-+ /// buffer. Size is in bytes.
-+ virtual size_t getMaxScratchSize() const;
-+
-+ /// \brief Get the flag that corresponds to the device.
-+ virtual uint32_t getDeviceFlag() const;
-+
-+ /// \returns The number of work-items that exist in a single hardware
-+ /// wavefront.
-+ virtual size_t getWavefrontSize() const = 0;
-+
-+ /// \brief Get the generational name of this specific device.
-+ virtual uint32_t getGeneration() const = 0;
-+
-+ /// \brief Get the stack alignment of this specific device.
-+ virtual uint32_t getStackAlignment() const;
-+
-+ /// \brief Get the resource ID for this specific device.
-+ virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
-+
-+ /// \brief Get the max number of UAV's for this device.
-+ virtual uint32_t getMaxNumUAVs() const = 0;
-+
-+
-+ // API utilizing more detailed capabilities of each family of
-+ // cards. If a capability is supported, then either usesHardware or
-+ // usesSoftware returned true. If usesHardware returned true, then
-+ // usesSoftware must return false for the same capability. Hardware
-+ // execution means that the feature is done natively by the hardware
-+ // and is not emulated by the softare. Software execution means
-+ // that the feature could be done in the hardware, but there is
-+ // software that emulates it with possibly using the hardware for
-+ // support since the hardware does not fully comply with OpenCL
-+ // specs.
-+
-+ bool isSupported(AMDGPUDeviceInfo::Caps Mode) const;
-+ bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const;
-+ bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const;
-+ virtual std::string getDataLayout() const;
-+ static const unsigned int MAX_LDS_SIZE_700 = 16384;
-+ static const unsigned int MAX_LDS_SIZE_800 = 32768;
-+ static const unsigned int WavefrontSize = 64;
-+ static const unsigned int HalfWavefrontSize = 32;
-+ static const unsigned int QuarterWavefrontSize = 16;
-+protected:
-+ virtual void setCaps();
-+ llvm::BitVector mHWBits;
-+ llvm::BitVector mSWBits;
-+ AMDGPUSubtarget *mSTM;
-+ uint32_t DeviceFlag;
-+private:
-+ AMDGPUDeviceInfo::ExecutionMode
-+ getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const;
-+};
-+
-+} // namespace llvm
-+#endif // AMDILDEVICEIMPL_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILDeviceInfo.cpp llvm-r600/lib/Target/R600/AMDILDeviceInfo.cpp
---- llvm-3.2.src/lib/Target/R600/AMDILDeviceInfo.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILDeviceInfo.cpp 2013-01-25 19:43:57.440049721 +0100
-@@ -0,0 +1,94 @@
-+//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Function that creates DeviceInfo from a device name and other information.
-+//
-+//==-----------------------------------------------------------------------===//
-+#include "AMDILDevices.h"
-+#include "AMDGPUSubtarget.h"
-+
-+using namespace llvm;
-+namespace llvm {
-+namespace AMDGPUDeviceInfo {
-+
-+AMDGPUDevice* getDeviceFromName(const std::string &deviceName,
-+ AMDGPUSubtarget *ptr,
-+ bool is64bit, bool is64on32bit) {
-+ if (deviceName.c_str()[2] == '7') {
-+ switch (deviceName.c_str()[3]) {
-+ case '1':
-+ return new AMDGPU710Device(ptr);
-+ case '7':
-+ return new AMDGPU770Device(ptr);
-+ default:
-+ return new AMDGPU7XXDevice(ptr);
-+ }
-+ } else if (deviceName == "cypress") {
-+#if DEBUG
-+ assert(!is64bit && "This device does not support 64bit pointers!");
-+ assert(!is64on32bit && "This device does not support 64bit"
-+ " on 32bit pointers!");
-+#endif
-+ return new AMDGPUCypressDevice(ptr);
-+ } else if (deviceName == "juniper") {
-+#if DEBUG
-+ assert(!is64bit && "This device does not support 64bit pointers!");
-+ assert(!is64on32bit && "This device does not support 64bit"
-+ " on 32bit pointers!");
-+#endif
-+ return new AMDGPUEvergreenDevice(ptr);
-+ } else if (deviceName == "redwood") {
-+#if DEBUG
-+ assert(!is64bit && "This device does not support 64bit pointers!");
-+ assert(!is64on32bit && "This device does not support 64bit"
-+ " on 32bit pointers!");
-+#endif
-+ return new AMDGPURedwoodDevice(ptr);
-+ } else if (deviceName == "cedar") {
-+#if DEBUG
-+ assert(!is64bit && "This device does not support 64bit pointers!");
-+ assert(!is64on32bit && "This device does not support 64bit"
-+ " on 32bit pointers!");
-+#endif
-+ return new AMDGPUCedarDevice(ptr);
-+ } else if (deviceName == "barts" || deviceName == "turks") {
-+#if DEBUG
-+ assert(!is64bit && "This device does not support 64bit pointers!");
-+ assert(!is64on32bit && "This device does not support 64bit"
-+ " on 32bit pointers!");
-+#endif
-+ return new AMDGPUNIDevice(ptr);
-+ } else if (deviceName == "cayman") {
-+#if DEBUG
-+ assert(!is64bit && "This device does not support 64bit pointers!");
-+ assert(!is64on32bit && "This device does not support 64bit"
-+ " on 32bit pointers!");
-+#endif
-+ return new AMDGPUCaymanDevice(ptr);
-+ } else if (deviceName == "caicos") {
-+#if DEBUG
-+ assert(!is64bit && "This device does not support 64bit pointers!");
-+ assert(!is64on32bit && "This device does not support 64bit"
-+ " on 32bit pointers!");
-+#endif
-+ return new AMDGPUNIDevice(ptr);
-+ } else if (deviceName == "SI") {
-+ return new AMDGPUSIDevice(ptr);
-+ } else {
-+#if DEBUG
-+ assert(!is64bit && "This device does not support 64bit pointers!");
-+ assert(!is64on32bit && "This device does not support 64bit"
-+ " on 32bit pointers!");
-+#endif
-+ return new AMDGPU7XXDevice(ptr);
-+ }
-+}
-+} // End namespace AMDGPUDeviceInfo
-+} // End namespace llvm
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILDeviceInfo.h llvm-r600/lib/Target/R600/AMDILDeviceInfo.h
---- llvm-3.2.src/lib/Target/R600/AMDILDeviceInfo.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILDeviceInfo.h 2013-01-25 19:43:57.440049721 +0100
-@@ -0,0 +1,88 @@
-+//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//==-----------------------------------------------------------------------===//
-+#ifndef AMDILDEVICEINFO_H
-+#define AMDILDEVICEINFO_H
-+
-+
-+#include <string>
-+
-+namespace llvm {
-+ class AMDGPUDevice;
-+ class AMDGPUSubtarget;
-+ namespace AMDGPUDeviceInfo {
-+ /// Each Capabilities can be executed using a hardware instruction,
-+ /// emulated with a sequence of software instructions, or not
-+ /// supported at all.
-+ enum ExecutionMode {
-+ Unsupported = 0, ///< Unsupported feature on the card(Default value)
-+ /// This is the execution mode that is set if the feature is emulated in
-+ /// software.
-+ Software,
-+ /// This execution mode is set if the feature exists natively in hardware
-+ Hardware
-+ };
-+
-+ enum Caps {
-+ HalfOps = 0x1, ///< Half float is supported or not.
-+ DoubleOps = 0x2, ///< Double is supported or not.
-+ ByteOps = 0x3, ///< Byte(char) is support or not.
-+ ShortOps = 0x4, ///< Short is supported or not.
-+ LongOps = 0x5, ///< Long is supported or not.
-+ Images = 0x6, ///< Images are supported or not.
-+ ByteStores = 0x7, ///< ByteStores available(!HD4XXX).
-+ ConstantMem = 0x8, ///< Constant/CB memory.
-+ LocalMem = 0x9, ///< Local/LDS memory.
-+ PrivateMem = 0xA, ///< Scratch/Private/Stack memory.
-+ RegionMem = 0xB, ///< OCL GDS Memory Extension.
-+ FMA = 0xC, ///< Use HW FMA or SW FMA.
-+ ArenaSegment = 0xD, ///< Use for Arena UAV per pointer 12-1023.
-+ MultiUAV = 0xE, ///< Use for UAV per Pointer 0-7.
-+ Reserved0 = 0xF, ///< ReservedFlag
-+ NoAlias = 0x10, ///< Cached loads.
-+ Signed24BitOps = 0x11, ///< Peephole Optimization.
-+ /// Debug mode implies that no hardware features or optimizations
-+ /// are performned and that all memory access go through a single
-+ /// uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
-+ Debug = 0x12,
-+ CachedMem = 0x13, ///< Cached mem is available or not.
-+ BarrierDetect = 0x14, ///< Detect duplicate barriers.
-+ Reserved1 = 0x15, ///< Reserved flag
-+ ByteLDSOps = 0x16, ///< Flag to specify if byte LDS ops are available.
-+ ArenaVectors = 0x17, ///< Flag to specify if vector loads from arena work.
-+ TmrReg = 0x18, ///< Flag to specify if Tmr register is supported.
-+ NoInline = 0x19, ///< Flag to specify that no inlining should occur.
-+ MacroDB = 0x1A, ///< Flag to specify that backend handles macrodb.
-+ HW64BitDivMod = 0x1B, ///< Flag for backend to generate 64bit div/mod.
-+ ArenaUAV = 0x1C, ///< Flag to specify that arena uav is supported.
-+ PrivateUAV = 0x1D, ///< Flag to specify that private memory uses uav's.
-+ /// If more capabilities are required, then
-+ /// this number needs to be increased.
-+ /// All capabilities must come before this
-+ /// number.
-+ MaxNumberCapabilities = 0x20
-+ };
-+ /// These have to be in order with the older generations
-+ /// having the lower number enumerations.
-+ enum Generation {
-+ HD4XXX = 0, ///< 7XX based devices.
-+ HD5XXX, ///< Evergreen based devices.
-+ HD6XXX, ///< NI/Evergreen+ based devices.
-+ HD7XXX, ///< Southern Islands based devices.
-+ HDTEST, ///< Experimental feature testing device.
-+ HDNUMGEN
-+ };
-+
-+
-+ AMDGPUDevice*
-+ getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr,
-+ bool is64bit = false, bool is64on32bit = false);
-+ } // namespace AMDILDeviceInfo
-+} // namespace llvm
-+#endif // AMDILDEVICEINFO_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILDevices.h llvm-r600/lib/Target/R600/AMDILDevices.h
---- llvm-3.2.src/lib/Target/R600/AMDILDevices.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILDevices.h 2013-01-25 19:43:57.440049721 +0100
-@@ -0,0 +1,19 @@
-+//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//==-----------------------------------------------------------------------===//
-+#ifndef AMDIL_DEVICES_H
-+#define AMDIL_DEVICES_H
-+// Include all of the device specific header files
-+#include "AMDIL7XXDevice.h"
-+#include "AMDILDevice.h"
-+#include "AMDILEvergreenDevice.h"
-+#include "AMDILNIDevice.h"
-+#include "AMDILSIDevice.h"
-+
-+#endif // AMDIL_DEVICES_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILEvergreenDevice.cpp llvm-r600/lib/Target/R600/AMDILEvergreenDevice.cpp
---- llvm-3.2.src/lib/Target/R600/AMDILEvergreenDevice.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILEvergreenDevice.cpp 2013-01-25 19:43:57.440049721 +0100
-@@ -0,0 +1,169 @@
-+//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//==-----------------------------------------------------------------------===//
-+#include "AMDILEvergreenDevice.h"
-+
-+using namespace llvm;
-+
-+AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST)
-+: AMDGPUDevice(ST) {
-+ setCaps();
-+ std::string name = ST->getDeviceName();
-+ if (name == "cedar") {
-+ DeviceFlag = OCL_DEVICE_CEDAR;
-+ } else if (name == "redwood") {
-+ DeviceFlag = OCL_DEVICE_REDWOOD;
-+ } else if (name == "cypress") {
-+ DeviceFlag = OCL_DEVICE_CYPRESS;
-+ } else {
-+ DeviceFlag = OCL_DEVICE_JUNIPER;
-+ }
-+}
-+
-+AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() {
-+}
-+
-+size_t AMDGPUEvergreenDevice::getMaxLDSSize() const {
-+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
-+ return MAX_LDS_SIZE_800;
-+ } else {
-+ return 0;
-+ }
-+}
-+size_t AMDGPUEvergreenDevice::getMaxGDSSize() const {
-+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
-+ return MAX_LDS_SIZE_800;
-+ } else {
-+ return 0;
-+ }
-+}
-+uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const {
-+ return 12;
-+}
-+
-+uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const {
-+ switch(id) {
-+ default:
-+ assert(0 && "ID type passed in is unknown!");
-+ break;
-+ case CONSTANT_ID:
-+ case RAW_UAV_ID:
-+ return GLOBAL_RETURN_RAW_UAV_ID;
-+ case GLOBAL_ID:
-+ case ARENA_UAV_ID:
-+ return DEFAULT_ARENA_UAV_ID;
-+ case LDS_ID:
-+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
-+ return DEFAULT_LDS_ID;
-+ } else {
-+ return DEFAULT_ARENA_UAV_ID;
-+ }
-+ case GDS_ID:
-+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
-+ return DEFAULT_GDS_ID;
-+ } else {
-+ return DEFAULT_ARENA_UAV_ID;
-+ }
-+ case SCRATCH_ID:
-+ if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
-+ return DEFAULT_SCRATCH_ID;
-+ } else {
-+ return DEFAULT_ARENA_UAV_ID;
-+ }
-+ };
-+ return 0;
-+}
-+
-+size_t AMDGPUEvergreenDevice::getWavefrontSize() const {
-+ return AMDGPUDevice::WavefrontSize;
-+}
-+
-+uint32_t AMDGPUEvergreenDevice::getGeneration() const {
-+ return AMDGPUDeviceInfo::HD5XXX;
-+}
-+
-+void AMDGPUEvergreenDevice::setCaps() {
-+ mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
-+ mHWBits.set(AMDGPUDeviceInfo::ArenaUAV);
-+ mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
-+ mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod);
-+ mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
-+ if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) {
-+ mHWBits.set(AMDGPUDeviceInfo::ByteStores);
-+ }
-+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
-+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
-+ mSWBits.set(AMDGPUDeviceInfo::RegionMem);
-+ } else {
-+ mHWBits.set(AMDGPUDeviceInfo::LocalMem);
-+ mHWBits.set(AMDGPUDeviceInfo::RegionMem);
-+ }
-+ mHWBits.set(AMDGPUDeviceInfo::Images);
-+ if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) {
-+ mHWBits.set(AMDGPUDeviceInfo::NoAlias);
-+ }
-+ mHWBits.set(AMDGPUDeviceInfo::CachedMem);
-+ if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) {
-+ mHWBits.set(AMDGPUDeviceInfo::MultiUAV);
-+ }
-+ mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
-+ mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps);
-+ mHWBits.set(AMDGPUDeviceInfo::ArenaVectors);
-+ mHWBits.set(AMDGPUDeviceInfo::LongOps);
-+ mSWBits.reset(AMDGPUDeviceInfo::LongOps);
-+ mHWBits.set(AMDGPUDeviceInfo::TmrReg);
-+}
-+
-+AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST)
-+ : AMDGPUEvergreenDevice(ST) {
-+ setCaps();
-+}
-+
-+AMDGPUCypressDevice::~AMDGPUCypressDevice() {
-+}
-+
-+void AMDGPUCypressDevice::setCaps() {
-+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
-+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
-+ mHWBits.set(AMDGPUDeviceInfo::FMA);
-+ }
-+}
-+
-+
-+AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST)
-+ : AMDGPUEvergreenDevice(ST) {
-+ setCaps();
-+}
-+
-+AMDGPUCedarDevice::~AMDGPUCedarDevice() {
-+}
-+
-+void AMDGPUCedarDevice::setCaps() {
-+ mSWBits.set(AMDGPUDeviceInfo::FMA);
-+}
-+
-+size_t AMDGPUCedarDevice::getWavefrontSize() const {
-+ return AMDGPUDevice::QuarterWavefrontSize;
-+}
-+
-+AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST)
-+ : AMDGPUEvergreenDevice(ST) {
-+ setCaps();
-+}
-+
-+AMDGPURedwoodDevice::~AMDGPURedwoodDevice() {
-+}
-+
-+void AMDGPURedwoodDevice::setCaps() {
-+ mSWBits.set(AMDGPUDeviceInfo::FMA);
-+}
-+
-+size_t AMDGPURedwoodDevice::getWavefrontSize() const {
-+ return AMDGPUDevice::HalfWavefrontSize;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILEvergreenDevice.h llvm-r600/lib/Target/R600/AMDILEvergreenDevice.h
---- llvm-3.2.src/lib/Target/R600/AMDILEvergreenDevice.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILEvergreenDevice.h 2013-01-25 19:43:57.440049721 +0100
-@@ -0,0 +1,93 @@
-+//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Interface for the subtarget data classes.
-+///
-+/// This file will define the interface that each generation needs to
-+/// implement in order to correctly answer queries on the capabilities of the
-+/// specific hardware.
-+//===----------------------------------------------------------------------===//
-+#ifndef AMDILEVERGREENDEVICE_H
-+#define AMDILEVERGREENDEVICE_H
-+#include "AMDILDevice.h"
-+#include "AMDGPUSubtarget.h"
-+
-+namespace llvm {
-+ class AMDGPUSubtarget;
-+//===----------------------------------------------------------------------===//
-+// Evergreen generation of devices and their respective sub classes
-+//===----------------------------------------------------------------------===//
-+
-+
-+/// \brief The AMDGPUEvergreenDevice is the base device class for all of the Evergreen
-+/// series of cards.
-+///
-+/// This class contains information required to differentiate
-+/// the Evergreen device from the generic AMDGPUDevice. This device represents
-+/// that capabilities of the 'Juniper' cards, also known as the HD57XX.
-+class AMDGPUEvergreenDevice : public AMDGPUDevice {
-+public:
-+ AMDGPUEvergreenDevice(AMDGPUSubtarget *ST);
-+ virtual ~AMDGPUEvergreenDevice();
-+ virtual size_t getMaxLDSSize() const;
-+ virtual size_t getMaxGDSSize() const;
-+ virtual size_t getWavefrontSize() const;
-+ virtual uint32_t getGeneration() const;
-+ virtual uint32_t getMaxNumUAVs() const;
-+ virtual uint32_t getResourceID(uint32_t) const;
-+protected:
-+ virtual void setCaps();
-+};
-+
-+/// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has
-+/// support for double precision operations. This device is used to represent
-+/// both the Cypress and Hemlock cards, which are commercially known as HD58XX
-+/// and HD59XX cards.
-+class AMDGPUCypressDevice : public AMDGPUEvergreenDevice {
-+public:
-+ AMDGPUCypressDevice(AMDGPUSubtarget *ST);
-+ virtual ~AMDGPUCypressDevice();
-+private:
-+ virtual void setCaps();
-+};
-+
-+
-+/// \brief The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based
-+/// devices.
-+///
-+/// This class differs from the base AMDGPUEvergreenDevice in that the
-+/// device is a ~quarter of the 'Juniper'. These are commercially known as the
-+/// HD54XX and HD53XX series of cards.
-+class AMDGPUCedarDevice : public AMDGPUEvergreenDevice {
-+public:
-+ AMDGPUCedarDevice(AMDGPUSubtarget *ST);
-+ virtual ~AMDGPUCedarDevice();
-+ virtual size_t getWavefrontSize() const;
-+private:
-+ virtual void setCaps();
-+};
-+
-+/// \brief The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based
-+/// devices.
-+///
-+/// This class differs from the base class, in that these devices are
-+/// considered about half of a 'Juniper' device. These are commercially known as
-+/// the HD55XX and HD56XX series of cards.
-+class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice {
-+public:
-+ AMDGPURedwoodDevice(AMDGPUSubtarget *ST);
-+ virtual ~AMDGPURedwoodDevice();
-+ virtual size_t getWavefrontSize() const;
-+private:
-+ virtual void setCaps();
-+};
-+
-+} // namespace llvm
-+#endif // AMDILEVERGREENDEVICE_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILFrameLowering.cpp llvm-r600/lib/Target/R600/AMDILFrameLowering.cpp
---- llvm-3.2.src/lib/Target/R600/AMDILFrameLowering.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILFrameLowering.cpp 2013-01-25 19:43:57.440049721 +0100
-@@ -0,0 +1,47 @@
-+//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Interface to describe a layout of a stack frame on a AMDGPU target
-+/// machine.
-+//
-+//===----------------------------------------------------------------------===//
-+#include "AMDILFrameLowering.h"
-+#include "llvm/CodeGen/MachineFrameInfo.h"
-+
-+using namespace llvm;
-+AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
-+ int LAO, unsigned TransAl)
-+ : TargetFrameLowering(D, StackAl, LAO, TransAl) {
-+}
-+
-+AMDGPUFrameLowering::~AMDGPUFrameLowering() {
-+}
-+
-+int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
-+ int FI) const {
-+ const MachineFrameInfo *MFI = MF.getFrameInfo();
-+ return MFI->getObjectOffset(FI);
-+}
-+
-+const TargetFrameLowering::SpillSlot *
-+AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
-+ NumEntries = 0;
-+ return 0;
-+}
-+void
-+AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
-+}
-+void
-+AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
-+}
-+bool
-+AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
-+ return false;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILFrameLowering.h llvm-r600/lib/Target/R600/AMDILFrameLowering.h
---- llvm-3.2.src/lib/Target/R600/AMDILFrameLowering.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILFrameLowering.h 2013-01-25 19:43:57.443383054 +0100
-@@ -0,0 +1,40 @@
-+//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Interface to describe a layout of a stack frame on a AMDIL target
-+/// machine.
-+//
-+//===----------------------------------------------------------------------===//
-+#ifndef AMDILFRAME_LOWERING_H
-+#define AMDILFRAME_LOWERING_H
-+
-+#include "llvm/CodeGen/MachineFunction.h"
-+#include "llvm/Target/TargetFrameLowering.h"
-+
-+namespace llvm {
-+
-+/// \brief Information about the stack frame layout on the AMDGPU targets.
-+///
-+/// It holds the direction of the stack growth, the known stack alignment on
-+/// entry to each function, and the offset to the locals area.
-+/// See TargetFrameInfo for more comments.
-+class AMDGPUFrameLowering : public TargetFrameLowering {
-+public:
-+ AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
-+ unsigned TransAl = 1);
-+ virtual ~AMDGPUFrameLowering();
-+ virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
-+ virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const;
-+ virtual void emitPrologue(MachineFunction &MF) const;
-+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-+ virtual bool hasFP(const MachineFunction &MF) const;
-+};
-+} // namespace llvm
-+#endif // AMDILFRAME_LOWERING_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDIL.h llvm-r600/lib/Target/R600/AMDIL.h
---- llvm-3.2.src/lib/Target/R600/AMDIL.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDIL.h 2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,122 @@
-+//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+/// This file contains the entry points for global functions defined in the LLVM
-+/// AMDGPU back-end.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDIL_H
-+#define AMDIL_H
-+
-+#include "llvm/CodeGen/MachineFunction.h"
-+#include "llvm/Target/TargetMachine.h"
-+
-+#define ARENA_SEGMENT_RESERVED_UAVS 12
-+#define DEFAULT_ARENA_UAV_ID 8
-+#define DEFAULT_RAW_UAV_ID 7
-+#define GLOBAL_RETURN_RAW_UAV_ID 11
-+#define HW_MAX_NUM_CB 8
-+#define MAX_NUM_UNIQUE_UAVS 8
-+#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
-+#define OPENCL_MAX_READ_IMAGES 128
-+#define OPENCL_MAX_WRITE_IMAGES 8
-+#define OPENCL_MAX_SAMPLERS 16
-+
-+// The next two values can never be zero, as zero is the ID that is
-+// used to assert against.
-+#define DEFAULT_LDS_ID 1
-+#define DEFAULT_GDS_ID 1
-+#define DEFAULT_SCRATCH_ID 1
-+#define DEFAULT_VEC_SLOTS 8
-+
-+#define OCL_DEVICE_RV710 0x0001
-+#define OCL_DEVICE_RV730 0x0002
-+#define OCL_DEVICE_RV770 0x0004
-+#define OCL_DEVICE_CEDAR 0x0008
-+#define OCL_DEVICE_REDWOOD 0x0010
-+#define OCL_DEVICE_JUNIPER 0x0020
-+#define OCL_DEVICE_CYPRESS 0x0040
-+#define OCL_DEVICE_CAICOS 0x0080
-+#define OCL_DEVICE_TURKS 0x0100
-+#define OCL_DEVICE_BARTS 0x0200
-+#define OCL_DEVICE_CAYMAN 0x0400
-+#define OCL_DEVICE_ALL 0x3FFF
-+
-+/// The number of function ID's that are reserved for
-+/// internal compiler usage.
-+const unsigned int RESERVED_FUNCS = 1024;
-+
-+namespace llvm {
-+class AMDGPUInstrPrinter;
-+class FunctionPass;
-+class MCAsmInfo;
-+class raw_ostream;
-+class Target;
-+class TargetMachine;
-+
-+// Instruction selection passes.
-+FunctionPass*
-+ createAMDGPUISelDag(TargetMachine &TM);
-+FunctionPass*
-+ createAMDGPUPeepholeOpt(TargetMachine &TM);
-+
-+// Pre emit passes.
-+FunctionPass*
-+ createAMDGPUCFGPreparationPass(TargetMachine &TM);
-+FunctionPass*
-+ createAMDGPUCFGStructurizerPass(TargetMachine &TM);
-+
-+extern Target TheAMDGPUTarget;
-+} // end namespace llvm;
-+
-+// Include device information enumerations
-+#include "AMDILDeviceInfo.h"
-+
-+namespace llvm {
-+/// OpenCL uses address spaces to differentiate between
-+/// various memory regions on the hardware. On the CPU
-+/// all of the address spaces point to the same memory,
-+/// however on the GPU, each address space points to
-+/// a seperate piece of memory that is unique from other
-+/// memory locations.
-+namespace AMDGPUAS {
-+enum AddressSpaces {
-+ PRIVATE_ADDRESS = 0, ///< Address space for private memory.
-+ GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
-+ CONSTANT_ADDRESS = 2, ///< Address space for constant memory
-+ LOCAL_ADDRESS = 3, ///< Address space for local memory.
-+ REGION_ADDRESS = 4, ///< Address space for region memory.
-+ ADDRESS_NONE = 5, ///< Address space for unknown memory.
-+ PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
-+ PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
-+ USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI
-+ CONSTANT_BUFFER_0 = 9,
-+ CONSTANT_BUFFER_1 = 10,
-+ CONSTANT_BUFFER_2 = 11,
-+ CONSTANT_BUFFER_3 = 12,
-+ CONSTANT_BUFFER_4 = 13,
-+ CONSTANT_BUFFER_5 = 14,
-+ CONSTANT_BUFFER_6 = 15,
-+ CONSTANT_BUFFER_7 = 16,
-+ CONSTANT_BUFFER_8 = 17,
-+ CONSTANT_BUFFER_9 = 18,
-+ CONSTANT_BUFFER_10 = 19,
-+ CONSTANT_BUFFER_11 = 20,
-+ CONSTANT_BUFFER_12 = 21,
-+ CONSTANT_BUFFER_13 = 22,
-+ CONSTANT_BUFFER_14 = 23,
-+ CONSTANT_BUFFER_15 = 24,
-+ LAST_ADDRESS = 25
-+};
-+
-+} // namespace AMDGPUAS
-+
-+} // end namespace llvm
-+#endif // AMDIL_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILInstrInfo.td llvm-r600/lib/Target/R600/AMDILInstrInfo.td
---- llvm-3.2.src/lib/Target/R600/AMDILInstrInfo.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILInstrInfo.td 2013-01-25 19:43:57.443383054 +0100
-@@ -0,0 +1,208 @@
-+//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+// This file describes the AMDIL instructions in TableGen format.
-+//
-+//===----------------------------------------------------------------------===//
-+// AMDIL Instruction Predicate Definitions
-+// Predicate that is set to true if the hardware supports double precision
-+// divide
-+def HasHWDDiv : Predicate<"Subtarget.device()"
-+ "->getGeneration() > AMDGPUDeviceInfo::HD4XXX && "
-+ "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
-+
-+// Predicate that is set to true if the hardware supports double, but not double
-+// precision divide in hardware
-+def HasSWDDiv : Predicate<"Subtarget.device()"
-+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
-+ "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
-+
-+// Predicate that is set to true if the hardware support 24bit signed
-+// math ops. Otherwise a software expansion to 32bit math ops is used instead.
-+def HasHWSign24Bit : Predicate<"Subtarget.device()"
-+ "->getGeneration() > AMDGPUDeviceInfo::HD5XXX">;
-+
-+// Predicate that is set to true if 64bit operations are supported or not
-+def HasHW64Bit : Predicate<"Subtarget.device()"
-+ "->usesHardware(AMDGPUDeviceInfo::LongOps)">;
-+def HasSW64Bit : Predicate<"Subtarget.device()"
-+ "->usesSoftware(AMDGPUDeviceInfo::LongOps)">;
-+
-+// Predicate that is set to true if the timer register is supported
-+def HasTmrRegister : Predicate<"Subtarget.device()"
-+ "->isSupported(AMDGPUDeviceInfo::TmrReg)">;
-+// Predicate that is true if we are at least evergreen series
-+def HasDeviceIDInst : Predicate<"Subtarget.device()"
-+ "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">;
-+
-+// Predicate that is true if we have region address space.
-+def hasRegionAS : Predicate<"Subtarget.device()"
-+ "->usesHardware(AMDGPUDeviceInfo::RegionMem)">;
-+
-+// Predicate that is false if we don't have region address space.
-+def noRegionAS : Predicate<"!Subtarget.device()"
-+ "->isSupported(AMDGPUDeviceInfo::RegionMem)">;
-+
-+
-+// Predicate that is set to true if 64bit Mul is supported in the IL or not
-+def HasHW64Mul : Predicate<"Subtarget.calVersion()"
-+ ">= CAL_VERSION_SC_139"
-+ "&& Subtarget.device()"
-+ "->getGeneration() >="
-+ "AMDGPUDeviceInfo::HD5XXX">;
-+def HasSW64Mul : Predicate<"Subtarget.calVersion()"
-+ "< CAL_VERSION_SC_139">;
-+// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
-+def HasHW64DivMod : Predicate<"Subtarget.device()"
-+ "->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">;
-+def HasSW64DivMod : Predicate<"Subtarget.device()"
-+ "->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">;
-+
-+// Predicate that is set to true if 64bit pointer are used.
-+def Has64BitPtr : Predicate<"Subtarget.is64bit()">;
-+def Has32BitPtr : Predicate<"!Subtarget.is64bit()">;
-+//===--------------------------------------------------------------------===//
-+// Custom Operands
-+//===--------------------------------------------------------------------===//
-+def brtarget : Operand<OtherVT>;
-+
-+//===--------------------------------------------------------------------===//
-+// Custom Selection DAG Type Profiles
-+//===--------------------------------------------------------------------===//
-+//===----------------------------------------------------------------------===//
-+// Generic Profile Types
-+//===----------------------------------------------------------------------===//
-+
-+def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
-+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
-+ ]>;
-+def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
-+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
-+ ]>;
-+def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
-+ SDTCisEltOfVec<1, 0>
-+ ]>;
-+
-+//===----------------------------------------------------------------------===//
-+// Flow Control Profile Types
-+//===----------------------------------------------------------------------===//
-+// Branch instruction where second and third are basic blocks
-+def SDTIL_BRCond : SDTypeProfile<0, 2, [
-+ SDTCisVT<0, OtherVT>
-+ ]>;
-+
-+//===--------------------------------------------------------------------===//
-+// Custom Selection DAG Nodes
-+//===--------------------------------------------------------------------===//
-+//===----------------------------------------------------------------------===//
-+// Flow Control DAG Nodes
-+//===----------------------------------------------------------------------===//
-+def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
-+
-+//===----------------------------------------------------------------------===//
-+// Call/Return DAG Nodes
-+//===----------------------------------------------------------------------===//
-+def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
-+ [SDNPHasChain, SDNPOptInGlue]>;
-+
-+//===--------------------------------------------------------------------===//
-+// Instructions
-+//===--------------------------------------------------------------------===//
-+// Floating point math functions
-+def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
-+def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>;
-+
-+//===----------------------------------------------------------------------===//
-+// Integer functions
-+//===----------------------------------------------------------------------===//
-+def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp,
-+ [SDNPCommutative, SDNPAssociative]>;
-+
-+//===--------------------------------------------------------------------===//
-+// Custom Pattern DAG Nodes
-+//===--------------------------------------------------------------------===//
-+def global_store : PatFrag<(ops node:$val, node:$ptr),
-+ (store node:$val, node:$ptr), [{
-+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
-+}]>;
-+
-+//===----------------------------------------------------------------------===//
-+// Load pattern fragments
-+//===----------------------------------------------------------------------===//
-+// Global address space loads
-+def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
-+}]>;
-+// Constant address space loads
-+def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-+ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
-+}]>;
-+
-+//===----------------------------------------------------------------------===//
-+// Complex addressing mode patterns
-+//===----------------------------------------------------------------------===//
-+def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
-+def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
-+def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
-+def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
-+
-+//===----------------------------------------------------------------------===//
-+// Instruction format classes
-+//===----------------------------------------------------------------------===//
-+class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
-+: Instruction {
-+
-+ let Namespace = "AMDGPU";
-+ dag OutOperandList = outs;
-+ dag InOperandList = ins;
-+ let Pattern = pattern;
-+ let AsmString = !strconcat(asmstr, "\n");
-+ let isPseudo = 1;
-+ let Itinerary = NullALU;
-+ bit hasIEEEFlag = 0;
-+ bit hasZeroOpFlag = 0;
-+ let mayLoad = 0;
-+ let mayStore = 0;
-+ let hasSideEffects = 0;
-+}
-+
-+//===--------------------------------------------------------------------===//
-+// Multiclass Instruction formats
-+//===--------------------------------------------------------------------===//
-+// Multiclass that handles branch instructions
-+multiclass BranchConditional<SDNode Op> {
-+ def _i32 : ILFormat<(outs),
-+ (ins brtarget:$target, GPRI32:$src0),
-+ "; i32 Pseudo branch instruction",
-+ [(Op bb:$target, GPRI32:$src0)]>;
-+ def _f32 : ILFormat<(outs),
-+ (ins brtarget:$target, GPRF32:$src0),
-+ "; f32 Pseudo branch instruction",
-+ [(Op bb:$target, GPRF32:$src0)]>;
-+}
-+
-+// Only scalar types should generate flow control
-+multiclass BranchInstr<string name> {
-+ def _i32 : ILFormat<(outs), (ins GPRI32:$src),
-+ !strconcat(name, " $src"), []>;
-+ def _f32 : ILFormat<(outs), (ins GPRF32:$src),
-+ !strconcat(name, " $src"), []>;
-+}
-+// Only scalar types should generate flow control
-+multiclass BranchInstr2<string name> {
-+ def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1),
-+ !strconcat(name, " $src0, $src1"), []>;
-+ def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1),
-+ !strconcat(name, " $src0, $src1"), []>;
-+}
-+
-+//===--------------------------------------------------------------------===//
-+// Intrinsics support
-+//===--------------------------------------------------------------------===//
-+include "AMDILIntrinsics.td"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILIntrinsicInfo.cpp llvm-r600/lib/Target/R600/AMDILIntrinsicInfo.cpp
---- llvm-3.2.src/lib/Target/R600/AMDILIntrinsicInfo.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILIntrinsicInfo.cpp 2013-01-25 19:43:57.446716388 +0100
-@@ -0,0 +1,79 @@
-+//===- AMDILIntrinsicInfo.cpp - AMDGPU Intrinsic Information ------*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief AMDGPU Implementation of the IntrinsicInfo class.
-+//
-+//===-----------------------------------------------------------------------===//
-+
-+#include "AMDILIntrinsicInfo.h"
-+#include "AMDIL.h"
-+#include "AMDGPUSubtarget.h"
-+#include "llvm/DerivedTypes.h"
-+#include "llvm/Intrinsics.h"
-+#include "llvm/Module.h"
-+
-+using namespace llvm;
-+
-+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
-+#include "AMDGPUGenIntrinsics.inc"
-+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
-+
-+AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
-+ : TargetIntrinsicInfo() {
-+}
-+
-+std::string
-+AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
-+ unsigned int numTys) const {
-+ static const char* const names[] = {
-+#define GET_INTRINSIC_NAME_TABLE
-+#include "AMDGPUGenIntrinsics.inc"
-+#undef GET_INTRINSIC_NAME_TABLE
-+ };
-+
-+ if (IntrID < Intrinsic::num_intrinsics) {
-+ return 0;
-+ }
-+ assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics
-+ && "Invalid intrinsic ID");
-+
-+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
-+ return Result;
-+}
-+
-+unsigned int
-+AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const {
-+#define GET_FUNCTION_RECOGNIZER
-+#include "AMDGPUGenIntrinsics.inc"
-+#undef GET_FUNCTION_RECOGNIZER
-+ AMDGPUIntrinsic::ID IntrinsicID
-+ = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
-+ IntrinsicID = getIntrinsicForGCCBuiltin("AMDGPU", Name);
-+
-+ if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
-+ return IntrinsicID;
-+ }
-+ return 0;
-+}
-+
-+bool
-+AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
-+ // Overload Table
-+#define GET_INTRINSIC_OVERLOAD_TABLE
-+#include "AMDGPUGenIntrinsics.inc"
-+#undef GET_INTRINSIC_OVERLOAD_TABLE
-+}
-+
-+Function*
-+AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
-+ Type **Tys,
-+ unsigned numTys) const {
-+ assert(!"Not implemented");
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILIntrinsicInfo.h llvm-r600/lib/Target/R600/AMDILIntrinsicInfo.h
---- llvm-3.2.src/lib/Target/R600/AMDILIntrinsicInfo.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILIntrinsicInfo.h 2013-01-25 19:43:57.446716388 +0100
-@@ -0,0 +1,49 @@
-+//===- AMDILIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Interface for the AMDGPU Implementation of the Intrinsic Info class.
-+//
-+//===-----------------------------------------------------------------------===//
-+#ifndef AMDIL_INTRINSICS_H
-+#define AMDIL_INTRINSICS_H
-+
-+#include "llvm/Intrinsics.h"
-+#include "llvm/Target/TargetIntrinsicInfo.h"
-+
-+namespace llvm {
-+class TargetMachine;
-+
-+namespace AMDGPUIntrinsic {
-+enum ID {
-+ last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
-+#define GET_INTRINSIC_ENUM_VALUES
-+#include "AMDGPUGenIntrinsics.inc"
-+#undef GET_INTRINSIC_ENUM_VALUES
-+ , num_AMDGPU_intrinsics
-+};
-+
-+} // end namespace AMDGPUIntrinsic
-+
-+class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
-+public:
-+ AMDGPUIntrinsicInfo(TargetMachine *tm);
-+ std::string getName(unsigned int IntrId, Type **Tys = 0,
-+ unsigned int numTys = 0) const;
-+ unsigned int lookupName(const char *Name, unsigned int Len) const;
-+ bool isOverloaded(unsigned int IID) const;
-+ Function *getDeclaration(Module *M, unsigned int ID,
-+ Type **Tys = 0,
-+ unsigned int numTys = 0) const;
-+};
-+
-+} // end namespace llvm
-+
-+#endif // AMDIL_INTRINSICS_H
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILIntrinsics.td llvm-r600/lib/Target/R600/AMDILIntrinsics.td
---- llvm-3.2.src/lib/Target/R600/AMDILIntrinsics.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILIntrinsics.td 2013-01-25 19:43:57.446716388 +0100
-@@ -0,0 +1,242 @@
-+//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+// This file defines all of the amdil-specific intrinsics
-+//
-+//===---------------------------------------------------------------===//
-+//===--------------------------------------------------------------------===//
-+// Intrinsic classes
-+// Generic versions of the above classes but for Target specific intrinsics
-+// instead of SDNode patterns.
-+//===--------------------------------------------------------------------===//
-+let TargetPrefix = "AMDIL", isTarget = 1 in {
-+ class VoidIntLong :
-+ Intrinsic<[llvm_i64_ty], [], []>;
-+ class VoidIntInt :
-+ Intrinsic<[llvm_i32_ty], [], []>;
-+ class VoidIntBool :
-+ Intrinsic<[llvm_i32_ty], [], []>;
-+ class UnaryIntInt :
-+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-+ class UnaryIntFloat :
-+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-+ class ConvertIntFTOI :
-+ Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
-+ class ConvertIntITOF :
-+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
-+ class UnaryIntNoRetInt :
-+ Intrinsic<[], [llvm_anyint_ty], []>;
-+ class UnaryIntNoRetFloat :
-+ Intrinsic<[], [llvm_anyfloat_ty], []>;
-+ class BinaryIntInt :
-+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
-+ class BinaryIntFloat :
-+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
-+ class BinaryIntNoRetInt :
-+ Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
-+ class BinaryIntNoRetFloat :
-+ Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
-+ class TernaryIntInt :
-+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
-+ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
-+ class TernaryIntFloat :
-+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
-+ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
-+ class QuaternaryIntInt :
-+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
-+ LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
-+ class UnaryAtomicInt :
-+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
-+ class BinaryAtomicInt :
-+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
-+ class TernaryAtomicInt :
-+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
-+ class UnaryAtomicIntNoRet :
-+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
-+ class BinaryAtomicIntNoRet :
-+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
-+ class TernaryAtomicIntNoRet :
-+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
-+}
-+
-+let TargetPrefix = "AMDIL", isTarget = 1 in {
-+ def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
-+
-+ def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
-+ TernaryIntInt;
-+ def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
-+ TernaryIntInt;
-+ def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
-+ UnaryIntInt;
-+ def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
-+ UnaryIntInt;
-+ def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
-+ UnaryIntInt;
-+ def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
-+ UnaryIntInt;
-+ def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
-+ UnaryIntInt;
-+ def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
-+ TernaryIntInt;
-+ def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
-+ TernaryIntInt;
-+ def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
-+ QuaternaryIntInt;
-+ def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
-+ TernaryIntInt;
-+ def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
-+ BinaryIntInt;
-+ def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
-+ TernaryIntInt;
-+ def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
-+ TernaryIntInt;
-+ def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">,
-+ TernaryIntFloat;
-+ def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
-+ BinaryIntInt;
-+ def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
-+ BinaryIntInt;
-+ def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
-+ BinaryIntInt;
-+ def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
-+ BinaryIntInt;
-+ def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
-+ BinaryIntInt;
-+ def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
-+ BinaryIntInt;
-+ def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
-+ TernaryIntInt;
-+ def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
-+ TernaryIntInt;
-+ def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
-+ BinaryIntInt;
-+ def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
-+ BinaryIntInt;
-+ def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
-+ BinaryIntInt;
-+ def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
-+ BinaryIntInt;
-+ def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
-+ BinaryIntFloat;
-+ def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
-+ BinaryIntInt;
-+ def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
-+ BinaryIntInt;
-+ def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
-+ BinaryIntFloat;
-+ def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
-+ TernaryIntInt;
-+ def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
-+ TernaryIntInt;
-+ def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
-+ TernaryIntInt;
-+ def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
-+ UnaryIntFloat;
-+ def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
-+ TernaryIntFloat;
-+ def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
-+ UnaryIntFloat;
-+ def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
-+ UnaryIntFloat;
-+ def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
-+ UnaryIntFloat;
-+ def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
-+ UnaryIntFloat;
-+ def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
-+ UnaryIntFloat;
-+ def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
-+ UnaryIntFloat;
-+ def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
-+ UnaryIntFloat;
-+ def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
-+ UnaryIntFloat;
-+ def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
-+ UnaryIntFloat;
-+ def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
-+ UnaryIntFloat;
-+ def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
-+ UnaryIntFloat;
-+ def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
-+ UnaryIntFloat;
-+ def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
-+ def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
-+ def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
-+ def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
-+ UnaryIntFloat;
-+ def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
-+ UnaryIntFloat;
-+ def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
-+ UnaryIntFloat;
-+ def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
-+ UnaryIntFloat;
-+ def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
-+ UnaryIntFloat;
-+ def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
-+ UnaryIntFloat;
-+ def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
-+ UnaryIntFloat;
-+ def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
-+ UnaryIntFloat;
-+ def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
-+ TernaryIntFloat;
-+ def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
-+ UnaryIntFloat;
-+ def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
-+ UnaryIntFloat;
-+ def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
-+ UnaryIntFloat;
-+ def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
-+ TernaryIntFloat;
-+ def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
-+ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
-+ llvm_v4i32_ty, llvm_i32_ty], []>;
-+
-+ def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
-+ Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
-+ def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
-+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
-+ def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
-+ Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
-+ def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
-+ ConvertIntITOF;
-+ def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
-+ ConvertIntFTOI;
-+ def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
-+ ConvertIntFTOI;
-+ def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
-+ ConvertIntFTOI;
-+ def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
-+ ConvertIntFTOI;
-+ def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
-+ ConvertIntFTOI;
-+ def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
-+ ConvertIntFTOI;
-+ def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
-+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
-+ def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
-+ ConvertIntITOF;
-+ def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
-+ ConvertIntITOF;
-+ def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
-+ ConvertIntITOF;
-+ def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
-+ ConvertIntITOF;
-+ def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
-+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
-+ llvm_v2f32_ty, llvm_float_ty], []>;
-+ def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
-+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
-+ llvm_v2f32_ty], []>;
-+ def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
-+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
-+ llvm_v4f32_ty], []>;
-+ def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
-+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
-+ llvm_v4f32_ty], []>;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILISelDAGToDAG.cpp llvm-r600/lib/Target/R600/AMDILISelDAGToDAG.cpp
---- llvm-3.2.src/lib/Target/R600/AMDILISelDAGToDAG.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILISelDAGToDAG.cpp 2013-01-25 19:43:57.443383054 +0100
-@@ -0,0 +1,567 @@
-+//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Defines an instruction selector for the AMDGPU target.
-+//
-+//===----------------------------------------------------------------------===//
-+#include "AMDGPUInstrInfo.h"
-+#include "AMDGPUISelLowering.h" // For AMDGPUISD
-+#include "AMDGPURegisterInfo.h"
-+#include "AMDILDevices.h"
-+#include "R600InstrInfo.h"
-+#include "llvm/ADT/ValueMap.h"
-+#include "llvm/CodeGen/PseudoSourceValue.h"
-+#include "llvm/CodeGen/SelectionDAGISel.h"
-+#include "llvm/Support/Compiler.h"
-+#include "llvm/CodeGen/SelectionDAG.h"
-+#include <list>
-+#include <queue>
-+
-+using namespace llvm;
-+
-+//===----------------------------------------------------------------------===//
-+// Instruction Selector Implementation
-+//===----------------------------------------------------------------------===//
-+
-+namespace {
-+/// AMDGPU specific code to select AMDGPU machine instructions for
-+/// SelectionDAG operations.
-+class AMDGPUDAGToDAGISel : public SelectionDAGISel {
-+ // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
-+ // make the right decision when generating code for different targets.
-+ const AMDGPUSubtarget &Subtarget;
-+public:
-+ AMDGPUDAGToDAGISel(TargetMachine &TM);
-+ virtual ~AMDGPUDAGToDAGISel();
-+
-+ SDNode *Select(SDNode *N);
-+ virtual const char *getPassName() const;
-+
-+private:
-+ inline SDValue getSmallIPtrImm(unsigned Imm);
-+ bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
-+
-+ // Complex pattern selectors
-+ bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
-+ bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
-+ bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
-+
-+ static bool checkType(const Value *ptr, unsigned int addrspace);
-+ static const Value *getBasePointerValue(const Value *V);
-+
-+ static bool isGlobalStore(const StoreSDNode *N);
-+ static bool isPrivateStore(const StoreSDNode *N);
-+ static bool isLocalStore(const StoreSDNode *N);
-+ static bool isRegionStore(const StoreSDNode *N);
-+
-+ static bool isCPLoad(const LoadSDNode *N);
-+ static bool isConstantLoad(const LoadSDNode *N, int cbID);
-+ static bool isGlobalLoad(const LoadSDNode *N);
-+ static bool isParamLoad(const LoadSDNode *N);
-+ static bool isPrivateLoad(const LoadSDNode *N);
-+ static bool isLocalLoad(const LoadSDNode *N);
-+ static bool isRegionLoad(const LoadSDNode *N);
-+
-+ bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
-+ bool SelectGlobalValueVariableOffset(SDValue Addr,
-+ SDValue &BaseReg, SDValue& Offset);
-+ bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
-+ bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
-+ bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
-+
-+ // Include the pieces autogenerated from the target description.
-+#include "AMDGPUGenDAGISel.inc"
-+};
-+} // end anonymous namespace
-+
-+/// \brief This pass converts a legalized DAG into a AMDGPU-specific
-+// DAG, ready for instruction scheduling.
-+FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
-+ ) {
-+ return new AMDGPUDAGToDAGISel(TM);
-+}
-+
-+AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM
-+ )
-+ : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
-+}
-+
-+AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
-+}
-+
-+SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
-+ return CurDAG->getTargetConstant(Imm, MVT::i32);
-+}
-+
-+bool AMDGPUDAGToDAGISel::SelectADDRParam(
-+ SDValue Addr, SDValue& R1, SDValue& R2) {
-+
-+ if (Addr.getOpcode() == ISD::FrameIndex) {
-+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
-+ } else {
-+ R1 = Addr;
-+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
-+ }
-+ } else if (Addr.getOpcode() == ISD::ADD) {
-+ R1 = Addr.getOperand(0);
-+ R2 = Addr.getOperand(1);
-+ } else {
-+ R1 = Addr;
-+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
-+ }
-+ return true;
-+}
-+
-+bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
-+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
-+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
-+ return false;
-+ }
-+ return SelectADDRParam(Addr, R1, R2);
-+}
-+
-+
-+bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
-+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
-+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
-+ return false;
-+ }
-+
-+ if (Addr.getOpcode() == ISD::FrameIndex) {
-+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
-+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
-+ } else {
-+ R1 = Addr;
-+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
-+ }
-+ } else if (Addr.getOpcode() == ISD::ADD) {
-+ R1 = Addr.getOperand(0);
-+ R2 = Addr.getOperand(1);
-+ } else {
-+ R1 = Addr;
-+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
-+ }
-+ return true;
-+}
-+
-+SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
-+ unsigned int Opc = N->getOpcode();
-+ if (N->isMachineOpcode()) {
-+ return NULL; // Already selected.
-+ }
-+ switch (Opc) {
-+ default: break;
-+ case ISD::FrameIndex: {
-+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
-+ unsigned int FI = FIN->getIndex();
-+ EVT OpVT = N->getValueType(0);
-+ unsigned int NewOpc = AMDGPU::COPY;
-+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
-+ return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
-+ }
-+ break;
-+ }
-+ case ISD::ConstantFP:
-+ case ISD::Constant: {
-+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
-+ // XXX: Custom immediate lowering not implemented yet. Instead we use
-+ // pseudo instructions defined in SIInstructions.td
-+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-+ break;
-+ }
-+ const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
-+
-+ uint64_t ImmValue = 0;
-+ unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
-+
-+ if (N->getOpcode() == ISD::ConstantFP) {
-+ // XXX: 64-bit Immediates not supported yet
-+ assert(N->getValueType(0) != MVT::f64);
-+
-+ ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
-+ APFloat Value = C->getValueAPF();
-+ float FloatValue = Value.convertToFloat();
-+ if (FloatValue == 0.0) {
-+ ImmReg = AMDGPU::ZERO;
-+ } else if (FloatValue == 0.5) {
-+ ImmReg = AMDGPU::HALF;
-+ } else if (FloatValue == 1.0) {
-+ ImmReg = AMDGPU::ONE;
-+ } else {
-+ ImmValue = Value.bitcastToAPInt().getZExtValue();
-+ }
-+ } else {
-+ // XXX: 64-bit Immediates not supported yet
-+ assert(N->getValueType(0) != MVT::i64);
-+
-+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
-+ if (C->getZExtValue() == 0) {
-+ ImmReg = AMDGPU::ZERO;
-+ } else if (C->getZExtValue() == 1) {
-+ ImmReg = AMDGPU::ONE_INT;
-+ } else {
-+ ImmValue = C->getZExtValue();
-+ }
-+ }
-+
-+ for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
-+ Use != SDNode::use_end(); Use = Next) {
-+ Next = llvm::next(Use);
-+ std::vector<SDValue> Ops;
-+ for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
-+ Ops.push_back(Use->getOperand(i));
-+ }
-+
-+ if (!Use->isMachineOpcode()) {
-+ if (ImmReg == AMDGPU::ALU_LITERAL_X) {
-+ // We can only use literal constants (e.g. AMDGPU::ZERO,
-+ // AMDGPU::ONE, etc) in machine opcodes.
-+ continue;
-+ }
-+ } else {
-+ if (!TII->isALUInstr(Use->getMachineOpcode())) {
-+ continue;
-+ }
-+
-+ int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM);
-+ assert(ImmIdx != -1);
-+
-+ // subtract one from ImmIdx, because the DST operand is usually index
-+ // 0 for MachineInstrs, but we have no DST in the Ops vector.
-+ ImmIdx--;
-+
-+ // Check that we aren't already using an immediate.
-+ // XXX: It's possible for an instruction to have more than one
-+ // immediate operand, but this is not supported yet.
-+ if (ImmReg == AMDGPU::ALU_LITERAL_X) {
-+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
-+ assert(C);
-+
-+ if (C->getZExtValue() != 0) {
-+ // This instruction is already using an immediate.
-+ continue;
-+ }
-+
-+ // Set the immediate value
-+ Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
-+ }
-+ }
-+ // Set the immediate register
-+ Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
-+
-+ CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
-+ }
-+ break;
-+ }
-+ }
-+ SDNode *Result = SelectCode(N);
-+
-+ // Fold operands of selected node
-+
-+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
-+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
-+ const R600InstrInfo *TII =
-+ static_cast<const R600InstrInfo*>(TM.getInstrInfo());
-+ if (Result && TII->isALUInstr(Result->getMachineOpcode())) {
-+ bool IsModified = false;
-+ do {
-+ std::vector<SDValue> Ops;
-+ for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
-+ I != E; ++I)
-+ Ops.push_back(*I);
-+ IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
-+ if (IsModified) {
-+ Result = CurDAG->MorphNodeTo(Result, Result->getOpcode(),
-+ Result->getVTList(), Ops.data(), Ops.size());
-+ }
-+ } while (IsModified);
-+ }
-+ }
-+
-+ return Result;
-+}
-+
-+bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
-+ const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
-+ int OperandIdx[] = {
-+ TII->getOperandIdx(Opcode, R600Operands::SRC0),
-+ TII->getOperandIdx(Opcode, R600Operands::SRC1),
-+ TII->getOperandIdx(Opcode, R600Operands::SRC2)
-+ };
-+ int SelIdx[] = {
-+ TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL),
-+ TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL),
-+ TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL)
-+ };
-+ for (unsigned i = 0; i < 3; i++) {
-+ if (OperandIdx[i] < 0)
-+ return false;
-+ SDValue Operand = Ops[OperandIdx[i] - 1];
-+ switch (Operand.getOpcode()) {
-+ case AMDGPUISD::CONST_ADDRESS: {
-+ SDValue CstOffset;
-+ if (!Operand.getValueType().isVector() &&
-+ SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
-+ Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
-+ Ops[SelIdx[i] - 1] = CstOffset;
-+ return true;
-+ }
-+ }
-+ break;
-+ default:
-+ break;
-+ }
-+ }
-+ return false;
-+}
-+
-+bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
-+ if (!ptr) {
-+ return false;
-+ }
-+ Type *ptrType = ptr->getType();
-+ return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
-+}
-+
-+const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V) {
-+ if (!V) {
-+ return NULL;
-+ }
-+ const Value *ret = NULL;
-+ ValueMap<const Value *, bool> ValueBitMap;
-+ std::queue<const Value *, std::list<const Value *> > ValueQueue;
-+ ValueQueue.push(V);
-+ while (!ValueQueue.empty()) {
-+ V = ValueQueue.front();
-+ if (ValueBitMap.find(V) == ValueBitMap.end()) {
-+ ValueBitMap[V] = true;
-+ if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
-+ ret = V;
-+ break;
-+ } else if (dyn_cast<GlobalVariable>(V)) {
-+ ret = V;
-+ break;
-+ } else if (dyn_cast<Constant>(V)) {
-+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
-+ if (CE) {
-+ ValueQueue.push(CE->getOperand(0));
-+ }
-+ } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
-+ ret = AI;
-+ break;
-+ } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
-+ uint32_t numOps = I->getNumOperands();
-+ for (uint32_t x = 0; x < numOps; ++x) {
-+ ValueQueue.push(I->getOperand(x));
-+ }
-+ } else {
-+ assert(!"Found a Value that we didn't know how to handle!");
-+ }
-+ }
-+ ValueQueue.pop();
-+ }
-+ return ret;
-+}
-+
-+bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
-+ return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
-+}
-+
-+bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
-+ return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
-+ && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
-+ && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
-+}
-+
-+bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
-+ return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
-+}
-+
-+bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
-+ return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
-+}
-+
-+bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
-+ if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) {
-+ return true;
-+ }
-+ MachineMemOperand *MMO = N->getMemOperand();
-+ const Value *V = MMO->getValue();
-+ const Value *BV = getBasePointerValue(V);
-+ if (MMO
-+ && MMO->getValue()
-+ && ((V && dyn_cast<GlobalValue>(V))
-+ || (BV && dyn_cast<GlobalValue>(
-+ getBasePointerValue(MMO->getValue()))))) {
-+ return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS);
-+ } else {
-+ return false;
-+ }
-+}
-+
-+bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
-+ return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
-+}
-+
-+bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) {
-+ return checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS);
-+}
-+
-+bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
-+ return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
-+}
-+
-+bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
-+ return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
-+}
-+
-+bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
-+ MachineMemOperand *MMO = N->getMemOperand();
-+ if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
-+ if (MMO) {
-+ const Value *V = MMO->getValue();
-+ const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
-+ if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
-+ return true;
-+ }
-+ }
-+ }
-+ return false;
-+}
-+
-+bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
-+ if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
-+ // Check to make sure we are not a constant pool load or a constant load
-+ // that is marked as a private load
-+ if (isCPLoad(N) || isConstantLoad(N, -1)) {
-+ return false;
-+ }
-+ }
-+ if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
-+ && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
-+ && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
-+ && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
-+ && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
-+ && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS)) {
-+ return true;
-+ }
-+ return false;
-+}
-+
-+const char *AMDGPUDAGToDAGISel::getPassName() const {
-+ return "AMDGPU DAG->DAG Pattern Instruction Selection";
-+}
-+
-+#ifdef DEBUGTMP
-+#undef INT64_C
-+#endif
-+#undef DEBUGTMP
-+
-+///==== AMDGPU Functions ====///
-+
-+bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
-+ SDValue& IntPtr) {
-+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
-+ IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
-+ return true;
-+ }
-+ return false;
-+}
-+
-+bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
-+ SDValue& BaseReg, SDValue &Offset) {
-+ if (!dyn_cast<ConstantSDNode>(Addr)) {
-+ BaseReg = Addr;
-+ Offset = CurDAG->getIntPtrConstant(0, true);
-+ return true;
-+ }
-+ return false;
-+}
-+
-+bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
-+ SDValue& Offset) {
-+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
-+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
-+ return false;
-+ }
-+
-+
-+ if (Addr.getOpcode() == ISD::ADD) {
-+ bool Match = false;
-+
-+ // Find the base ptr and the offset
-+ for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
-+ SDValue Arg = Addr.getOperand(i);
-+ ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
-+ // This arg isn't a constant so it must be the base PTR.
-+ if (!OffsetNode) {
-+ Base = Addr.getOperand(i);
-+ continue;
-+ }
-+ // Check if the constant argument fits in 8-bits. The offset is in bytes
-+ // so we need to convert it to dwords.
-+ if (isUInt<8>(OffsetNode->getZExtValue() >> 2)) {
-+ Match = true;
-+ Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
-+ MVT::i32);
-+ }
-+ }
-+ return Match;
-+ }
-+
-+ // Default case, no offset
-+ Base = Addr;
-+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
-+ return true;
-+}
-+
-+bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
-+ SDValue &Offset) {
-+ ConstantSDNode * IMMOffset;
-+
-+ if (Addr.getOpcode() == ISD::ADD
-+ && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
-+ && isInt<16>(IMMOffset->getZExtValue())) {
-+
-+ Base = Addr.getOperand(0);
-+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
-+ return true;
-+ // If the pointer address is constant, we can move it to the offset field.
-+ } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
-+ && isInt<16>(IMMOffset->getZExtValue())) {
-+ Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
-+ CurDAG->getEntryNode().getDebugLoc(),
-+ AMDGPU::ZERO, MVT::i32);
-+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
-+ return true;
-+ }
-+
-+ // Default case, no offset
-+ Base = Addr;
-+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
-+ return true;
-+}
-+
-+bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
-+ SDValue& Offset) {
-+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
-+ Addr.getOpcode() == ISD::TargetGlobalAddress ||
-+ Addr.getOpcode() != ISD::ADD) {
-+ return false;
-+ }
-+
-+ Base = Addr.getOperand(0);
-+ Offset = Addr.getOperand(1);
-+
-+ return true;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILISelLowering.cpp llvm-r600/lib/Target/R600/AMDILISelLowering.cpp
---- llvm-3.2.src/lib/Target/R600/AMDILISelLowering.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILISelLowering.cpp 2013-01-25 19:43:57.443383054 +0100
-@@ -0,0 +1,651 @@
-+//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief TargetLowering functions borrowed from AMDIL.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUISelLowering.h"
-+#include "AMDGPURegisterInfo.h"
-+#include "AMDILDevices.h"
-+#include "AMDILIntrinsicInfo.h"
-+#include "AMDGPUSubtarget.h"
-+#include "llvm/CallingConv.h"
-+#include "llvm/CodeGen/MachineFrameInfo.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+#include "llvm/CodeGen/PseudoSourceValue.h"
-+#include "llvm/CodeGen/SelectionDAG.h"
-+#include "llvm/CodeGen/SelectionDAGNodes.h"
-+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-+#include "llvm/DerivedTypes.h"
-+#include "llvm/Instructions.h"
-+#include "llvm/Intrinsics.h"
-+#include "llvm/Support/raw_ostream.h"
-+#include "llvm/Target/TargetInstrInfo.h"
-+#include "llvm/Target/TargetOptions.h"
-+
-+using namespace llvm;
-+//===----------------------------------------------------------------------===//
-+// Calling Convention Implementation
-+//===----------------------------------------------------------------------===//
-+#include "AMDGPUGenCallingConv.inc"
-+
-+//===----------------------------------------------------------------------===//
-+// TargetLowering Implementation Help Functions End
-+//===----------------------------------------------------------------------===//
-+
-+//===----------------------------------------------------------------------===//
-+// TargetLowering Class Implementation Begins
-+//===----------------------------------------------------------------------===//
-+void AMDGPUTargetLowering::InitAMDILLowering() {
-+ int types[] = {
-+ (int)MVT::i8,
-+ (int)MVT::i16,
-+ (int)MVT::i32,
-+ (int)MVT::f32,
-+ (int)MVT::f64,
-+ (int)MVT::i64,
-+ (int)MVT::v2i8,
-+ (int)MVT::v4i8,
-+ (int)MVT::v2i16,
-+ (int)MVT::v4i16,
-+ (int)MVT::v4f32,
-+ (int)MVT::v4i32,
-+ (int)MVT::v2f32,
-+ (int)MVT::v2i32,
-+ (int)MVT::v2f64,
-+ (int)MVT::v2i64
-+ };
-+
-+ int IntTypes[] = {
-+ (int)MVT::i8,
-+ (int)MVT::i16,
-+ (int)MVT::i32,
-+ (int)MVT::i64
-+ };
-+
-+ int FloatTypes[] = {
-+ (int)MVT::f32,
-+ (int)MVT::f64
-+ };
-+
-+ int VectorTypes[] = {
-+ (int)MVT::v2i8,
-+ (int)MVT::v4i8,
-+ (int)MVT::v2i16,
-+ (int)MVT::v4i16,
-+ (int)MVT::v4f32,
-+ (int)MVT::v4i32,
-+ (int)MVT::v2f32,
-+ (int)MVT::v2i32,
-+ (int)MVT::v2f64,
-+ (int)MVT::v2i64
-+ };
-+ size_t NumTypes = sizeof(types) / sizeof(*types);
-+ size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
-+ size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
-+ size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
-+
-+ const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
-+ // These are the current register classes that are
-+ // supported
-+
-+ for (unsigned int x = 0; x < NumTypes; ++x) {
-+ MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
-+
-+ //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
-+ // We cannot sextinreg, expand to shifts
-+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
-+ setOperationAction(ISD::SUBE, VT, Expand);
-+ setOperationAction(ISD::SUBC, VT, Expand);
-+ setOperationAction(ISD::ADDE, VT, Expand);
-+ setOperationAction(ISD::ADDC, VT, Expand);
-+ setOperationAction(ISD::BRCOND, VT, Custom);
-+ setOperationAction(ISD::BR_JT, VT, Expand);
-+ setOperationAction(ISD::BRIND, VT, Expand);
-+ // TODO: Implement custom UREM/SREM routines
-+ setOperationAction(ISD::SREM, VT, Expand);
-+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
-+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
-+ if (VT != MVT::i64 && VT != MVT::v2i64) {
-+ setOperationAction(ISD::SDIV, VT, Custom);
-+ }
-+ }
-+ for (unsigned int x = 0; x < NumFloatTypes; ++x) {
-+ MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
-+
-+ // IL does not have these operations for floating point types
-+ setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
-+ setOperationAction(ISD::SETOLT, VT, Expand);
-+ setOperationAction(ISD::SETOGE, VT, Expand);
-+ setOperationAction(ISD::SETOGT, VT, Expand);
-+ setOperationAction(ISD::SETOLE, VT, Expand);
-+ setOperationAction(ISD::SETULT, VT, Expand);
-+ setOperationAction(ISD::SETUGE, VT, Expand);
-+ setOperationAction(ISD::SETUGT, VT, Expand);
-+ setOperationAction(ISD::SETULE, VT, Expand);
-+ }
-+
-+ for (unsigned int x = 0; x < NumIntTypes; ++x) {
-+ MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
-+
-+ // GPU also does not have divrem function for signed or unsigned
-+ setOperationAction(ISD::SDIVREM, VT, Expand);
-+
-+ // GPU does not have [S|U]MUL_LOHI functions as a single instruction
-+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
-+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
-+
-+ // GPU doesn't have a rotl, rotr, or byteswap instruction
-+ setOperationAction(ISD::ROTR, VT, Expand);
-+ setOperationAction(ISD::BSWAP, VT, Expand);
-+
-+ // GPU doesn't have any counting operators
-+ setOperationAction(ISD::CTPOP, VT, Expand);
-+ setOperationAction(ISD::CTTZ, VT, Expand);
-+ setOperationAction(ISD::CTLZ, VT, Expand);
-+ }
-+
-+ for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
-+ MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
-+
-+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
-+ setOperationAction(ISD::SDIVREM, VT, Expand);
-+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
-+ // setOperationAction(ISD::VSETCC, VT, Expand);
-+ setOperationAction(ISD::SELECT_CC, VT, Expand);
-+
-+ }
-+ if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
-+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
-+ setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
-+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
-+ setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
-+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
-+ setOperationAction(ISD::SREM, MVT::v2i64, Expand);
-+ setOperationAction(ISD::Constant , MVT::i64 , Legal);
-+ setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
-+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
-+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
-+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
-+ setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
-+ }
-+ if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
-+ // we support loading/storing v2f64 but not operations on the type
-+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
-+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
-+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
-+ setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
-+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
-+ setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
-+ // We want to expand vector conversions into their scalar
-+ // counterparts.
-+ setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
-+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
-+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
-+ setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
-+ setOperationAction(ISD::FABS, MVT::f64, Expand);
-+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
-+ }
-+ // TODO: Fix the UDIV24 algorithm so it works for these
-+ // types correctly. This needs vector comparisons
-+ // for this to work correctly.
-+ setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
-+ setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
-+ setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
-+ setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
-+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
-+ setOperationAction(ISD::SUBC, MVT::Other, Expand);
-+ setOperationAction(ISD::ADDE, MVT::Other, Expand);
-+ setOperationAction(ISD::ADDC, MVT::Other, Expand);
-+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
-+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
-+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
-+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
-+
-+
-+ // Use the default implementation.
-+ setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
-+ setOperationAction(ISD::Constant , MVT::i32 , Legal);
-+
-+ setSchedulingPreference(Sched::RegPressure);
-+ setPow2DivIsCheap(false);
-+ setSelectIsExpensive(true);
-+ setJumpIsExpensive(true);
-+
-+ maxStoresPerMemcpy = 4096;
-+ maxStoresPerMemmove = 4096;
-+ maxStoresPerMemset = 4096;
-+
-+}
-+
-+bool
-+AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-+ const CallInst &I, unsigned Intrinsic) const {
-+ return false;
-+}
-+
-+// The backend supports 32 and 64 bit floating point immediates
-+bool
-+AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
-+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
-+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
-+ return true;
-+ } else {
-+ return false;
-+ }
-+}
-+
-+bool
-+AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
-+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
-+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
-+ return false;
-+ } else {
-+ return true;
-+ }
-+}
-+
-+
-+// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
-+// be zero. Op is expected to be a target specific node. Used by DAG
-+// combiner.
-+
-+void
-+AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
-+ const SDValue Op,
-+ APInt &KnownZero,
-+ APInt &KnownOne,
-+ const SelectionDAG &DAG,
-+ unsigned Depth) const {
-+ APInt KnownZero2;
-+ APInt KnownOne2;
-+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
-+ switch (Op.getOpcode()) {
-+ default: break;
-+ case ISD::SELECT_CC:
-+ DAG.ComputeMaskedBits(
-+ Op.getOperand(1),
-+ KnownZero,
-+ KnownOne,
-+ Depth + 1
-+ );
-+ DAG.ComputeMaskedBits(
-+ Op.getOperand(0),
-+ KnownZero2,
-+ KnownOne2
-+ );
-+ assert((KnownZero & KnownOne) == 0
-+ && "Bits known to be one AND zero?");
-+ assert((KnownZero2 & KnownOne2) == 0
-+ && "Bits known to be one AND zero?");
-+ // Only known if known in both the LHS and RHS
-+ KnownOne &= KnownOne2;
-+ KnownZero &= KnownZero2;
-+ break;
-+ };
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Other Lowering Hooks
-+//===----------------------------------------------------------------------===//
-+
-+SDValue
-+AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
-+ EVT OVT = Op.getValueType();
-+ SDValue DST;
-+ if (OVT.getScalarType() == MVT::i64) {
-+ DST = LowerSDIV64(Op, DAG);
-+ } else if (OVT.getScalarType() == MVT::i32) {
-+ DST = LowerSDIV32(Op, DAG);
-+ } else if (OVT.getScalarType() == MVT::i16
-+ || OVT.getScalarType() == MVT::i8) {
-+ DST = LowerSDIV24(Op, DAG);
-+ } else {
-+ DST = SDValue(Op.getNode(), 0);
-+ }
-+ return DST;
-+}
-+
-+SDValue
-+AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
-+ EVT OVT = Op.getValueType();
-+ SDValue DST;
-+ if (OVT.getScalarType() == MVT::i64) {
-+ DST = LowerSREM64(Op, DAG);
-+ } else if (OVT.getScalarType() == MVT::i32) {
-+ DST = LowerSREM32(Op, DAG);
-+ } else if (OVT.getScalarType() == MVT::i16) {
-+ DST = LowerSREM16(Op, DAG);
-+ } else if (OVT.getScalarType() == MVT::i8) {
-+ DST = LowerSREM8(Op, DAG);
-+ } else {
-+ DST = SDValue(Op.getNode(), 0);
-+ }
-+ return DST;
-+}
-+
-+SDValue
-+AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
-+ SDValue Data = Op.getOperand(0);
-+ VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT DVT = Data.getValueType();
-+ EVT BVT = BaseType->getVT();
-+ unsigned baseBits = BVT.getScalarType().getSizeInBits();
-+ unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
-+ unsigned shiftBits = srcBits - baseBits;
-+ if (srcBits < 32) {
-+ // If the op is less than 32 bits, then it needs to extend to 32bits
-+ // so it can properly keep the upper bits valid.
-+ EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
-+ Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
-+ shiftBits = 32 - baseBits;
-+ DVT = IVT;
-+ }
-+ SDValue Shift = DAG.getConstant(shiftBits, DVT);
-+ // Shift left by 'Shift' bits.
-+ Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
-+ // Signed shift Right by 'Shift' bits.
-+ Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
-+ if (srcBits < 32) {
-+ // Once the sign extension is done, the op needs to be converted to
-+ // its original type.
-+ Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
-+ }
-+ return Data;
-+}
-+EVT
-+AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
-+ int iSize = (size * numEle);
-+ int vEle = (iSize >> ((size == 64) ? 6 : 5));
-+ if (!vEle) {
-+ vEle = 1;
-+ }
-+ if (size == 64) {
-+ if (vEle == 1) {
-+ return EVT(MVT::i64);
-+ } else {
-+ return EVT(MVT::getVectorVT(MVT::i64, vEle));
-+ }
-+ } else {
-+ if (vEle == 1) {
-+ return EVT(MVT::i32);
-+ } else {
-+ return EVT(MVT::getVectorVT(MVT::i32, vEle));
-+ }
-+ }
-+}
-+
-+SDValue
-+AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
-+ SDValue Chain = Op.getOperand(0);
-+ SDValue Cond = Op.getOperand(1);
-+ SDValue Jump = Op.getOperand(2);
-+ SDValue Result;
-+ Result = DAG.getNode(
-+ AMDGPUISD::BRANCH_COND,
-+ Op.getDebugLoc(),
-+ Op.getValueType(),
-+ Chain, Jump, Cond);
-+ return Result;
-+}
-+
-+SDValue
-+AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT OVT = Op.getValueType();
-+ SDValue LHS = Op.getOperand(0);
-+ SDValue RHS = Op.getOperand(1);
-+ MVT INTTY;
-+ MVT FLTTY;
-+ if (!OVT.isVector()) {
-+ INTTY = MVT::i32;
-+ FLTTY = MVT::f32;
-+ } else if (OVT.getVectorNumElements() == 2) {
-+ INTTY = MVT::v2i32;
-+ FLTTY = MVT::v2f32;
-+ } else if (OVT.getVectorNumElements() == 4) {
-+ INTTY = MVT::v4i32;
-+ FLTTY = MVT::v4f32;
-+ }
-+ unsigned bitsize = OVT.getScalarType().getSizeInBits();
-+ // char|short jq = ia ^ ib;
-+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
-+
-+ // jq = jq >> (bitsize - 2)
-+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
-+
-+ // jq = jq | 0x1
-+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
-+
-+ // jq = (int)jq
-+ jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
-+
-+ // int ia = (int)LHS;
-+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
-+
-+ // int ib, (int)RHS;
-+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
-+
-+ // float fa = (float)ia;
-+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
-+
-+ // float fb = (float)ib;
-+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
-+
-+ // float fq = native_divide(fa, fb);
-+ SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
-+
-+ // fq = trunc(fq);
-+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
-+
-+ // float fqneg = -fq;
-+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
-+
-+ // float fr = mad(fqneg, fb, fa);
-+ SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
-+
-+ // int iq = (int)fq;
-+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
-+
-+ // fr = fabs(fr);
-+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
-+
-+ // fb = fabs(fb);
-+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
-+
-+ // int cv = fr >= fb;
-+ SDValue cv;
-+ if (INTTY == MVT::i32) {
-+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
-+ } else {
-+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
-+ }
-+ // jq = (cv ? jq : 0);
-+ jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
-+ DAG.getConstant(0, OVT));
-+ // dst = iq + jq;
-+ iq = DAG.getSExtOrTrunc(iq, DL, OVT);
-+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
-+ return iq;
-+}
-+
-+SDValue
-+AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT OVT = Op.getValueType();
-+ SDValue LHS = Op.getOperand(0);
-+ SDValue RHS = Op.getOperand(1);
-+ // The LowerSDIV32 function generates equivalent to the following IL.
-+ // mov r0, LHS
-+ // mov r1, RHS
-+ // ilt r10, r0, 0
-+ // ilt r11, r1, 0
-+ // iadd r0, r0, r10
-+ // iadd r1, r1, r11
-+ // ixor r0, r0, r10
-+ // ixor r1, r1, r11
-+ // udiv r0, r0, r1
-+ // ixor r10, r10, r11
-+ // iadd r0, r0, r10
-+ // ixor DST, r0, r10
-+
-+ // mov r0, LHS
-+ SDValue r0 = LHS;
-+
-+ // mov r1, RHS
-+ SDValue r1 = RHS;
-+
-+ // ilt r10, r0, 0
-+ SDValue r10 = DAG.getSelectCC(DL,
-+ r0, DAG.getConstant(0, OVT),
-+ DAG.getConstant(-1, MVT::i32),
-+ DAG.getConstant(0, MVT::i32),
-+ ISD::SETLT);
-+
-+ // ilt r11, r1, 0
-+ SDValue r11 = DAG.getSelectCC(DL,
-+ r1, DAG.getConstant(0, OVT),
-+ DAG.getConstant(-1, MVT::i32),
-+ DAG.getConstant(0, MVT::i32),
-+ ISD::SETLT);
-+
-+ // iadd r0, r0, r10
-+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-+
-+ // iadd r1, r1, r11
-+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
-+
-+ // ixor r0, r0, r10
-+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-+
-+ // ixor r1, r1, r11
-+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
-+
-+ // udiv r0, r0, r1
-+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
-+
-+ // ixor r10, r10, r11
-+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
-+
-+ // iadd r0, r0, r10
-+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-+
-+ // ixor DST, r0, r10
-+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-+ return DST;
-+}
-+
-+SDValue
-+AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
-+ return SDValue(Op.getNode(), 0);
-+}
-+
-+SDValue
-+AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT OVT = Op.getValueType();
-+ MVT INTTY = MVT::i32;
-+ if (OVT == MVT::v2i8) {
-+ INTTY = MVT::v2i32;
-+ } else if (OVT == MVT::v4i8) {
-+ INTTY = MVT::v4i32;
-+ }
-+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
-+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
-+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
-+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
-+ return LHS;
-+}
-+
-+SDValue
-+AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT OVT = Op.getValueType();
-+ MVT INTTY = MVT::i32;
-+ if (OVT == MVT::v2i16) {
-+ INTTY = MVT::v2i32;
-+ } else if (OVT == MVT::v4i16) {
-+ INTTY = MVT::v4i32;
-+ }
-+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
-+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
-+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
-+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
-+ return LHS;
-+}
-+
-+SDValue
-+AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT OVT = Op.getValueType();
-+ SDValue LHS = Op.getOperand(0);
-+ SDValue RHS = Op.getOperand(1);
-+ // The LowerSREM32 function generates equivalent to the following IL.
-+ // mov r0, LHS
-+ // mov r1, RHS
-+ // ilt r10, r0, 0
-+ // ilt r11, r1, 0
-+ // iadd r0, r0, r10
-+ // iadd r1, r1, r11
-+ // ixor r0, r0, r10
-+ // ixor r1, r1, r11
-+ // udiv r20, r0, r1
-+ // umul r20, r20, r1
-+ // sub r0, r0, r20
-+ // iadd r0, r0, r10
-+ // ixor DST, r0, r10
-+
-+ // mov r0, LHS
-+ SDValue r0 = LHS;
-+
-+ // mov r1, RHS
-+ SDValue r1 = RHS;
-+
-+ // ilt r10, r0, 0
-+ SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
-+
-+ // ilt r11, r1, 0
-+ SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
-+
-+ // iadd r0, r0, r10
-+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-+
-+ // iadd r1, r1, r11
-+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
-+
-+ // ixor r0, r0, r10
-+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-+
-+ // ixor r1, r1, r11
-+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
-+
-+ // udiv r20, r0, r1
-+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
-+
-+ // umul r20, r20, r1
-+ r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
-+
-+ // sub r0, r0, r20
-+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
-+
-+ // iadd r0, r0, r10
-+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-+
-+ // ixor DST, r0, r10
-+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-+ return DST;
-+}
-+
-+SDValue
-+AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
-+ return SDValue(Op.getNode(), 0);
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILNIDevice.cpp llvm-r600/lib/Target/R600/AMDILNIDevice.cpp
---- llvm-3.2.src/lib/Target/R600/AMDILNIDevice.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILNIDevice.cpp 2013-01-25 19:43:57.446716388 +0100
-@@ -0,0 +1,65 @@
-+//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//==-----------------------------------------------------------------------===//
-+#include "AMDILNIDevice.h"
-+#include "AMDILEvergreenDevice.h"
-+#include "AMDGPUSubtarget.h"
-+
-+using namespace llvm;
-+
-+AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST)
-+ : AMDGPUEvergreenDevice(ST) {
-+ std::string name = ST->getDeviceName();
-+ if (name == "caicos") {
-+ DeviceFlag = OCL_DEVICE_CAICOS;
-+ } else if (name == "turks") {
-+ DeviceFlag = OCL_DEVICE_TURKS;
-+ } else if (name == "cayman") {
-+ DeviceFlag = OCL_DEVICE_CAYMAN;
-+ } else {
-+ DeviceFlag = OCL_DEVICE_BARTS;
-+ }
-+}
-+AMDGPUNIDevice::~AMDGPUNIDevice() {
-+}
-+
-+size_t
-+AMDGPUNIDevice::getMaxLDSSize() const {
-+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
-+ return MAX_LDS_SIZE_900;
-+ } else {
-+ return 0;
-+ }
-+}
-+
-+uint32_t
-+AMDGPUNIDevice::getGeneration() const {
-+ return AMDGPUDeviceInfo::HD6XXX;
-+}
-+
-+
-+AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST)
-+ : AMDGPUNIDevice(ST) {
-+ setCaps();
-+}
-+
-+AMDGPUCaymanDevice::~AMDGPUCaymanDevice() {
-+}
-+
-+void
-+AMDGPUCaymanDevice::setCaps() {
-+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
-+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
-+ mHWBits.set(AMDGPUDeviceInfo::FMA);
-+ }
-+ mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
-+ mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps);
-+ mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
-+}
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILNIDevice.h llvm-r600/lib/Target/R600/AMDILNIDevice.h
---- llvm-3.2.src/lib/Target/R600/AMDILNIDevice.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILNIDevice.h 2013-01-25 19:43:57.446716388 +0100
-@@ -0,0 +1,57 @@
-+//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+/// \file
-+/// \brief Interface for the subtarget data classes.
-+///
-+/// This file will define the interface that each generation needs to
-+/// implement in order to correctly answer queries on the capabilities of the
-+/// specific hardware.
-+//===---------------------------------------------------------------------===//
-+#ifndef AMDILNIDEVICE_H
-+#define AMDILNIDEVICE_H
-+#include "AMDILEvergreenDevice.h"
-+#include "AMDGPUSubtarget.h"
-+
-+namespace llvm {
-+
-+class AMDGPUSubtarget;
-+//===---------------------------------------------------------------------===//
-+// NI generation of devices and their respective sub classes
-+//===---------------------------------------------------------------------===//
-+
-+/// \brief The AMDGPUNIDevice is the base class for all Northern Island series of
-+/// cards.
-+///
-+/// It is very similiar to the AMDGPUEvergreenDevice, with the major
-+/// exception being differences in wavefront size and hardware capabilities. The
-+/// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
-+/// integer operations
-+class AMDGPUNIDevice : public AMDGPUEvergreenDevice {
-+public:
-+ AMDGPUNIDevice(AMDGPUSubtarget*);
-+ virtual ~AMDGPUNIDevice();
-+ virtual size_t getMaxLDSSize() const;
-+ virtual uint32_t getGeneration() const;
-+};
-+
-+/// Just as the AMDGPUCypressDevice is the double capable version of the
-+/// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version
-+/// of the AMDGPUNIDevice. The other major difference is that the Cayman Device
-+/// has 4 wide ALU's, whereas the rest of the NI family is a 5 wide.
-+class AMDGPUCaymanDevice: public AMDGPUNIDevice {
-+public:
-+ AMDGPUCaymanDevice(AMDGPUSubtarget*);
-+ virtual ~AMDGPUCaymanDevice();
-+private:
-+ virtual void setCaps();
-+};
-+
-+static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800;
-+} // namespace llvm
-+#endif // AMDILNIDEVICE_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILPeepholeOptimizer.cpp llvm-r600/lib/Target/R600/AMDILPeepholeOptimizer.cpp
---- llvm-3.2.src/lib/Target/R600/AMDILPeepholeOptimizer.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILPeepholeOptimizer.cpp 2013-01-25 19:43:57.450049721 +0100
-@@ -0,0 +1,1256 @@
-+//===-- AMDILPeepholeOptimizer.cpp - AMDGPU Peephole optimizations ---------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//==-----------------------------------------------------------------------===//
-+
-+#define DEBUG_TYPE "PeepholeOpt"
-+#ifdef DEBUG
-+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
-+#else
-+#define DEBUGME 0
-+#endif
-+
-+#include "AMDILDevices.h"
-+#include "AMDGPUInstrInfo.h"
-+#include "llvm/ADT/Statistic.h"
-+#include "llvm/ADT/StringExtras.h"
-+#include "llvm/ADT/StringRef.h"
-+#include "llvm/ADT/Twine.h"
-+#include "llvm/Constants.h"
-+#include "llvm/CodeGen/MachineFunction.h"
-+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-+#include "llvm/Function.h"
-+#include "llvm/Instructions.h"
-+#include "llvm/Module.h"
-+#include "llvm/Support/Debug.h"
-+#include "llvm/Support/MathExtras.h"
-+
-+#include <sstream>
-+
-+#if 0
-+STATISTIC(PointerAssignments, "Number of dynamic pointer "
-+ "assigments discovered");
-+STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
-+#endif
-+
-+using namespace llvm;
-+// The Peephole optimization pass is used to do simple last minute optimizations
-+// that are required for correct code or to remove redundant functions
-+namespace {
-+
-+class OpaqueType;
-+
-+class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {
-+public:
-+ TargetMachine &TM;
-+ static char ID;
-+ AMDGPUPeepholeOpt(TargetMachine &tm);
-+ ~AMDGPUPeepholeOpt();
-+ const char *getPassName() const;
-+ bool runOnFunction(Function &F);
-+ bool doInitialization(Module &M);
-+ bool doFinalization(Module &M);
-+ void getAnalysisUsage(AnalysisUsage &AU) const;
-+protected:
-+private:
-+ // Function to initiate all of the instruction level optimizations.
-+ bool instLevelOptimizations(BasicBlock::iterator *inst);
-+ // Quick check to see if we need to dump all of the pointers into the
-+ // arena. If this is correct, then we set all pointers to exist in arena. This
-+ // is a workaround for aliasing of pointers in a struct/union.
-+ bool dumpAllIntoArena(Function &F);
-+ // Because I don't want to invalidate any pointers while in the
-+ // safeNestedForEachFunction. I push atomic conversions to a vector and handle
-+ // it later. This function does the conversions if required.
-+ void doAtomicConversionIfNeeded(Function &F);
-+ // Because __amdil_is_constant cannot be properly evaluated if
-+ // optimizations are disabled, the call's are placed in a vector
-+ // and evaluated after the __amdil_image* functions are evaluated
-+ // which should allow the __amdil_is_constant function to be
-+ // evaluated correctly.
-+ void doIsConstCallConversionIfNeeded();
-+ bool mChanged;
-+ bool mDebug;
-+ bool mConvertAtomics;
-+ CodeGenOpt::Level optLevel;
-+ // Run a series of tests to see if we can optimize a CALL instruction.
-+ bool optimizeCallInst(BasicBlock::iterator *bbb);
-+ // A peephole optimization to optimize bit extract sequences.
-+ bool optimizeBitExtract(Instruction *inst);
-+ // A peephole optimization to optimize bit insert sequences.
-+ bool optimizeBitInsert(Instruction *inst);
-+ bool setupBitInsert(Instruction *base,
-+ Instruction *&src,
-+ Constant *&mask,
-+ Constant *&shift);
-+ // Expand the bit field insert instruction on versions of OpenCL that
-+ // don't support it.
-+ bool expandBFI(CallInst *CI);
-+ // Expand the bit field mask instruction on version of OpenCL that
-+ // don't support it.
-+ bool expandBFM(CallInst *CI);
-+ // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
-+ // this case we need to expand them. These functions check for 24bit functions
-+ // and then expand.
-+ bool isSigned24BitOps(CallInst *CI);
-+ void expandSigned24BitOps(CallInst *CI);
-+ // One optimization that can occur is that if the required workgroup size is
-+ // specified then the result of get_local_size is known at compile time and
-+ // can be returned accordingly.
-+ bool isRWGLocalOpt(CallInst *CI);
-+ // On northern island cards, the division is slightly less accurate than on
-+ // previous generations, so we need to utilize a more accurate division. So we
-+ // can translate the accurate divide to a normal divide on all other cards.
-+ bool convertAccurateDivide(CallInst *CI);
-+ void expandAccurateDivide(CallInst *CI);
-+ // If the alignment is set incorrectly, it can produce really inefficient
-+ // code. This checks for this scenario and fixes it if possible.
-+ bool correctMisalignedMemOp(Instruction *inst);
-+
-+ // If we are in no opt mode, then we need to make sure that
-+ // local samplers are properly propagated as constant propagation
-+ // doesn't occur and we need to know the value of kernel defined
-+ // samplers at compile time.
-+ bool propagateSamplerInst(CallInst *CI);
-+
-+ // Helper functions
-+
-+ // Group of functions that recursively calculate the size of a structure based
-+ // on it's sub-types.
-+ size_t getTypeSize(Type * const T, bool dereferencePtr = false);
-+ size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
-+ size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
-+ size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
-+ size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
-+ size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
-+ size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
-+ size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
-+
-+ LLVMContext *mCTX;
-+ Function *mF;
-+ const AMDGPUSubtarget *mSTM;
-+ SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
-+ SmallVector<CallInst *, 16> isConstVec;
-+}; // class AMDGPUPeepholeOpt
-+ char AMDGPUPeepholeOpt::ID = 0;
-+
-+// A template function that has two levels of looping before calling the
-+// function with a pointer to the current iterator.
-+template<class InputIterator, class SecondIterator, class Function>
-+Function safeNestedForEach(InputIterator First, InputIterator Last,
-+ SecondIterator S, Function F) {
-+ for ( ; First != Last; ++First) {
-+ SecondIterator sf, sl;
-+ for (sf = First->begin(), sl = First->end();
-+ sf != sl; ) {
-+ if (!F(&sf)) {
-+ ++sf;
-+ }
-+ }
-+ }
-+ return F;
-+}
-+
-+} // anonymous namespace
-+
-+namespace llvm {
-+ FunctionPass *
-+ createAMDGPUPeepholeOpt(TargetMachine &tm) {
-+ return new AMDGPUPeepholeOpt(tm);
-+ }
-+} // llvm namespace
-+
-+AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)
-+ : FunctionPass(ID), TM(tm) {
-+ mDebug = DEBUGME;
-+ optLevel = TM.getOptLevel();
-+
-+}
-+
-+AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt() {
-+}
-+
-+const char *
-+AMDGPUPeepholeOpt::getPassName() const {
-+ return "AMDGPU PeepHole Optimization Pass";
-+}
-+
-+bool
-+containsPointerType(Type *Ty) {
-+ if (!Ty) {
-+ return false;
-+ }
-+ switch(Ty->getTypeID()) {
-+ default:
-+ return false;
-+ case Type::StructTyID: {
-+ const StructType *ST = dyn_cast<StructType>(Ty);
-+ for (StructType::element_iterator stb = ST->element_begin(),
-+ ste = ST->element_end(); stb != ste; ++stb) {
-+ if (!containsPointerType(*stb)) {
-+ continue;
-+ }
-+ return true;
-+ }
-+ break;
-+ }
-+ case Type::VectorTyID:
-+ case Type::ArrayTyID:
-+ return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
-+ case Type::PointerTyID:
-+ return true;
-+ };
-+ return false;
-+}
-+
-+bool
-+AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F) {
-+ bool dumpAll = false;
-+ for (Function::const_arg_iterator cab = F.arg_begin(),
-+ cae = F.arg_end(); cab != cae; ++cab) {
-+ const Argument *arg = cab;
-+ const PointerType *PT = dyn_cast<PointerType>(arg->getType());
-+ if (!PT) {
-+ continue;
-+ }
-+ Type *DereferencedType = PT->getElementType();
-+ if (!dyn_cast<StructType>(DereferencedType)
-+ ) {
-+ continue;
-+ }
-+ if (!containsPointerType(DereferencedType)) {
-+ continue;
-+ }
-+ // FIXME: Because a pointer inside of a struct/union may be aliased to
-+ // another pointer we need to take the conservative approach and place all
-+ // pointers into the arena until more advanced detection is implemented.
-+ dumpAll = true;
-+ }
-+ return dumpAll;
-+}
-+void
-+AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() {
-+ if (isConstVec.empty()) {
-+ return;
-+ }
-+ for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
-+ CallInst *CI = isConstVec[x];
-+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
-+ Type *aType = Type::getInt32Ty(*mCTX);
-+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
-+ : ConstantInt::get(aType, 0);
-+ CI->replaceAllUsesWith(Val);
-+ CI->eraseFromParent();
-+ }
-+ isConstVec.clear();
-+}
-+void
-+AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F) {
-+ // Don't do anything if we don't have any atomic operations.
-+ if (atomicFuncs.empty()) {
-+ return;
-+ }
-+ // Change the function name for the atomic if it is required
-+ uint32_t size = atomicFuncs.size();
-+ for (uint32_t x = 0; x < size; ++x) {
-+ atomicFuncs[x].first->setOperand(
-+ atomicFuncs[x].first->getNumOperands()-1,
-+ atomicFuncs[x].second);
-+
-+ }
-+ mChanged = true;
-+ if (mConvertAtomics) {
-+ return;
-+ }
-+}
-+
-+bool
-+AMDGPUPeepholeOpt::runOnFunction(Function &MF) {
-+ mChanged = false;
-+ mF = &MF;
-+ mSTM = &TM.getSubtarget<AMDGPUSubtarget>();
-+ if (mDebug) {
-+ MF.dump();
-+ }
-+ mCTX = &MF.getType()->getContext();
-+ mConvertAtomics = true;
-+ safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
-+ std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),
-+ this));
-+
-+ doAtomicConversionIfNeeded(MF);
-+ doIsConstCallConversionIfNeeded();
-+
-+ if (mDebug) {
-+ MF.dump();
-+ }
-+ return mChanged;
-+}
-+
-+bool
-+AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) {
-+ Instruction *inst = (*bbb);
-+ CallInst *CI = dyn_cast<CallInst>(inst);
-+ if (!CI) {
-+ return false;
-+ }
-+ if (isSigned24BitOps(CI)) {
-+ expandSigned24BitOps(CI);
-+ ++(*bbb);
-+ CI->eraseFromParent();
-+ return true;
-+ }
-+ if (propagateSamplerInst(CI)) {
-+ return false;
-+ }
-+ if (expandBFI(CI) || expandBFM(CI)) {
-+ ++(*bbb);
-+ CI->eraseFromParent();
-+ return true;
-+ }
-+ if (convertAccurateDivide(CI)) {
-+ expandAccurateDivide(CI);
-+ ++(*bbb);
-+ CI->eraseFromParent();
-+ return true;
-+ }
-+
-+ StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
-+ if (calleeName.startswith("__amdil_is_constant")) {
-+ // If we do not have optimizations, then this
-+ // cannot be properly evaluated, so we add the
-+ // call instruction to a vector and process
-+ // them at the end of processing after the
-+ // samplers have been correctly handled.
-+ if (optLevel == CodeGenOpt::None) {
-+ isConstVec.push_back(CI);
-+ return false;
-+ } else {
-+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
-+ Type *aType = Type::getInt32Ty(*mCTX);
-+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
-+ : ConstantInt::get(aType, 0);
-+ CI->replaceAllUsesWith(Val);
-+ ++(*bbb);
-+ CI->eraseFromParent();
-+ return true;
-+ }
-+ }
-+
-+ if (calleeName.equals("__amdil_is_asic_id_i32")) {
-+ ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
-+ Type *aType = Type::getInt32Ty(*mCTX);
-+ Value *Val = CV;
-+ if (Val) {
-+ Val = ConstantInt::get(aType,
-+ mSTM->device()->getDeviceFlag() & CV->getZExtValue());
-+ } else {
-+ Val = ConstantInt::get(aType, 0);
-+ }
-+ CI->replaceAllUsesWith(Val);
-+ ++(*bbb);
-+ CI->eraseFromParent();
-+ return true;
-+ }
-+ Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
-+ if (!F) {
-+ return false;
-+ }
-+ if (F->getName().startswith("__atom") && !CI->getNumUses()
-+ && F->getName().find("_xchg") == StringRef::npos) {
-+ std::string buffer(F->getName().str() + "_noret");
-+ F = dyn_cast<Function>(
-+ F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
-+ atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
-+ }
-+
-+ if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
-+ && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {
-+ return false;
-+ }
-+ if (!mConvertAtomics) {
-+ return false;
-+ }
-+ StringRef name = F->getName();
-+ if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
-+ mConvertAtomics = false;
-+ }
-+ return false;
-+}
-+
-+bool
-+AMDGPUPeepholeOpt::setupBitInsert(Instruction *base,
-+ Instruction *&src,
-+ Constant *&mask,
-+ Constant *&shift) {
-+ if (!base) {
-+ if (mDebug) {
-+ dbgs() << "Null pointer passed into function.\n";
-+ }
-+ return false;
-+ }
-+ bool andOp = false;
-+ if (base->getOpcode() == Instruction::Shl) {
-+ shift = dyn_cast<Constant>(base->getOperand(1));
-+ } else if (base->getOpcode() == Instruction::And) {
-+ mask = dyn_cast<Constant>(base->getOperand(1));
-+ andOp = true;
-+ } else {
-+ if (mDebug) {
-+ dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
-+ }
-+ // If the base is neither a Shl or a And, we don't fit any of the patterns above.
-+ return false;
-+ }
-+ src = dyn_cast<Instruction>(base->getOperand(0));
-+ if (!src) {
-+ if (mDebug) {
-+ dbgs() << "Failed setup since the base operand is not an instruction!\n";
-+ }
-+ return false;
-+ }
-+ // If we find an 'and' operation, then we don't need to
-+ // find the next operation as we already know the
-+ // bits that are valid at this point.
-+ if (andOp) {
-+ return true;
-+ }
-+ if (src->getOpcode() == Instruction::Shl && !shift) {
-+ shift = dyn_cast<Constant>(src->getOperand(1));
-+ src = dyn_cast<Instruction>(src->getOperand(0));
-+ } else if (src->getOpcode() == Instruction::And && !mask) {
-+ mask = dyn_cast<Constant>(src->getOperand(1));
-+ }
-+ if (!mask && !shift) {
-+ if (mDebug) {
-+ dbgs() << "Failed setup since both mask and shift are NULL!\n";
-+ }
-+ // Did not find a constant mask or a shift.
-+ return false;
-+ }
-+ return true;
-+}
-+bool
-+AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) {
-+ if (!inst) {
-+ return false;
-+ }
-+ if (!inst->isBinaryOp()) {
-+ return false;
-+ }
-+ if (inst->getOpcode() != Instruction::Or) {
-+ return false;
-+ }
-+ if (optLevel == CodeGenOpt::None) {
-+ return false;
-+ }
-+ // We want to do an optimization on a sequence of ops that in the end equals a
-+ // single ISA instruction.
-+ // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
-+ // Some simplified versions of this pattern are as follows:
-+ // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
-+ // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
-+ // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
-+ // (A & B) | (D << F) when (1 << F) >= B
-+ // (A << C) | (D & E) when (1 << C) >= E
-+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
-+ // The HD4XXX hardware doesn't support the ubit_insert instruction.
-+ return false;
-+ }
-+ Type *aType = inst->getType();
-+ bool isVector = aType->isVectorTy();
-+ int numEle = 1;
-+ // This optimization only works on 32bit integers.
-+ if (aType->getScalarType()
-+ != Type::getInt32Ty(inst->getContext())) {
-+ return false;
-+ }
-+ if (isVector) {
-+ const VectorType *VT = dyn_cast<VectorType>(aType);
-+ numEle = VT->getNumElements();
-+ // We currently cannot support more than 4 elements in a intrinsic and we
-+ // cannot support Vec3 types.
-+ if (numEle > 4 || numEle == 3) {
-+ return false;
-+ }
-+ }
-+ // TODO: Handle vectors.
-+ if (isVector) {
-+ if (mDebug) {
-+ dbgs() << "!!! Vectors are not supported yet!\n";
-+ }
-+ return false;
-+ }
-+ Instruction *LHSSrc = NULL, *RHSSrc = NULL;
-+ Constant *LHSMask = NULL, *RHSMask = NULL;
-+ Constant *LHSShift = NULL, *RHSShift = NULL;
-+ Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
-+ Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
-+ if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
-+ if (mDebug) {
-+ dbgs() << "Found an OR Operation that failed setup!\n";
-+ inst->dump();
-+ if (LHS) { LHS->dump(); }
-+ if (LHSSrc) { LHSSrc->dump(); }
-+ if (LHSMask) { LHSMask->dump(); }
-+ if (LHSShift) { LHSShift->dump(); }
-+ }
-+ // There was an issue with the setup for BitInsert.
-+ return false;
-+ }
-+ if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
-+ if (mDebug) {
-+ dbgs() << "Found an OR Operation that failed setup!\n";
-+ inst->dump();
-+ if (RHS) { RHS->dump(); }
-+ if (RHSSrc) { RHSSrc->dump(); }
-+ if (RHSMask) { RHSMask->dump(); }
-+ if (RHSShift) { RHSShift->dump(); }
-+ }
-+ // There was an issue with the setup for BitInsert.
-+ return false;
-+ }
-+ if (mDebug) {
-+ dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
-+ dbgs() << "Op: "; inst->dump();
-+ dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
-+ dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
-+ dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
-+ dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
-+ dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
-+ dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
-+ dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
-+ dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
-+ }
-+ Constant *offset = NULL;
-+ Constant *width = NULL;
-+ uint32_t lhsMaskVal = 0, rhsMaskVal = 0;
-+ uint32_t lhsShiftVal = 0, rhsShiftVal = 0;
-+ uint32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
-+ uint32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
-+ lhsMaskVal = (LHSMask
-+ ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
-+ rhsMaskVal = (RHSMask
-+ ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
-+ lhsShiftVal = (LHSShift
-+ ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
-+ rhsShiftVal = (RHSShift
-+ ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
-+ lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
-+ rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
-+ lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
-+ rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
-+ // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
-+ if (mDebug) {
-+ dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")");
-+ dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ;
-+ dbgs() << (RHSMask ? " & E)" : ")");
-+ dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n");
-+ dbgs() << "A = LHSSrc\t\tD = RHSSrc \n";
-+ dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n";
-+ dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n";
-+ dbgs() << "width(B) = " << lhsMaskWidth;
-+ dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n";
-+ dbgs() << "offset(B) = " << lhsMaskOffset;
-+ dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n";
-+ dbgs() << "Constraints: \n";
-+ dbgs() << "\t(1) B ^ E == 0\n";
-+ dbgs() << "\t(2-LHS) B is a mask\n";
-+ dbgs() << "\t(2-LHS) E is a mask\n";
-+ dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n";
-+ dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n";
-+ }
-+ if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
-+ if (mDebug) {
-+ dbgs() << lhsMaskVal << " ^ " << rhsMaskVal;
-+ dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n";
-+ dbgs() << "Failed constraint 1!\n";
-+ }
-+ return false;
-+ }
-+ if (mDebug) {
-+ dbgs() << "LHS = " << lhsMaskOffset << "";
-+ dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = ";
-+ dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset));
-+ dbgs() << "\nRHS = " << rhsMaskOffset << "";
-+ dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = ";
-+ dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset));
-+ dbgs() << "\n";
-+ }
-+ if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
-+ offset = ConstantInt::get(aType, lhsMaskOffset, false);
-+ width = ConstantInt::get(aType, lhsMaskWidth, false);
-+ RHSSrc = RHS;
-+ if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
-+ if (mDebug) {
-+ dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n";
-+ dbgs() << "Failed constraint 2!\n";
-+ }
-+ return false;
-+ }
-+ if (!LHSShift) {
-+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
-+ "MaskShr", LHS);
-+ } else if (lhsShiftVal != lhsMaskOffset) {
-+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
-+ "MaskShr", LHS);
-+ }
-+ if (mDebug) {
-+ dbgs() << "Optimizing LHS!\n";
-+ }
-+ } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
-+ offset = ConstantInt::get(aType, rhsMaskOffset, false);
-+ width = ConstantInt::get(aType, rhsMaskWidth, false);
-+ LHSSrc = RHSSrc;
-+ RHSSrc = LHS;
-+ if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
-+ if (mDebug) {
-+ dbgs() << "Non-Mask: " << rhsMaskVal << "\n";
-+ dbgs() << "Failed constraint 2!\n";
-+ }
-+ return false;
-+ }
-+ if (!RHSShift) {
-+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
-+ "MaskShr", RHS);
-+ } else if (rhsShiftVal != rhsMaskOffset) {
-+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
-+ "MaskShr", RHS);
-+ }
-+ if (mDebug) {
-+ dbgs() << "Optimizing RHS!\n";
-+ }
-+ } else {
-+ if (mDebug) {
-+ dbgs() << "Failed constraint 3!\n";
-+ }
-+ return false;
-+ }
-+ if (mDebug) {
-+ dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
-+ dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
-+ dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
-+ dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
-+ }
-+ if (!offset || !width) {
-+ if (mDebug) {
-+ dbgs() << "Either width or offset are NULL, failed detection!\n";
-+ }
-+ return false;
-+ }
-+ // Lets create the function signature.
-+ std::vector<Type *> callTypes;
-+ callTypes.push_back(aType);
-+ callTypes.push_back(aType);
-+ callTypes.push_back(aType);
-+ callTypes.push_back(aType);
-+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);
-+ std::string name = "__amdil_ubit_insert";
-+ if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
-+ Function *Func =
-+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
-+ getOrInsertFunction(llvm::StringRef(name), funcType));
-+ Value *Operands[4] = {
-+ width,
-+ offset,
-+ LHSSrc,
-+ RHSSrc
-+ };
-+ CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
-+ if (mDebug) {
-+ dbgs() << "Old Inst: ";
-+ inst->dump();
-+ dbgs() << "New Inst: ";
-+ CI->dump();
-+ dbgs() << "\n\n";
-+ }
-+ CI->insertBefore(inst);
-+ inst->replaceAllUsesWith(CI);
-+ return true;
-+}
-+
-+bool
-+AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) {
-+ if (!inst) {
-+ return false;
-+ }
-+ if (!inst->isBinaryOp()) {
-+ return false;
-+ }
-+ if (inst->getOpcode() != Instruction::And) {
-+ return false;
-+ }
-+ if (optLevel == CodeGenOpt::None) {
-+ return false;
-+ }
-+ // We want to do some simple optimizations on Shift right/And patterns. The
-+ // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
-+ // value smaller than 32 and C is a mask. If C is a constant value, then the
-+ // following transformation can occur. For signed integers, it turns into the
-+ // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
-+ // integers, it turns into the function call dst =
-+ // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
-+ // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
-+ // Evergreen hardware.
-+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
-+ // This does not work on HD4XXX hardware.
-+ return false;
-+ }
-+ Type *aType = inst->getType();
-+ bool isVector = aType->isVectorTy();
-+
-+ // XXX Support vector types
-+ if (isVector) {
-+ return false;
-+ }
-+ int numEle = 1;
-+ // This only works on 32bit integers
-+ if (aType->getScalarType()
-+ != Type::getInt32Ty(inst->getContext())) {
-+ return false;
-+ }
-+ if (isVector) {
-+ const VectorType *VT = dyn_cast<VectorType>(aType);
-+ numEle = VT->getNumElements();
-+ // We currently cannot support more than 4 elements in a intrinsic and we
-+ // cannot support Vec3 types.
-+ if (numEle > 4 || numEle == 3) {
-+ return false;
-+ }
-+ }
-+ BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
-+ // If the first operand is not a shift instruction, then we can return as it
-+ // doesn't match this pattern.
-+ if (!ShiftInst || !ShiftInst->isShift()) {
-+ return false;
-+ }
-+ // If we are a shift left, then we need don't match this pattern.
-+ if (ShiftInst->getOpcode() == Instruction::Shl) {
-+ return false;
-+ }
-+ bool isSigned = ShiftInst->isArithmeticShift();
-+ Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
-+ Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
-+ // Lets make sure that the shift value and the and mask are constant integers.
-+ if (!AndMask || !ShrVal) {
-+ return false;
-+ }
-+ Constant *newMaskConst;
-+ Constant *shiftValConst;
-+ if (isVector) {
-+ // Handle the vector case
-+ std::vector<Constant *> maskVals;
-+ std::vector<Constant *> shiftVals;
-+ ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
-+ ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
-+ Type *scalarType = AndMaskVec->getType()->getScalarType();
-+ assert(AndMaskVec->getNumOperands() ==
-+ ShrValVec->getNumOperands() && "cannot have a "
-+ "combination where the number of elements to a "
-+ "shift and an and are different!");
-+ for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
-+ ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
-+ ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
-+ if (!AndCI || !ShiftIC) {
-+ return false;
-+ }
-+ uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
-+ if (!isMask_32(maskVal)) {
-+ return false;
-+ }
-+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
-+ uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
-+ // If the mask or shiftval is greater than the bitcount, then break out.
-+ if (maskVal >= 32 || shiftVal >= 32) {
-+ return false;
-+ }
-+ // If the mask val is greater than the the number of original bits left
-+ // then this optimization is invalid.
-+ if (maskVal > (32 - shiftVal)) {
-+ return false;
-+ }
-+ maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
-+ shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
-+ }
-+ newMaskConst = ConstantVector::get(maskVals);
-+ shiftValConst = ConstantVector::get(shiftVals);
-+ } else {
-+ // Handle the scalar case
-+ uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
-+ // This must be a mask value where all lower bits are set to 1 and then any
-+ // bit higher is set to 0.
-+ if (!isMask_32(maskVal)) {
-+ return false;
-+ }
-+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
-+ // Count the number of bits set in the mask, this is the width of the
-+ // resulting bit set that is extracted from the source value.
-+ uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
-+ // If the mask or shift val is greater than the bitcount, then break out.
-+ if (maskVal >= 32 || shiftVal >= 32) {
-+ return false;
-+ }
-+ // If the mask val is greater than the the number of original bits left then
-+ // this optimization is invalid.
-+ if (maskVal > (32 - shiftVal)) {
-+ return false;
-+ }
-+ newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
-+ shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
-+ }
-+ // Lets create the function signature.
-+ std::vector<Type *> callTypes;
-+ callTypes.push_back(aType);
-+ callTypes.push_back(aType);
-+ callTypes.push_back(aType);
-+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);
-+ std::string name = "llvm.AMDGPU.bit.extract.u32";
-+ if (isVector) {
-+ name += ".v" + itostr(numEle) + "i32";
-+ } else {
-+ name += ".";
-+ }
-+ // Lets create the function.
-+ Function *Func =
-+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
-+ getOrInsertFunction(llvm::StringRef(name), funcType));
-+ Value *Operands[3] = {
-+ ShiftInst->getOperand(0),
-+ shiftValConst,
-+ newMaskConst
-+ };
-+ // Lets create the Call with the operands
-+ CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
-+ CI->setDoesNotAccessMemory();
-+ CI->insertBefore(inst);
-+ inst->replaceAllUsesWith(CI);
-+ return true;
-+}
-+
-+bool
-+AMDGPUPeepholeOpt::expandBFI(CallInst *CI) {
-+ if (!CI) {
-+ return false;
-+ }
-+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
-+ if (!LHS->getName().startswith("__amdil_bfi")) {
-+ return false;
-+ }
-+ Type* type = CI->getOperand(0)->getType();
-+ Constant *negOneConst = NULL;
-+ if (type->isVectorTy()) {
-+ std::vector<Constant *> negOneVals;
-+ negOneConst = ConstantInt::get(CI->getContext(),
-+ APInt(32, StringRef("-1"), 10));
-+ for (size_t x = 0,
-+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
-+ negOneVals.push_back(negOneConst);
-+ }
-+ negOneConst = ConstantVector::get(negOneVals);
-+ } else {
-+ negOneConst = ConstantInt::get(CI->getContext(),
-+ APInt(32, StringRef("-1"), 10));
-+ }
-+ // __amdil_bfi => (A & B) | (~A & C)
-+ BinaryOperator *lhs =
-+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),
-+ CI->getOperand(1), "bfi_and", CI);
-+ BinaryOperator *rhs =
-+ BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
-+ "bfi_not", CI);
-+ rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
-+ "bfi_and", CI);
-+ lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
-+ CI->replaceAllUsesWith(lhs);
-+ return true;
-+}
-+
-+bool
-+AMDGPUPeepholeOpt::expandBFM(CallInst *CI) {
-+ if (!CI) {
-+ return false;
-+ }
-+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
-+ if (!LHS->getName().startswith("__amdil_bfm")) {
-+ return false;
-+ }
-+ // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
-+ Constant *newMaskConst = NULL;
-+ Constant *newShiftConst = NULL;
-+ Type* type = CI->getOperand(0)->getType();
-+ if (type->isVectorTy()) {
-+ std::vector<Constant*> newMaskVals, newShiftVals;
-+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
-+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
-+ for (size_t x = 0,
-+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
-+ newMaskVals.push_back(newMaskConst);
-+ newShiftVals.push_back(newShiftConst);
-+ }
-+ newMaskConst = ConstantVector::get(newMaskVals);
-+ newShiftConst = ConstantVector::get(newShiftVals);
-+ } else {
-+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
-+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
-+ }
-+ BinaryOperator *lhs =
-+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),
-+ newMaskConst, "bfm_mask", CI);
-+ lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
-+ lhs, "bfm_shl", CI);
-+ lhs = BinaryOperator::Create(Instruction::Sub, lhs,
-+ newShiftConst, "bfm_sub", CI);
-+ BinaryOperator *rhs =
-+ BinaryOperator::Create(Instruction::And, CI->getOperand(1),
-+ newMaskConst, "bfm_mask", CI);
-+ lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
-+ CI->replaceAllUsesWith(lhs);
-+ return true;
-+}
-+
-+bool
-+AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) {
-+ Instruction *inst = (*bbb);
-+ if (optimizeCallInst(bbb)) {
-+ return true;
-+ }
-+ if (optimizeBitExtract(inst)) {
-+ return false;
-+ }
-+ if (optimizeBitInsert(inst)) {
-+ return false;
-+ }
-+ if (correctMisalignedMemOp(inst)) {
-+ return false;
-+ }
-+ return false;
-+}
-+bool
-+AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) {
-+ LoadInst *linst = dyn_cast<LoadInst>(inst);
-+ StoreInst *sinst = dyn_cast<StoreInst>(inst);
-+ unsigned alignment;
-+ Type* Ty = inst->getType();
-+ if (linst) {
-+ alignment = linst->getAlignment();
-+ Ty = inst->getType();
-+ } else if (sinst) {
-+ alignment = sinst->getAlignment();
-+ Ty = sinst->getValueOperand()->getType();
-+ } else {
-+ return false;
-+ }
-+ unsigned size = getTypeSize(Ty);
-+ if (size == alignment || size < alignment) {
-+ return false;
-+ }
-+ if (!Ty->isStructTy()) {
-+ return false;
-+ }
-+ if (alignment < 4) {
-+ if (linst) {
-+ linst->setAlignment(0);
-+ return true;
-+ } else if (sinst) {
-+ sinst->setAlignment(0);
-+ return true;
-+ }
-+ }
-+ return false;
-+}
-+bool
-+AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI) {
-+ if (!CI) {
-+ return false;
-+ }
-+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
-+ std::string namePrefix = LHS->getName().substr(0, 14);
-+ if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
-+ && namePrefix != "__amdil__imul24_high") {
-+ return false;
-+ }
-+ if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {
-+ return false;
-+ }
-+ return true;
-+}
-+
-+void
-+AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) {
-+ assert(isSigned24BitOps(CI) && "Must be a "
-+ "signed 24 bit operation to call this function!");
-+ Value *LHS = CI->getOperand(CI->getNumOperands()-1);
-+ // On 7XX and 8XX we do not have signed 24bit, so we need to
-+ // expand it to the following:
-+ // imul24 turns into 32bit imul
-+ // imad24 turns into 32bit imad
-+ // imul24_high turns into 32bit imulhigh
-+ if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
-+ Type *aType = CI->getOperand(0)->getType();
-+ bool isVector = aType->isVectorTy();
-+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
-+ std::vector<Type*> callTypes;
-+ callTypes.push_back(CI->getOperand(0)->getType());
-+ callTypes.push_back(CI->getOperand(1)->getType());
-+ callTypes.push_back(CI->getOperand(2)->getType());
-+ FunctionType *funcType =
-+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
-+ std::string name = "__amdil_imad";
-+ if (isVector) {
-+ name += "_v" + itostr(numEle) + "i32";
-+ } else {
-+ name += "_i32";
-+ }
-+ Function *Func = dyn_cast<Function>(
-+ CI->getParent()->getParent()->getParent()->
-+ getOrInsertFunction(llvm::StringRef(name), funcType));
-+ Value *Operands[3] = {
-+ CI->getOperand(0),
-+ CI->getOperand(1),
-+ CI->getOperand(2)
-+ };
-+ CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
-+ nCI->insertBefore(CI);
-+ CI->replaceAllUsesWith(nCI);
-+ } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
-+ BinaryOperator *mulOp =
-+ BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
-+ CI->getOperand(1), "imul24", CI);
-+ CI->replaceAllUsesWith(mulOp);
-+ } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
-+ Type *aType = CI->getOperand(0)->getType();
-+
-+ bool isVector = aType->isVectorTy();
-+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
-+ std::vector<Type*> callTypes;
-+ callTypes.push_back(CI->getOperand(0)->getType());
-+ callTypes.push_back(CI->getOperand(1)->getType());
-+ FunctionType *funcType =
-+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
-+ std::string name = "__amdil_imul_high";
-+ if (isVector) {
-+ name += "_v" + itostr(numEle) + "i32";
-+ } else {
-+ name += "_i32";
-+ }
-+ Function *Func = dyn_cast<Function>(
-+ CI->getParent()->getParent()->getParent()->
-+ getOrInsertFunction(llvm::StringRef(name), funcType));
-+ Value *Operands[2] = {
-+ CI->getOperand(0),
-+ CI->getOperand(1)
-+ };
-+ CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
-+ nCI->insertBefore(CI);
-+ CI->replaceAllUsesWith(nCI);
-+ }
-+}
-+
-+bool
-+AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI) {
-+ return (CI != NULL
-+ && CI->getOperand(CI->getNumOperands() - 1)->getName()
-+ == "__amdil_get_local_size_int");
-+}
-+
-+bool
-+AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI) {
-+ if (!CI) {
-+ return false;
-+ }
-+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX
-+ && (mSTM->getDeviceName() == "cayman")) {
-+ return false;
-+ }
-+ return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
-+ == "__amdil_improved_div";
-+}
-+
-+void
-+AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI) {
-+ assert(convertAccurateDivide(CI)
-+ && "expanding accurate divide can only happen if it is expandable!");
-+ BinaryOperator *divOp =
-+ BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
-+ CI->getOperand(1), "fdiv32", CI);
-+ CI->replaceAllUsesWith(divOp);
-+}
-+
-+bool
-+AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) {
-+ if (optLevel != CodeGenOpt::None) {
-+ return false;
-+ }
-+
-+ if (!CI) {
-+ return false;
-+ }
-+
-+ unsigned funcNameIdx = 0;
-+ funcNameIdx = CI->getNumOperands() - 1;
-+ StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
-+ if (calleeName != "__amdil_image2d_read_norm"
-+ && calleeName != "__amdil_image2d_read_unnorm"
-+ && calleeName != "__amdil_image3d_read_norm"
-+ && calleeName != "__amdil_image3d_read_unnorm") {
-+ return false;
-+ }
-+
-+ unsigned samplerIdx = 2;
-+ samplerIdx = 1;
-+ Value *sampler = CI->getOperand(samplerIdx);
-+ LoadInst *lInst = dyn_cast<LoadInst>(sampler);
-+ if (!lInst) {
-+ return false;
-+ }
-+
-+ if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
-+ return false;
-+ }
-+
-+ GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
-+ // If we are loading from what is not a global value, then we
-+ // fail and return.
-+ if (!gv) {
-+ return false;
-+ }
-+
-+ // If we don't have an initializer or we have an initializer and
-+ // the initializer is not a 32bit integer, we fail.
-+ if (!gv->hasInitializer()
-+ || !gv->getInitializer()->getType()->isIntegerTy(32)) {
-+ return false;
-+ }
-+
-+ // Now that we have the global variable initializer, lets replace
-+ // all uses of the load instruction with the samplerVal and
-+ // reparse the __amdil_is_constant() function.
-+ Constant *samplerVal = gv->getInitializer();
-+ lInst->replaceAllUsesWith(samplerVal);
-+ return true;
-+}
-+
-+bool
-+AMDGPUPeepholeOpt::doInitialization(Module &M) {
-+ return false;
-+}
-+
-+bool
-+AMDGPUPeepholeOpt::doFinalization(Module &M) {
-+ return false;
-+}
-+
-+void
-+AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const {
-+ AU.addRequired<MachineFunctionAnalysis>();
-+ FunctionPass::getAnalysisUsage(AU);
-+ AU.setPreservesAll();
-+}
-+
-+size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
-+ size_t size = 0;
-+ if (!T) {
-+ return size;
-+ }
-+ switch (T->getTypeID()) {
-+ case Type::X86_FP80TyID:
-+ case Type::FP128TyID:
-+ case Type::PPC_FP128TyID:
-+ case Type::LabelTyID:
-+ assert(0 && "These types are not supported by this backend");
-+ default:
-+ case Type::FloatTyID:
-+ case Type::DoubleTyID:
-+ size = T->getPrimitiveSizeInBits() >> 3;
-+ break;
-+ case Type::PointerTyID:
-+ size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
-+ break;
-+ case Type::IntegerTyID:
-+ size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
-+ break;
-+ case Type::StructTyID:
-+ size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
-+ break;
-+ case Type::ArrayTyID:
-+ size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
-+ break;
-+ case Type::FunctionTyID:
-+ size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
-+ break;
-+ case Type::VectorTyID:
-+ size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
-+ break;
-+ };
-+ return size;
-+}
-+
-+size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,
-+ bool dereferencePtr) {
-+ size_t size = 0;
-+ if (!ST) {
-+ return size;
-+ }
-+ Type *curType;
-+ StructType::element_iterator eib;
-+ StructType::element_iterator eie;
-+ for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
-+ curType = *eib;
-+ size += getTypeSize(curType, dereferencePtr);
-+ }
-+ return size;
-+}
-+
-+size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,
-+ bool dereferencePtr) {
-+ return IT ? (IT->getBitWidth() >> 3) : 0;
-+}
-+
-+size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,
-+ bool dereferencePtr) {
-+ assert(0 && "Should not be able to calculate the size of an function type");
-+ return 0;
-+}
-+
-+size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,
-+ bool dereferencePtr) {
-+ return (size_t)(AT ? (getTypeSize(AT->getElementType(),
-+ dereferencePtr) * AT->getNumElements())
-+ : 0);
-+}
-+
-+size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,
-+ bool dereferencePtr) {
-+ return VT ? (VT->getBitWidth() >> 3) : 0;
-+}
-+
-+size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,
-+ bool dereferencePtr) {
-+ if (!PT) {
-+ return 0;
-+ }
-+ Type *CT = PT->getElementType();
-+ if (CT->getTypeID() == Type::StructTyID &&
-+ PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
-+ return getTypeSize(dyn_cast<StructType>(CT));
-+ } else if (dereferencePtr) {
-+ size_t size = 0;
-+ for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
-+ size += getTypeSize(PT->getContainedType(x), dereferencePtr);
-+ }
-+ return size;
-+ } else {
-+ return 4;
-+ }
-+}
-+
-+size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,
-+ bool dereferencePtr) {
-+ //assert(0 && "Should not be able to calculate the size of an opaque type");
-+ return 4;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILRegisterInfo.td llvm-r600/lib/Target/R600/AMDILRegisterInfo.td
---- llvm-3.2.src/lib/Target/R600/AMDILRegisterInfo.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILRegisterInfo.td 2013-01-25 19:43:57.450049721 +0100
-@@ -0,0 +1,107 @@
-+//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+// Declarations that describe the AMDIL register file
-+//
-+//===----------------------------------------------------------------------===//
-+
-+class AMDILReg<bits<16> num, string n> : Register<n> {
-+ field bits<16> Value;
-+ let Value = num;
-+ let Namespace = "AMDGPU";
-+}
-+
-+// We will start with 8 registers for each class before expanding to more
-+// Since the swizzle is added based on the register class, we can leave it
-+// off here and just specify different registers for different register classes
-+def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
-+def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
-+def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
-+def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
-+def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
-+def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
-+def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
-+def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
-+def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
-+def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
-+def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
-+def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
-+def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
-+def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
-+def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
-+def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
-+def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
-+def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
-+def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
-+def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
-+
-+// All registers between 1000 and 1024 are reserved and cannot be used
-+// unless commented in this section
-+// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
-+// r1020 is used to hold the frame index for local arrays
-+// r1019 is used to hold the dynamic stack allocation pointer
-+// r1018 is used as a temporary register for handwritten code
-+// r1017 is used as a temporary register for handwritten code
-+// r1016 is used as a temporary register for load/store code
-+// r1015 is used as a temporary register for data segment offset
-+// r1014 is used as a temporary register for store code
-+// r1013 is used as the section data pointer register
-+// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
-+// r1009 is used as the frame pointer register
-+// r999 is used as the mem register.
-+// r998 is used as the return address register.
-+//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
-+//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
-+//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
-+//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
-+//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
-+//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
-+def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
-+def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
-+def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
-+def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
-+def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
-+def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
-+def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
-+def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
-+def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
-+def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
-+def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
-+def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
-+def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
-+def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
-+def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
-+def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
-+def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
-+def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
-+def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
-+def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
-+def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
-+def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
-+def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
-+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
-+ let AltOrders = [(add (sequence "R%u", 1, 20))];
-+ let AltOrderSelect = [{
-+ return 1;
-+ }];
-+ }
-+def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
-+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
-+ let AltOrders = [(add (sequence "R%u", 1, 20))];
-+ let AltOrderSelect = [{
-+ return 1;
-+ }];
-+ }
-+def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
-+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
-+ let AltOrders = [(add (sequence "R%u", 1, 20))];
-+ let AltOrderSelect = [{
-+ return 1;
-+ }];
-+ }
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILSIDevice.cpp llvm-r600/lib/Target/R600/AMDILSIDevice.cpp
---- llvm-3.2.src/lib/Target/R600/AMDILSIDevice.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILSIDevice.cpp 2013-01-25 19:43:57.450049721 +0100
-@@ -0,0 +1,45 @@
-+//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//==-----------------------------------------------------------------------===//
-+#include "AMDILSIDevice.h"
-+#include "AMDILEvergreenDevice.h"
-+#include "AMDILNIDevice.h"
-+#include "AMDGPUSubtarget.h"
-+
-+using namespace llvm;
-+
-+AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST)
-+ : AMDGPUEvergreenDevice(ST) {
-+}
-+AMDGPUSIDevice::~AMDGPUSIDevice() {
-+}
-+
-+size_t
-+AMDGPUSIDevice::getMaxLDSSize() const {
-+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
-+ return MAX_LDS_SIZE_900;
-+ } else {
-+ return 0;
-+ }
-+}
-+
-+uint32_t
-+AMDGPUSIDevice::getGeneration() const {
-+ return AMDGPUDeviceInfo::HD7XXX;
-+}
-+
-+std::string
-+AMDGPUSIDevice::getDataLayout() const {
-+ return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
-+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
-+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
-+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
-+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
-+ "-n8:16:32:64");
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILSIDevice.h llvm-r600/lib/Target/R600/AMDILSIDevice.h
---- llvm-3.2.src/lib/Target/R600/AMDILSIDevice.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILSIDevice.h 2013-01-25 19:43:57.450049721 +0100
-@@ -0,0 +1,39 @@
-+//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Interface for the subtarget data classes.
-+///
-+/// This file will define the interface that each generation needs to
-+/// implement in order to correctly answer queries on the capabilities of the
-+/// specific hardware.
-+//===---------------------------------------------------------------------===//
-+#ifndef AMDILSIDEVICE_H
-+#define AMDILSIDEVICE_H
-+#include "AMDILEvergreenDevice.h"
-+
-+namespace llvm {
-+class AMDGPUSubtarget;
-+//===---------------------------------------------------------------------===//
-+// SI generation of devices and their respective sub classes
-+//===---------------------------------------------------------------------===//
-+
-+/// \brief The AMDGPUSIDevice is the base class for all Southern Island series
-+/// of cards.
-+class AMDGPUSIDevice : public AMDGPUEvergreenDevice {
-+public:
-+ AMDGPUSIDevice(AMDGPUSubtarget*);
-+ virtual ~AMDGPUSIDevice();
-+ virtual size_t getMaxLDSSize() const;
-+ virtual uint32_t getGeneration() const;
-+ virtual std::string getDataLayout() const;
-+};
-+
-+} // namespace llvm
-+#endif // AMDILSIDEVICE_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/CMakeLists.txt llvm-r600/lib/Target/R600/CMakeLists.txt
---- llvm-3.2.src/lib/Target/R600/CMakeLists.txt 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/CMakeLists.txt 2013-01-25 19:43:57.453383054 +0100
-@@ -0,0 +1,55 @@
-+set(LLVM_TARGET_DEFINITIONS AMDGPU.td)
-+
-+tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info)
-+tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
-+tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
-+tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
-+tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
-+tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic)
-+tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
-+tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
-+tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
-+add_public_tablegen_target(AMDGPUCommonTableGen)
-+
-+add_llvm_target(AMDGPUCodeGen
-+ AMDIL7XXDevice.cpp
-+ AMDILCFGStructurizer.cpp
-+ AMDILDevice.cpp
-+ AMDILDeviceInfo.cpp
-+ AMDILEvergreenDevice.cpp
-+ AMDILFrameLowering.cpp
-+ AMDILIntrinsicInfo.cpp
-+ AMDILISelDAGToDAG.cpp
-+ AMDILISelLowering.cpp
-+ AMDILNIDevice.cpp
-+ AMDILPeepholeOptimizer.cpp
-+ AMDILSIDevice.cpp
-+ AMDGPUAsmPrinter.cpp
-+ AMDGPUMCInstLower.cpp
-+ AMDGPUSubtarget.cpp
-+ AMDGPUTargetMachine.cpp
-+ AMDGPUISelLowering.cpp
-+ AMDGPUConvertToISA.cpp
-+ AMDGPUInstrInfo.cpp
-+ AMDGPURegisterInfo.cpp
-+ R600ExpandSpecialInstrs.cpp
-+ R600InstrInfo.cpp
-+ R600ISelLowering.cpp
-+ R600LowerConstCopy.cpp
-+ R600MachineFunctionInfo.cpp
-+ R600RegisterInfo.cpp
-+ SIAssignInterpRegs.cpp
-+ SIInstrInfo.cpp
-+ SIISelLowering.cpp
-+ SILowerLiteralConstants.cpp
-+ SILowerControlFlow.cpp
-+ SIMachineFunctionInfo.cpp
-+ SIRegisterInfo.cpp
-+ SIFixSGPRLiveness.cpp
-+ )
-+
-+add_dependencies(LLVMR600CodeGen intrinsics_gen)
-+
-+add_subdirectory(InstPrinter)
-+add_subdirectory(TargetInfo)
-+add_subdirectory(MCTargetDesc)
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp llvm-r600/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
---- llvm-3.2.src/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp 2013-01-25 19:43:57.456716387 +0100
-@@ -0,0 +1,156 @@
-+//===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+// \file
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUInstPrinter.h"
-+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-+#include "llvm/MC/MCInst.h"
-+
-+using namespace llvm;
-+
-+void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-+ StringRef Annot) {
-+ printInstruction(MI, OS);
-+
-+ printAnnotation(OS, Annot);
-+}
-+
-+void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-+ raw_ostream &O) {
-+
-+ const MCOperand &Op = MI->getOperand(OpNo);
-+ if (Op.isReg()) {
-+ switch (Op.getReg()) {
-+ // This is the default predicate state, so we don't need to print it.
-+ case AMDGPU::PRED_SEL_OFF: break;
-+ default: O << getRegisterName(Op.getReg()); break;
-+ }
-+ } else if (Op.isImm()) {
-+ O << Op.getImm();
-+ } else if (Op.isFPImm()) {
-+ O << Op.getFPImm();
-+ } else {
-+ assert(!"unknown operand type in printOperand");
-+ }
-+}
-+
-+void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
-+ raw_ostream &O) {
-+ printOperand(MI, OpNo, O);
-+ O << ", ";
-+ printOperand(MI, OpNo + 1, O);
-+}
-+
-+void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
-+ raw_ostream &O, StringRef Asm) {
-+ const MCOperand &Op = MI->getOperand(OpNo);
-+ assert(Op.isImm());
-+ if (Op.getImm() == 1) {
-+ O << Asm;
-+ }
-+}
-+
-+void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
-+ raw_ostream &O) {
-+ printIfSet(MI, OpNo, O, "|");
-+}
-+
-+void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
-+ raw_ostream &O) {
-+ printIfSet(MI, OpNo, O, "_SAT");
-+}
-+
-+void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
-+ raw_ostream &O) {
-+ union Literal {
-+ float f;
-+ int32_t i;
-+ } L;
-+
-+ L.i = MI->getOperand(OpNo).getImm();
-+ O << L.i << "(" << L.f << ")";
-+}
-+
-+void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
-+ raw_ostream &O) {
-+ printIfSet(MI, OpNo, O, " *");
-+}
-+
-+void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
-+ raw_ostream &O) {
-+ printIfSet(MI, OpNo, O, "-");
-+}
-+
-+void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
-+ raw_ostream &O) {
-+ switch (MI->getOperand(OpNo).getImm()) {
-+ default: break;
-+ case 1:
-+ O << " * 2.0";
-+ break;
-+ case 2:
-+ O << " * 4.0";
-+ break;
-+ case 3:
-+ O << " / 2.0";
-+ break;
-+ }
-+}
-+
-+void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
-+ raw_ostream &O) {
-+ const MCOperand &Op = MI->getOperand(OpNo);
-+ if (Op.getImm() != 0) {
-+ O << " + " << Op.getImm();
-+ }
-+}
-+
-+void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
-+ raw_ostream &O) {
-+ printIfSet(MI, OpNo, O, "ExecMask,");
-+}
-+
-+void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
-+ raw_ostream &O) {
-+ printIfSet(MI, OpNo, O, "Pred,");
-+}
-+
-+void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
-+ raw_ostream &O) {
-+ const MCOperand &Op = MI->getOperand(OpNo);
-+ if (Op.getImm() == 0) {
-+ O << " (MASKED)";
-+ }
-+}
-+
-+void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
-+ raw_ostream &O) {
-+ const char * chans = "XYZW";
-+ int sel = MI->getOperand(OpNo).getImm();
-+
-+ int chan = sel & 3;
-+ sel >>= 2;
-+
-+ if (sel >= 512) {
-+ sel -= 512;
-+ int cb = sel >> 12;
-+ sel &= 4095;
-+ O << cb << "[" << sel << "]";
-+ } else if (sel >= 448) {
-+ sel -= 448;
-+ O << sel;
-+ } else if (sel >= 0){
-+ O << sel;
-+ }
-+
-+ if (sel >= 0)
-+ O << "." << chans[chan];
-+}
-+
-+#include "AMDGPUGenAsmWriter.inc"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h llvm-r600/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
---- llvm-3.2.src/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h 2013-01-25 19:43:57.456716387 +0100
-@@ -0,0 +1,53 @@
-+//===-- AMDGPUInstPrinter.h - AMDGPU MC Inst -> ASM interface ---*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUINSTPRINTER_H
-+#define AMDGPUINSTPRINTER_H
-+
-+#include "llvm/ADT/StringRef.h"
-+#include "llvm/MC/MCInstPrinter.h"
-+#include "llvm/Support/raw_ostream.h"
-+
-+namespace llvm {
-+
-+class AMDGPUInstPrinter : public MCInstPrinter {
-+public:
-+ AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-+ const MCRegisterInfo &MRI)
-+ : MCInstPrinter(MAI, MII, MRI) {}
-+
-+ //Autogenerated by tblgen
-+ void printInstruction(const MCInst *MI, raw_ostream &O);
-+ static const char *getRegisterName(unsigned RegNo);
-+
-+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
-+
-+private:
-+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-+ void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-+ void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm);
-+ void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-+ void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-+ void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-+ void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-+ void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-+ void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-+ void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-+ void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-+ void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-+ void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-+ void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-+};
-+
-+} // End namespace llvm
-+
-+#endif // AMDGPUINSTRPRINTER_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/InstPrinter/CMakeLists.txt llvm-r600/lib/Target/R600/InstPrinter/CMakeLists.txt
---- llvm-3.2.src/lib/Target/R600/InstPrinter/CMakeLists.txt 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/InstPrinter/CMakeLists.txt 2013-01-25 19:43:57.456716387 +0100
-@@ -0,0 +1,7 @@
-+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-+
-+add_llvm_library(LLVMR600AsmPrinter
-+ AMDGPUInstPrinter.cpp
-+ )
-+
-+add_dependencies(LLVMR600AsmPrinter R600CommonTableGen)
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/InstPrinter/LLVMBuild.txt llvm-r600/lib/Target/R600/InstPrinter/LLVMBuild.txt
---- llvm-3.2.src/lib/Target/R600/InstPrinter/LLVMBuild.txt 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/InstPrinter/LLVMBuild.txt 2013-01-25 19:43:57.456716387 +0100
-@@ -0,0 +1,24 @@
-+;===- ./lib/Target/R600/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
-+;
-+; The LLVM Compiler Infrastructure
-+;
-+; This file is distributed under the University of Illinois Open Source
-+; License. See LICENSE.TXT for details.
-+;
-+;===------------------------------------------------------------------------===;
-+;
-+; This is an LLVMBuild description file for the components in this subdirectory.
-+;
-+; For more information on the LLVMBuild system, please see:
-+;
-+; http://llvm.org/docs/LLVMBuild.html
-+;
-+;===------------------------------------------------------------------------===;
-+
-+[component_0]
-+type = Library
-+name = R600AsmPrinter
-+parent = R600
-+required_libraries = MC Support
-+add_to_library_groups = R600
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/InstPrinter/Makefile llvm-r600/lib/Target/R600/InstPrinter/Makefile
---- llvm-3.2.src/lib/Target/R600/InstPrinter/Makefile 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/InstPrinter/Makefile 2013-01-25 19:43:57.456716387 +0100
-@@ -0,0 +1,15 @@
-+#===- lib/Target/R600/AsmPrinter/Makefile ------------------*- Makefile -*-===##
-+#
-+# The LLVM Compiler Infrastructure
-+#
-+# This file is distributed under the University of Illinois Open Source
-+# License. See LICENSE.TXT for details.
-+#
-+##===----------------------------------------------------------------------===##
-+LEVEL = ../../../..
-+LIBRARYNAME = LLVMR600AsmPrinter
-+
-+# Hack: we need to include 'main' x86 target directory to grab private headers
-+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-+
-+include $(LEVEL)/Makefile.common
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/LLVMBuild.txt llvm-r600/lib/Target/R600/LLVMBuild.txt
---- llvm-3.2.src/lib/Target/R600/LLVMBuild.txt 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/LLVMBuild.txt 2013-01-25 19:43:57.456716387 +0100
-@@ -0,0 +1,32 @@
-+;===- ./lib/Target/AMDIL/LLVMBuild.txt -------------------------*- Conf -*--===;
-+;
-+; The LLVM Compiler Infrastructure
-+;
-+; This file is distributed under the University of Illinois Open Source
-+; License. See LICENSE.TXT for details.
-+;
-+;===------------------------------------------------------------------------===;
-+;
-+; This is an LLVMBuild description file for the components in this subdirectory.
-+;
-+; For more information on the LLVMBuild system, please see:
-+;
-+; http://llvm.org/docs/LLVMBuild.html
-+;
-+;===------------------------------------------------------------------------===;
-+
-+[common]
-+subdirectories = InstPrinter MCTargetDesc TargetInfo
-+
-+[component_0]
-+type = TargetGroup
-+name = R600
-+parent = Target
-+has_asmprinter = 1
-+
-+[component_1]
-+type = Library
-+name = R600CodeGen
-+parent = R600
-+required_libraries = AsmPrinter CodeGen Core SelectionDAG Support Target MC R600AsmPrinter R600Desc R600Info
-+add_to_library_groups = R600
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/Makefile llvm-r600/lib/Target/R600/Makefile
---- llvm-3.2.src/lib/Target/R600/Makefile 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/Makefile 2013-01-25 19:43:57.460049721 +0100
-@@ -0,0 +1,23 @@
-+##===- lib/Target/R600/Makefile ---------------------------*- Makefile -*-===##
-+#
-+# The LLVM Compiler Infrastructure
-+#
-+# This file is distributed under the University of Illinois Open Source
-+# License. See LICENSE.TXT for details.
-+#
-+##===----------------------------------------------------------------------===##
-+
-+LEVEL = ../../..
-+LIBRARYNAME = LLVMR600CodeGen
-+TARGET = AMDGPU
-+
-+# Make sure that tblgen is run, first thing.
-+BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \
-+ AMDGPUGenDAGISel.inc AMDGPUGenSubtargetInfo.inc \
-+ AMDGPUGenMCCodeEmitter.inc AMDGPUGenCallingConv.inc \
-+ AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \
-+ AMDGPUGenAsmWriter.inc
-+
-+DIRS = InstPrinter TargetInfo MCTargetDesc
-+
-+include $(LEVEL)/Makefile.common
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp 2013-01-25 19:43:57.456716387 +0100
-@@ -0,0 +1,90 @@
-+//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//===----------------------------------------------------------------------===//
-+
-+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-+#include "llvm/ADT/StringRef.h"
-+#include "llvm/MC/MCAsmBackend.h"
-+#include "llvm/MC/MCAssembler.h"
-+#include "llvm/MC/MCObjectWriter.h"
-+#include "llvm/MC/MCValue.h"
-+#include "llvm/Support/TargetRegistry.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+class AMDGPUMCObjectWriter : public MCObjectWriter {
-+public:
-+ AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
-+ virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
-+ const MCAsmLayout &Layout) {
-+ //XXX: Implement if necessary.
-+ }
-+ virtual void RecordRelocation(const MCAssembler &Asm,
-+ const MCAsmLayout &Layout,
-+ const MCFragment *Fragment,
-+ const MCFixup &Fixup,
-+ MCValue Target, uint64_t &FixedValue) {
-+ assert(!"Not implemented");
-+ }
-+
-+ virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
-+
-+};
-+
-+class AMDGPUAsmBackend : public MCAsmBackend {
-+public:
-+ AMDGPUAsmBackend(const Target &T)
-+ : MCAsmBackend() {}
-+
-+ virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
-+ virtual unsigned getNumFixupKinds() const { return 0; };
-+ virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
-+ uint64_t Value) const;
-+ virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
-+ const MCInstFragment *DF,
-+ const MCAsmLayout &Layout) const {
-+ return false;
-+ }
-+ virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
-+ assert(!"Not implemented");
-+ }
-+ virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; }
-+ virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
-+ return true;
-+ }
-+};
-+
-+} //End anonymous namespace
-+
-+void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
-+ const MCAsmLayout &Layout) {
-+ for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
-+ Asm.writeSectionData(I, Layout);
-+ }
-+}
-+
-+MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
-+ StringRef CPU) {
-+ return new AMDGPUAsmBackend(T);
-+}
-+
-+AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
-+ raw_ostream &OS) const {
-+ return new AMDGPUMCObjectWriter(OS);
-+}
-+
-+void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
-+ unsigned DataSize, uint64_t Value) const {
-+
-+ uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
-+ assert(Fixup.getKind() == FK_PCRel_4);
-+ *Dst = (Value - 4) / 4;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp 2013-01-25 19:43:57.456716387 +0100
-@@ -0,0 +1,85 @@
-+//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUMCAsmInfo.h"
-+
-+using namespace llvm;
-+AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() {
-+ HasSingleParameterDotFile = false;
-+ WeakDefDirective = 0;
-+ //===------------------------------------------------------------------===//
-+ HasSubsectionsViaSymbols = true;
-+ HasMachoZeroFillDirective = false;
-+ HasMachoTBSSDirective = false;
-+ HasStaticCtorDtorReferenceInStaticMode = false;
-+ LinkerRequiresNonEmptyDwarfLines = true;
-+ MaxInstLength = 16;
-+ PCSymbol = "$";
-+ SeparatorString = "\n";
-+ CommentColumn = 40;
-+ CommentString = ";";
-+ LabelSuffix = ":";
-+ GlobalPrefix = "@";
-+ PrivateGlobalPrefix = ";.";
-+ LinkerPrivateGlobalPrefix = "!";
-+ InlineAsmStart = ";#ASMSTART";
-+ InlineAsmEnd = ";#ASMEND";
-+ AssemblerDialect = 0;
-+ AllowQuotesInName = false;
-+ AllowNameToStartWithDigit = false;
-+ AllowPeriodsInName = false;
-+
-+ //===--- Data Emission Directives -------------------------------------===//
-+ ZeroDirective = ".zero";
-+ AsciiDirective = ".ascii\t";
-+ AscizDirective = ".asciz\t";
-+ Data8bitsDirective = ".byte\t";
-+ Data16bitsDirective = ".short\t";
-+ Data32bitsDirective = ".long\t";
-+ Data64bitsDirective = ".quad\t";
-+ GPRel32Directive = 0;
-+ SunStyleELFSectionSwitchSyntax = true;
-+ UsesELFSectionDirectiveForBSS = true;
-+ HasMicrosoftFastStdCallMangling = false;
-+
-+ //===--- Alignment Information ----------------------------------------===//
-+ AlignDirective = ".align\t";
-+ AlignmentIsInBytes = true;
-+ TextAlignFillValue = 0;
-+
-+ //===--- Global Variable Emission Directives --------------------------===//
-+ GlobalDirective = ".global";
-+ ExternDirective = ".extern";
-+ HasSetDirective = false;
-+ HasAggressiveSymbolFolding = true;
-+ COMMDirectiveAlignmentIsInBytes = false;
-+ HasDotTypeDotSizeDirective = false;
-+ HasNoDeadStrip = true;
-+ HasSymbolResolver = false;
-+ WeakRefDirective = ".weakref\t";
-+ LinkOnceDirective = 0;
-+ //===--- Dwarf Emission Directives -----------------------------------===//
-+ HasLEB128 = true;
-+ SupportsDebugInformation = true;
-+ ExceptionsType = ExceptionHandling::None;
-+ DwarfUsesInlineInfoSection = false;
-+ DwarfSectionOffsetDirective = ".offset";
-+
-+}
-+
-+const char*
-+AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const {
-+ return 0;
-+}
-+
-+const MCSection*
-+AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const {
-+ return 0;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h 2013-01-25 19:43:57.456716387 +0100
-@@ -0,0 +1,30 @@
-+//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface ----------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUMCASMINFO_H
-+#define AMDGPUMCASMINFO_H
-+
-+#include "llvm/MC/MCAsmInfo.h"
-+namespace llvm {
-+
-+class Target;
-+class StringRef;
-+
-+class AMDGPUMCAsmInfo : public MCAsmInfo {
-+public:
-+ explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT);
-+ const char* getDataASDirective(unsigned int Size, unsigned int AS) const;
-+ const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
-+};
-+} // namespace llvm
-+#endif // AMDGPUMCASMINFO_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h 2013-01-25 19:43:57.456716387 +0100
-@@ -0,0 +1,60 @@
-+//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief CodeEmitter interface for R600 and SI codegen.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUCODEEMITTER_H
-+#define AMDGPUCODEEMITTER_H
-+
-+#include "llvm/MC/MCCodeEmitter.h"
-+#include "llvm/Support/raw_ostream.h"
-+
-+namespace llvm {
-+
-+class MCInst;
-+class MCOperand;
-+
-+class AMDGPUMCCodeEmitter : public MCCodeEmitter {
-+public:
-+
-+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
-+ SmallVectorImpl<MCFixup> &Fixups) const;
-+
-+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
-+ SmallVectorImpl<MCFixup> &Fixups) const {
-+ return 0;
-+ }
-+
-+ virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
-+ SmallVectorImpl<MCFixup> &Fixups) const {
-+ return 0;
-+ }
-+ virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
-+ SmallVectorImpl<MCFixup> &Fixups) const {
-+ return 0;
-+ }
-+ virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const {
-+ return Value;
-+ }
-+ virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo,
-+ SmallVectorImpl<MCFixup> &Fixups) const {
-+ return 0;
-+ }
-+ virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
-+ SmallVectorImpl<MCFixup> &Fixups) const {
-+ return 0;
-+ }
-+};
-+
-+} // End namespace llvm
-+
-+#endif // AMDGPUCODEEMITTER_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp 2013-01-25 19:43:57.460049721 +0100
-@@ -0,0 +1,113 @@
-+//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief This file provides AMDGPU specific target descriptions.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUMCTargetDesc.h"
-+#include "AMDGPUMCAsmInfo.h"
-+#include "InstPrinter/AMDGPUInstPrinter.h"
-+#include "llvm/MC/MachineLocation.h"
-+#include "llvm/MC/MCCodeGenInfo.h"
-+#include "llvm/MC/MCInstrInfo.h"
-+#include "llvm/MC/MCRegisterInfo.h"
-+#include "llvm/MC/MCStreamer.h"
-+#include "llvm/MC/MCSubtargetInfo.h"
-+#include "llvm/Support/ErrorHandling.h"
-+#include "llvm/Support/TargetRegistry.h"
-+
-+#define GET_INSTRINFO_MC_DESC
-+#include "AMDGPUGenInstrInfo.inc"
-+
-+#define GET_SUBTARGETINFO_MC_DESC
-+#include "AMDGPUGenSubtargetInfo.inc"
-+
-+#define GET_REGINFO_MC_DESC
-+#include "AMDGPUGenRegisterInfo.inc"
-+
-+using namespace llvm;
-+
-+static MCInstrInfo *createAMDGPUMCInstrInfo() {
-+ MCInstrInfo *X = new MCInstrInfo();
-+ InitAMDGPUMCInstrInfo(X);
-+ return X;
-+}
-+
-+static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
-+ MCRegisterInfo *X = new MCRegisterInfo();
-+ InitAMDGPUMCRegisterInfo(X, 0);
-+ return X;
-+}
-+
-+static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
-+ StringRef FS) {
-+ MCSubtargetInfo * X = new MCSubtargetInfo();
-+ InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
-+ return X;
-+}
-+
-+static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-+ CodeModel::Model CM,
-+ CodeGenOpt::Level OL) {
-+ MCCodeGenInfo *X = new MCCodeGenInfo();
-+ X->InitMCCodeGenInfo(RM, CM, OL);
-+ return X;
-+}
-+
-+static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
-+ unsigned SyntaxVariant,
-+ const MCAsmInfo &MAI,
-+ const MCInstrInfo &MII,
-+ const MCRegisterInfo &MRI,
-+ const MCSubtargetInfo &STI) {
-+ return new AMDGPUInstPrinter(MAI, MII, MRI);
-+}
-+
-+static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
-+ const MCRegisterInfo &MRI,
-+ const MCSubtargetInfo &STI,
-+ MCContext &Ctx) {
-+ if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
-+ return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
-+ } else {
-+ return createR600MCCodeEmitter(MCII, MRI, STI, Ctx);
-+ }
-+}
-+
-+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
-+ MCContext &Ctx, MCAsmBackend &MAB,
-+ raw_ostream &_OS,
-+ MCCodeEmitter *_Emitter,
-+ bool RelaxAll,
-+ bool NoExecStack) {
-+ return createPureStreamer(Ctx, MAB, _OS, _Emitter);
-+}
-+
-+extern "C" void LLVMInitializeR600TargetMC() {
-+
-+ RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
-+
-+ TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
-+
-+ TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
-+
-+ TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
-+
-+ TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
-+
-+ TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
-+
-+ TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
-+
-+ TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
-+
-+ TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h 2013-01-25 19:43:57.460049721 +0100
-@@ -0,0 +1,55 @@
-+//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Provides AMDGPU specific target descriptions.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+
-+#ifndef AMDGPUMCTARGETDESC_H
-+#define AMDGPUMCTARGETDESC_H
-+
-+#include "llvm/ADT/StringRef.h"
-+
-+namespace llvm {
-+class MCAsmBackend;
-+class MCCodeEmitter;
-+class MCContext;
-+class MCInstrInfo;
-+class MCRegisterInfo;
-+class MCSubtargetInfo;
-+class Target;
-+
-+extern Target TheAMDGPUTarget;
-+
-+MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
-+ const MCRegisterInfo &MRI,
-+ const MCSubtargetInfo &STI,
-+ MCContext &Ctx);
-+
-+MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
-+ const MCRegisterInfo &MRI,
-+ const MCSubtargetInfo &STI,
-+ MCContext &Ctx);
-+
-+MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT,
-+ StringRef CPU);
-+} // End llvm namespace
-+
-+#define GET_REGINFO_ENUM
-+#include "AMDGPUGenRegisterInfo.inc"
-+
-+#define GET_INSTRINFO_ENUM
-+#include "AMDGPUGenInstrInfo.inc"
-+
-+#define GET_SUBTARGETINFO_ENUM
-+#include "AMDGPUGenSubtargetInfo.inc"
-+
-+#endif // AMDGPUMCTARGETDESC_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/CMakeLists.txt llvm-r600/lib/Target/R600/MCTargetDesc/CMakeLists.txt
---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/CMakeLists.txt 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/MCTargetDesc/CMakeLists.txt 2013-01-25 19:43:57.460049721 +0100
-@@ -0,0 +1,10 @@
-+
-+add_llvm_library(LLVMR600Desc
-+ AMDGPUAsmBackend.cpp
-+ AMDGPUMCTargetDesc.cpp
-+ AMDGPUMCAsmInfo.cpp
-+ R600MCCodeEmitter.cpp
-+ SIMCCodeEmitter.cpp
-+ )
-+
-+add_dependencies(LLVMR600Desc AMDGPUCommonTableGen)
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/LLVMBuild.txt llvm-r600/lib/Target/R600/MCTargetDesc/LLVMBuild.txt
---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/LLVMBuild.txt 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/MCTargetDesc/LLVMBuild.txt 2013-01-25 19:43:57.460049721 +0100
-@@ -0,0 +1,23 @@
-+;===- ./lib/Target/R600/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
-+;
-+; The LLVM Compiler Infrastructure
-+;
-+; This file is distributed under the University of Illinois Open Source
-+; License. See LICENSE.TXT for details.
-+;
-+;===------------------------------------------------------------------------===;
-+;
-+; This is an LLVMBuild description file for the components in this subdirectory.
-+;
-+; For more information on the LLVMBuild system, please see:
-+;
-+; http://llvm.org/docs/LLVMBuild.html
-+;
-+;===------------------------------------------------------------------------===;
-+
-+[component_0]
-+type = Library
-+name = R600Desc
-+parent = R600
-+required_libraries = R600AsmPrinter R600Info MC
-+add_to_library_groups = R600
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/Makefile llvm-r600/lib/Target/R600/MCTargetDesc/Makefile
---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/Makefile 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/MCTargetDesc/Makefile 2013-01-25 19:43:57.460049721 +0100
-@@ -0,0 +1,16 @@
-+##===- lib/Target/AMDGPU/TargetDesc/Makefile ----------------*- Makefile -*-===##
-+#
-+# The LLVM Compiler Infrastructure
-+#
-+# This file is distributed under the University of Illinois Open Source
-+# License. See LICENSE.TXT for details.
-+#
-+##===----------------------------------------------------------------------===##
-+
-+LEVEL = ../../../..
-+LIBRARYNAME = LLVMR600Desc
-+
-+# Hack: we need to include 'main' target directory to grab private headers
-+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-+
-+include $(LEVEL)/Makefile.common
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp llvm-r600/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp 2013-01-25 19:43:57.460049721 +0100
-@@ -0,0 +1,580 @@
-+//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+///
-+/// This code emitter outputs bytecode that is understood by the r600g driver
-+/// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
-+/// but it still needs to be run through a finalizer in order to be executed
-+/// by the GPU.
-+///
-+/// [1] http://www.mesa3d.org/
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "R600Defines.h"
-+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
-+#include "llvm/MC/MCCodeEmitter.h"
-+#include "llvm/MC/MCContext.h"
-+#include "llvm/MC/MCInst.h"
-+#include "llvm/MC/MCInstrInfo.h"
-+#include "llvm/MC/MCRegisterInfo.h"
-+#include "llvm/MC/MCSubtargetInfo.h"
-+#include "llvm/Support/raw_ostream.h"
-+
-+#include <stdio.h>
-+
-+#define SRC_BYTE_COUNT 11
-+#define DST_BYTE_COUNT 5
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
-+ R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
-+ void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
-+ const MCInstrInfo &MCII;
-+ const MCRegisterInfo &MRI;
-+ const MCSubtargetInfo &STI;
-+ MCContext &Ctx;
-+
-+public:
-+
-+ R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
-+ const MCSubtargetInfo &sti, MCContext &ctx)
-+ : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
-+
-+ /// \brief Encode the instruction and write it to the OS.
-+ virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
-+ SmallVectorImpl<MCFixup> &Fixups) const;
-+
-+ /// \returns the encoding for an MCOperand.
-+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
-+ SmallVectorImpl<MCFixup> &Fixups) const;
-+private:
-+
-+ void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
-+ raw_ostream &OS) const;
-+ void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
-+ void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
-+ raw_ostream &OS) const;
-+ void EmitDst(const MCInst &MI, raw_ostream &OS) const;
-+ void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
-+ raw_ostream &OS) const;
-+ void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
-+
-+ void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
-+
-+ void EmitByte(unsigned int byte, raw_ostream &OS) const;
-+
-+ void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
-+
-+ void Emit(uint32_t value, raw_ostream &OS) const;
-+ void Emit(uint64_t value, raw_ostream &OS) const;
-+
-+ unsigned getHWRegChan(unsigned reg) const;
-+ unsigned getHWReg(unsigned regNo) const;
-+
-+ bool isFCOp(unsigned opcode) const;
-+ bool isTexOp(unsigned opcode) const;
-+ bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
-+
-+};
-+
-+} // End anonymous namespace
-+
-+enum RegElement {
-+ ELEMENT_X = 0,
-+ ELEMENT_Y,
-+ ELEMENT_Z,
-+ ELEMENT_W
-+};
-+
-+enum InstrTypes {
-+ INSTR_ALU = 0,
-+ INSTR_TEX,
-+ INSTR_FC,
-+ INSTR_NATIVE,
-+ INSTR_VTX,
-+ INSTR_EXPORT
-+};
-+
-+enum FCInstr {
-+ FC_IF_PREDICATE = 0,
-+ FC_ELSE,
-+ FC_ENDIF,
-+ FC_BGNLOOP,
-+ FC_ENDLOOP,
-+ FC_BREAK_PREDICATE,
-+ FC_CONTINUE
-+};
-+
-+enum TextureTypes {
-+ TEXTURE_1D = 1,
-+ TEXTURE_2D,
-+ TEXTURE_3D,
-+ TEXTURE_CUBE,
-+ TEXTURE_RECT,
-+ TEXTURE_SHADOW1D,
-+ TEXTURE_SHADOW2D,
-+ TEXTURE_SHADOWRECT,
-+ TEXTURE_1D_ARRAY,
-+ TEXTURE_2D_ARRAY,
-+ TEXTURE_SHADOW1D_ARRAY,
-+ TEXTURE_SHADOW2D_ARRAY
-+};
-+
-+MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
-+ const MCRegisterInfo &MRI,
-+ const MCSubtargetInfo &STI,
-+ MCContext &Ctx) {
-+ return new R600MCCodeEmitter(MCII, MRI, STI, Ctx);
-+}
-+
-+void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
-+ SmallVectorImpl<MCFixup> &Fixups) const {
-+ if (isTexOp(MI.getOpcode())) {
-+ EmitTexInstr(MI, Fixups, OS);
-+ } else if (isFCOp(MI.getOpcode())){
-+ EmitFCInstr(MI, OS);
-+ } else if (MI.getOpcode() == AMDGPU::RETURN ||
-+ MI.getOpcode() == AMDGPU::BUNDLE ||
-+ MI.getOpcode() == AMDGPU::KILL) {
-+ return;
-+ } else {
-+ switch(MI.getOpcode()) {
-+ case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
-+ case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
-+ uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
-+ EmitByte(INSTR_NATIVE, OS);
-+ Emit(inst, OS);
-+ break;
-+ }
-+ case AMDGPU::CONSTANT_LOAD_eg:
-+ case AMDGPU::VTX_READ_PARAM_8_eg:
-+ case AMDGPU::VTX_READ_PARAM_16_eg:
-+ case AMDGPU::VTX_READ_PARAM_32_eg:
-+ case AMDGPU::VTX_READ_GLOBAL_8_eg:
-+ case AMDGPU::VTX_READ_GLOBAL_32_eg:
-+ case AMDGPU::VTX_READ_GLOBAL_128_eg:
-+ case AMDGPU::TEX_VTX_CONSTBUF: {
-+ uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
-+ uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
-+
-+ EmitByte(INSTR_VTX, OS);
-+ Emit(InstWord01, OS);
-+ Emit(InstWord2, OS);
-+ break;
-+ }
-+ case AMDGPU::EG_ExportSwz:
-+ case AMDGPU::R600_ExportSwz:
-+ case AMDGPU::EG_ExportBuf:
-+ case AMDGPU::R600_ExportBuf: {
-+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
-+ EmitByte(INSTR_EXPORT, OS);
-+ Emit(Inst, OS);
-+ break;
-+ }
-+
-+ default:
-+ EmitALUInstr(MI, Fixups, OS);
-+ break;
-+ }
-+ }
-+}
-+
-+void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
-+ SmallVectorImpl<MCFixup> &Fixups,
-+ raw_ostream &OS) const {
-+ const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
-+
-+ // Emit instruction type
-+ EmitByte(INSTR_ALU, OS);
-+
-+ uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
-+
-+ //older alu have different encoding for instructions with one or two src
-+ //parameters.
-+ if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
-+ !(MCDesc.TSFlags & R600_InstFlag::OP3)) {
-+ uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
-+ InstWord01 &= ~(0x3FFULL << 39);
-+ InstWord01 |= ISAOpCode << 1;
-+ }
-+
-+ unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
-+ MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
-+
-+ EmitByte(SrcNum, OS);
-+
-+ const unsigned SrcOps[3][2] = {
-+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
-+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
-+ {R600Operands::SRC2, R600Operands::SRC2_SEL}
-+ };
-+
-+ for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
-+ unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
-+ unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
-+ EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
-+ }
-+
-+ Emit(InstWord01, OS);
-+ return;
-+}
-+
-+void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
-+ raw_ostream &OS) const {
-+ const MCOperand &MO = MI.getOperand(OpIdx);
-+ union {
-+ float f;
-+ uint32_t i;
-+ } Value;
-+ Value.i = 0;
-+ // Emit the source select (2 bytes). For GPRs, this is the register index.
-+ // For other potential instruction operands, (e.g. constant registers) the
-+ // value of the source select is defined in the r600isa docs.
-+ if (MO.isReg()) {
-+ unsigned reg = MO.getReg();
-+ EmitTwoBytes(getHWReg(reg), OS);
-+ if (reg == AMDGPU::ALU_LITERAL_X) {
-+ unsigned ImmOpIndex = MI.getNumOperands() - 1;
-+ MCOperand ImmOp = MI.getOperand(ImmOpIndex);
-+ if (ImmOp.isFPImm()) {
-+ Value.f = ImmOp.getFPImm();
-+ } else {
-+ assert(ImmOp.isImm());
-+ Value.i = ImmOp.getImm();
-+ }
-+ }
-+ } else {
-+ // XXX: Handle other operand types.
-+ EmitTwoBytes(0, OS);
-+ }
-+
-+ // Emit the source channel (1 byte)
-+ if (MO.isReg()) {
-+ EmitByte(getHWRegChan(MO.getReg()), OS);
-+ } else {
-+ EmitByte(0, OS);
-+ }
-+
-+ // XXX: Emit isNegated (1 byte)
-+ if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
-+ && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
-+ (MO.isReg() &&
-+ (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
-+ EmitByte(1, OS);
-+ } else {
-+ EmitByte(0, OS);
-+ }
-+
-+ // Emit isAbsolute (1 byte)
-+ if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
-+ EmitByte(1, OS);
-+ } else {
-+ EmitByte(0, OS);
-+ }
-+
-+ // XXX: Emit relative addressing mode (1 byte)
-+ EmitByte(0, OS);
-+
-+ // Emit kc_bank, This will be adjusted later by r600_asm
-+ EmitByte(0, OS);
-+
-+ // Emit the literal value, if applicable (4 bytes).
-+ Emit(Value.i, OS);
-+
-+}
-+
-+void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
-+ unsigned SelOpIdx, raw_ostream &OS) const {
-+ const MCOperand &RegMO = MI.getOperand(RegOpIdx);
-+ const MCOperand &SelMO = MI.getOperand(SelOpIdx);
-+
-+ union {
-+ float f;
-+ uint32_t i;
-+ } InlineConstant;
-+ InlineConstant.i = 0;
-+ // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
-+ // and select is 0 (GPR index is encoded in the instr encoding. For constants
-+ // type is 1 and select is the original const select passed from the driver.
-+ unsigned Reg = RegMO.getReg();
-+ if (Reg == AMDGPU::ALU_CONST) {
-+ EmitByte(1, OS);
-+ uint32_t Sel = SelMO.getImm();
-+ Emit(Sel, OS);
-+ } else {
-+ EmitByte(0, OS);
-+ Emit((uint32_t)0, OS);
-+ }
-+
-+ if (Reg == AMDGPU::ALU_LITERAL_X) {
-+ unsigned ImmOpIndex = MI.getNumOperands() - 1;
-+ MCOperand ImmOp = MI.getOperand(ImmOpIndex);
-+ if (ImmOp.isFPImm()) {
-+ InlineConstant.f = ImmOp.getFPImm();
-+ } else {
-+ assert(ImmOp.isImm());
-+ InlineConstant.i = ImmOp.getImm();
-+ }
-+ }
-+
-+ // Emit the literal value, if applicable (4 bytes).
-+ Emit(InlineConstant.i, OS);
-+}
-+
-+void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI,
-+ SmallVectorImpl<MCFixup> &Fixups,
-+ raw_ostream &OS) const {
-+
-+ unsigned Opcode = MI.getOpcode();
-+ bool hasOffsets = (Opcode == AMDGPU::TEX_LD);
-+ unsigned OpOffset = hasOffsets ? 3 : 0;
-+ int64_t Resource = MI.getOperand(OpOffset + 2).getImm();
-+ int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
-+ int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
-+ unsigned srcSelect[4] = {0, 1, 2, 3};
-+
-+ // Emit instruction type
-+ EmitByte(1, OS);
-+
-+ // Emit instruction
-+ EmitByte(getBinaryCodeForInstr(MI, Fixups), OS);
-+
-+ // Emit resource id
-+ EmitByte(Resource, OS);
-+
-+ // Emit source register
-+ EmitByte(getHWReg(MI.getOperand(1).getReg()), OS);
-+
-+ // XXX: Emit src isRelativeAddress
-+ EmitByte(0, OS);
-+
-+ // Emit destination register
-+ EmitByte(getHWReg(MI.getOperand(0).getReg()), OS);
-+
-+ // XXX: Emit dst isRealtiveAddress
-+ EmitByte(0, OS);
-+
-+ // XXX: Emit dst select
-+ EmitByte(0, OS); // X
-+ EmitByte(1, OS); // Y
-+ EmitByte(2, OS); // Z
-+ EmitByte(3, OS); // W
-+
-+ // XXX: Emit lod bias
-+ EmitByte(0, OS);
-+
-+ // XXX: Emit coord types
-+ unsigned coordType[4] = {1, 1, 1, 1};
-+
-+ if (TextureType == TEXTURE_RECT
-+ || TextureType == TEXTURE_SHADOWRECT) {
-+ coordType[ELEMENT_X] = 0;
-+ coordType[ELEMENT_Y] = 0;
-+ }
-+
-+ if (TextureType == TEXTURE_1D_ARRAY
-+ || TextureType == TEXTURE_SHADOW1D_ARRAY) {
-+ if (Opcode == AMDGPU::TEX_SAMPLE_C_L || Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
-+ coordType[ELEMENT_Y] = 0;
-+ } else {
-+ coordType[ELEMENT_Z] = 0;
-+ srcSelect[ELEMENT_Z] = ELEMENT_Y;
-+ }
-+ } else if (TextureType == TEXTURE_2D_ARRAY
-+ || TextureType == TEXTURE_SHADOW2D_ARRAY) {
-+ coordType[ELEMENT_Z] = 0;
-+ }
-+
-+ for (unsigned i = 0; i < 4; i++) {
-+ EmitByte(coordType[i], OS);
-+ }
-+
-+ // XXX: Emit offsets
-+ if (hasOffsets)
-+ for (unsigned i = 2; i < 5; i++)
-+ EmitByte(MI.getOperand(i).getImm()<<1, OS);
-+ else
-+ EmitNullBytes(3, OS);
-+
-+ // Emit sampler id
-+ EmitByte(Sampler, OS);
-+
-+ // XXX:Emit source select
-+ if ((TextureType == TEXTURE_SHADOW1D
-+ || TextureType == TEXTURE_SHADOW2D
-+ || TextureType == TEXTURE_SHADOWRECT
-+ || TextureType == TEXTURE_SHADOW1D_ARRAY)
-+ && Opcode != AMDGPU::TEX_SAMPLE_C_L
-+ && Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
-+ srcSelect[ELEMENT_W] = ELEMENT_Z;
-+ }
-+
-+ for (unsigned i = 0; i < 4; i++) {
-+ EmitByte(srcSelect[i], OS);
-+ }
-+}
-+
-+void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
-+
-+ // Emit instruction type
-+ EmitByte(INSTR_FC, OS);
-+
-+ // Emit SRC
-+ unsigned NumOperands = MI.getNumOperands();
-+ if (NumOperands > 0) {
-+ assert(NumOperands == 1);
-+ EmitSrc(MI, 0, OS);
-+ } else {
-+ EmitNullBytes(SRC_BYTE_COUNT, OS);
-+ }
-+
-+ // Emit FC Instruction
-+ enum FCInstr instr;
-+ switch (MI.getOpcode()) {
-+ case AMDGPU::PREDICATED_BREAK:
-+ instr = FC_BREAK_PREDICATE;
-+ break;
-+ case AMDGPU::CONTINUE:
-+ instr = FC_CONTINUE;
-+ break;
-+ case AMDGPU::IF_PREDICATE_SET:
-+ instr = FC_IF_PREDICATE;
-+ break;
-+ case AMDGPU::ELSE:
-+ instr = FC_ELSE;
-+ break;
-+ case AMDGPU::ENDIF:
-+ instr = FC_ENDIF;
-+ break;
-+ case AMDGPU::ENDLOOP:
-+ instr = FC_ENDLOOP;
-+ break;
-+ case AMDGPU::WHILELOOP:
-+ instr = FC_BGNLOOP;
-+ break;
-+ default:
-+ abort();
-+ break;
-+ }
-+ EmitByte(instr, OS);
-+}
-+
-+void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
-+ raw_ostream &OS) const {
-+
-+ for (unsigned int i = 0; i < ByteCount; i++) {
-+ EmitByte(0, OS);
-+ }
-+}
-+
-+void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
-+ OS.write((uint8_t) Byte & 0xff);
-+}
-+
-+void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
-+ raw_ostream &OS) const {
-+ OS.write((uint8_t) (Bytes & 0xff));
-+ OS.write((uint8_t) ((Bytes >> 8) & 0xff));
-+}
-+
-+void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
-+ for (unsigned i = 0; i < 4; i++) {
-+ OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
-+ }
-+}
-+
-+void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
-+ for (unsigned i = 0; i < 8; i++) {
-+ EmitByte((Value >> (8 * i)) & 0xff, OS);
-+ }
-+}
-+
-+unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
-+ return MRI.getEncodingValue(reg) >> HW_CHAN_SHIFT;
-+}
-+
-+unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
-+ return MRI.getEncodingValue(RegNo) & HW_REG_MASK;
-+}
-+
-+uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
-+ const MCOperand &MO,
-+ SmallVectorImpl<MCFixup> &Fixup) const {
-+ if (MO.isReg()) {
-+ if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) {
-+ return MRI.getEncodingValue(MO.getReg());
-+ } else {
-+ return getHWReg(MO.getReg());
-+ }
-+ } else if (MO.isImm()) {
-+ return MO.getImm();
-+ } else {
-+ assert(0);
-+ return 0;
-+ }
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Encoding helper functions
-+//===----------------------------------------------------------------------===//
-+
-+bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
-+ switch(opcode) {
-+ default: return false;
-+ case AMDGPU::PREDICATED_BREAK:
-+ case AMDGPU::CONTINUE:
-+ case AMDGPU::IF_PREDICATE_SET:
-+ case AMDGPU::ELSE:
-+ case AMDGPU::ENDIF:
-+ case AMDGPU::ENDLOOP:
-+ case AMDGPU::WHILELOOP:
-+ return true;
-+ }
-+}
-+
-+bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
-+ switch(opcode) {
-+ default: return false;
-+ case AMDGPU::TEX_LD:
-+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
-+ case AMDGPU::TEX_SAMPLE:
-+ case AMDGPU::TEX_SAMPLE_C:
-+ case AMDGPU::TEX_SAMPLE_L:
-+ case AMDGPU::TEX_SAMPLE_C_L:
-+ case AMDGPU::TEX_SAMPLE_LB:
-+ case AMDGPU::TEX_SAMPLE_C_LB:
-+ case AMDGPU::TEX_SAMPLE_G:
-+ case AMDGPU::TEX_SAMPLE_C_G:
-+ case AMDGPU::TEX_GET_GRADIENTS_H:
-+ case AMDGPU::TEX_GET_GRADIENTS_V:
-+ case AMDGPU::TEX_SET_GRADIENTS_H:
-+ case AMDGPU::TEX_SET_GRADIENTS_V:
-+ return true;
-+ }
-+}
-+
-+bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
-+ unsigned Flag) const {
-+ const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
-+ unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
-+ if (FlagIndex == 0) {
-+ return false;
-+ }
-+ assert(MI.getOperand(FlagIndex).isImm());
-+ return !!((MI.getOperand(FlagIndex).getImm() >>
-+ (NUM_MO_FLAGS * Operand)) & Flag);
-+}
-+
-+#include "AMDGPUGenMCCodeEmitter.inc"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp llvm-r600/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp 2013-01-25 19:43:57.460049721 +0100
-@@ -0,0 +1,298 @@
-+//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief The SI code emitter produces machine code that can be executed
-+/// directly on the GPU device.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
-+#include "llvm/MC/MCCodeEmitter.h"
-+#include "llvm/MC/MCContext.h"
-+#include "llvm/MC/MCInst.h"
-+#include "llvm/MC/MCInstrInfo.h"
-+#include "llvm/MC/MCRegisterInfo.h"
-+#include "llvm/MC/MCSubtargetInfo.h"
-+#include "llvm/MC/MCFixup.h"
-+#include "llvm/Support/raw_ostream.h"
-+
-+#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
-+#define SI_INSTR_FLAGS_ENCODING_MASK 0xf
-+
-+// These must be kept in sync with SIInstructions.td and also the
-+// InstrEncodingInfo array in SIInstrInfo.cpp.
-+//
-+// NOTE: This enum is only used to identify the encoding type within LLVM,
-+// the actual encoding type that is part of the instruction format is different
-+namespace SIInstrEncodingType {
-+ enum Encoding {
-+ EXP = 0,
-+ LDS = 1,
-+ MIMG = 2,
-+ MTBUF = 3,
-+ MUBUF = 4,
-+ SMRD = 5,
-+ SOP1 = 6,
-+ SOP2 = 7,
-+ SOPC = 8,
-+ SOPK = 9,
-+ SOPP = 10,
-+ VINTRP = 11,
-+ VOP1 = 12,
-+ VOP2 = 13,
-+ VOP3 = 14,
-+ VOPC = 15
-+ };
-+}
-+
-+using namespace llvm;
-+
-+namespace {
-+class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
-+ SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
-+ void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
-+ const MCInstrInfo &MCII;
-+ const MCRegisterInfo &MRI;
-+ const MCSubtargetInfo &STI;
-+ MCContext &Ctx;
-+
-+public:
-+ SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
-+ const MCSubtargetInfo &sti, MCContext &ctx)
-+ : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
-+
-+ ~SIMCCodeEmitter() { }
-+
-+ /// \breif Encode the instruction and write it to the OS.
-+ virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
-+ SmallVectorImpl<MCFixup> &Fixups) const;
-+
-+ /// \returns the encoding for an MCOperand.
-+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
-+ SmallVectorImpl<MCFixup> &Fixups) const;
-+
-+public:
-+
-+ /// \brief Encode a sequence of registers with the correct alignment.
-+ unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
-+
-+ /// \brief Encoding for when 2 consecutive registers are used
-+ virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
-+ SmallVectorImpl<MCFixup> &Fixup) const;
-+
-+ /// \brief Encoding for when 4 consectuive registers are used
-+ virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
-+ SmallVectorImpl<MCFixup> &Fixup) const;
-+
-+ /// \brief Encoding for SMRD indexed loads
-+ virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
-+ SmallVectorImpl<MCFixup> &Fixup) const;
-+
-+ /// \brief Post-Encoder method for VOP instructions
-+ virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const;
-+
-+private:
-+
-+ /// \returns this SIInstrEncodingType for this instruction.
-+ unsigned getEncodingType(const MCInst &MI) const;
-+
-+ /// \brief Get then size in bytes of this instructions encoding.
-+ unsigned getEncodingBytes(const MCInst &MI) const;
-+
-+ /// \returns the hardware encoding for a register
-+ unsigned getRegBinaryCode(unsigned reg) const;
-+
-+ /// \brief Generated function that returns the hardware encoding for
-+ /// a register
-+ unsigned getHWRegNum(unsigned reg) const;
-+
-+};
-+
-+} // End anonymous namespace
-+
-+MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
-+ const MCRegisterInfo &MRI,
-+ const MCSubtargetInfo &STI,
-+ MCContext &Ctx) {
-+ return new SIMCCodeEmitter(MCII, MRI, STI, Ctx);
-+}
-+
-+void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
-+ SmallVectorImpl<MCFixup> &Fixups) const {
-+ uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
-+ unsigned bytes = getEncodingBytes(MI);
-+ for (unsigned i = 0; i < bytes; i++) {
-+ OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
-+ }
-+}
-+
-+uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
-+ const MCOperand &MO,
-+ SmallVectorImpl<MCFixup> &Fixups) const {
-+ if (MO.isReg()) {
-+ return getRegBinaryCode(MO.getReg());
-+ } else if (MO.isImm()) {
-+ return MO.getImm();
-+ } else if (MO.isFPImm()) {
-+ // XXX: Not all instructions can use inline literals
-+ // XXX: We should make sure this is a 32-bit constant
-+ union {
-+ float F;
-+ uint32_t I;
-+ } Imm;
-+ Imm.F = MO.getFPImm();
-+ return Imm.I;
-+ } else if (MO.isExpr()) {
-+ const MCExpr *Expr = MO.getExpr();
-+ MCFixupKind Kind = MCFixupKind(FK_PCRel_4);
-+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
-+ return 0;
-+ } else{
-+ llvm_unreachable("Encoding of this operand type is not supported yet.");
-+ }
-+ return 0;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Custom Operand Encodings
-+//===----------------------------------------------------------------------===//
-+
-+unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo,
-+ unsigned shift) const {
-+ unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg());
-+ return regCode >> shift;
-+ return 0;
-+}
-+unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI,
-+ unsigned OpNo ,
-+ SmallVectorImpl<MCFixup> &Fixup) const {
-+ return GPRAlign(MI, OpNo, 1);
-+}
-+
-+unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI,
-+ unsigned OpNo,
-+ SmallVectorImpl<MCFixup> &Fixup) const {
-+ return GPRAlign(MI, OpNo, 2);
-+}
-+
-+#define SMRD_OFFSET_MASK 0xff
-+#define SMRD_IMM_SHIFT 8
-+#define SMRD_SBASE_MASK 0x3f
-+#define SMRD_SBASE_SHIFT 9
-+/// This function is responsibe for encoding the offset
-+/// and the base ptr for SMRD instructions it should return a bit string in
-+/// this format:
-+///
-+/// OFFSET = bits{7-0}
-+/// IMM = bits{8}
-+/// SBASE = bits{14-9}
-+///
-+uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
-+ SmallVectorImpl<MCFixup> &Fixup) const {
-+ uint32_t Encoding;
-+
-+ const MCOperand &OffsetOp = MI.getOperand(OpNo + 1);
-+
-+ //XXX: Use this function for SMRD loads with register offsets
-+ assert(OffsetOp.isImm());
-+
-+ Encoding =
-+ (getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK)
-+ | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
-+ | ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
-+ ;
-+
-+ return Encoding;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Post Encoder Callbacks
-+//===----------------------------------------------------------------------===//
-+
-+uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{
-+ unsigned encodingType = getEncodingType(MI);
-+ unsigned numSrcOps;
-+ unsigned vgprBitOffset;
-+
-+ if (encodingType == SIInstrEncodingType::VOP3) {
-+ numSrcOps = 3;
-+ vgprBitOffset = 32;
-+ } else {
-+ numSrcOps = 1;
-+ vgprBitOffset = 0;
-+ }
-+
-+ // Add one to skip over the destination reg operand.
-+ for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
-+ const MCOperand &MO = MI.getOperand(opIdx);
-+ if (MO.isReg()) {
-+ unsigned reg = MI.getOperand(opIdx).getReg();
-+ if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) ||
-+ AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) {
-+ Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
-+ }
-+ } else if (MO.isFPImm()) {
-+ union {
-+ float f;
-+ uint32_t i;
-+ } Imm;
-+ // XXX: Not all instructions can use inline literals
-+ // XXX: We should make sure this is a 32-bit constant
-+ Imm.f = MO.getFPImm();
-+ Value |= ((uint64_t)Imm.i) << 32;
-+ }
-+ }
-+ return Value;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Encoding helper functions
-+//===----------------------------------------------------------------------===//
-+
-+unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const {
-+ return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
-+}
-+
-+unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const {
-+
-+ // These instructions aren't real instructions with an encoding type, so
-+ // we need to manually specify their size.
-+ switch (MI.getOpcode()) {
-+ default: break;
-+ case AMDGPU::SI_LOAD_LITERAL_I32:
-+ case AMDGPU::SI_LOAD_LITERAL_F32:
-+ return 4;
-+ }
-+
-+ unsigned encoding_type = getEncodingType(MI);
-+ switch (encoding_type) {
-+ case SIInstrEncodingType::EXP:
-+ case SIInstrEncodingType::LDS:
-+ case SIInstrEncodingType::MUBUF:
-+ case SIInstrEncodingType::MTBUF:
-+ case SIInstrEncodingType::MIMG:
-+ case SIInstrEncodingType::VOP3:
-+ return 8;
-+ default:
-+ return 4;
-+ }
-+}
-+
-+
-+unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const {
-+ switch (reg) {
-+ case AMDGPU::M0: return 124;
-+ case AMDGPU::SREG_LIT_0: return 128;
-+ case AMDGPU::SI_LITERAL_CONSTANT: return 255;
-+ default: return MRI.getEncodingValue(reg);
-+ }
-+}
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/Processors.td llvm-r600/lib/Target/R600/Processors.td
---- llvm-3.2.src/lib/Target/R600/Processors.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/Processors.td 2013-01-25 19:43:57.460049721 +0100
-@@ -0,0 +1,29 @@
-+//===-- Processors.td - TODO: Add brief description -------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// AMDIL processors supported.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
-+: Processor<Name, itin, Features>;
-+def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>;
-+def : Proc<"rv710", R600_EG_Itin, []>;
-+def : Proc<"rv730", R600_EG_Itin, []>;
-+def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
-+def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-+def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-+def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-+def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
-+def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-+def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-+def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-+def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
-+def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600Defines.h llvm-r600/lib/Target/R600/R600Defines.h
---- llvm-3.2.src/lib/Target/R600/R600Defines.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/R600Defines.h 2013-01-25 19:43:57.460049721 +0100
-@@ -0,0 +1,94 @@
-+//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef R600DEFINES_H_
-+#define R600DEFINES_H_
-+
-+#include "llvm/MC/MCRegisterInfo.h"
-+
-+// Operand Flags
-+#define MO_FLAG_CLAMP (1 << 0)
-+#define MO_FLAG_NEG (1 << 1)
-+#define MO_FLAG_ABS (1 << 2)
-+#define MO_FLAG_MASK (1 << 3)
-+#define MO_FLAG_PUSH (1 << 4)
-+#define MO_FLAG_NOT_LAST (1 << 5)
-+#define MO_FLAG_LAST (1 << 6)
-+#define NUM_MO_FLAGS 7
-+
-+/// \brief Helper for getting the operand index for the instruction flags
-+/// operand.
-+#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3)
-+
-+namespace R600_InstFlag {
-+ enum TIF {
-+ TRANS_ONLY = (1 << 0),
-+ TEX = (1 << 1),
-+ REDUCTION = (1 << 2),
-+ FC = (1 << 3),
-+ TRIG = (1 << 4),
-+ OP3 = (1 << 5),
-+ VECTOR = (1 << 6),
-+ //FlagOperand bits 7, 8
-+ NATIVE_OPERANDS = (1 << 9),
-+ OP1 = (1 << 10),
-+ OP2 = (1 << 11)
-+ };
-+}
-+
-+#define HAS_NATIVE_OPERANDS(Flags) ((Flags) & R600_InstFlag::NATIVE_OPERANDS)
-+
-+/// \brief Defines for extracting register infomation from register encoding
-+#define HW_REG_MASK 0x1ff
-+#define HW_CHAN_SHIFT 9
-+
-+namespace R600Operands {
-+ enum Ops {
-+ DST,
-+ UPDATE_EXEC_MASK,
-+ UPDATE_PREDICATE,
-+ WRITE,
-+ OMOD,
-+ DST_REL,
-+ CLAMP,
-+ SRC0,
-+ SRC0_NEG,
-+ SRC0_REL,
-+ SRC0_ABS,
-+ SRC0_SEL,
-+ SRC1,
-+ SRC1_NEG,
-+ SRC1_REL,
-+ SRC1_ABS,
-+ SRC1_SEL,
-+ SRC2,
-+ SRC2_NEG,
-+ SRC2_REL,
-+ SRC2_SEL,
-+ LAST,
-+ PRED_SEL,
-+ IMM,
-+ COUNT
-+ };
-+
-+ const static int ALUOpTable[3][R600Operands::COUNT] = {
-+// W C S S S S S S S S S S S
-+// R O D L S R R R R S R R R R S R R R L P
-+// D U I M R A R C C C C R C C C C R C C C A R I
-+// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M
-+// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M
-+ {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
-+ {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
-+ {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
-+ };
-+
-+}
-+
-+#endif // R600DEFINES_H_
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600ExpandSpecialInstrs.cpp llvm-r600/lib/Target/R600/R600ExpandSpecialInstrs.cpp
---- llvm-3.2.src/lib/Target/R600/R600ExpandSpecialInstrs.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/R600ExpandSpecialInstrs.cpp 2013-01-25 19:43:57.463383054 +0100
-@@ -0,0 +1,333 @@
-+//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// Vector, Reduction, and Cube instructions need to fill the entire instruction
-+/// group to work correctly. This pass expands these individual instructions
-+/// into several instructions that will completely fill the instruction group.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPU.h"
-+#include "R600Defines.h"
-+#include "R600InstrInfo.h"
-+#include "R600RegisterInfo.h"
-+#include "R600MachineFunctionInfo.h"
-+#include "llvm/CodeGen/MachineFunctionPass.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
-+
-+private:
-+ static char ID;
-+ const R600InstrInfo *TII;
-+
-+ bool ExpandInputPerspective(MachineInstr& MI);
-+ bool ExpandInputConstant(MachineInstr& MI);
-+
-+public:
-+ R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
-+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
-+
-+ virtual bool runOnMachineFunction(MachineFunction &MF);
-+
-+ const char *getPassName() const {
-+ return "R600 Expand special instructions pass";
-+ }
-+};
-+
-+} // End anonymous namespace
-+
-+char R600ExpandSpecialInstrsPass::ID = 0;
-+
-+FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
-+ return new R600ExpandSpecialInstrsPass(TM);
-+}
-+
-+bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) {
-+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
-+ if (MI.getOpcode() != AMDGPU::input_perspective)
-+ return false;
-+
-+ MachineBasicBlock::iterator I = &MI;
-+ unsigned DstReg = MI.getOperand(0).getReg();
-+ R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
-+ ->getInfo<R600MachineFunctionInfo>();
-+ unsigned IJIndexBase;
-+
-+ // In Evergreen ISA doc section 8.3.2 :
-+ // We need to interpolate XY and ZW in two different instruction groups.
-+ // An INTERP_* must occupy all 4 slots of an instruction group.
-+ // Output of INTERP_XY is written in X,Y slots
-+ // Output of INTERP_ZW is written in Z,W slots
-+ //
-+ // Thus interpolation requires the following sequences :
-+ //
-+ // AnyGPR.x = INTERP_ZW; (Write Masked Out)
-+ // AnyGPR.y = INTERP_ZW; (Write Masked Out)
-+ // DstGPR.z = INTERP_ZW;
-+ // DstGPR.w = INTERP_ZW; (End of first IG)
-+ // DstGPR.x = INTERP_XY;
-+ // DstGPR.y = INTERP_XY;
-+ // AnyGPR.z = INTERP_XY; (Write Masked Out)
-+ // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
-+ //
-+ switch (MI.getOperand(1).getImm()) {
-+ case 0:
-+ IJIndexBase = MFI->GetIJPerspectiveIndex();
-+ break;
-+ case 1:
-+ IJIndexBase = MFI->GetIJLinearIndex();
-+ break;
-+ default:
-+ assert(0 && "Unknow ij index");
-+ }
-+
-+ for (unsigned i = 0; i < 8; i++) {
-+ unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
-+ 2 * IJIndexBase + ((i + 1) % 2));
-+ unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
-+ MI.getOperand(2).getImm());
-+
-+
-+ unsigned Sel = AMDGPU::sel_x;
-+ switch (i % 4) {
-+ case 0:Sel = AMDGPU::sel_x;break;
-+ case 1:Sel = AMDGPU::sel_y;break;
-+ case 2:Sel = AMDGPU::sel_z;break;
-+ case 3:Sel = AMDGPU::sel_w;break;
-+ default:break;
-+ }
-+
-+ unsigned Res = TRI.getSubReg(DstReg, Sel);
-+
-+ unsigned Opcode = (i < 4)?AMDGPU::INTERP_ZW:AMDGPU::INTERP_XY;
-+
-+ MachineBasicBlock &MBB = *(MI.getParent());
-+ MachineInstr *NewMI =
-+ TII->buildDefaultInstruction(MBB, I, Opcode, Res, IJIndex, ReadReg);
-+
-+ if (!(i> 1 && i < 6)) {
-+ TII->addFlag(NewMI, 0, MO_FLAG_MASK);
-+ }
-+
-+ if (i % 4 != 3)
-+ TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
-+ }
-+
-+ MI.eraseFromParent();
-+
-+ return true;
-+}
-+
-+bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) {
-+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
-+ if (MI.getOpcode() != AMDGPU::input_constant)
-+ return false;
-+
-+ MachineBasicBlock::iterator I = &MI;
-+ unsigned DstReg = MI.getOperand(0).getReg();
-+
-+ for (unsigned i = 0; i < 4; i++) {
-+ unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
-+ MI.getOperand(1).getImm());
-+
-+ unsigned Sel = AMDGPU::sel_x;
-+ switch (i % 4) {
-+ case 0:Sel = AMDGPU::sel_x;break;
-+ case 1:Sel = AMDGPU::sel_y;break;
-+ case 2:Sel = AMDGPU::sel_z;break;
-+ case 3:Sel = AMDGPU::sel_w;break;
-+ default:break;
-+ }
-+
-+ unsigned Res = TRI.getSubReg(DstReg, Sel);
-+
-+ MachineBasicBlock &MBB = *(MI.getParent());
-+ MachineInstr *NewMI = TII->buildDefaultInstruction(
-+ MBB, I, AMDGPU::INTERP_LOAD_P0, Res, ReadReg);
-+
-+ if (i % 4 != 3)
-+ TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
-+ }
-+
-+ MI.eraseFromParent();
-+
-+ return true;
-+}
-+
-+bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
-+
-+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
-+
-+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-+ BB != BB_E; ++BB) {
-+ MachineBasicBlock &MBB = *BB;
-+ MachineBasicBlock::iterator I = MBB.begin();
-+ while (I != MBB.end()) {
-+ MachineInstr &MI = *I;
-+ I = llvm::next(I);
-+
-+ switch (MI.getOpcode()) {
-+ default: break;
-+ // Expand PRED_X to one of the PRED_SET instructions.
-+ case AMDGPU::PRED_X: {
-+ uint64_t Flags = MI.getOperand(3).getImm();
-+ // The native opcode used by PRED_X is stored as an immediate in the
-+ // third operand.
-+ MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
-+ MI.getOperand(2).getImm(), // opcode
-+ MI.getOperand(0).getReg(), // dst
-+ MI.getOperand(1).getReg(), // src0
-+ AMDGPU::ZERO); // src1
-+ TII->addFlag(PredSet, 0, MO_FLAG_MASK);
-+ if (Flags & MO_FLAG_PUSH) {
-+ TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1);
-+ } else {
-+ TII->setImmOperand(PredSet, R600Operands::UPDATE_PREDICATE, 1);
-+ }
-+ MI.eraseFromParent();
-+ continue;
-+ }
-+ case AMDGPU::BREAK:
-+ MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
-+ AMDGPU::PRED_SETE_INT,
-+ AMDGPU::PREDICATE_BIT,
-+ AMDGPU::ZERO,
-+ AMDGPU::ZERO);
-+ TII->addFlag(PredSet, 0, MO_FLAG_MASK);
-+ TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1);
-+
-+ BuildMI(MBB, I, MBB.findDebugLoc(I),
-+ TII->get(AMDGPU::PREDICATED_BREAK))
-+ .addReg(AMDGPU::PREDICATE_BIT);
-+ MI.eraseFromParent();
-+ continue;
-+ }
-+
-+ if (ExpandInputPerspective(MI))
-+ continue;
-+ if (ExpandInputConstant(MI))
-+ continue;
-+
-+ bool IsReduction = TII->isReductionOp(MI.getOpcode());
-+ bool IsVector = TII->isVector(MI);
-+ bool IsCube = TII->isCubeOp(MI.getOpcode());
-+ if (!IsReduction && !IsVector && !IsCube) {
-+ continue;
-+ }
-+
-+ // Expand the instruction
-+ //
-+ // Reduction instructions:
-+ // T0_X = DP4 T1_XYZW, T2_XYZW
-+ // becomes:
-+ // TO_X = DP4 T1_X, T2_X
-+ // TO_Y (write masked) = DP4 T1_Y, T2_Y
-+ // TO_Z (write masked) = DP4 T1_Z, T2_Z
-+ // TO_W (write masked) = DP4 T1_W, T2_W
-+ //
-+ // Vector instructions:
-+ // T0_X = MULLO_INT T1_X, T2_X
-+ // becomes:
-+ // T0_X = MULLO_INT T1_X, T2_X
-+ // T0_Y (write masked) = MULLO_INT T1_X, T2_X
-+ // T0_Z (write masked) = MULLO_INT T1_X, T2_X
-+ // T0_W (write masked) = MULLO_INT T1_X, T2_X
-+ //
-+ // Cube instructions:
-+ // T0_XYZW = CUBE T1_XYZW
-+ // becomes:
-+ // TO_X = CUBE T1_Z, T1_Y
-+ // T0_Y = CUBE T1_Z, T1_X
-+ // T0_Z = CUBE T1_X, T1_Z
-+ // T0_W = CUBE T1_Y, T1_Z
-+ for (unsigned Chan = 0; Chan < 4; Chan++) {
-+ unsigned DstReg = MI.getOperand(
-+ TII->getOperandIdx(MI, R600Operands::DST)).getReg();
-+ unsigned Src0 = MI.getOperand(
-+ TII->getOperandIdx(MI, R600Operands::SRC0)).getReg();
-+ unsigned Src1 = 0;
-+
-+ // Determine the correct source registers
-+ if (!IsCube) {
-+ int Src1Idx = TII->getOperandIdx(MI, R600Operands::SRC1);
-+ if (Src1Idx != -1) {
-+ Src1 = MI.getOperand(Src1Idx).getReg();
-+ }
-+ }
-+ if (IsReduction) {
-+ unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
-+ Src0 = TRI.getSubReg(Src0, SubRegIndex);
-+ Src1 = TRI.getSubReg(Src1, SubRegIndex);
-+ } else if (IsCube) {
-+ static const int CubeSrcSwz[] = {2, 2, 0, 1};
-+ unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
-+ unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
-+ Src1 = TRI.getSubReg(Src0, SubRegIndex1);
-+ Src0 = TRI.getSubReg(Src0, SubRegIndex0);
-+ }
-+
-+ // Determine the correct destination registers;
-+ bool Mask = false;
-+ bool NotLast = true;
-+ if (IsCube) {
-+ unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
-+ DstReg = TRI.getSubReg(DstReg, SubRegIndex);
-+ } else {
-+ // Mask the write if the original instruction does not write to
-+ // the current Channel.
-+ Mask = (Chan != TRI.getHWRegChan(DstReg));
-+ unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
-+ DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
-+ }
-+
-+ // Set the IsLast bit
-+ NotLast = (Chan != 3 );
-+
-+ // Add the new instruction
-+ unsigned Opcode = MI.getOpcode();
-+ switch (Opcode) {
-+ case AMDGPU::CUBE_r600_pseudo:
-+ Opcode = AMDGPU::CUBE_r600_real;
-+ break;
-+ case AMDGPU::CUBE_eg_pseudo:
-+ Opcode = AMDGPU::CUBE_eg_real;
-+ break;
-+ case AMDGPU::DOT4_r600_pseudo:
-+ Opcode = AMDGPU::DOT4_r600_real;
-+ break;
-+ case AMDGPU::DOT4_eg_pseudo:
-+ Opcode = AMDGPU::DOT4_eg_real;
-+ break;
-+ default:
-+ break;
-+ }
-+
-+ MachineInstr *NewMI =
-+ TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1);
-+
-+ NewMI->setIsInsideBundle(Chan != 0);
-+ if (Mask) {
-+ TII->addFlag(NewMI, 0, MO_FLAG_MASK);
-+ }
-+ if (NotLast) {
-+ TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
-+ }
-+ }
-+ MI.eraseFromParent();
-+ }
-+ }
-+ return false;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600InstrInfo.cpp llvm-r600/lib/Target/R600/R600InstrInfo.cpp
---- llvm-3.2.src/lib/Target/R600/R600InstrInfo.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/R600InstrInfo.cpp 2013-01-25 19:43:57.466716387 +0100
-@@ -0,0 +1,655 @@
-+//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief R600 Implementation of TargetInstrInfo.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "R600InstrInfo.h"
-+#include "AMDGPUTargetMachine.h"
-+#include "AMDGPUSubtarget.h"
-+#include "R600Defines.h"
-+#include "R600RegisterInfo.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+
-+#define GET_INSTRINFO_CTOR
-+#include "AMDGPUGenDFAPacketizer.inc"
-+
-+using namespace llvm;
-+
-+R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
-+ : AMDGPUInstrInfo(tm),
-+ RI(tm, *this)
-+ { }
-+
-+const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
-+ return RI;
-+}
-+
-+bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
-+ return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
-+}
-+
-+bool R600InstrInfo::isVector(const MachineInstr &MI) const {
-+ return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
-+}
-+
-+void
-+R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI, DebugLoc DL,
-+ unsigned DestReg, unsigned SrcReg,
-+ bool KillSrc) const {
-+ if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
-+ && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
-+ for (unsigned I = 0; I < 4; I++) {
-+ unsigned SubRegIndex = RI.getSubRegFromChannel(I);
-+ buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
-+ RI.getSubReg(DestReg, SubRegIndex),
-+ RI.getSubReg(SrcReg, SubRegIndex))
-+ .addReg(DestReg,
-+ RegState::Define | RegState::Implicit);
-+ }
-+ } else {
-+
-+ // We can't copy vec4 registers
-+ assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
-+ && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
-+
-+ MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
-+ DestReg, SrcReg);
-+ NewMI->getOperand(getOperandIdx(*NewMI, R600Operands::SRC0))
-+ .setIsKill(KillSrc);
-+ }
-+}
-+
-+MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
-+ unsigned DstReg, int64_t Imm) const {
-+ MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
-+ MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
-+ MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
-+ MachineInstrBuilder(MI).addImm(Imm);
-+ MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
-+
-+ return MI;
-+}
-+
-+unsigned R600InstrInfo::getIEQOpcode() const {
-+ return AMDGPU::SETE_INT;
-+}
-+
-+bool R600InstrInfo::isMov(unsigned Opcode) const {
-+
-+
-+ switch(Opcode) {
-+ default: return false;
-+ case AMDGPU::MOV:
-+ case AMDGPU::MOV_IMM_F32:
-+ case AMDGPU::MOV_IMM_I32:
-+ return true;
-+ }
-+}
-+
-+// Some instructions act as place holders to emulate operations that the GPU
-+// hardware does automatically. This function can be used to check if
-+// an opcode falls into this category.
-+bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
-+ switch (Opcode) {
-+ default: return false;
-+ case AMDGPU::RETURN:
-+ case AMDGPU::RESERVE_REG:
-+ return true;
-+ }
-+}
-+
-+bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
-+ switch(Opcode) {
-+ default: return false;
-+ case AMDGPU::DOT4_r600_pseudo:
-+ case AMDGPU::DOT4_eg_pseudo:
-+ return true;
-+ }
-+}
-+
-+bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
-+ switch(Opcode) {
-+ default: return false;
-+ case AMDGPU::CUBE_r600_pseudo:
-+ case AMDGPU::CUBE_r600_real:
-+ case AMDGPU::CUBE_eg_pseudo:
-+ case AMDGPU::CUBE_eg_real:
-+ return true;
-+ }
-+}
-+
-+bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
-+ unsigned TargetFlags = get(Opcode).TSFlags;
-+
-+ return ((TargetFlags & R600_InstFlag::OP1) |
-+ (TargetFlags & R600_InstFlag::OP2) |
-+ (TargetFlags & R600_InstFlag::OP3));
-+}
-+
-+DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
-+ const ScheduleDAG *DAG) const {
-+ const InstrItineraryData *II = TM->getInstrItineraryData();
-+ return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
-+}
-+
-+static bool
-+isPredicateSetter(unsigned Opcode) {
-+ switch (Opcode) {
-+ case AMDGPU::PRED_X:
-+ return true;
-+ default:
-+ return false;
-+ }
-+}
-+
-+static MachineInstr *
-+findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator I) {
-+ while (I != MBB.begin()) {
-+ --I;
-+ MachineInstr *MI = I;
-+ if (isPredicateSetter(MI->getOpcode()))
-+ return MI;
-+ }
-+
-+ return NULL;
-+}
-+
-+bool
-+R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
-+ MachineBasicBlock *&TBB,
-+ MachineBasicBlock *&FBB,
-+ SmallVectorImpl<MachineOperand> &Cond,
-+ bool AllowModify) const {
-+ // Most of the following comes from the ARM implementation of AnalyzeBranch
-+
-+ // If the block has no terminators, it just falls into the block after it.
-+ MachineBasicBlock::iterator I = MBB.end();
-+ if (I == MBB.begin())
-+ return false;
-+ --I;
-+ while (I->isDebugValue()) {
-+ if (I == MBB.begin())
-+ return false;
-+ --I;
-+ }
-+ if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
-+ return false;
-+ }
-+
-+ // Get the last instruction in the block.
-+ MachineInstr *LastInst = I;
-+
-+ // If there is only one terminator instruction, process it.
-+ unsigned LastOpc = LastInst->getOpcode();
-+ if (I == MBB.begin() ||
-+ static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
-+ if (LastOpc == AMDGPU::JUMP) {
-+ if(!isPredicated(LastInst)) {
-+ TBB = LastInst->getOperand(0).getMBB();
-+ return false;
-+ } else {
-+ MachineInstr *predSet = I;
-+ while (!isPredicateSetter(predSet->getOpcode())) {
-+ predSet = --I;
-+ }
-+ TBB = LastInst->getOperand(0).getMBB();
-+ Cond.push_back(predSet->getOperand(1));
-+ Cond.push_back(predSet->getOperand(2));
-+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
-+ return false;
-+ }
-+ }
-+ return true; // Can't handle indirect branch.
-+ }
-+
-+ // Get the instruction before it if it is a terminator.
-+ MachineInstr *SecondLastInst = I;
-+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
-+
-+ // If the block ends with a B and a Bcc, handle it.
-+ if (SecondLastOpc == AMDGPU::JUMP &&
-+ isPredicated(SecondLastInst) &&
-+ LastOpc == AMDGPU::JUMP &&
-+ !isPredicated(LastInst)) {
-+ MachineInstr *predSet = --I;
-+ while (!isPredicateSetter(predSet->getOpcode())) {
-+ predSet = --I;
-+ }
-+ TBB = SecondLastInst->getOperand(0).getMBB();
-+ FBB = LastInst->getOperand(0).getMBB();
-+ Cond.push_back(predSet->getOperand(1));
-+ Cond.push_back(predSet->getOperand(2));
-+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
-+ return false;
-+ }
-+
-+ // Otherwise, can't handle this.
-+ return true;
-+}
-+
-+int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
-+ const MachineInstr *MI = op.getParent();
-+
-+ switch (MI->getDesc().OpInfo->RegClass) {
-+ default: // FIXME: fallthrough??
-+ case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
-+ case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
-+ };
-+}
-+
-+unsigned
-+R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
-+ MachineBasicBlock *TBB,
-+ MachineBasicBlock *FBB,
-+ const SmallVectorImpl<MachineOperand> &Cond,
-+ DebugLoc DL) const {
-+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-+
-+ if (FBB == 0) {
-+ if (Cond.empty()) {
-+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
-+ return 1;
-+ } else {
-+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
-+ assert(PredSet && "No previous predicate !");
-+ addFlag(PredSet, 0, MO_FLAG_PUSH);
-+ PredSet->getOperand(2).setImm(Cond[1].getImm());
-+
-+ BuildMI(&MBB, DL, get(AMDGPU::JUMP))
-+ .addMBB(TBB)
-+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
-+ return 1;
-+ }
-+ } else {
-+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
-+ assert(PredSet && "No previous predicate !");
-+ addFlag(PredSet, 0, MO_FLAG_PUSH);
-+ PredSet->getOperand(2).setImm(Cond[1].getImm());
-+ BuildMI(&MBB, DL, get(AMDGPU::JUMP))
-+ .addMBB(TBB)
-+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
-+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
-+ return 2;
-+ }
-+}
-+
-+unsigned
-+R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
-+
-+ // Note : we leave PRED* instructions there.
-+ // They may be needed when predicating instructions.
-+
-+ MachineBasicBlock::iterator I = MBB.end();
-+
-+ if (I == MBB.begin()) {
-+ return 0;
-+ }
-+ --I;
-+ switch (I->getOpcode()) {
-+ default:
-+ return 0;
-+ case AMDGPU::JUMP:
-+ if (isPredicated(I)) {
-+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
-+ clearFlag(predSet, 0, MO_FLAG_PUSH);
-+ }
-+ I->eraseFromParent();
-+ break;
-+ }
-+ I = MBB.end();
-+
-+ if (I == MBB.begin()) {
-+ return 1;
-+ }
-+ --I;
-+ switch (I->getOpcode()) {
-+ // FIXME: only one case??
-+ default:
-+ return 1;
-+ case AMDGPU::JUMP:
-+ if (isPredicated(I)) {
-+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
-+ clearFlag(predSet, 0, MO_FLAG_PUSH);
-+ }
-+ I->eraseFromParent();
-+ break;
-+ }
-+ return 2;
-+}
-+
-+bool
-+R600InstrInfo::isPredicated(const MachineInstr *MI) const {
-+ int idx = MI->findFirstPredOperandIdx();
-+ if (idx < 0)
-+ return false;
-+
-+ unsigned Reg = MI->getOperand(idx).getReg();
-+ switch (Reg) {
-+ default: return false;
-+ case AMDGPU::PRED_SEL_ONE:
-+ case AMDGPU::PRED_SEL_ZERO:
-+ case AMDGPU::PREDICATE_BIT:
-+ return true;
-+ }
-+}
-+
-+bool
-+R600InstrInfo::isPredicable(MachineInstr *MI) const {
-+ // XXX: KILL* instructions can be predicated, but they must be the last
-+ // instruction in a clause, so this means any instructions after them cannot
-+ // be predicated. Until we have proper support for instruction clauses in the
-+ // backend, we will mark KILL* instructions as unpredicable.
-+
-+ if (MI->getOpcode() == AMDGPU::KILLGT) {
-+ return false;
-+ } else {
-+ return AMDGPUInstrInfo::isPredicable(MI);
-+ }
-+}
-+
-+
-+bool
-+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
-+ unsigned NumCyles,
-+ unsigned ExtraPredCycles,
-+ const BranchProbability &Probability) const{
-+ return true;
-+}
-+
-+bool
-+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
-+ unsigned NumTCycles,
-+ unsigned ExtraTCycles,
-+ MachineBasicBlock &FMBB,
-+ unsigned NumFCycles,
-+ unsigned ExtraFCycles,
-+ const BranchProbability &Probability) const {
-+ return true;
-+}
-+
-+bool
-+R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
-+ unsigned NumCyles,
-+ const BranchProbability &Probability)
-+ const {
-+ return true;
-+}
-+
-+bool
-+R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
-+ MachineBasicBlock &FMBB) const {
-+ return false;
-+}
-+
-+
-+bool
-+R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
-+ MachineOperand &MO = Cond[1];
-+ switch (MO.getImm()) {
-+ case OPCODE_IS_ZERO_INT:
-+ MO.setImm(OPCODE_IS_NOT_ZERO_INT);
-+ break;
-+ case OPCODE_IS_NOT_ZERO_INT:
-+ MO.setImm(OPCODE_IS_ZERO_INT);
-+ break;
-+ case OPCODE_IS_ZERO:
-+ MO.setImm(OPCODE_IS_NOT_ZERO);
-+ break;
-+ case OPCODE_IS_NOT_ZERO:
-+ MO.setImm(OPCODE_IS_ZERO);
-+ break;
-+ default:
-+ return true;
-+ }
-+
-+ MachineOperand &MO2 = Cond[2];
-+ switch (MO2.getReg()) {
-+ case AMDGPU::PRED_SEL_ZERO:
-+ MO2.setReg(AMDGPU::PRED_SEL_ONE);
-+ break;
-+ case AMDGPU::PRED_SEL_ONE:
-+ MO2.setReg(AMDGPU::PRED_SEL_ZERO);
-+ break;
-+ default:
-+ return true;
-+ }
-+ return false;
-+}
-+
-+bool
-+R600InstrInfo::DefinesPredicate(MachineInstr *MI,
-+ std::vector<MachineOperand> &Pred) const {
-+ return isPredicateSetter(MI->getOpcode());
-+}
-+
-+
-+bool
-+R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-+ const SmallVectorImpl<MachineOperand> &Pred2) const {
-+ return false;
-+}
-+
-+
-+bool
-+R600InstrInfo::PredicateInstruction(MachineInstr *MI,
-+ const SmallVectorImpl<MachineOperand> &Pred) const {
-+ int PIdx = MI->findFirstPredOperandIdx();
-+
-+ if (PIdx != -1) {
-+ MachineOperand &PMO = MI->getOperand(PIdx);
-+ PMO.setReg(Pred[2].getReg());
-+ MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
-+ return true;
-+ }
-+
-+ return false;
-+}
-+
-+unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
-+ const MachineInstr *MI,
-+ unsigned *PredCost) const {
-+ if (PredCost)
-+ *PredCost = 2;
-+ return 2;
-+}
-+
-+MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator I,
-+ unsigned Opcode,
-+ unsigned DstReg,
-+ unsigned Src0Reg,
-+ unsigned Src1Reg) const {
-+ MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
-+ DstReg); // $dst
-+
-+ if (Src1Reg) {
-+ MIB.addImm(0) // $update_exec_mask
-+ .addImm(0); // $update_predicate
-+ }
-+ MIB.addImm(1) // $write
-+ .addImm(0) // $omod
-+ .addImm(0) // $dst_rel
-+ .addImm(0) // $dst_clamp
-+ .addReg(Src0Reg) // $src0
-+ .addImm(0) // $src0_neg
-+ .addImm(0) // $src0_rel
-+ .addImm(0) // $src0_abs
-+ .addImm(-1); // $src0_sel
-+
-+ if (Src1Reg) {
-+ MIB.addReg(Src1Reg) // $src1
-+ .addImm(0) // $src1_neg
-+ .addImm(0) // $src1_rel
-+ .addImm(0) // $src1_abs
-+ .addImm(-1); // $src1_sel
-+ }
-+
-+ //XXX: The r600g finalizer expects this to be 1, once we've moved the
-+ //scheduling to the backend, we can change the default to 0.
-+ MIB.addImm(1) // $last
-+ .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
-+ .addImm(0); // $literal
-+
-+ return MIB;
-+}
-+
-+MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
-+ MachineBasicBlock::iterator I,
-+ unsigned DstReg,
-+ uint64_t Imm) const {
-+ MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
-+ AMDGPU::ALU_LITERAL_X);
-+ setImmOperand(MovImm, R600Operands::IMM, Imm);
-+ return MovImm;
-+}
-+
-+int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
-+ R600Operands::Ops Op) const {
-+ return getOperandIdx(MI.getOpcode(), Op);
-+}
-+
-+int R600InstrInfo::getOperandIdx(unsigned Opcode,
-+ R600Operands::Ops Op) const {
-+ unsigned TargetFlags = get(Opcode).TSFlags;
-+ unsigned OpTableIdx;
-+
-+ if (!HAS_NATIVE_OPERANDS(TargetFlags)) {
-+ switch (Op) {
-+ case R600Operands::DST: return 0;
-+ case R600Operands::SRC0: return 1;
-+ case R600Operands::SRC1: return 2;
-+ case R600Operands::SRC2: return 3;
-+ default:
-+ assert(!"Unknown operand type for instruction");
-+ return -1;
-+ }
-+ }
-+
-+ if (TargetFlags & R600_InstFlag::OP1) {
-+ OpTableIdx = 0;
-+ } else if (TargetFlags & R600_InstFlag::OP2) {
-+ OpTableIdx = 1;
-+ } else {
-+ assert((TargetFlags & R600_InstFlag::OP3) && "OP1, OP2, or OP3 not defined "
-+ "for this instruction");
-+ OpTableIdx = 2;
-+ }
-+
-+ return R600Operands::ALUOpTable[OpTableIdx][Op];
-+}
-+
-+void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
-+ int64_t Imm) const {
-+ int Idx = getOperandIdx(*MI, Op);
-+ assert(Idx != -1 && "Operand not supported for this instruction.");
-+ assert(MI->getOperand(Idx).isImm());
-+ MI->getOperand(Idx).setImm(Imm);
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Instruction flag getters/setters
-+//===----------------------------------------------------------------------===//
-+
-+bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const {
-+ return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
-+}
-+
-+MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
-+ unsigned Flag) const {
-+ unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
-+ int FlagIndex = 0;
-+ if (Flag != 0) {
-+ // If we pass something other than the default value of Flag to this
-+ // function, it means we are want to set a flag on an instruction
-+ // that uses native encoding.
-+ assert(HAS_NATIVE_OPERANDS(TargetFlags));
-+ bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
-+ switch (Flag) {
-+ case MO_FLAG_CLAMP:
-+ FlagIndex = getOperandIdx(*MI, R600Operands::CLAMP);
-+ break;
-+ case MO_FLAG_MASK:
-+ FlagIndex = getOperandIdx(*MI, R600Operands::WRITE);
-+ break;
-+ case MO_FLAG_NOT_LAST:
-+ case MO_FLAG_LAST:
-+ FlagIndex = getOperandIdx(*MI, R600Operands::LAST);
-+ break;
-+ case MO_FLAG_NEG:
-+ switch (SrcIdx) {
-+ case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_NEG); break;
-+ case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_NEG); break;
-+ case 2: FlagIndex = getOperandIdx(*MI, R600Operands::SRC2_NEG); break;
-+ }
-+ break;
-+
-+ case MO_FLAG_ABS:
-+ assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
-+ "instructions.");
-+ switch (SrcIdx) {
-+ case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_ABS); break;
-+ case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_ABS); break;
-+ }
-+ break;
-+
-+ default:
-+ FlagIndex = -1;
-+ break;
-+ }
-+ assert(FlagIndex != -1 && "Flag not supported for this instruction");
-+ } else {
-+ FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
-+ assert(FlagIndex != 0 &&
-+ "Instruction flags not supported for this instruction");
-+ }
-+
-+ MachineOperand &FlagOp = MI->getOperand(FlagIndex);
-+ assert(FlagOp.isImm());
-+ return FlagOp;
-+}
-+
-+void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
-+ unsigned Flag) const {
-+ unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
-+ if (Flag == 0) {
-+ return;
-+ }
-+ if (HAS_NATIVE_OPERANDS(TargetFlags)) {
-+ MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
-+ if (Flag == MO_FLAG_NOT_LAST) {
-+ clearFlag(MI, Operand, MO_FLAG_LAST);
-+ } else if (Flag == MO_FLAG_MASK) {
-+ clearFlag(MI, Operand, Flag);
-+ } else {
-+ FlagOp.setImm(1);
-+ }
-+ } else {
-+ MachineOperand &FlagOp = getFlagOp(MI, Operand);
-+ FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
-+ }
-+}
-+
-+void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
-+ unsigned Flag) const {
-+ unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
-+ if (HAS_NATIVE_OPERANDS(TargetFlags)) {
-+ MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
-+ FlagOp.setImm(0);
-+ } else {
-+ MachineOperand &FlagOp = getFlagOp(MI);
-+ unsigned InstFlags = FlagOp.getImm();
-+ InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
-+ FlagOp.setImm(InstFlags);
-+ }
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600InstrInfo.h llvm-r600/lib/Target/R600/R600InstrInfo.h
---- llvm-3.2.src/lib/Target/R600/R600InstrInfo.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/R600InstrInfo.h 2013-01-25 19:43:57.466716387 +0100
-@@ -0,0 +1,169 @@
-+//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Interface definition for R600InstrInfo
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef R600INSTRUCTIONINFO_H_
-+#define R600INSTRUCTIONINFO_H_
-+
-+#include "AMDIL.h"
-+#include "AMDGPUInstrInfo.h"
-+#include "R600Defines.h"
-+#include "R600RegisterInfo.h"
-+
-+#include <map>
-+
-+namespace llvm {
-+
-+ class AMDGPUTargetMachine;
-+ class DFAPacketizer;
-+ class ScheduleDAG;
-+ class MachineFunction;
-+ class MachineInstr;
-+ class MachineInstrBuilder;
-+
-+ class R600InstrInfo : public AMDGPUInstrInfo {
-+ private:
-+ const R600RegisterInfo RI;
-+
-+ int getBranchInstr(const MachineOperand &op) const;
-+
-+ public:
-+ explicit R600InstrInfo(AMDGPUTargetMachine &tm);
-+
-+ const R600RegisterInfo &getRegisterInfo() const;
-+ virtual void copyPhysReg(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI, DebugLoc DL,
-+ unsigned DestReg, unsigned SrcReg,
-+ bool KillSrc) const;
-+
-+ bool isTrig(const MachineInstr &MI) const;
-+ bool isPlaceHolderOpcode(unsigned opcode) const;
-+ bool isReductionOp(unsigned opcode) const;
-+ bool isCubeOp(unsigned opcode) const;
-+
-+ /// \returns true if this \p Opcode represents an ALU instruction.
-+ bool isALUInstr(unsigned Opcode) const;
-+
-+ /// \breif Vector instructions are instructions that must fill all
-+ /// instruction slots within an instruction group.
-+ bool isVector(const MachineInstr &MI) const;
-+
-+ virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
-+ int64_t Imm) const;
-+
-+ virtual unsigned getIEQOpcode() const;
-+ virtual bool isMov(unsigned Opcode) const;
-+
-+ DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
-+ const ScheduleDAG *DAG) const;
-+
-+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-+
-+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
-+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
-+
-+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
-+
-+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-+
-+ bool isPredicated(const MachineInstr *MI) const;
-+
-+ bool isPredicable(MachineInstr *MI) const;
-+
-+ bool
-+ isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
-+ const BranchProbability &Probability) const;
-+
-+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
-+ unsigned ExtraPredCycles,
-+ const BranchProbability &Probability) const ;
-+
-+ bool
-+ isProfitableToIfCvt(MachineBasicBlock &TMBB,
-+ unsigned NumTCycles, unsigned ExtraTCycles,
-+ MachineBasicBlock &FMBB,
-+ unsigned NumFCycles, unsigned ExtraFCycles,
-+ const BranchProbability &Probability) const;
-+
-+ bool DefinesPredicate(MachineInstr *MI,
-+ std::vector<MachineOperand> &Pred) const;
-+
-+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-+ const SmallVectorImpl<MachineOperand> &Pred2) const;
-+
-+ bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
-+ MachineBasicBlock &FMBB) const;
-+
-+ bool PredicateInstruction(MachineInstr *MI,
-+ const SmallVectorImpl<MachineOperand> &Pred) const;
-+
-+ unsigned int getInstrLatency(const InstrItineraryData *ItinData,
-+ const MachineInstr *MI,
-+ unsigned *PredCost = 0) const;
-+
-+ virtual int getInstrLatency(const InstrItineraryData *ItinData,
-+ SDNode *Node) const { return 1;}
-+
-+ /// You can use this function to avoid manually specifying each instruction
-+ /// modifier operand when building a new instruction.
-+ ///
-+ /// \returns a MachineInstr with all the instruction modifiers initialized
-+ /// to their default values.
-+ MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator I,
-+ unsigned Opcode,
-+ unsigned DstReg,
-+ unsigned Src0Reg,
-+ unsigned Src1Reg = 0) const;
-+
-+ MachineInstr *buildMovImm(MachineBasicBlock &BB,
-+ MachineBasicBlock::iterator I,
-+ unsigned DstReg,
-+ uint64_t Imm) const;
-+
-+ /// \brief Get the index of Op in the MachineInstr.
-+ ///
-+ /// \returns -1 if the Instruction does not contain the specified \p Op.
-+ int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const;
-+
-+ /// \brief Get the index of \p Op for the given Opcode.
-+ ///
-+ /// \returns -1 if the Instruction does not contain the specified \p Op.
-+ int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const;
-+
-+ /// \brief Helper function for setting instruction flag values.
-+ void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const;
-+
-+ /// \returns true if this instruction has an operand for storing target flags.
-+ bool hasFlagOperand(const MachineInstr &MI) const;
-+
-+ ///\brief Add one of the MO_FLAG* flags to the specified \p Operand.
-+ void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
-+
-+ ///\brief Determine if the specified \p Flag is set on this \p Operand.
-+ bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const;
-+
-+ /// \param SrcIdx The register source to set the flag on (e.g src0, src1, src2)
-+ /// \param Flag The flag being set.
-+ ///
-+ /// \returns the operand containing the flags for this instruction.
-+ MachineOperand &getFlagOp(MachineInstr *MI, unsigned SrcIdx = 0,
-+ unsigned Flag = 0) const;
-+
-+ /// \brief Clear the specified flag on the instruction.
-+ void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
-+};
-+
-+} // End llvm namespace
-+
-+#endif // R600INSTRINFO_H_
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600Instructions.td llvm-r600/lib/Target/R600/R600Instructions.td
---- llvm-3.2.src/lib/Target/R600/R600Instructions.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/R600Instructions.td 2013-01-25 19:43:57.466716387 +0100
-@@ -0,0 +1,1843 @@
-+//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// R600 Tablegen instruction definitions
-+//
-+//===----------------------------------------------------------------------===//
-+
-+include "R600Intrinsics.td"
-+
-+class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
-+ InstrItinClass itin>
-+ : AMDGPUInst <outs, ins, asm, pattern> {
-+
-+ field bits<64> Inst;
-+ bit Trig = 0;
-+ bit Op3 = 0;
-+ bit isVector = 0;
-+ bits<2> FlagOperandIdx = 0;
-+ bit Op1 = 0;
-+ bit Op2 = 0;
-+ bit HasNativeOperands = 0;
-+
-+ bits<11> op_code = inst;
-+ //let Inst = inst;
-+ let Namespace = "AMDGPU";
-+ let OutOperandList = outs;
-+ let InOperandList = ins;
-+ let AsmString = asm;
-+ let Pattern = pattern;
-+ let Itinerary = itin;
-+
-+ let TSFlags{4} = Trig;
-+ let TSFlags{5} = Op3;
-+
-+ // Vector instructions are instructions that must fill all slots in an
-+ // instruction group
-+ let TSFlags{6} = isVector;
-+ let TSFlags{8-7} = FlagOperandIdx;
-+ let TSFlags{9} = HasNativeOperands;
-+ let TSFlags{10} = Op1;
-+ let TSFlags{11} = Op2;
-+}
-+
-+class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
-+ AMDGPUInst <outs, ins, asm, pattern> {
-+ field bits<64> Inst;
-+
-+ let Namespace = "AMDGPU";
-+}
-+
-+def MEMxi : Operand<iPTR> {
-+ let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
-+ let PrintMethod = "printMemOperand";
-+}
-+
-+def MEMrr : Operand<iPTR> {
-+ let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
-+}
-+
-+// Operands for non-registers
-+
-+class InstFlag<string PM = "printOperand", int Default = 0>
-+ : OperandWithDefaultOps <i32, (ops (i32 Default))> {
-+ let PrintMethod = PM;
-+}
-+
-+// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
-+def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
-+ let PrintMethod = "printSel";
-+}
-+
-+def LITERAL : InstFlag<"printLiteral">;
-+
-+def WRITE : InstFlag <"printWrite", 1>;
-+def OMOD : InstFlag <"printOMOD">;
-+def REL : InstFlag <"printRel">;
-+def CLAMP : InstFlag <"printClamp">;
-+def NEG : InstFlag <"printNeg">;
-+def ABS : InstFlag <"printAbs">;
-+def UEM : InstFlag <"printUpdateExecMask">;
-+def UP : InstFlag <"printUpdatePred">;
-+
-+// XXX: The r600g finalizer in Mesa expects last to be one in most cases.
-+// Once we start using the packetizer in this backend we should have this
-+// default to 0.
-+def LAST : InstFlag<"printLast", 1>;
-+
-+def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
-+def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
-+def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
-+def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
-+def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
-+
-+class R600ALU_Word0 {
-+ field bits<32> Word0;
-+
-+ bits<11> src0;
-+ bits<1> src0_neg;
-+ bits<1> src0_rel;
-+ bits<11> src1;
-+ bits<1> src1_rel;
-+ bits<1> src1_neg;
-+ bits<3> index_mode = 0;
-+ bits<2> pred_sel;
-+ bits<1> last;
-+
-+ bits<9> src0_sel = src0{8-0};
-+ bits<2> src0_chan = src0{10-9};
-+ bits<9> src1_sel = src1{8-0};
-+ bits<2> src1_chan = src1{10-9};
-+
-+ let Word0{8-0} = src0_sel;
-+ let Word0{9} = src0_rel;
-+ let Word0{11-10} = src0_chan;
-+ let Word0{12} = src0_neg;
-+ let Word0{21-13} = src1_sel;
-+ let Word0{22} = src1_rel;
-+ let Word0{24-23} = src1_chan;
-+ let Word0{25} = src1_neg;
-+ let Word0{28-26} = index_mode;
-+ let Word0{30-29} = pred_sel;
-+ let Word0{31} = last;
-+}
-+
-+class R600ALU_Word1 {
-+ field bits<32> Word1;
-+
-+ bits<11> dst;
-+ bits<3> bank_swizzle = 0;
-+ bits<1> dst_rel;
-+ bits<1> clamp;
-+
-+ bits<7> dst_sel = dst{6-0};
-+ bits<2> dst_chan = dst{10-9};
-+
-+ let Word1{20-18} = bank_swizzle;
-+ let Word1{27-21} = dst_sel;
-+ let Word1{28} = dst_rel;
-+ let Word1{30-29} = dst_chan;
-+ let Word1{31} = clamp;
-+}
-+
-+class R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{
-+
-+ bits<1> src0_abs;
-+ bits<1> src1_abs;
-+ bits<1> update_exec_mask;
-+ bits<1> update_pred;
-+ bits<1> write;
-+ bits<2> omod;
-+
-+ let Word1{0} = src0_abs;
-+ let Word1{1} = src1_abs;
-+ let Word1{2} = update_exec_mask;
-+ let Word1{3} = update_pred;
-+ let Word1{4} = write;
-+ let Word1{6-5} = omod;
-+ let Word1{17-7} = alu_inst;
-+}
-+
-+class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{
-+
-+ bits<11> src2;
-+ bits<1> src2_rel;
-+ bits<1> src2_neg;
-+
-+ bits<9> src2_sel = src2{8-0};
-+ bits<2> src2_chan = src2{10-9};
-+
-+ let Word1{8-0} = src2_sel;
-+ let Word1{9} = src2_rel;
-+ let Word1{11-10} = src2_chan;
-+ let Word1{12} = src2_neg;
-+ let Word1{17-13} = alu_inst;
-+}
-+
-+class VTX_WORD0 {
-+ field bits<32> Word0;
-+ bits<7> SRC_GPR;
-+ bits<5> VC_INST;
-+ bits<2> FETCH_TYPE;
-+ bits<1> FETCH_WHOLE_QUAD;
-+ bits<8> BUFFER_ID;
-+ bits<1> SRC_REL;
-+ bits<2> SRC_SEL_X;
-+ bits<6> MEGA_FETCH_COUNT;
-+
-+ let Word0{4-0} = VC_INST;
-+ let Word0{6-5} = FETCH_TYPE;
-+ let Word0{7} = FETCH_WHOLE_QUAD;
-+ let Word0{15-8} = BUFFER_ID;
-+ let Word0{22-16} = SRC_GPR;
-+ let Word0{23} = SRC_REL;
-+ let Word0{25-24} = SRC_SEL_X;
-+ let Word0{31-26} = MEGA_FETCH_COUNT;
-+}
-+
-+class VTX_WORD1_GPR {
-+ field bits<32> Word1;
-+ bits<7> DST_GPR;
-+ bits<1> DST_REL;
-+ bits<3> DST_SEL_X;
-+ bits<3> DST_SEL_Y;
-+ bits<3> DST_SEL_Z;
-+ bits<3> DST_SEL_W;
-+ bits<1> USE_CONST_FIELDS;
-+ bits<6> DATA_FORMAT;
-+ bits<2> NUM_FORMAT_ALL;
-+ bits<1> FORMAT_COMP_ALL;
-+ bits<1> SRF_MODE_ALL;
-+
-+ let Word1{6-0} = DST_GPR;
-+ let Word1{7} = DST_REL;
-+ let Word1{8} = 0; // Reserved
-+ let Word1{11-9} = DST_SEL_X;
-+ let Word1{14-12} = DST_SEL_Y;
-+ let Word1{17-15} = DST_SEL_Z;
-+ let Word1{20-18} = DST_SEL_W;
-+ let Word1{21} = USE_CONST_FIELDS;
-+ let Word1{27-22} = DATA_FORMAT;
-+ let Word1{29-28} = NUM_FORMAT_ALL;
-+ let Word1{30} = FORMAT_COMP_ALL;
-+ let Word1{31} = SRF_MODE_ALL;
-+}
-+
-+/*
-+XXX: R600 subtarget uses a slightly different encoding than the other
-+subtargets. We currently handle this in R600MCCodeEmitter, but we may
-+want to use these instruction classes in the future.
-+
-+class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 {
-+
-+ bits<1> fog_merge;
-+ bits<10> alu_inst;
-+
-+ let Inst{37} = fog_merge;
-+ let Inst{39-38} = omod;
-+ let Inst{49-40} = alu_inst;
-+}
-+
-+class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 {
-+
-+ bits<11> alu_inst;
-+
-+ let Inst{38-37} = omod;
-+ let Inst{49-39} = alu_inst;
-+}
-+*/
-+
-+def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
-+ (ops PRED_SEL_OFF)>;
-+
-+
-+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-+
-+// Class for instructions with only one source register.
-+// If you add new ins to this instruction, make sure they are listed before
-+// $literal, because the backend currently assumes that the last operand is
-+// a literal. Also be sure to update the enum R600Op1OperandIndex::ROI in
-+// R600Defines.h, R600InstrInfo::buildDefaultInstruction(),
-+// and R600InstrInfo::getOperandIdx().
-+class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
-+ InstrItinClass itin = AnyALU> :
-+ InstR600 <0,
-+ (outs R600_Reg32:$dst),
-+ (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
-+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
-+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
-+ !strconcat(opName,
-+ "$clamp $dst$write$dst_rel$omod, "
-+ "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
-+ "$literal $pred_sel$last"),
-+ pattern,
-+ itin>,
-+ R600ALU_Word0,
-+ R600ALU_Word1_OP2 <inst> {
-+
-+ let src1 = 0;
-+ let src1_rel = 0;
-+ let src1_neg = 0;
-+ let src1_abs = 0;
-+ let update_exec_mask = 0;
-+ let update_pred = 0;
-+ let HasNativeOperands = 1;
-+ let Op1 = 1;
-+ let DisableEncoding = "$literal";
-+
-+ let Inst{31-0} = Word0;
-+ let Inst{63-32} = Word1;
-+}
-+
-+class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
-+ InstrItinClass itin = AnyALU> :
-+ R600_1OP <inst, opName,
-+ [(set R600_Reg32:$dst, (node R600_Reg32:$src0))]
-+>;
-+
-+// If you add our change the operands for R600_2OP instructions, you must
-+// also update the R600Op2OperandIndex::ROI enum in R600Defines.h,
-+// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
-+class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
-+ InstrItinClass itin = AnyALU> :
-+ InstR600 <inst,
-+ (outs R600_Reg32:$dst),
-+ (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
-+ OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
-+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
-+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
-+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
-+ !strconcat(opName,
-+ "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
-+ "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
-+ "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, "
-+ "$literal $pred_sel$last"),
-+ pattern,
-+ itin>,
-+ R600ALU_Word0,
-+ R600ALU_Word1_OP2 <inst> {
-+
-+ let HasNativeOperands = 1;
-+ let Op2 = 1;
-+ let DisableEncoding = "$literal";
-+
-+ let Inst{31-0} = Word0;
-+ let Inst{63-32} = Word1;
-+}
-+
-+class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
-+ InstrItinClass itim = AnyALU> :
-+ R600_2OP <inst, opName,
-+ [(set R600_Reg32:$dst, (node R600_Reg32:$src0,
-+ R600_Reg32:$src1))]
-+>;
-+
-+// If you add our change the operands for R600_3OP instructions, you must
-+// also update the R600Op3OperandIndex::ROI enum in R600Defines.h,
-+// R600InstrInfo::buildDefaultInstruction(), and
-+// R600InstrInfo::getOperandIdx().
-+class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
-+ InstrItinClass itin = AnyALU> :
-+ InstR600 <0,
-+ (outs R600_Reg32:$dst),
-+ (ins REL:$dst_rel, CLAMP:$clamp,
-+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
-+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
-+ R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
-+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
-+ !strconcat(opName, "$clamp $dst$dst_rel, "
-+ "$src0_neg$src0$src0_sel$src0_rel, "
-+ "$src1_neg$src1$src1_sel$src1_rel, "
-+ "$src2_neg$src2$src2_sel$src2_rel, "
-+ "$literal $pred_sel$last"),
-+ pattern,
-+ itin>,
-+ R600ALU_Word0,
-+ R600ALU_Word1_OP3<inst>{
-+
-+ let HasNativeOperands = 1;
-+ let DisableEncoding = "$literal";
-+ let Op3 = 1;
-+
-+ let Inst{31-0} = Word0;
-+ let Inst{63-32} = Word1;
-+}
-+
-+class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
-+ InstrItinClass itin = VecALU> :
-+ InstR600 <inst,
-+ (outs R600_Reg32:$dst),
-+ ins,
-+ asm,
-+ pattern,
-+ itin>;
-+
-+class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
-+ InstrItinClass itin = AnyALU> :
-+ InstR600 <inst,
-+ (outs R600_Reg128:$dst),
-+ (ins R600_Reg128:$src0, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
-+ !strconcat(opName, "$dst, $src0, $resourceId, $samplerId, $textureTarget"),
-+ pattern,
-+ itin>{
-+ let Inst {10-0} = inst;
-+ }
-+
-+} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
-+
-+def TEX_SHADOW : PatLeaf<
-+ (imm),
-+ [{uint32_t TType = (uint32_t)N->getZExtValue();
-+ return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13);
-+ }]
-+>;
-+
-+def TEX_RECT : PatLeaf<
-+ (imm),
-+ [{uint32_t TType = (uint32_t)N->getZExtValue();
-+ return TType == 5;
-+ }]
-+>;
-+
-+class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
-+ dag ins, string asm, list<dag> pattern> :
-+ InstR600ISA <outs, ins, asm, pattern> {
-+ bits<7> RW_GPR;
-+ bits<7> INDEX_GPR;
-+
-+ bits<2> RIM;
-+ bits<2> TYPE;
-+ bits<1> RW_REL;
-+ bits<2> ELEM_SIZE;
-+
-+ bits<12> ARRAY_SIZE;
-+ bits<4> COMP_MASK;
-+ bits<4> BURST_COUNT;
-+ bits<1> VPM;
-+ bits<1> eop;
-+ bits<1> MARK;
-+ bits<1> BARRIER;
-+
-+ // CF_ALLOC_EXPORT_WORD0_RAT
-+ let Inst{3-0} = rat_id;
-+ let Inst{9-4} = rat_inst;
-+ let Inst{10} = 0; // Reserved
-+ let Inst{12-11} = RIM;
-+ let Inst{14-13} = TYPE;
-+ let Inst{21-15} = RW_GPR;
-+ let Inst{22} = RW_REL;
-+ let Inst{29-23} = INDEX_GPR;
-+ let Inst{31-30} = ELEM_SIZE;
-+
-+ // CF_ALLOC_EXPORT_WORD1_BUF
-+ let Inst{43-32} = ARRAY_SIZE;
-+ let Inst{47-44} = COMP_MASK;
-+ let Inst{51-48} = BURST_COUNT;
-+ let Inst{52} = VPM;
-+ let Inst{53} = eop;
-+ let Inst{61-54} = cf_inst;
-+ let Inst{62} = MARK;
-+ let Inst{63} = BARRIER;
-+}
-+
-+class LoadParamFrag <PatFrag load_type> : PatFrag <
-+ (ops node:$ptr), (load_type node:$ptr),
-+ [{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }]
-+>;
-+
-+def load_param : LoadParamFrag<load>;
-+def load_param_zexti8 : LoadParamFrag<zextloadi8>;
-+def load_param_zexti16 : LoadParamFrag<zextloadi16>;
-+
-+def isR600 : Predicate<"Subtarget.device()"
-+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">;
-+def isR700 : Predicate<"Subtarget.device()"
-+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
-+ "Subtarget.device()->getDeviceFlag()"
-+ ">= OCL_DEVICE_RV710">;
-+def isEG : Predicate<
-+ "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && "
-+ "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && "
-+ "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">;
-+
-+def isCayman : Predicate<"Subtarget.device()"
-+ "->getDeviceFlag() == OCL_DEVICE_CAYMAN">;
-+def isEGorCayman : Predicate<"Subtarget.device()"
-+ "->getGeneration() == AMDGPUDeviceInfo::HD5XXX"
-+ "|| Subtarget.device()->getGeneration() =="
-+ "AMDGPUDeviceInfo::HD6XXX">;
-+
-+def isR600toCayman : Predicate<
-+ "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
-+
-+//===----------------------------------------------------------------------===//
-+// R600 SDNodes
-+//===----------------------------------------------------------------------===//
-+
-+def INTERP: SDNode<"AMDGPUISD::INTERP",
-+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>
-+ >;
-+
-+def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
-+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]>
-+ >;
-+
-+def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
-+ SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
-+ [SDNPMayLoad]
-+>;
-+
-+//===----------------------------------------------------------------------===//
-+// Interpolation Instructions
-+//===----------------------------------------------------------------------===//
-+
-+let usesCustomInserter = 1 in {
-+def input_perspective : AMDGPUShaderInst <
-+ (outs R600_Reg128:$dst),
-+ (ins i32imm:$src0, i32imm:$src1),
-+ "input_perspective $src0 $src1 : dst",
-+ [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>;
-+} // End usesCustomInserter = 1
-+
-+def input_constant : AMDGPUShaderInst <
-+ (outs R600_Reg128:$dst),
-+ (ins i32imm:$src),
-+ "input_perspective $src : dst",
-+ [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>;
-+
-+
-+
-+def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
-+ let bank_swizzle = 5;
-+}
-+
-+def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> {
-+ let bank_swizzle = 5;
-+}
-+
-+def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
-+
-+//===----------------------------------------------------------------------===//
-+// Export Instructions
-+//===----------------------------------------------------------------------===//
-+
-+def ExportType : SDTypeProfile<0, 5, [SDTCisFP<0>, SDTCisInt<1>]>;
-+
-+def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
-+ [SDNPHasChain, SDNPSideEffect]>;
-+
-+class ExportWord0 {
-+ field bits<32> Word0;
-+
-+ bits<13> arraybase;
-+ bits<2> type;
-+ bits<7> gpr;
-+ bits<2> elem_size;
-+
-+ let Word0{12-0} = arraybase;
-+ let Word0{14-13} = type;
-+ let Word0{21-15} = gpr;
-+ let Word0{22} = 0; // RW_REL
-+ let Word0{29-23} = 0; // INDEX_GPR
-+ let Word0{31-30} = elem_size;
-+}
-+
-+class ExportSwzWord1 {
-+ field bits<32> Word1;
-+
-+ bits<3> sw_x;
-+ bits<3> sw_y;
-+ bits<3> sw_z;
-+ bits<3> sw_w;
-+ bits<1> eop;
-+ bits<8> inst;
-+
-+ let Word1{2-0} = sw_x;
-+ let Word1{5-3} = sw_y;
-+ let Word1{8-6} = sw_z;
-+ let Word1{11-9} = sw_w;
-+}
-+
-+class ExportBufWord1 {
-+ field bits<32> Word1;
-+
-+ bits<12> arraySize;
-+ bits<4> compMask;
-+ bits<1> eop;
-+ bits<8> inst;
-+
-+ let Word1{11-0} = arraySize;
-+ let Word1{15-12} = compMask;
-+}
-+
-+multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
-+ def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
-+ (ExportInst
-+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
-+ 0, 61, 0, 7, 7, 7, cf_inst, 0)
-+ >;
-+
-+ def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
-+ (ExportInst
-+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
-+ 0, 61, 7, 0, 7, 7, cf_inst, 0)
-+ >;
-+
-+ def : Pat<(int_R600_store_pixel_dummy),
-+ (ExportInst
-+ (v4f32 (IMPLICIT_DEF)), 0, 0, 7, 7, 7, 7, cf_inst, 0)
-+ >;
-+
-+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 0),
-+ (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)),
-+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
-+ 0, 1, 2, 3, cf_inst, 0)
-+ >;
-+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1),
-+ (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)),
-+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
-+ 0, 1, 2, 3, cf_inst, 0)
-+ >;
-+
-+ def : Pat<(int_R600_store_swizzle (v4f32 R600_Reg128:$src), imm:$arraybase,
-+ imm:$type),
-+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
-+ 0, 1, 2, 3, cf_inst, 0)
-+ >;
-+}
-+
-+multiclass SteamOutputExportPattern<Instruction ExportInst,
-+ bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
-+// Stream0
-+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
-+ (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
-+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
-+ 4095, imm:$mask, buf0inst, 0)>;
-+// Stream1
-+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
-+ (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
-+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
-+ 4095, imm:$mask, buf1inst, 0)>;
-+// Stream2
-+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
-+ (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
-+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
-+ 4095, imm:$mask, buf2inst, 0)>;
-+// Stream3
-+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
-+ (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
-+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
-+ 4095, imm:$mask, buf3inst, 0)>;
-+}
-+
-+let isTerminator = 1, usesCustomInserter = 1 in {
-+
-+class ExportSwzInst : InstR600ISA<(
-+ outs),
-+ (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
-+ i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst,
-+ i32imm:$eop),
-+ !strconcat("EXPORT", " $gpr"),
-+ []>, ExportWord0, ExportSwzWord1 {
-+ let elem_size = 3;
-+ let Inst{31-0} = Word0;
-+ let Inst{63-32} = Word1;
-+}
-+
-+} // End isTerminator = 1, usesCustomInserter = 1
-+
-+class ExportBufInst : InstR600ISA<(
-+ outs),
-+ (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
-+ i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop),
-+ !strconcat("EXPORT", " $gpr"),
-+ []>, ExportWord0, ExportBufWord1 {
-+ let elem_size = 0;
-+ let Inst{31-0} = Word0;
-+ let Inst{63-32} = Word1;
-+}
-+
-+let Predicates = [isR600toCayman] in {
-+
-+//===----------------------------------------------------------------------===//
-+// Common Instructions R600, R700, Evergreen, Cayman
-+//===----------------------------------------------------------------------===//
-+
-+def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
-+// Non-IEEE MUL: 0 * anything = 0
-+def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
-+def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
-+def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
-+def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
-+
-+// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
-+// so some of the instruction names don't match the asm string.
-+// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
-+def SETE : R600_2OP <
-+ 0x08, "SETE",
-+ [(set R600_Reg32:$dst,
-+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
-+ COND_EQ))]
-+>;
-+
-+def SGT : R600_2OP <
-+ 0x09, "SETGT",
-+ [(set R600_Reg32:$dst,
-+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
-+ COND_GT))]
-+>;
-+
-+def SGE : R600_2OP <
-+ 0xA, "SETGE",
-+ [(set R600_Reg32:$dst,
-+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
-+ COND_GE))]
-+>;
-+
-+def SNE : R600_2OP <
-+ 0xB, "SETNE",
-+ [(set R600_Reg32:$dst,
-+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
-+ COND_NE))]
-+>;
-+
-+def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
-+def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
-+def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
-+def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
-+def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
-+
-+def MOV : R600_1OP <0x19, "MOV", []>;
-+
-+let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
-+
-+class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
-+ (outs R600_Reg32:$dst),
-+ (ins immType:$imm),
-+ "",
-+ []
-+>;
-+
-+} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
-+
-+def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
-+def : Pat <
-+ (imm:$val),
-+ (MOV_IMM_I32 imm:$val)
-+>;
-+
-+def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
-+def : Pat <
-+ (fpimm:$val),
-+ (MOV_IMM_F32 fpimm:$val)
-+>;
-+
-+def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>;
-+def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>;
-+def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>;
-+def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>;
-+
-+let hasSideEffects = 1 in {
-+
-+def KILLGT : R600_2OP <0x2D, "KILLGT", []>;
-+
-+} // end hasSideEffects
-+
-+def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>;
-+def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>;
-+def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>;
-+def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>;
-+def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>;
-+def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>;
-+def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>;
-+def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>;
-+def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>;
-+def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
-+
-+def SETE_INT : R600_2OP <
-+ 0x3A, "SETE_INT",
-+ [(set (i32 R600_Reg32:$dst),
-+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))]
-+>;
-+
-+def SETGT_INT : R600_2OP <
-+ 0x3B, "SGT_INT",
-+ [(set (i32 R600_Reg32:$dst),
-+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
-+>;
-+
-+def SETGE_INT : R600_2OP <
-+ 0x3C, "SETGE_INT",
-+ [(set (i32 R600_Reg32:$dst),
-+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))]
-+>;
-+
-+def SETNE_INT : R600_2OP <
-+ 0x3D, "SETNE_INT",
-+ [(set (i32 R600_Reg32:$dst),
-+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))]
-+>;
-+
-+def SETGT_UINT : R600_2OP <
-+ 0x3E, "SETGT_UINT",
-+ [(set (i32 R600_Reg32:$dst),
-+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))]
-+>;
-+
-+def SETGE_UINT : R600_2OP <
-+ 0x3F, "SETGE_UINT",
-+ [(set (i32 R600_Reg32:$dst),
-+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))]
-+>;
-+
-+def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
-+def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>;
-+def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>;
-+def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
-+
-+def CNDE_INT : R600_3OP <
-+ 0x1C, "CNDE_INT",
-+ [(set (i32 R600_Reg32:$dst),
-+ (selectcc (i32 R600_Reg32:$src0), 0,
-+ (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
-+ COND_EQ))]
-+>;
-+
-+def CNDGE_INT : R600_3OP <
-+ 0x1E, "CNDGE_INT",
-+ [(set (i32 R600_Reg32:$dst),
-+ (selectcc (i32 R600_Reg32:$src0), 0,
-+ (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
-+ COND_GE))]
-+>;
-+
-+def CNDGT_INT : R600_3OP <
-+ 0x1D, "CNDGT_INT",
-+ [(set (i32 R600_Reg32:$dst),
-+ (selectcc (i32 R600_Reg32:$src0), 0,
-+ (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
-+ COND_GT))]
-+>;
-+
-+//===----------------------------------------------------------------------===//
-+// Texture instructions
-+//===----------------------------------------------------------------------===//
-+
-+def TEX_LD : R600_TEX <
-+ 0x03, "TEX_LD",
-+ [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
-+> {
-+let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $resourceId, $samplerId, $textureTarget";
-+let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget);
-+}
-+
-+def TEX_GET_TEXTURE_RESINFO : R600_TEX <
-+ 0x04, "TEX_GET_TEXTURE_RESINFO",
-+ [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
-+>;
-+
-+def TEX_GET_GRADIENTS_H : R600_TEX <
-+ 0x07, "TEX_GET_GRADIENTS_H",
-+ [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
-+>;
-+
-+def TEX_GET_GRADIENTS_V : R600_TEX <
-+ 0x08, "TEX_GET_GRADIENTS_V",
-+ [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
-+>;
-+
-+def TEX_SET_GRADIENTS_H : R600_TEX <
-+ 0x0B, "TEX_SET_GRADIENTS_H",
-+ []
-+>;
-+
-+def TEX_SET_GRADIENTS_V : R600_TEX <
-+ 0x0C, "TEX_SET_GRADIENTS_V",
-+ []
-+>;
-+
-+def TEX_SAMPLE : R600_TEX <
-+ 0x10, "TEX_SAMPLE",
-+ [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
-+>;
-+
-+def TEX_SAMPLE_C : R600_TEX <
-+ 0x18, "TEX_SAMPLE_C",
-+ [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
-+>;
-+
-+def TEX_SAMPLE_L : R600_TEX <
-+ 0x11, "TEX_SAMPLE_L",
-+ [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
-+>;
-+
-+def TEX_SAMPLE_C_L : R600_TEX <
-+ 0x19, "TEX_SAMPLE_C_L",
-+ [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
-+>;
-+
-+def TEX_SAMPLE_LB : R600_TEX <
-+ 0x12, "TEX_SAMPLE_LB",
-+ [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0,imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
-+>;
-+
-+def TEX_SAMPLE_C_LB : R600_TEX <
-+ 0x1A, "TEX_SAMPLE_C_LB",
-+ [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
-+>;
-+
-+def TEX_SAMPLE_G : R600_TEX <
-+ 0x14, "TEX_SAMPLE_G",
-+ []
-+>;
-+
-+def TEX_SAMPLE_C_G : R600_TEX <
-+ 0x1C, "TEX_SAMPLE_C_G",
-+ []
-+>;
-+
-+//===----------------------------------------------------------------------===//
-+// Helper classes for common instructions
-+//===----------------------------------------------------------------------===//
-+
-+class MUL_LIT_Common <bits<5> inst> : R600_3OP <
-+ inst, "MUL_LIT",
-+ []
-+>;
-+
-+class MULADD_Common <bits<5> inst> : R600_3OP <
-+ inst, "MULADD",
-+ [(set (f32 R600_Reg32:$dst),
-+ (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
-+>;
-+
-+class CNDE_Common <bits<5> inst> : R600_3OP <
-+ inst, "CNDE",
-+ [(set R600_Reg32:$dst,
-+ (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
-+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
-+ COND_EQ))]
-+>;
-+
-+class CNDGT_Common <bits<5> inst> : R600_3OP <
-+ inst, "CNDGT",
-+ [(set R600_Reg32:$dst,
-+ (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
-+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
-+ COND_GT))]
-+>;
-+
-+class CNDGE_Common <bits<5> inst> : R600_3OP <
-+ inst, "CNDGE",
-+ [(set R600_Reg32:$dst,
-+ (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
-+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
-+ COND_GE))]
-+>;
-+
-+multiclass DOT4_Common <bits<11> inst> {
-+
-+ def _pseudo : R600_REDUCTION <inst,
-+ (ins R600_Reg128:$src0, R600_Reg128:$src1),
-+ "DOT4 $dst $src0, $src1",
-+ [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
-+ >;
-+
-+ def _real : R600_2OP <inst, "DOT4", []>;
-+}
-+
-+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-+multiclass CUBE_Common <bits<11> inst> {
-+
-+ def _pseudo : InstR600 <
-+ inst,
-+ (outs R600_Reg128:$dst),
-+ (ins R600_Reg128:$src),
-+ "CUBE $dst $src",
-+ [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
-+ VecALU
-+ > {
-+ let isPseudo = 1;
-+ }
-+
-+ def _real : R600_2OP <inst, "CUBE", []>;
-+}
-+} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
-+
-+class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
-+ inst, "EXP_IEEE", fexp2
-+>;
-+
-+class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
-+ inst, "FLT_TO_INT", fp_to_sint
-+>;
-+
-+class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
-+ inst, "INT_TO_FLT", sint_to_fp
-+>;
-+
-+class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
-+ inst, "FLT_TO_UINT", fp_to_uint
-+>;
-+
-+class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
-+ inst, "UINT_TO_FLT", uint_to_fp
-+>;
-+
-+class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
-+ inst, "LOG_CLAMPED", []
-+>;
-+
-+class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
-+ inst, "LOG_IEEE", flog2
-+>;
-+
-+class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
-+class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
-+class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
-+class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
-+ inst, "MULHI_INT", mulhs
-+>;
-+class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
-+ inst, "MULHI", mulhu
-+>;
-+class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
-+ inst, "MULLO_INT", mul
-+>;
-+class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []>;
-+
-+class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
-+ inst, "RECIP_CLAMPED", []
-+>;
-+
-+class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
-+ inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (fdiv FP_ONE, R600_Reg32:$src0))]
-+>;
-+
-+class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
-+ inst, "RECIP_UINT", AMDGPUurecip
-+>;
-+
-+class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
-+ inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
-+>;
-+
-+class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
-+ inst, "RECIPSQRT_IEEE", []
-+>;
-+
-+class SIN_Common <bits<11> inst> : R600_1OP <
-+ inst, "SIN", []>{
-+ let Trig = 1;
-+}
-+
-+class COS_Common <bits<11> inst> : R600_1OP <
-+ inst, "COS", []> {
-+ let Trig = 1;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Helper patterns for complex intrinsics
-+//===----------------------------------------------------------------------===//
-+
-+multiclass DIV_Common <InstR600 recip_ieee> {
-+def : Pat<
-+ (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
-+ (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
-+>;
-+
-+def : Pat<
-+ (fdiv R600_Reg32:$src0, R600_Reg32:$src1),
-+ (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
-+>;
-+}
-+
-+class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat <
-+ (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w),
-+ (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x))
-+>;
-+
-+//===----------------------------------------------------------------------===//
-+// R600 / R700 Instructions
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [isR600] in {
-+
-+ def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
-+ def MULADD_r600 : MULADD_Common<0x10>;
-+ def CNDE_r600 : CNDE_Common<0x18>;
-+ def CNDGT_r600 : CNDGT_Common<0x19>;
-+ def CNDGE_r600 : CNDGE_Common<0x1A>;
-+ defm DOT4_r600 : DOT4_Common<0x50>;
-+ defm CUBE_r600 : CUBE_Common<0x52>;
-+ def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
-+ def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
-+ def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
-+ def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
-+ def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
-+ def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
-+ def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
-+ def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
-+ def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
-+ def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
-+ def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
-+ def SIN_r600 : SIN_Common<0x6E>;
-+ def COS_r600 : COS_Common<0x6F>;
-+ def ASHR_r600 : ASHR_Common<0x70>;
-+ def LSHR_r600 : LSHR_Common<0x71>;
-+ def LSHL_r600 : LSHL_Common<0x72>;
-+ def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
-+ def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
-+ def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
-+ def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
-+ def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
-+
-+ defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
-+ def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
-+
-+ def : Pat<(fsqrt R600_Reg32:$src),
-+ (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>;
-+
-+ def R600_ExportSwz : ExportSwzInst {
-+ let Word1{20-17} = 1; // BURST_COUNT
-+ let Word1{21} = eop;
-+ let Word1{22} = 1; // VALID_PIXEL_MODE
-+ let Word1{30-23} = inst;
-+ let Word1{31} = 1; // BARRIER
-+ }
-+ defm : ExportPattern<R600_ExportSwz, 39>;
-+
-+ def R600_ExportBuf : ExportBufInst {
-+ let Word1{20-17} = 1; // BURST_COUNT
-+ let Word1{21} = eop;
-+ let Word1{22} = 1; // VALID_PIXEL_MODE
-+ let Word1{30-23} = inst;
-+ let Word1{31} = 1; // BARRIER
-+ }
-+ defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
-+}
-+
-+// Helper pattern for normalizing inputs to triginomic instructions for R700+
-+// cards.
-+class COS_PAT <InstR600 trig> : Pat<
-+ (fcos R600_Reg32:$src),
-+ (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
-+>;
-+
-+class SIN_PAT <InstR600 trig> : Pat<
-+ (fsin R600_Reg32:$src),
-+ (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
-+>;
-+
-+//===----------------------------------------------------------------------===//
-+// R700 Only instructions
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [isR700] in {
-+ def SIN_r700 : SIN_Common<0x6E>;
-+ def COS_r700 : COS_Common<0x6F>;
-+
-+ // R700 normalizes inputs to SIN/COS the same as EG
-+ def : SIN_PAT <SIN_r700>;
-+ def : COS_PAT <COS_r700>;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Evergreen Only instructions
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [isEG] in {
-+
-+def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
-+defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
-+
-+def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
-+def MULHI_INT_eg : MULHI_INT_Common<0x90>;
-+def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
-+def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
-+def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
-+def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
-+def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
-+def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
-+def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
-+def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
-+def SIN_eg : SIN_Common<0x8D>;
-+def COS_eg : COS_Common<0x8E>;
-+
-+def : SIN_PAT <SIN_eg>;
-+def : COS_PAT <COS_eg>;
-+def : Pat<(fsqrt R600_Reg32:$src),
-+ (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>;
-+} // End Predicates = [isEG]
-+
-+//===----------------------------------------------------------------------===//
-+// Evergreen / Cayman Instructions
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [isEGorCayman] in {
-+
-+ // BFE_UINT - bit_extract, an optimization for mask and shift
-+ // Src0 = Input
-+ // Src1 = Offset
-+ // Src2 = Width
-+ //
-+ // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
-+ //
-+ // Example Usage:
-+ // (Offset, Width)
-+ //
-+ // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0
-+ // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8
-+ // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
-+ // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
-+ def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
-+ [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
-+ R600_Reg32:$src1,
-+ R600_Reg32:$src2))],
-+ VecALU
-+ >;
-+
-+ def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
-+ [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
-+ R600_Reg32:$src2))],
-+ VecALU
-+ >;
-+
-+ def MULADD_eg : MULADD_Common<0x14>;
-+ def ASHR_eg : ASHR_Common<0x15>;
-+ def LSHR_eg : LSHR_Common<0x16>;
-+ def LSHL_eg : LSHL_Common<0x17>;
-+ def CNDE_eg : CNDE_Common<0x19>;
-+ def CNDGT_eg : CNDGT_Common<0x1A>;
-+ def CNDGE_eg : CNDGE_Common<0x1B>;
-+ def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
-+ def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
-+ defm DOT4_eg : DOT4_Common<0xBE>;
-+ defm CUBE_eg : CUBE_Common<0xC0>;
-+
-+ def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
-+
-+ def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
-+ let Pattern = [];
-+ }
-+
-+ def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
-+
-+ def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
-+ let Pattern = [];
-+ }
-+
-+ def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
-+
-+ // TRUNC is used for the FLT_TO_INT instructions to work around a
-+ // perceived problem where the rounding modes are applied differently
-+ // depending on the instruction and the slot they are in.
-+ // See:
-+ // https://bugs.freedesktop.org/show_bug.cgi?id=50232
-+ // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c
-+ //
-+ // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
-+ // which do not need to be truncated since the fp values are 0.0f or 1.0f.
-+ // We should look into handling these cases separately.
-+ def : Pat<(fp_to_sint R600_Reg32:$src0),
-+ (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>;
-+
-+ def : Pat<(fp_to_uint R600_Reg32:$src0),
-+ (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
-+
-+ def EG_ExportSwz : ExportSwzInst {
-+ let Word1{19-16} = 1; // BURST_COUNT
-+ let Word1{20} = 1; // VALID_PIXEL_MODE
-+ let Word1{21} = eop;
-+ let Word1{29-22} = inst;
-+ let Word1{30} = 0; // MARK
-+ let Word1{31} = 1; // BARRIER
-+ }
-+ defm : ExportPattern<EG_ExportSwz, 83>;
-+
-+ def EG_ExportBuf : ExportBufInst {
-+ let Word1{19-16} = 1; // BURST_COUNT
-+ let Word1{20} = 1; // VALID_PIXEL_MODE
-+ let Word1{21} = eop;
-+ let Word1{29-22} = inst;
-+ let Word1{30} = 0; // MARK
-+ let Word1{31} = 1; // BARRIER
-+ }
-+ defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
-+
-+//===----------------------------------------------------------------------===//
-+// Memory read/write instructions
-+//===----------------------------------------------------------------------===//
-+let usesCustomInserter = 1 in {
-+
-+class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
-+ list<dag> pattern>
-+ : EG_CF_RAT <0x57, 0x2, 0, (outs), ins,
-+ !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> {
-+ let RIM = 0;
-+ // XXX: Have a separate instruction for non-indexed writes.
-+ let TYPE = 1;
-+ let RW_REL = 0;
-+ let ELEM_SIZE = 0;
-+
-+ let ARRAY_SIZE = 0;
-+ let COMP_MASK = comp_mask;
-+ let BURST_COUNT = 0;
-+ let VPM = 0;
-+ let MARK = 0;
-+ let BARRIER = 1;
-+}
-+
-+} // End usesCustomInserter = 1
-+
-+// 32-bit store
-+def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
-+ (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
-+ 0x1, "RAT_WRITE_CACHELESS_32_eg",
-+ [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]
-+>;
-+
-+//128-bit store
-+def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
-+ (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
-+ 0xf, "RAT_WRITE_CACHELESS_128",
-+ [(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)]
-+>;
-+
-+class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
-+ : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>,
-+ VTX_WORD1_GPR, VTX_WORD0 {
-+
-+ // Static fields
-+ let VC_INST = 0;
-+ let FETCH_TYPE = 2;
-+ let FETCH_WHOLE_QUAD = 0;
-+ let BUFFER_ID = buffer_id;
-+ let SRC_REL = 0;
-+ // XXX: We can infer this field based on the SRC_GPR. This would allow us
-+ // to store vertex addresses in any channel, not just X.
-+ let SRC_SEL_X = 0;
-+ let DST_REL = 0;
-+ // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
-+ // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
-+ // however, based on my testing if USE_CONST_FIELDS is set, then all
-+ // these fields need to be set to 0.
-+ let USE_CONST_FIELDS = 0;
-+ let NUM_FORMAT_ALL = 1;
-+ let FORMAT_COMP_ALL = 0;
-+ let SRF_MODE_ALL = 0;
-+
-+ let Inst{31-0} = Word0;
-+ let Inst{63-32} = Word1;
-+ // LLVM can only encode 64-bit instructions, so these fields are manually
-+ // encoded in R600CodeEmitter
-+ //
-+ // bits<16> OFFSET;
-+ // bits<2> ENDIAN_SWAP = 0;
-+ // bits<1> CONST_BUF_NO_STRIDE = 0;
-+ // bits<1> MEGA_FETCH = 0;
-+ // bits<1> ALT_CONST = 0;
-+ // bits<2> BUFFER_INDEX_MODE = 0;
-+
-+
-+
-+ // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
-+ // is done in R600CodeEmitter
-+ //
-+ // Inst{79-64} = OFFSET;
-+ // Inst{81-80} = ENDIAN_SWAP;
-+ // Inst{82} = CONST_BUF_NO_STRIDE;
-+ // Inst{83} = MEGA_FETCH;
-+ // Inst{84} = ALT_CONST;
-+ // Inst{86-85} = BUFFER_INDEX_MODE;
-+ // Inst{95-86} = 0; Reserved
-+
-+ // VTX_WORD3 (Padding)
-+ //
-+ // Inst{127-96} = 0;
-+}
-+
-+class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
-+ : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst),
-+ pattern> {
-+
-+ let MEGA_FETCH_COUNT = 1;
-+ let DST_SEL_X = 0;
-+ let DST_SEL_Y = 7; // Masked
-+ let DST_SEL_Z = 7; // Masked
-+ let DST_SEL_W = 7; // Masked
-+ let DATA_FORMAT = 1; // FMT_8
-+}
-+
-+class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
-+ : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst),
-+ pattern> {
-+ let MEGA_FETCH_COUNT = 2;
-+ let DST_SEL_X = 0;
-+ let DST_SEL_Y = 7; // Masked
-+ let DST_SEL_Z = 7; // Masked
-+ let DST_SEL_W = 7; // Masked
-+ let DATA_FORMAT = 5; // FMT_16
-+
-+}
-+
-+class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
-+ : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst),
-+ pattern> {
-+
-+ let MEGA_FETCH_COUNT = 4;
-+ let DST_SEL_X = 0;
-+ let DST_SEL_Y = 7; // Masked
-+ let DST_SEL_Z = 7; // Masked
-+ let DST_SEL_W = 7; // Masked
-+ let DATA_FORMAT = 0xD; // COLOR_32
-+
-+ // This is not really necessary, but there were some GPU hangs that appeared
-+ // to be caused by ALU instructions in the next instruction group that wrote
-+ // to the $ptr registers of the VTX_READ.
-+ // e.g.
-+ // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
-+ // %T2_X<def> = MOV %ZERO
-+ //Adding this constraint prevents this from happening.
-+ let Constraints = "$ptr.ptr = $dst";
-+}
-+
-+class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
-+ : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst),
-+ pattern> {
-+
-+ let MEGA_FETCH_COUNT = 16;
-+ let DST_SEL_X = 0;
-+ let DST_SEL_Y = 1;
-+ let DST_SEL_Z = 2;
-+ let DST_SEL_W = 3;
-+ let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
-+
-+ // XXX: Need to force VTX_READ_128 instructions to write to the same register
-+ // that holds its buffer address to avoid potential hangs. We can't use
-+ // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst
-+ // registers are different sizes.
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// VTX Read from parameter memory space
-+//===----------------------------------------------------------------------===//
-+
-+def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
-+ [(set (i32 R600_TReg32_X:$dst), (load_param_zexti8 ADDRVTX_READ:$ptr))]
-+>;
-+
-+def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
-+ [(set (i32 R600_TReg32_X:$dst), (load_param_zexti16 ADDRVTX_READ:$ptr))]
-+>;
-+
-+def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
-+ [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
-+>;
-+
-+//===----------------------------------------------------------------------===//
-+// VTX Read from global memory space
-+//===----------------------------------------------------------------------===//
-+
-+// 8-bit reads
-+def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
-+ [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))]
-+>;
-+
-+// 32-bit reads
-+def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
-+ [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
-+>;
-+
-+// 128-bit reads
-+def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
-+ [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
-+>;
-+
-+//===----------------------------------------------------------------------===//
-+// Constant Loads
-+// XXX: We are currently storing all constants in the global address space.
-+//===----------------------------------------------------------------------===//
-+
-+def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
-+ [(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))]
-+>;
-+
-+}
-+
-+let Predicates = [isCayman] in {
-+
-+let isVector = 1 in {
-+
-+def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
-+
-+def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
-+def MULHI_INT_cm : MULHI_INT_Common<0x90>;
-+def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
-+def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
-+def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
-+def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
-+def LOG_IEEE_ : LOG_IEEE_Common<0x83>;
-+def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
-+def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
-+def SIN_cm : SIN_Common<0x8D>;
-+def COS_cm : COS_Common<0x8E>;
-+} // End isVector = 1
-+
-+def : SIN_PAT <SIN_cm>;
-+def : COS_PAT <COS_cm>;
-+
-+defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
-+
-+// RECIP_UINT emulation for Cayman
-+def : Pat <
-+ (AMDGPUurecip R600_Reg32:$src0),
-+ (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
-+ (MOV_IMM_I32 0x4f800000)))
-+>;
-+
-+
-+def : Pat<(fsqrt R600_Reg32:$src),
-+ (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>;
-+
-+} // End isCayman
-+
-+//===----------------------------------------------------------------------===//
-+// Branch Instructions
-+//===----------------------------------------------------------------------===//
-+
-+
-+def IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src),
-+ "IF_PREDICATE_SET $src", []>;
-+
-+def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src),
-+ "PREDICATED_BREAK $src", []>;
-+
-+//===----------------------------------------------------------------------===//
-+// Pseudo instructions
-+//===----------------------------------------------------------------------===//
-+
-+let isPseudo = 1 in {
-+
-+def PRED_X : InstR600 <
-+ 0, (outs R600_Predicate_Bit:$dst),
-+ (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
-+ "", [], NullALU> {
-+ let FlagOperandIdx = 3;
-+}
-+
-+let isTerminator = 1, isBranch = 1, isBarrier = 1 in {
-+
-+def JUMP : InstR600 <0x10,
-+ (outs),
-+ (ins brtarget:$target, R600_Pred:$p),
-+ "JUMP $target ($p)",
-+ [], AnyALU
-+ >;
-+
-+} // End isTerminator = 1, isBranch = 1, isBarrier = 1
-+
-+let usesCustomInserter = 1 in {
-+
-+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
-+
-+def MASK_WRITE : AMDGPUShaderInst <
-+ (outs),
-+ (ins R600_Reg32:$src),
-+ "MASK_WRITE $src",
-+ []
-+>;
-+
-+} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
-+
-+
-+def RESERVE_REG : AMDGPUShaderInst <
-+ (outs),
-+ (ins i32imm:$src),
-+ "RESERVE_REG $src",
-+ [(int_AMDGPU_reserve_reg imm:$src)]
-+>;
-+def TXD: AMDGPUShaderInst <
-+ (outs R600_Reg128:$dst),
-+ (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
-+ "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
-+ [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
-+>;
-+
-+def TXD_SHADOW: AMDGPUShaderInst <
-+ (outs R600_Reg128:$dst),
-+ (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
-+ "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
-+ [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
-+>;
-+
-+} // End isPseudo = 1
-+} // End usesCustomInserter = 1
-+
-+def CLAMP_R600 : CLAMP <R600_Reg32>;
-+def FABS_R600 : FABS<R600_Reg32>;
-+def FNEG_R600 : FNEG<R600_Reg32>;
-+
-+//===---------------------------------------------------------------------===//
-+// Return instruction
-+//===---------------------------------------------------------------------===//
-+let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
-+ def RETURN : ILFormat<(outs), (ins variable_ops),
-+ "RETURN", [(IL_retflag)]>;
-+}
-+
-+
-+//===----------------------------------------------------------------------===//
-+// Constant Buffer Addressing Support
-+//===----------------------------------------------------------------------===//
-+
-+let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
-+def CONST_COPY : Instruction {
-+ let OutOperandList = (outs R600_Reg32:$dst);
-+ let InOperandList = (ins i32imm:$src);
-+ let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
-+ let AsmString = "CONST_COPY";
-+ let neverHasSideEffects = 1;
-+ let isAsCheapAsAMove = 1;
-+ let Itinerary = NullALU;
-+}
-+} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
-+
-+def TEX_VTX_CONSTBUF :
-+ InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr",
-+ [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>,
-+ VTX_WORD1_GPR, VTX_WORD0 {
-+
-+ let VC_INST = 0;
-+ let FETCH_TYPE = 2;
-+ let FETCH_WHOLE_QUAD = 0;
-+ let BUFFER_ID = 0;
-+ let SRC_REL = 0;
-+ let SRC_SEL_X = 0;
-+ let DST_REL = 0;
-+ let USE_CONST_FIELDS = 0;
-+ let NUM_FORMAT_ALL = 2;
-+ let FORMAT_COMP_ALL = 1;
-+ let SRF_MODE_ALL = 1;
-+ let MEGA_FETCH_COUNT = 16;
-+ let DST_SEL_X = 0;
-+ let DST_SEL_Y = 1;
-+ let DST_SEL_Z = 2;
-+ let DST_SEL_W = 3;
-+ let DATA_FORMAT = 35;
-+
-+ let Inst{31-0} = Word0;
-+ let Inst{63-32} = Word1;
-+
-+// LLVM can only encode 64-bit instructions, so these fields are manually
-+// encoded in R600CodeEmitter
-+//
-+// bits<16> OFFSET;
-+// bits<2> ENDIAN_SWAP = 0;
-+// bits<1> CONST_BUF_NO_STRIDE = 0;
-+// bits<1> MEGA_FETCH = 0;
-+// bits<1> ALT_CONST = 0;
-+// bits<2> BUFFER_INDEX_MODE = 0;
-+
-+
-+
-+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
-+// is done in R600CodeEmitter
-+//
-+// Inst{79-64} = OFFSET;
-+// Inst{81-80} = ENDIAN_SWAP;
-+// Inst{82} = CONST_BUF_NO_STRIDE;
-+// Inst{83} = MEGA_FETCH;
-+// Inst{84} = ALT_CONST;
-+// Inst{86-85} = BUFFER_INDEX_MODE;
-+// Inst{95-86} = 0; Reserved
-+
-+// VTX_WORD3 (Padding)
-+//
-+// Inst{127-96} = 0;
-+}
-+
-+
-+//===--------------------------------------------------------------------===//
-+// Instructions support
-+//===--------------------------------------------------------------------===//
-+//===---------------------------------------------------------------------===//
-+// Custom Inserter for Branches and returns, this eventually will be a
-+// seperate pass
-+//===---------------------------------------------------------------------===//
-+let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
-+ def BRANCH : ILFormat<(outs), (ins brtarget:$target),
-+ "; Pseudo unconditional branch instruction",
-+ [(br bb:$target)]>;
-+ defm BRANCH_COND : BranchConditional<IL_brcond>;
-+}
-+
-+//===---------------------------------------------------------------------===//
-+// Flow and Program control Instructions
-+//===---------------------------------------------------------------------===//
-+let isTerminator=1 in {
-+ def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
-+ !strconcat("SWITCH", " $src"), []>;
-+ def CASE : ILFormat< (outs), (ins GPRI32:$src),
-+ !strconcat("CASE", " $src"), []>;
-+ def BREAK : ILFormat< (outs), (ins),
-+ "BREAK", []>;
-+ def CONTINUE : ILFormat< (outs), (ins),
-+ "CONTINUE", []>;
-+ def DEFAULT : ILFormat< (outs), (ins),
-+ "DEFAULT", []>;
-+ def ELSE : ILFormat< (outs), (ins),
-+ "ELSE", []>;
-+ def ENDSWITCH : ILFormat< (outs), (ins),
-+ "ENDSWITCH", []>;
-+ def ENDMAIN : ILFormat< (outs), (ins),
-+ "ENDMAIN", []>;
-+ def END : ILFormat< (outs), (ins),
-+ "END", []>;
-+ def ENDFUNC : ILFormat< (outs), (ins),
-+ "ENDFUNC", []>;
-+ def ENDIF : ILFormat< (outs), (ins),
-+ "ENDIF", []>;
-+ def WHILELOOP : ILFormat< (outs), (ins),
-+ "WHILE", []>;
-+ def ENDLOOP : ILFormat< (outs), (ins),
-+ "ENDLOOP", []>;
-+ def FUNC : ILFormat< (outs), (ins),
-+ "FUNC", []>;
-+ def RETDYN : ILFormat< (outs), (ins),
-+ "RET_DYN", []>;
-+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
-+ defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">;
-+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
-+ defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">;
-+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
-+ defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
-+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
-+ defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
-+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
-+ defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
-+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
-+ defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
-+ defm IFC : BranchInstr2<"IFC">;
-+ defm BREAKC : BranchInstr2<"BREAKC">;
-+ defm CONTINUEC : BranchInstr2<"CONTINUEC">;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// ISel Patterns
-+//===----------------------------------------------------------------------===//
-+
-+//CNDGE_INT extra pattern
-+def : Pat <
-+ (selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1),
-+ (i32 R600_Reg32:$src2), COND_GT),
-+ (CNDGE_INT R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
-+>;
-+
-+// KIL Patterns
-+def KILP : Pat <
-+ (int_AMDGPU_kilp),
-+ (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
-+>;
-+
-+def KIL : Pat <
-+ (int_AMDGPU_kill R600_Reg32:$src0),
-+ (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0)))
-+>;
-+
-+// SGT Reverse args
-+def : Pat <
-+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT),
-+ (SGT R600_Reg32:$src1, R600_Reg32:$src0)
-+>;
-+
-+// SGE Reverse args
-+def : Pat <
-+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
-+ (SGE R600_Reg32:$src1, R600_Reg32:$src0)
-+>;
-+
-+// SETGT_INT reverse args
-+def : Pat <
-+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),
-+ (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0)
-+>;
-+
-+// SETGE_INT reverse args
-+def : Pat <
-+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE),
-+ (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0)
-+>;
-+
-+// SETGT_UINT reverse args
-+def : Pat <
-+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT),
-+ (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0)
-+>;
-+
-+// SETGE_UINT reverse args
-+def : Pat <
-+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE),
-+ (SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0)
-+>;
-+
-+// The next two patterns are special cases for handling 'true if ordered' and
-+// 'true if unordered' conditionals. The assumption here is that the behavior of
-+// SETE and SNE conforms to the Direct3D 10 rules for floating point values
-+// described here:
-+// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
-+// We assume that SETE returns false when one of the operands is NAN and
-+// SNE returns true when on of the operands is NAN
-+
-+//SETE - 'true if ordered'
-+def : Pat <
-+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO),
-+ (SETE R600_Reg32:$src0, R600_Reg32:$src1)
-+>;
-+
-+//SNE - 'true if unordered'
-+def : Pat <
-+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
-+ (SNE R600_Reg32:$src0, R600_Reg32:$src1)
-+>;
-+
-+def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>;
-+def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;
-+def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;
-+def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>;
-+
-+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>;
-+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>;
-+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>;
-+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>;
-+
-+def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>;
-+def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>;
-+def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>;
-+def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>;
-+
-+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>;
-+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>;
-+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>;
-+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>;
-+
-+def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
-+def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
-+
-+// bitconvert patterns
-+
-+def : BitConvert <i32, f32, R600_Reg32>;
-+def : BitConvert <f32, i32, R600_Reg32>;
-+def : BitConvert <v4f32, v4i32, R600_Reg128>;
-+def : BitConvert <v4i32, v4f32, R600_Reg128>;
-+
-+// DWORDADDR pattern
-+def : DwordAddrPat <i32, R600_Reg32>;
-+
-+} // End isR600toCayman Predicate
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600Intrinsics.td llvm-r600/lib/Target/R600/R600Intrinsics.td
---- llvm-3.2.src/lib/Target/R600/R600Intrinsics.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/R600Intrinsics.td 2013-01-25 19:43:57.466716387 +0100
-@@ -0,0 +1,34 @@
-+//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// R600 Intrinsic Definitions
-+//
-+//===----------------------------------------------------------------------===//
-+
-+let TargetPrefix = "R600", isTarget = 1 in {
-+ def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
-+ def int_R600_load_input_perspective :
-+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
-+ def int_R600_load_input_constant :
-+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
-+ def int_R600_load_input_linear :
-+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
-+ def int_R600_store_swizzle :
-+ Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
-+ def int_R600_store_stream_output :
-+ Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
-+ def int_R600_store_pixel_color :
-+ Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
-+ def int_R600_store_pixel_depth :
-+ Intrinsic<[], [llvm_float_ty], []>;
-+ def int_R600_store_pixel_stencil :
-+ Intrinsic<[], [llvm_float_ty], []>;
-+ def int_R600_store_pixel_dummy :
-+ Intrinsic<[], [], []>;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600ISelLowering.cpp llvm-r600/lib/Target/R600/R600ISelLowering.cpp
---- llvm-3.2.src/lib/Target/R600/R600ISelLowering.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/R600ISelLowering.cpp 2013-01-25 19:43:57.463383054 +0100
-@@ -0,0 +1,997 @@
-+//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Custom DAG lowering for R600
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "R600ISelLowering.h"
-+#include "R600Defines.h"
-+#include "R600InstrInfo.h"
-+#include "R600MachineFunctionInfo.h"
-+#include "llvm/Argument.h"
-+#include "llvm/Function.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+#include "llvm/CodeGen/SelectionDAG.h"
-+
-+using namespace llvm;
-+
-+R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
-+ AMDGPUTargetLowering(TM),
-+ TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
-+ setOperationAction(ISD::MUL, MVT::i64, Expand);
-+ addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
-+ addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
-+ addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
-+ addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
-+ computeRegisterProperties();
-+
-+ setOperationAction(ISD::FADD, MVT::v4f32, Expand);
-+ setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
-+ setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
-+ setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
-+
-+ setOperationAction(ISD::ADD, MVT::v4i32, Expand);
-+ setOperationAction(ISD::AND, MVT::v4i32, Expand);
-+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
-+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
-+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
-+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
-+ setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
-+ setOperationAction(ISD::UREM, MVT::v4i32, Expand);
-+ setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
-+
-+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
-+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
-+
-+ setOperationAction(ISD::FSUB, MVT::f32, Expand);
-+
-+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
-+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
-+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
-+ setOperationAction(ISD::FPOW, MVT::f32, Custom);
-+
-+ setOperationAction(ISD::ROTL, MVT::i32, Custom);
-+
-+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
-+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
-+
-+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
-+ setOperationAction(ISD::SETCC, MVT::f32, Custom);
-+ setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
-+
-+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
-+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
-+
-+ setOperationAction(ISD::STORE, MVT::i32, Custom);
-+ setOperationAction(ISD::STORE, MVT::v4i32, Custom);
-+
-+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
-+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
-+ setTargetDAGCombine(ISD::FP_ROUND);
-+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
-+
-+ setSchedulingPreference(Sched::VLIW);
-+}
-+
-+MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
-+ MachineInstr * MI, MachineBasicBlock * BB) const {
-+ MachineFunction * MF = BB->getParent();
-+ MachineRegisterInfo &MRI = MF->getRegInfo();
-+ MachineBasicBlock::iterator I = *MI;
-+
-+ switch (MI->getOpcode()) {
-+ default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
-+ case AMDGPU::SHADER_TYPE: break;
-+ case AMDGPU::CLAMP_R600: {
-+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
-+ AMDGPU::MOV,
-+ MI->getOperand(0).getReg(),
-+ MI->getOperand(1).getReg());
-+ TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
-+ break;
-+ }
-+
-+ case AMDGPU::FABS_R600: {
-+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
-+ AMDGPU::MOV,
-+ MI->getOperand(0).getReg(),
-+ MI->getOperand(1).getReg());
-+ TII->addFlag(NewMI, 0, MO_FLAG_ABS);
-+ break;
-+ }
-+
-+ case AMDGPU::FNEG_R600: {
-+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
-+ AMDGPU::MOV,
-+ MI->getOperand(0).getReg(),
-+ MI->getOperand(1).getReg());
-+ TII->addFlag(NewMI, 0, MO_FLAG_NEG);
-+ break;
-+ }
-+
-+ case AMDGPU::MASK_WRITE: {
-+ unsigned maskedRegister = MI->getOperand(0).getReg();
-+ assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
-+ MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
-+ TII->addFlag(defInstr, 0, MO_FLAG_MASK);
-+ break;
-+ }
-+
-+ case AMDGPU::MOV_IMM_F32:
-+ TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
-+ MI->getOperand(1).getFPImm()->getValueAPF()
-+ .bitcastToAPInt().getZExtValue());
-+ break;
-+ case AMDGPU::MOV_IMM_I32:
-+ TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
-+ MI->getOperand(1).getImm());
-+ break;
-+
-+
-+ case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
-+ case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
-+ unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
-+
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
-+ .addOperand(MI->getOperand(0))
-+ .addOperand(MI->getOperand(1))
-+ .addImm(EOP); // Set End of program bit
-+ break;
-+ }
-+
-+ case AMDGPU::RESERVE_REG: {
-+ R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
-+ int64_t ReservedIndex = MI->getOperand(0).getImm();
-+ unsigned ReservedReg =
-+ AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
-+ MFI->ReservedRegs.push_back(ReservedReg);
-+ unsigned SuperReg =
-+ AMDGPU::R600_Reg128RegClass.getRegister(ReservedIndex / 4);
-+ MFI->ReservedRegs.push_back(SuperReg);
-+ break;
-+ }
-+
-+ case AMDGPU::TXD: {
-+ unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
-+ unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
-+
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
-+ .addOperand(MI->getOperand(3))
-+ .addOperand(MI->getOperand(4))
-+ .addOperand(MI->getOperand(5))
-+ .addOperand(MI->getOperand(6));
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
-+ .addOperand(MI->getOperand(2))
-+ .addOperand(MI->getOperand(4))
-+ .addOperand(MI->getOperand(5))
-+ .addOperand(MI->getOperand(6));
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
-+ .addOperand(MI->getOperand(0))
-+ .addOperand(MI->getOperand(1))
-+ .addOperand(MI->getOperand(4))
-+ .addOperand(MI->getOperand(5))
-+ .addOperand(MI->getOperand(6))
-+ .addReg(T0, RegState::Implicit)
-+ .addReg(T1, RegState::Implicit);
-+ break;
-+ }
-+
-+ case AMDGPU::TXD_SHADOW: {
-+ unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
-+ unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
-+
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
-+ .addOperand(MI->getOperand(3))
-+ .addOperand(MI->getOperand(4))
-+ .addOperand(MI->getOperand(5))
-+ .addOperand(MI->getOperand(6));
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
-+ .addOperand(MI->getOperand(2))
-+ .addOperand(MI->getOperand(4))
-+ .addOperand(MI->getOperand(5))
-+ .addOperand(MI->getOperand(6));
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
-+ .addOperand(MI->getOperand(0))
-+ .addOperand(MI->getOperand(1))
-+ .addOperand(MI->getOperand(4))
-+ .addOperand(MI->getOperand(5))
-+ .addOperand(MI->getOperand(6))
-+ .addReg(T0, RegState::Implicit)
-+ .addReg(T1, RegState::Implicit);
-+ break;
-+ }
-+
-+ case AMDGPU::BRANCH:
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
-+ .addOperand(MI->getOperand(0))
-+ .addReg(0);
-+ break;
-+
-+ case AMDGPU::BRANCH_COND_f32: {
-+ MachineInstr *NewMI =
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
-+ AMDGPU::PREDICATE_BIT)
-+ .addOperand(MI->getOperand(1))
-+ .addImm(OPCODE_IS_NOT_ZERO)
-+ .addImm(0); // Flags
-+ TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
-+ .addOperand(MI->getOperand(0))
-+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
-+ break;
-+ }
-+
-+ case AMDGPU::BRANCH_COND_i32: {
-+ MachineInstr *NewMI =
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
-+ AMDGPU::PREDICATE_BIT)
-+ .addOperand(MI->getOperand(1))
-+ .addImm(OPCODE_IS_NOT_ZERO_INT)
-+ .addImm(0); // Flags
-+ TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
-+ .addOperand(MI->getOperand(0))
-+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
-+ break;
-+ }
-+
-+ case AMDGPU::input_perspective: {
-+ R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
-+
-+ // XXX Be more fine about register reservation
-+ for (unsigned i = 0; i < 4; i ++) {
-+ unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
-+ MFI->ReservedRegs.push_back(ReservedReg);
-+ }
-+
-+ switch (MI->getOperand(1).getImm()) {
-+ case 0:// Perspective
-+ MFI->HasPerspectiveInterpolation = true;
-+ break;
-+ case 1:// Linear
-+ MFI->HasLinearInterpolation = true;
-+ break;
-+ default:
-+ assert(0 && "Unknow ij index");
-+ }
-+
-+ return BB;
-+ }
-+
-+ case AMDGPU::EG_ExportSwz:
-+ case AMDGPU::R600_ExportSwz: {
-+ // Instruction is left unmodified if its not the last one of its type
-+ bool isLastInstructionOfItsType = true;
-+ unsigned InstExportType = MI->getOperand(1).getImm();
-+ for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
-+ EndBlock = BB->end(); NextExportInst != EndBlock;
-+ NextExportInst = llvm::next(NextExportInst)) {
-+ if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
-+ NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
-+ unsigned CurrentInstExportType = NextExportInst->getOperand(1)
-+ .getImm();
-+ if (CurrentInstExportType == InstExportType) {
-+ isLastInstructionOfItsType = false;
-+ break;
-+ }
-+ }
-+ }
-+ bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
-+ if (!EOP && !isLastInstructionOfItsType)
-+ return BB;
-+ unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
-+ .addOperand(MI->getOperand(0))
-+ .addOperand(MI->getOperand(1))
-+ .addOperand(MI->getOperand(2))
-+ .addOperand(MI->getOperand(3))
-+ .addOperand(MI->getOperand(4))
-+ .addOperand(MI->getOperand(5))
-+ .addOperand(MI->getOperand(6))
-+ .addImm(CfInst)
-+ .addImm(EOP);
-+ break;
-+ }
-+ }
-+
-+ MI->eraseFromParent();
-+ return BB;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Custom DAG Lowering Operations
-+//===----------------------------------------------------------------------===//
-+
-+using namespace llvm::Intrinsic;
-+using namespace llvm::AMDGPUIntrinsic;
-+
-+static SDValue
-+InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
-+ unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
-+ SDValue Scalar, SDValue Chain) {
-+ if (!ExportMap[Slot]) {
-+ SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
-+ DL, MVT::v4f32,
-+ DAG.getUNDEF(MVT::v4f32),
-+ Scalar,
-+ DAG.getConstant(Channel, MVT::i32));
-+
-+ unsigned Mask = 1 << Channel;
-+
-+ const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32),
-+ DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32),
-+ DAG.getConstant(Mask, MVT::i32)};
-+
-+ SDValue Res = DAG.getNode(
-+ AMDGPUISD::EXPORT,
-+ DL,
-+ MVT::Other,
-+ Ops, 6);
-+ ExportMap[Slot] = Res.getNode();
-+ return Res;
-+ }
-+
-+ SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ;
-+ SDValue PreviousVector = ExportInstruction->getOperand(1);
-+ SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
-+ DL, MVT::v4f32,
-+ PreviousVector,
-+ Scalar,
-+ DAG.getConstant(Channel, MVT::i32));
-+
-+ unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5))
-+ ->getZExtValue();
-+ Mask |= (1 << Channel);
-+
-+ const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
-+ DAG.getConstant(Inst, MVT::i32),
-+ DAG.getConstant(Type, MVT::i32),
-+ DAG.getConstant(Slot, MVT::i32),
-+ DAG.getConstant(Mask, MVT::i32)};
-+
-+ DAG.UpdateNodeOperands(ExportInstruction,
-+ Ops, 6);
-+
-+ return Chain;
-+
-+}
-+
-+SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
-+ switch (Op.getOpcode()) {
-+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
-+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
-+ case ISD::ROTL: return LowerROTL(Op, DAG);
-+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
-+ case ISD::SELECT: return LowerSELECT(Op, DAG);
-+ case ISD::SETCC: return LowerSETCC(Op, DAG);
-+ case ISD::STORE: return LowerSTORE(Op, DAG);
-+ case ISD::LOAD: return LowerLOAD(Op, DAG);
-+ case ISD::FPOW: return LowerFPOW(Op, DAG);
-+ case ISD::INTRINSIC_VOID: {
-+ SDValue Chain = Op.getOperand(0);
-+ unsigned IntrinsicID =
-+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-+ switch (IntrinsicID) {
-+ case AMDGPUIntrinsic::AMDGPU_store_output: {
-+ MachineFunction &MF = DAG.getMachineFunction();
-+ MachineRegisterInfo &MRI = MF.getRegInfo();
-+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
-+ unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
-+ if (!MRI.isLiveOut(Reg)) {
-+ MRI.addLiveOut(Reg);
-+ }
-+ return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
-+ }
-+ case AMDGPUIntrinsic::R600_store_pixel_color: {
-+ MachineFunction &MF = DAG.getMachineFunction();
-+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
-+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
-+
-+ SDNode **OutputsMap = MFI->Outputs;
-+ return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
-+ RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
-+ Chain);
-+
-+ }
-+
-+ // default for switch(IntrinsicID)
-+ default: break;
-+ }
-+ // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
-+ break;
-+ }
-+ case ISD::INTRINSIC_WO_CHAIN: {
-+ unsigned IntrinsicID =
-+ cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-+ EVT VT = Op.getValueType();
-+ DebugLoc DL = Op.getDebugLoc();
-+ switch(IntrinsicID) {
-+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
-+ case AMDGPUIntrinsic::R600_load_input: {
-+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-+ unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
-+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
-+ }
-+ case AMDGPUIntrinsic::R600_load_input_perspective: {
-+ int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-+ if (slot < 0)
-+ return DAG.getUNDEF(MVT::f32);
-+ SDValue FullVector = DAG.getNode(
-+ AMDGPUISD::INTERP,
-+ DL, MVT::v4f32,
-+ DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
-+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
-+ DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
-+ }
-+ case AMDGPUIntrinsic::R600_load_input_linear: {
-+ int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-+ if (slot < 0)
-+ return DAG.getUNDEF(MVT::f32);
-+ SDValue FullVector = DAG.getNode(
-+ AMDGPUISD::INTERP,
-+ DL, MVT::v4f32,
-+ DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
-+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
-+ DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
-+ }
-+ case AMDGPUIntrinsic::R600_load_input_constant: {
-+ int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-+ if (slot < 0)
-+ return DAG.getUNDEF(MVT::f32);
-+ SDValue FullVector = DAG.getNode(
-+ AMDGPUISD::INTERP_P0,
-+ DL, MVT::v4f32,
-+ DAG.getConstant(slot / 4 , MVT::i32));
-+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
-+ DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
-+ }
-+
-+ case r600_read_ngroups_x:
-+ return LowerImplicitParameter(DAG, VT, DL, 0);
-+ case r600_read_ngroups_y:
-+ return LowerImplicitParameter(DAG, VT, DL, 1);
-+ case r600_read_ngroups_z:
-+ return LowerImplicitParameter(DAG, VT, DL, 2);
-+ case r600_read_global_size_x:
-+ return LowerImplicitParameter(DAG, VT, DL, 3);
-+ case r600_read_global_size_y:
-+ return LowerImplicitParameter(DAG, VT, DL, 4);
-+ case r600_read_global_size_z:
-+ return LowerImplicitParameter(DAG, VT, DL, 5);
-+ case r600_read_local_size_x:
-+ return LowerImplicitParameter(DAG, VT, DL, 6);
-+ case r600_read_local_size_y:
-+ return LowerImplicitParameter(DAG, VT, DL, 7);
-+ case r600_read_local_size_z:
-+ return LowerImplicitParameter(DAG, VT, DL, 8);
-+
-+ case r600_read_tgid_x:
-+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
-+ AMDGPU::T1_X, VT);
-+ case r600_read_tgid_y:
-+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
-+ AMDGPU::T1_Y, VT);
-+ case r600_read_tgid_z:
-+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
-+ AMDGPU::T1_Z, VT);
-+ case r600_read_tidig_x:
-+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
-+ AMDGPU::T0_X, VT);
-+ case r600_read_tidig_y:
-+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
-+ AMDGPU::T0_Y, VT);
-+ case r600_read_tidig_z:
-+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
-+ AMDGPU::T0_Z, VT);
-+ }
-+ // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
-+ break;
-+ }
-+ } // end switch(Op.getOpcode())
-+ return SDValue();
-+}
-+
-+void R600TargetLowering::ReplaceNodeResults(SDNode *N,
-+ SmallVectorImpl<SDValue> &Results,
-+ SelectionDAG &DAG) const {
-+ switch (N->getOpcode()) {
-+ default: return;
-+ case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
-+ return;
-+ case ISD::LOAD: {
-+ SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
-+ Results.push_back(SDValue(Node, 0));
-+ Results.push_back(SDValue(Node, 1));
-+ // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
-+ // function
-+ DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
-+ return;
-+ }
-+ }
-+}
-+
-+SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
-+ return DAG.getNode(
-+ ISD::SETCC,
-+ Op.getDebugLoc(),
-+ MVT::i1,
-+ Op, DAG.getConstantFP(0.0f, MVT::f32),
-+ DAG.getCondCode(ISD::SETNE)
-+ );
-+}
-+
-+SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
-+ SDValue Chain = Op.getOperand(0);
-+ SDValue CC = Op.getOperand(1);
-+ SDValue LHS = Op.getOperand(2);
-+ SDValue RHS = Op.getOperand(3);
-+ SDValue JumpT = Op.getOperand(4);
-+ SDValue CmpValue;
-+ SDValue Result;
-+
-+ if (LHS.getValueType() == MVT::i32) {
-+ CmpValue = DAG.getNode(
-+ ISD::SELECT_CC,
-+ Op.getDebugLoc(),
-+ MVT::i32,
-+ LHS, RHS,
-+ DAG.getConstant(-1, MVT::i32),
-+ DAG.getConstant(0, MVT::i32),
-+ CC);
-+ } else if (LHS.getValueType() == MVT::f32) {
-+ CmpValue = DAG.getNode(
-+ ISD::SELECT_CC,
-+ Op.getDebugLoc(),
-+ MVT::f32,
-+ LHS, RHS,
-+ DAG.getConstantFP(1.0f, MVT::f32),
-+ DAG.getConstantFP(0.0f, MVT::f32),
-+ CC);
-+ } else {
-+ assert(0 && "Not valid type for br_cc");
-+ }
-+ Result = DAG.getNode(
-+ AMDGPUISD::BRANCH_COND,
-+ CmpValue.getDebugLoc(),
-+ MVT::Other, Chain,
-+ JumpT, CmpValue);
-+ return Result;
-+}
-+
-+SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
-+ DebugLoc DL,
-+ unsigned DwordOffset) const {
-+ unsigned ByteOffset = DwordOffset * 4;
-+ PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
-+ AMDGPUAS::PARAM_I_ADDRESS);
-+
-+ // We shouldn't be using an offset wider than 16-bits for implicit parameters.
-+ assert(isInt<16>(ByteOffset));
-+
-+ return DAG.getLoad(VT, DL, DAG.getEntryNode(),
-+ DAG.getConstant(ByteOffset, MVT::i32), // PTR
-+ MachinePointerInfo(ConstantPointerNull::get(PtrType)),
-+ false, false, false, 0);
-+}
-+
-+SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT VT = Op.getValueType();
-+
-+ return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
-+ Op.getOperand(0),
-+ Op.getOperand(0),
-+ DAG.getNode(ISD::SUB, DL, VT,
-+ DAG.getConstant(32, MVT::i32),
-+ Op.getOperand(1)));
-+}
-+
-+bool R600TargetLowering::isZero(SDValue Op) const {
-+ if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
-+ return Cst->isNullValue();
-+ } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
-+ return CstFP->isZero();
-+ } else {
-+ return false;
-+ }
-+}
-+
-+SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT VT = Op.getValueType();
-+
-+ SDValue LHS = Op.getOperand(0);
-+ SDValue RHS = Op.getOperand(1);
-+ SDValue True = Op.getOperand(2);
-+ SDValue False = Op.getOperand(3);
-+ SDValue CC = Op.getOperand(4);
-+ SDValue Temp;
-+
-+ // LHS and RHS are guaranteed to be the same value type
-+ EVT CompareVT = LHS.getValueType();
-+
-+ // Check if we can lower this to a native operation.
-+
-+ // Try to lower to a CND* instruction:
-+ // CND* instructions requires RHS to be zero. Some SELECT_CC nodes that
-+ // can be lowered to CND* instructions can also be lowered to SET*
-+ // instructions. CND* instructions are cheaper, because they dont't
-+ // require additional instructions to convert their result to the correct
-+ // value type, so this check should be first.
-+ if (isZero(LHS) || isZero(RHS)) {
-+ SDValue Cond = (isZero(LHS) ? RHS : LHS);
-+ SDValue Zero = (isZero(LHS) ? LHS : RHS);
-+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
-+ if (CompareVT != VT) {
-+ // Bitcast True / False to the correct types. This will end up being
-+ // a nop, but it allows us to define only a single pattern in the
-+ // .TD files for each CND* instruction rather than having to have
-+ // one pattern for integer True/False and one for fp True/False
-+ True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
-+ False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
-+ }
-+ if (isZero(LHS)) {
-+ CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
-+ }
-+
-+ switch (CCOpcode) {
-+ case ISD::SETONE:
-+ case ISD::SETUNE:
-+ case ISD::SETNE:
-+ case ISD::SETULE:
-+ case ISD::SETULT:
-+ case ISD::SETOLE:
-+ case ISD::SETOLT:
-+ case ISD::SETLE:
-+ case ISD::SETLT:
-+ CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
-+ Temp = True;
-+ True = False;
-+ False = Temp;
-+ break;
-+ default:
-+ break;
-+ }
-+ SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
-+ Cond, Zero,
-+ True, False,
-+ DAG.getCondCode(CCOpcode));
-+ return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
-+ }
-+
-+ // Try to lower to a SET* instruction:
-+ // We need all the operands of SELECT_CC to have the same value type, so if
-+ // necessary we need to change True and False to be the same type as LHS and
-+ // RHS, and then convert the result of the select_cc back to the correct type.
-+
-+ // Move hardware True/False values to the correct operand.
-+ if (isHWTrueValue(False) && isHWFalseValue(True)) {
-+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
-+ std::swap(False, True);
-+ CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
-+ }
-+
-+ if (isHWTrueValue(True) && isHWFalseValue(False)) {
-+ if (CompareVT != VT) {
-+ if (VT == MVT::f32 && CompareVT == MVT::i32) {
-+ SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
-+ LHS, RHS,
-+ DAG.getConstant(-1, MVT::i32),
-+ DAG.getConstant(0, MVT::i32),
-+ CC);
-+ // Convert integer values of true (-1) and false (0) to fp values of
-+ // true (1.0f) and false (0.0f).
-+ SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
-+ DAG.getConstant(1, MVT::i32));
-+ return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
-+ } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
-+ SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
-+ LHS, RHS,
-+ DAG.getConstantFP(1.0f, MVT::f32),
-+ DAG.getConstantFP(0.0f, MVT::f32),
-+ CC);
-+ // Convert fp values of true (1.0f) and false (0.0f) to integer values
-+ // of true (-1) and false (0).
-+ SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
-+ return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
-+ } else {
-+ // I don't think there will be any other type pairings.
-+ assert(!"Unhandled operand type parings in SELECT_CC");
-+ }
-+ } else {
-+ // This SELECT_CC is already legal.
-+ return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
-+ }
-+ }
-+
-+ // Possible Min/Max pattern
-+ SDValue MinMax = LowerMinMax(Op, DAG);
-+ if (MinMax.getNode()) {
-+ return MinMax;
-+ }
-+
-+ // If we make it this for it means we have no native instructions to handle
-+ // this SELECT_CC, so we must lower it.
-+ SDValue HWTrue, HWFalse;
-+
-+ if (CompareVT == MVT::f32) {
-+ HWTrue = DAG.getConstantFP(1.0f, CompareVT);
-+ HWFalse = DAG.getConstantFP(0.0f, CompareVT);
-+ } else if (CompareVT == MVT::i32) {
-+ HWTrue = DAG.getConstant(-1, CompareVT);
-+ HWFalse = DAG.getConstant(0, CompareVT);
-+ }
-+ else {
-+ assert(!"Unhandled value type in LowerSELECT_CC");
-+ }
-+
-+ // Lower this unsupported SELECT_CC into a combination of two supported
-+ // SELECT_CC operations.
-+ SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
-+
-+ return DAG.getNode(ISD::SELECT_CC, DL, VT,
-+ Cond, HWFalse,
-+ True, False,
-+ DAG.getCondCode(ISD::SETNE));
-+}
-+
-+SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
-+ return DAG.getNode(ISD::SELECT_CC,
-+ Op.getDebugLoc(),
-+ Op.getValueType(),
-+ Op.getOperand(0),
-+ DAG.getConstant(0, MVT::i32),
-+ Op.getOperand(1),
-+ Op.getOperand(2),
-+ DAG.getCondCode(ISD::SETNE));
-+}
-+
-+SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
-+ SDValue Cond;
-+ SDValue LHS = Op.getOperand(0);
-+ SDValue RHS = Op.getOperand(1);
-+ SDValue CC = Op.getOperand(2);
-+ DebugLoc DL = Op.getDebugLoc();
-+ assert(Op.getValueType() == MVT::i32);
-+ if (LHS.getValueType() == MVT::i32) {
-+ Cond = DAG.getNode(
-+ ISD::SELECT_CC,
-+ Op.getDebugLoc(),
-+ MVT::i32,
-+ LHS, RHS,
-+ DAG.getConstant(-1, MVT::i32),
-+ DAG.getConstant(0, MVT::i32),
-+ CC);
-+ } else if (LHS.getValueType() == MVT::f32) {
-+ Cond = DAG.getNode(
-+ ISD::SELECT_CC,
-+ Op.getDebugLoc(),
-+ MVT::f32,
-+ LHS, RHS,
-+ DAG.getConstantFP(1.0f, MVT::f32),
-+ DAG.getConstantFP(0.0f, MVT::f32),
-+ CC);
-+ Cond = DAG.getNode(
-+ ISD::FP_TO_SINT,
-+ DL,
-+ MVT::i32,
-+ Cond);
-+ } else {
-+ assert(0 && "Not valid type for set_cc");
-+ }
-+ Cond = DAG.getNode(
-+ ISD::AND,
-+ DL,
-+ MVT::i32,
-+ DAG.getConstant(1, MVT::i32),
-+ Cond);
-+ return Cond;
-+}
-+
-+SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+ StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
-+ SDValue Chain = Op.getOperand(0);
-+ SDValue Value = Op.getOperand(1);
-+ SDValue Ptr = Op.getOperand(2);
-+
-+ if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
-+ Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
-+ // Convert pointer from byte address to dword address.
-+ Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
-+ DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
-+ Ptr, DAG.getConstant(2, MVT::i32)));
-+
-+ if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
-+ assert(!"Truncated and indexed stores not supported yet");
-+ } else {
-+ Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
-+ }
-+ return Chain;
-+ }
-+ return SDValue();
-+}
-+
-+// return (512 + (kc_bank << 12)
-+static int
-+ConstantAddressBlock(unsigned AddressSpace) {
-+ switch (AddressSpace) {
-+ case AMDGPUAS::CONSTANT_BUFFER_0:
-+ return 512;
-+ case AMDGPUAS::CONSTANT_BUFFER_1:
-+ return 512 + 4096;
-+ case AMDGPUAS::CONSTANT_BUFFER_2:
-+ return 512 + 4096 * 2;
-+ case AMDGPUAS::CONSTANT_BUFFER_3:
-+ return 512 + 4096 * 3;
-+ case AMDGPUAS::CONSTANT_BUFFER_4:
-+ return 512 + 4096 * 4;
-+ case AMDGPUAS::CONSTANT_BUFFER_5:
-+ return 512 + 4096 * 5;
-+ case AMDGPUAS::CONSTANT_BUFFER_6:
-+ return 512 + 4096 * 6;
-+ case AMDGPUAS::CONSTANT_BUFFER_7:
-+ return 512 + 4096 * 7;
-+ case AMDGPUAS::CONSTANT_BUFFER_8:
-+ return 512 + 4096 * 8;
-+ case AMDGPUAS::CONSTANT_BUFFER_9:
-+ return 512 + 4096 * 9;
-+ case AMDGPUAS::CONSTANT_BUFFER_10:
-+ return 512 + 4096 * 10;
-+ case AMDGPUAS::CONSTANT_BUFFER_11:
-+ return 512 + 4096 * 11;
-+ case AMDGPUAS::CONSTANT_BUFFER_12:
-+ return 512 + 4096 * 12;
-+ case AMDGPUAS::CONSTANT_BUFFER_13:
-+ return 512 + 4096 * 13;
-+ case AMDGPUAS::CONSTANT_BUFFER_14:
-+ return 512 + 4096 * 14;
-+ case AMDGPUAS::CONSTANT_BUFFER_15:
-+ return 512 + 4096 * 15;
-+ default:
-+ return -1;
-+ }
-+}
-+
-+SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
-+{
-+ EVT VT = Op.getValueType();
-+ DebugLoc DL = Op.getDebugLoc();
-+ LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
-+ SDValue Chain = Op.getOperand(0);
-+ SDValue Ptr = Op.getOperand(1);
-+ SDValue LoweredLoad;
-+
-+ int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
-+ if (ConstantBlock > -1) {
-+ SDValue Result;
-+ if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
-+ dyn_cast<Constant>(LoadNode->getSrcValue())) {
-+ SDValue Slots[4];
-+ for (unsigned i = 0; i < 4; i++) {
-+ // We want Const position encoded with the following formula :
-+ // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
-+ // const_index is Ptr computed by llvm using an alignment of 16.
-+ // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
-+ // then div by 4 at the ISel step
-+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-+ DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
-+ Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
-+ }
-+ Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
-+ } else {
-+ // non constant ptr cant be folded, keeps it as a v4f32 load
-+ Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
-+ DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
-+ );
-+ }
-+
-+ if (!VT.isVector()) {
-+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
-+ DAG.getConstant(0, MVT::i32));
-+ }
-+
-+ SDValue MergedValues[2] = {
-+ Result,
-+ Chain
-+ };
-+ return DAG.getMergeValues(MergedValues, 2, DL);
-+ }
-+
-+ return SDValue();
-+}
-+
-+SDValue R600TargetLowering::LowerFPOW(SDValue Op,
-+ SelectionDAG &DAG) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT VT = Op.getValueType();
-+ SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
-+ SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
-+ return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
-+}
-+
-+/// XXX Only kernel functions are supported, so we can assume for now that
-+/// every function is a kernel function, but in the future we should use
-+/// separate calling conventions for kernel and non-kernel functions.
-+SDValue R600TargetLowering::LowerFormalArguments(
-+ SDValue Chain,
-+ CallingConv::ID CallConv,
-+ bool isVarArg,
-+ const SmallVectorImpl<ISD::InputArg> &Ins,
-+ DebugLoc DL, SelectionDAG &DAG,
-+ SmallVectorImpl<SDValue> &InVals) const {
-+ unsigned ParamOffsetBytes = 36;
-+ Function::const_arg_iterator FuncArg =
-+ DAG.getMachineFunction().getFunction()->arg_begin();
-+ for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
-+ EVT VT = Ins[i].VT;
-+ Type *ArgType = FuncArg->getType();
-+ unsigned ArgSizeInBits = ArgType->isPointerTy() ?
-+ 32 : ArgType->getPrimitiveSizeInBits();
-+ unsigned ArgBytes = ArgSizeInBits >> 3;
-+ EVT ArgVT;
-+ if (ArgSizeInBits < VT.getSizeInBits()) {
-+ assert(!ArgType->isFloatTy() &&
-+ "Extending floating point arguments not supported yet");
-+ ArgVT = MVT::getIntegerVT(ArgSizeInBits);
-+ } else {
-+ ArgVT = VT;
-+ }
-+ PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
-+ AMDGPUAS::PARAM_I_ADDRESS);
-+ SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
-+ DAG.getConstant(ParamOffsetBytes, MVT::i32),
-+ MachinePointerInfo(new Argument(PtrTy)),
-+ ArgVT, false, false, ArgBytes);
-+ InVals.push_back(Arg);
-+ ParamOffsetBytes += ArgBytes;
-+ }
-+ return Chain;
-+}
-+
-+EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
-+ if (!VT.isVector()) return MVT::i32;
-+ return VT.changeVectorElementTypeToInteger();
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Custom DAG Optimizations
-+//===----------------------------------------------------------------------===//
-+
-+SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
-+ DAGCombinerInfo &DCI) const {
-+ SelectionDAG &DAG = DCI.DAG;
-+
-+ switch (N->getOpcode()) {
-+ // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
-+ case ISD::FP_ROUND: {
-+ SDValue Arg = N->getOperand(0);
-+ if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
-+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
-+ Arg.getOperand(0));
-+ }
-+ break;
-+ }
-+ // Extract_vec (Build_vector) generated by custom lowering
-+ // also needs to be customly combined
-+ case ISD::EXTRACT_VECTOR_ELT: {
-+ SDValue Arg = N->getOperand(0);
-+ if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
-+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
-+ unsigned Element = Const->getZExtValue();
-+ return Arg->getOperand(Element);
-+ }
-+ }
-+ }
-+ }
-+ return SDValue();
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600ISelLowering.h llvm-r600/lib/Target/R600/R600ISelLowering.h
---- llvm-3.2.src/lib/Target/R600/R600ISelLowering.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/R600ISelLowering.h 2013-01-25 19:43:57.463383054 +0100
-@@ -0,0 +1,73 @@
-+//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief R600 DAG Lowering interface definition
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef R600ISELLOWERING_H
-+#define R600ISELLOWERING_H
-+
-+#include "AMDGPUISelLowering.h"
-+
-+namespace llvm {
-+
-+class R600InstrInfo;
-+
-+class R600TargetLowering : public AMDGPUTargetLowering {
-+public:
-+ R600TargetLowering(TargetMachine &TM);
-+ virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
-+ MachineBasicBlock * BB) const;
-+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-+ void ReplaceNodeResults(SDNode * N,
-+ SmallVectorImpl<SDValue> &Results,
-+ SelectionDAG &DAG) const;
-+ virtual SDValue LowerFormalArguments(
-+ SDValue Chain,
-+ CallingConv::ID CallConv,
-+ bool isVarArg,
-+ const SmallVectorImpl<ISD::InputArg> &Ins,
-+ DebugLoc DL, SelectionDAG &DAG,
-+ SmallVectorImpl<SDValue> &InVals) const;
-+ virtual EVT getSetCCResultType(EVT VT) const;
-+private:
-+ const R600InstrInfo * TII;
-+
-+ /// Each OpenCL kernel has nine implicit parameters that are stored in the
-+ /// first nine dwords of a Vertex Buffer. These implicit parameters are
-+ /// lowered to load instructions which retreive the values from the Vertex
-+ /// Buffer.
-+ SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
-+ DebugLoc DL, unsigned DwordOffset) const;
-+
-+ void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
-+ MachineRegisterInfo & MRI, unsigned dword_offset) const;
-+
-+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
-+
-+ /// \brief Lower ROTL opcode to BITALIGN
-+ SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
-+
-+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
-+
-+ bool isZero(SDValue Op) const;
-+};
-+
-+} // End namespace llvm;
-+
-+#endif // R600ISELLOWERING_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600LowerConstCopy.cpp llvm-r600/lib/Target/R600/R600LowerConstCopy.cpp
---- llvm-3.2.src/lib/Target/R600/R600LowerConstCopy.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/R600LowerConstCopy.cpp 2013-01-25 19:43:57.466716387 +0100
-@@ -0,0 +1,74 @@
-+//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr.
-+/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot
-+/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits
-+/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try
-+/// to fold them if possible or replace them by MOV otherwise.
-+/// TODO : Implement the folding part, using Copy Propagation algorithm.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPU.h"
-+#include "llvm/CodeGen/MachineFunction.h"
-+#include "llvm/CodeGen/MachineFunctionPass.h"
-+#include "R600InstrInfo.h"
-+#include "llvm/GlobalValue.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+
-+namespace llvm {
-+
-+class R600LowerConstCopy : public MachineFunctionPass {
-+private:
-+ static char ID;
-+ const R600InstrInfo *TII;
-+public:
-+ R600LowerConstCopy(TargetMachine &tm);
-+ virtual bool runOnMachineFunction(MachineFunction &MF);
-+
-+ const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; }
-+};
-+
-+char R600LowerConstCopy::ID = 0;
-+
-+
-+R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) :
-+ MachineFunctionPass(ID),
-+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo()))
-+{
-+}
-+
-+bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) {
-+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-+ BB != BB_E; ++BB) {
-+ MachineBasicBlock &MBB = *BB;
-+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-+ I != E;) {
-+ MachineInstr &MI = *I;
-+ I = llvm::next(I);
-+ if (MI.getOpcode() != AMDGPU::CONST_COPY)
-+ continue;
-+ MachineInstr *NewMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV,
-+ MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
-+ NewMI->getOperand(9).setImm(MI.getOperand(1).getImm());
-+ MI.eraseFromParent();
-+ }
-+ }
-+ return false;
-+}
-+
-+FunctionPass *createR600LowerConstCopy(TargetMachine &tm) {
-+ return new R600LowerConstCopy(tm);
-+}
-+
-+}
-+
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600MachineFunctionInfo.cpp llvm-r600/lib/Target/R600/R600MachineFunctionInfo.cpp
---- llvm-3.2.src/lib/Target/R600/R600MachineFunctionInfo.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/R600MachineFunctionInfo.cpp 2013-01-25 19:43:57.470049720 +0100
-@@ -0,0 +1,33 @@
-+//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//===----------------------------------------------------------------------===//
-+
-+#include "R600MachineFunctionInfo.h"
-+
-+using namespace llvm;
-+
-+R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
-+ : MachineFunctionInfo(),
-+ HasLinearInterpolation(false),
-+ HasPerspectiveInterpolation(false) {
-+ memset(Outputs, 0, sizeof(Outputs));
-+ }
-+
-+unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const {
-+ assert(HasPerspectiveInterpolation);
-+ return 0;
-+}
-+
-+unsigned R600MachineFunctionInfo::GetIJLinearIndex() const {
-+ assert(HasLinearInterpolation);
-+ if (HasPerspectiveInterpolation)
-+ return 1;
-+ else
-+ return 0;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600MachineFunctionInfo.h llvm-r600/lib/Target/R600/R600MachineFunctionInfo.h
---- llvm-3.2.src/lib/Target/R600/R600MachineFunctionInfo.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/R600MachineFunctionInfo.h 2013-01-25 19:43:57.470049720 +0100
-@@ -0,0 +1,38 @@
-+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef R600MACHINEFUNCTIONINFO_H
-+#define R600MACHINEFUNCTIONINFO_H
-+
-+#include "llvm/CodeGen/MachineFunction.h"
-+#include "llvm/CodeGen/SelectionDAG.h"
-+#include <vector>
-+
-+namespace llvm {
-+
-+class R600MachineFunctionInfo : public MachineFunctionInfo {
-+
-+public:
-+ R600MachineFunctionInfo(const MachineFunction &MF);
-+ std::vector<unsigned> ReservedRegs;
-+ SDNode *Outputs[16];
-+ bool HasLinearInterpolation;
-+ bool HasPerspectiveInterpolation;
-+
-+ unsigned GetIJLinearIndex() const;
-+ unsigned GetIJPerspectiveIndex() const;
-+
-+};
-+
-+} // End llvm namespace
-+
-+#endif //R600MACHINEFUNCTIONINFO_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600RegisterInfo.cpp llvm-r600/lib/Target/R600/R600RegisterInfo.cpp
---- llvm-3.2.src/lib/Target/R600/R600RegisterInfo.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/R600RegisterInfo.cpp 2013-01-25 19:43:57.470049720 +0100
-@@ -0,0 +1,85 @@
-+//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief R600 implementation of the TargetRegisterInfo class.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "R600RegisterInfo.h"
-+#include "AMDGPUTargetMachine.h"
-+#include "R600Defines.h"
-+#include "R600MachineFunctionInfo.h"
-+
-+using namespace llvm;
-+
-+R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
-+ const TargetInstrInfo &tii)
-+: AMDGPURegisterInfo(tm, tii),
-+ TM(tm),
-+ TII(tii)
-+ { }
-+
-+BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
-+ BitVector Reserved(getNumRegs());
-+ const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
-+
-+ Reserved.set(AMDGPU::ZERO);
-+ Reserved.set(AMDGPU::HALF);
-+ Reserved.set(AMDGPU::ONE);
-+ Reserved.set(AMDGPU::ONE_INT);
-+ Reserved.set(AMDGPU::NEG_HALF);
-+ Reserved.set(AMDGPU::NEG_ONE);
-+ Reserved.set(AMDGPU::PV_X);
-+ Reserved.set(AMDGPU::ALU_LITERAL_X);
-+ Reserved.set(AMDGPU::ALU_CONST);
-+ Reserved.set(AMDGPU::PREDICATE_BIT);
-+ Reserved.set(AMDGPU::PRED_SEL_OFF);
-+ Reserved.set(AMDGPU::PRED_SEL_ZERO);
-+ Reserved.set(AMDGPU::PRED_SEL_ONE);
-+
-+ for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
-+ E = MFI->ReservedRegs.end(); I != E; ++I) {
-+ Reserved.set(*I);
-+ }
-+
-+ return Reserved;
-+}
-+
-+const TargetRegisterClass *
-+R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
-+ switch (rc->getID()) {
-+ case AMDGPU::GPRF32RegClassID:
-+ case AMDGPU::GPRI32RegClassID:
-+ return &AMDGPU::R600_Reg32RegClass;
-+ default: return rc;
-+ }
-+}
-+
-+unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
-+ return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
-+}
-+
-+const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
-+ MVT VT) const {
-+ switch(VT.SimpleTy) {
-+ default:
-+ case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
-+ }
-+}
-+
-+unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const {
-+ switch (Channel) {
-+ default: assert(!"Invalid channel index"); return 0;
-+ case 0: return AMDGPU::sel_x;
-+ case 1: return AMDGPU::sel_y;
-+ case 2: return AMDGPU::sel_z;
-+ case 3: return AMDGPU::sel_w;
-+ }
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600RegisterInfo.h llvm-r600/lib/Target/R600/R600RegisterInfo.h
---- llvm-3.2.src/lib/Target/R600/R600RegisterInfo.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/R600RegisterInfo.h 2013-01-25 19:43:57.470049720 +0100
-@@ -0,0 +1,55 @@
-+//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Interface definition for R600RegisterInfo
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef R600REGISTERINFO_H_
-+#define R600REGISTERINFO_H_
-+
-+#include "AMDGPUTargetMachine.h"
-+#include "AMDGPURegisterInfo.h"
-+
-+namespace llvm {
-+
-+class R600TargetMachine;
-+class TargetInstrInfo;
-+
-+struct R600RegisterInfo : public AMDGPURegisterInfo {
-+ AMDGPUTargetMachine &TM;
-+ const TargetInstrInfo &TII;
-+
-+ R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
-+
-+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
-+
-+ /// \param RC is an AMDIL reg class.
-+ ///
-+ /// \returns the R600 reg class that is equivalent to \p RC.
-+ virtual const TargetRegisterClass *getISARegClass(
-+ const TargetRegisterClass *RC) const;
-+
-+ /// \brief get the HW encoding for a register's channel.
-+ unsigned getHWRegChan(unsigned reg) const;
-+
-+ /// \brief get the register class of the specified type to use in the
-+ /// CFGStructurizer
-+ virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
-+
-+ /// \returns the sub reg enum value for the given \p Channel
-+ /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x)
-+ unsigned getSubRegFromChannel(unsigned Channel) const;
-+
-+};
-+
-+} // End namespace llvm
-+
-+#endif // AMDIDSAREGISTERINFO_H_
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600RegisterInfo.td llvm-r600/lib/Target/R600/R600RegisterInfo.td
---- llvm-3.2.src/lib/Target/R600/R600RegisterInfo.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/R600RegisterInfo.td 2013-01-25 19:43:57.470049720 +0100
-@@ -0,0 +1,101 @@
-+
-+class R600Reg <string name, bits<16> encoding> : Register<name> {
-+ let Namespace = "AMDGPU";
-+ let HWEncoding = encoding;
-+}
-+
-+class R600RegWithChan <string name, bits<9> sel, string chan> :
-+ Register <name> {
-+
-+ field bits<2> chan_encoding = !if(!eq(chan, "X"), 0,
-+ !if(!eq(chan, "Y"), 1,
-+ !if(!eq(chan, "Z"), 2,
-+ !if(!eq(chan, "W"), 3, 0))));
-+ let HWEncoding{8-0} = sel;
-+ let HWEncoding{10-9} = chan_encoding;
-+ let Namespace = "AMDGPU";
-+}
-+
-+class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
-+ RegisterWithSubRegs<n, subregs> {
-+ let Namespace = "AMDGPU";
-+ let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
-+ let HWEncoding = encoding;
-+}
-+
-+foreach Index = 0-127 in {
-+ foreach Chan = [ "X", "Y", "Z", "W" ] in {
-+ // 32-bit Temporary Registers
-+ def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
-+ }
-+ // 128-bit Temporary Registers
-+ def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
-+ [!cast<Register>("T"#Index#"_X"),
-+ !cast<Register>("T"#Index#"_Y"),
-+ !cast<Register>("T"#Index#"_Z"),
-+ !cast<Register>("T"#Index#"_W")],
-+ Index>;
-+}
-+
-+// Array Base Register holding input in FS
-+foreach Index = 448-464 in {
-+ def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>;
-+}
-+
-+
-+// Special Registers
-+
-+def ZERO : R600Reg<"0.0", 248>;
-+def ONE : R600Reg<"1.0", 249>;
-+def NEG_ONE : R600Reg<"-1.0", 249>;
-+def ONE_INT : R600Reg<"1", 250>;
-+def HALF : R600Reg<"0.5", 252>;
-+def NEG_HALF : R600Reg<"-0.5", 252>;
-+def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
-+def PV_X : R600Reg<"pv.x", 254>;
-+def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
-+def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
-+def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
-+def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
-+
-+def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
-+ (add (sequence "ArrayBase%u", 448, 464))>;
-+// special registers for ALU src operands
-+// const buffer reference, SRCx_SEL contains index
-+def ALU_CONST : R600Reg<"CBuf", 0>;
-+// interpolation param reference, SRCx_SEL contains index
-+def ALU_PARAM : R600Reg<"Param", 0>;
-+
-+def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
-+ (add (sequence "T%u_X", 0, 127))>;
-+
-+def R600_TReg32_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
-+ (add (sequence "T%u_Y", 0, 127))>;
-+
-+def R600_TReg32_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
-+ (add (sequence "T%u_Z", 0, 127))>;
-+
-+def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32,
-+ (add (sequence "T%u_W", 0, 127))>;
-+
-+def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
-+ (interleave R600_TReg32_X, R600_TReg32_Y,
-+ R600_TReg32_Z, R600_TReg32_W)>;
-+
-+def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
-+ R600_TReg32,
-+ R600_ArrayBase,
-+ ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
-+ ALU_CONST, ALU_PARAM
-+ )>;
-+
-+def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
-+ PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
-+
-+def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
-+ PREDICATE_BIT)>;
-+
-+def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
-+ (add (sequence "T%u_XYZW", 0, 127))> {
-+ let CopyCost = -1;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600Schedule.td llvm-r600/lib/Target/R600/R600Schedule.td
---- llvm-3.2.src/lib/Target/R600/R600Schedule.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/R600Schedule.td 2013-01-25 19:43:57.470049720 +0100
-@@ -0,0 +1,36 @@
-+//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction
-+// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS
-+// slot has been removed.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+
-+def ALU_X : FuncUnit;
-+def ALU_Y : FuncUnit;
-+def ALU_Z : FuncUnit;
-+def ALU_W : FuncUnit;
-+def TRANS : FuncUnit;
-+
-+def AnyALU : InstrItinClass;
-+def VecALU : InstrItinClass;
-+def TransALU : InstrItinClass;
-+
-+def R600_EG_Itin : ProcessorItineraries <
-+ [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
-+ [],
-+ [
-+ InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
-+ InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
-+ InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
-+ InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
-+ ]
-+>;
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIAnnotateControlFlow.cpp llvm-r600/lib/Target/R600/SIAnnotateControlFlow.cpp
---- llvm-3.2.src/lib/Target/R600/SIAnnotateControlFlow.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIAnnotateControlFlow.cpp 2013-01-25 19:43:57.470049720 +0100
-@@ -0,0 +1,330 @@
-+//===-- SIAnnotateControlFlow.cpp - ------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// Annotates the control flow with hardware specific intrinsics.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPU.h"
-+
-+#include "llvm/Pass.h"
-+#include "llvm/Module.h"
-+#include "llvm/Analysis/Dominators.h"
-+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-+#include "llvm/ADT/DepthFirstIterator.h"
-+#include "llvm/Transforms/Utils/SSAUpdater.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+// Complex types used in this pass
-+typedef std::pair<BasicBlock *, Value *> StackEntry;
-+typedef SmallVector<StackEntry, 16> StackVector;
-+
-+// Intrinsic names the control flow is annotated with
-+static const char *IfIntrinsic = "llvm.SI.if";
-+static const char *ElseIntrinsic = "llvm.SI.else";
-+static const char *BreakIntrinsic = "llvm.SI.break";
-+static const char *IfBreakIntrinsic = "llvm.SI.if.break";
-+static const char *ElseBreakIntrinsic = "llvm.SI.else.break";
-+static const char *LoopIntrinsic = "llvm.SI.loop";
-+static const char *EndCfIntrinsic = "llvm.SI.end.cf";
-+
-+class SIAnnotateControlFlow : public FunctionPass {
-+
-+ static char ID;
-+
-+ Type *Boolean;
-+ Type *Void;
-+ Type *Int64;
-+ Type *ReturnStruct;
-+
-+ ConstantInt *BoolTrue;
-+ ConstantInt *BoolFalse;
-+ UndefValue *BoolUndef;
-+ Constant *Int64Zero;
-+
-+ Constant *If;
-+ Constant *Else;
-+ Constant *Break;
-+ Constant *IfBreak;
-+ Constant *ElseBreak;
-+ Constant *Loop;
-+ Constant *EndCf;
-+
-+ DominatorTree *DT;
-+ StackVector Stack;
-+ SSAUpdater PhiInserter;
-+
-+ bool isTopOfStack(BasicBlock *BB);
-+
-+ Value *popSaved();
-+
-+ void push(BasicBlock *BB, Value *Saved);
-+
-+ bool isElse(PHINode *Phi);
-+
-+ void eraseIfUnused(PHINode *Phi);
-+
-+ void openIf(BranchInst *Term);
-+
-+ void insertElse(BranchInst *Term);
-+
-+ void handleLoopCondition(Value *Cond);
-+
-+ void handleLoop(BranchInst *Term);
-+
-+ void closeControlFlow(BasicBlock *BB);
-+
-+public:
-+ SIAnnotateControlFlow():
-+ FunctionPass(ID) { }
-+
-+ virtual bool doInitialization(Module &M);
-+
-+ virtual bool runOnFunction(Function &F);
-+
-+ virtual const char *getPassName() const {
-+ return "SI annotate control flow";
-+ }
-+
-+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-+ AU.addRequired<DominatorTree>();
-+ AU.addPreserved<DominatorTree>();
-+ FunctionPass::getAnalysisUsage(AU);
-+ }
-+
-+};
-+
-+} // end anonymous namespace
-+
-+char SIAnnotateControlFlow::ID = 0;
-+
-+/// \brief Initialize all the types and constants used in the pass
-+bool SIAnnotateControlFlow::doInitialization(Module &M) {
-+ LLVMContext &Context = M.getContext();
-+
-+ Void = Type::getVoidTy(Context);
-+ Boolean = Type::getInt1Ty(Context);
-+ Int64 = Type::getInt64Ty(Context);
-+ ReturnStruct = StructType::get(Boolean, Int64, (Type *)0);
-+
-+ BoolTrue = ConstantInt::getTrue(Context);
-+ BoolFalse = ConstantInt::getFalse(Context);
-+ BoolUndef = UndefValue::get(Boolean);
-+ Int64Zero = ConstantInt::get(Int64, 0);
-+
-+ If = M.getOrInsertFunction(
-+ IfIntrinsic, ReturnStruct, Boolean, (Type *)0);
-+
-+ Else = M.getOrInsertFunction(
-+ ElseIntrinsic, ReturnStruct, Int64, (Type *)0);
-+
-+ Break = M.getOrInsertFunction(
-+ BreakIntrinsic, Int64, Int64, (Type *)0);
-+
-+ IfBreak = M.getOrInsertFunction(
-+ IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)0);
-+
-+ ElseBreak = M.getOrInsertFunction(
-+ ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)0);
-+
-+ Loop = M.getOrInsertFunction(
-+ LoopIntrinsic, Boolean, Int64, (Type *)0);
-+
-+ EndCf = M.getOrInsertFunction(
-+ EndCfIntrinsic, Void, Int64, (Type *)0);
-+
-+ return false;
-+}
-+
-+/// \brief Is BB the last block saved on the stack ?
-+bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) {
-+ return Stack.back().first == BB;
-+}
-+
-+/// \brief Pop the last saved value from the control flow stack
-+Value *SIAnnotateControlFlow::popSaved() {
-+ return Stack.pop_back_val().second;
-+}
-+
-+/// \brief Push a BB and saved value to the control flow stack
-+void SIAnnotateControlFlow::push(BasicBlock *BB, Value *Saved) {
-+ Stack.push_back(std::make_pair(BB, Saved));
-+}
-+
-+/// \brief Can the condition represented by this PHI node treated like
-+/// an "Else" block?
-+bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
-+ BasicBlock *IDom = DT->getNode(Phi->getParent())->getIDom()->getBlock();
-+ for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
-+ if (Phi->getIncomingBlock(i) == IDom) {
-+
-+ if (Phi->getIncomingValue(i) != BoolTrue)
-+ return false;
-+
-+ } else {
-+ if (Phi->getIncomingValue(i) != BoolFalse)
-+ return false;
-+
-+ }
-+ }
-+ return true;
-+}
-+
-+// \brief Erase "Phi" if it is not used any more
-+void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
-+ if (!Phi->hasNUsesOrMore(1))
-+ Phi->eraseFromParent();
-+}
-+
-+/// \brief Open a new "If" block
-+void SIAnnotateControlFlow::openIf(BranchInst *Term) {
-+ Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term);
-+ Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
-+ push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
-+}
-+
-+/// \brief Close the last "If" block and open a new "Else" block
-+void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
-+ Value *Ret = CallInst::Create(Else, popSaved(), "", Term);
-+ Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
-+ push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
-+}
-+
-+/// \brief Recursively handle the condition leading to a loop
-+void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) {
-+ if (PHINode *Phi = dyn_cast<PHINode>(Cond)) {
-+
-+ // Handle all non constant incoming values first
-+ for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
-+ Value *Incoming = Phi->getIncomingValue(i);
-+ if (isa<ConstantInt>(Incoming))
-+ continue;
-+
-+ Phi->setIncomingValue(i, BoolFalse);
-+ handleLoopCondition(Incoming);
-+ }
-+
-+ BasicBlock *Parent = Phi->getParent();
-+ BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();
-+
-+ for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
-+
-+ Value *Incoming = Phi->getIncomingValue(i);
-+ if (Incoming != BoolTrue)
-+ continue;
-+
-+ BasicBlock *From = Phi->getIncomingBlock(i);
-+ if (From == IDom) {
-+ CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt());
-+ if (OldEnd && OldEnd->getCalledFunction() == EndCf) {
-+ Value *Args[] = {
-+ OldEnd->getArgOperand(0),
-+ PhiInserter.GetValueAtEndOfBlock(Parent)
-+ };
-+ Value *Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
-+ PhiInserter.AddAvailableValue(Parent, Ret);
-+ continue;
-+ }
-+ }
-+
-+ TerminatorInst *Insert = From->getTerminator();
-+ Value *Arg = PhiInserter.GetValueAtEndOfBlock(From);
-+ Value *Ret = CallInst::Create(Break, Arg, "", Insert);
-+ PhiInserter.AddAvailableValue(From, Ret);
-+ }
-+ eraseIfUnused(Phi);
-+
-+ } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
-+ BasicBlock *Parent = Inst->getParent();
-+ TerminatorInst *Insert = Parent->getTerminator();
-+ Value *Args[] = { Cond, PhiInserter.GetValueAtEndOfBlock(Parent) };
-+ Value *Ret = CallInst::Create(IfBreak, Args, "", Insert);
-+ PhiInserter.AddAvailableValue(Parent, Ret);
-+
-+ } else {
-+ assert(0 && "Unhandled loop condition!");
-+ }
-+}
-+
-+/// \brief Handle a back edge (loop)
-+void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
-+ BasicBlock *Target = Term->getSuccessor(1);
-+ PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
-+
-+ PhiInserter.Initialize(Int64, "");
-+ PhiInserter.AddAvailableValue(Target, Broken);
-+
-+ Value *Cond = Term->getCondition();
-+ Term->setCondition(BoolTrue);
-+ handleLoopCondition(Cond);
-+
-+ BasicBlock *BB = Term->getParent();
-+ Value *Arg = PhiInserter.GetValueAtEndOfBlock(BB);
-+ for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
-+ PI != PE; ++PI) {
-+
-+ Broken->addIncoming(*PI == BB ? Arg : Int64Zero, *PI);
-+ }
-+
-+ Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
-+ push(Term->getSuccessor(0), Arg);
-+}
-+
-+/// \brief Close the last opened control flow
-+void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
-+ CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
-+}
-+
-+/// \brief Annotate the control flow with intrinsics so the backend can
-+/// recognize if/then/else and loops.
-+bool SIAnnotateControlFlow::runOnFunction(Function &F) {
-+ DT = &getAnalysis<DominatorTree>();
-+
-+ for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
-+ E = df_end(&F.getEntryBlock()); I != E; ++I) {
-+
-+ BranchInst *Term = dyn_cast<BranchInst>((*I)->getTerminator());
-+
-+ if (!Term || Term->isUnconditional()) {
-+ if (isTopOfStack(*I))
-+ closeControlFlow(*I);
-+ continue;
-+ }
-+
-+ if (I.nodeVisited(Term->getSuccessor(1))) {
-+ if (isTopOfStack(*I))
-+ closeControlFlow(*I);
-+ handleLoop(Term);
-+ continue;
-+ }
-+
-+ if (isTopOfStack(*I)) {
-+ PHINode *Phi = dyn_cast<PHINode>(Term->getCondition());
-+ if (Phi && Phi->getParent() == *I && isElse(Phi)) {
-+ insertElse(Term);
-+ eraseIfUnused(Phi);
-+ continue;
-+ }
-+ closeControlFlow(*I);
-+ }
-+ openIf(Term);
-+ }
-+
-+ assert(Stack.empty());
-+ return true;
-+}
-+
-+/// \brief Create the annotation pass
-+FunctionPass *llvm::createSIAnnotateControlFlowPass() {
-+ return new SIAnnotateControlFlow();
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIAssignInterpRegs.cpp llvm-r600/lib/Target/R600/SIAssignInterpRegs.cpp
---- llvm-3.2.src/lib/Target/R600/SIAssignInterpRegs.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIAssignInterpRegs.cpp 2013-01-25 19:43:57.470049720 +0100
-@@ -0,0 +1,152 @@
-+//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief This pass maps the pseudo interpolation registers to the correct physical
-+/// registers.
-+//
-+/// Prior to executing a fragment shader, the GPU loads interpolation
-+/// parameters into physical registers. The specific physical register that each
-+/// interpolation parameter ends up in depends on the type of the interpolation
-+/// parameter as well as how many interpolation parameters are used by the
-+/// shader.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+
-+
-+#include "AMDGPU.h"
-+#include "AMDIL.h"
-+#include "SIMachineFunctionInfo.h"
-+#include "llvm/CodeGen/MachineFunctionPass.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+class SIAssignInterpRegsPass : public MachineFunctionPass {
-+
-+private:
-+ static char ID;
-+ TargetMachine &TM;
-+
-+ void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
-+ unsigned physReg, unsigned virtReg);
-+
-+public:
-+ SIAssignInterpRegsPass(TargetMachine &tm) :
-+ MachineFunctionPass(ID), TM(tm) { }
-+
-+ virtual bool runOnMachineFunction(MachineFunction &MF);
-+
-+ const char *getPassName() const { return "SI Assign intrpolation registers"; }
-+};
-+
-+} // End anonymous namespace
-+
-+char SIAssignInterpRegsPass::ID = 0;
-+
-+#define INTERP_VALUES 16
-+#define REQUIRED_VALUE_MAX_INDEX 7
-+
-+struct InterpInfo {
-+ bool Enabled;
-+ unsigned Regs[3];
-+ unsigned RegCount;
-+};
-+
-+
-+FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
-+ return new SIAssignInterpRegsPass(tm);
-+}
-+
-+bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF) {
-+
-+ struct InterpInfo InterpUse[INTERP_VALUES] = {
-+ {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
-+ {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
-+ {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
-+ {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
-+ {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
-+ {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
-+ {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
-+ {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
-+ {false, {AMDGPU::POS_X_FLOAT}, 1},
-+ {false, {AMDGPU::POS_Y_FLOAT}, 1},
-+ {false, {AMDGPU::POS_Z_FLOAT}, 1},
-+ {false, {AMDGPU::POS_W_FLOAT}, 1},
-+ {false, {AMDGPU::FRONT_FACE}, 1},
-+ {false, {AMDGPU::ANCILLARY}, 1},
-+ {false, {AMDGPU::SAMPLE_COVERAGE}, 1},
-+ {false, {AMDGPU::POS_FIXED_PT}, 1}
-+ };
-+
-+ SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
-+ // This pass is only needed for pixel shaders.
-+ if (MFI->ShaderType != ShaderType::PIXEL) {
-+ return false;
-+ }
-+ MachineRegisterInfo &MRI = MF.getRegInfo();
-+ bool ForceEnable = true;
-+
-+ // First pass, mark the interpolation values that are used.
-+ for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
-+ for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
-+ RegIdx++) {
-+ InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled ||
-+ !MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]);
-+ if (InterpUse[InterpIdx].Enabled &&
-+ InterpIdx <= REQUIRED_VALUE_MAX_INDEX) {
-+ ForceEnable = false;
-+ }
-+ }
-+ }
-+
-+ // At least one interpolation mode must be enabled or else the GPU will hang.
-+ if (ForceEnable) {
-+ InterpUse[0].Enabled = true;
-+ }
-+
-+ unsigned UsedVgprs = 0;
-+
-+ // Second pass, replace with VGPRs.
-+ for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
-+ if (!InterpUse[InterpIdx].Enabled) {
-+ continue;
-+ }
-+ MFI->SPIPSInputAddr |= (1 << InterpIdx);
-+
-+ for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
-+ RegIdx++, UsedVgprs++) {
-+ unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs);
-+ unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
-+ MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg);
-+ addLiveIn(&MF, MRI, NewReg, VirtReg);
-+ }
-+ }
-+
-+ return false;
-+}
-+
-+void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF,
-+ MachineRegisterInfo & MRI,
-+ unsigned physReg, unsigned virtReg) {
-+ const TargetInstrInfo * TII = TM.getInstrInfo();
-+ if (!MRI.isLiveIn(physReg)) {
-+ MRI.addLiveIn(physReg, virtReg);
-+ MF->front().addLiveIn(physReg);
-+ BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
-+ TII->get(TargetOpcode::COPY), virtReg)
-+ .addReg(physReg);
-+ } else {
-+ MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
-+ }
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIInsertWaits.cpp llvm-r600/lib/Target/R600/SIInsertWaits.cpp
---- llvm-3.2.src/lib/Target/R600/SIInsertWaits.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIInsertWaits.cpp 2013-01-25 19:43:57.473383054 +0100
-@@ -0,0 +1,353 @@
-+//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Insert wait instructions for memory reads and writes.
-+///
-+/// Memory reads and writes are issued asynchronously, so we need to insert
-+/// S_WAITCNT instructions when we want to access any of their results or
-+/// overwrite any register that's used asynchronously.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPU.h"
-+#include "SIInstrInfo.h"
-+#include "SIMachineFunctionInfo.h"
-+#include "llvm/CodeGen/MachineFunction.h"
-+#include "llvm/CodeGen/MachineFunctionPass.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+/// \brief One variable for each of the hardware counters
-+typedef union {
-+ struct {
-+ unsigned VM;
-+ unsigned EXP;
-+ unsigned LGKM;
-+ } Named;
-+ unsigned Array[3];
-+
-+} Counters;
-+
-+typedef Counters RegCounters[512];
-+typedef std::pair<unsigned, unsigned> RegInterval;
-+
-+class SIInsertWaits : public MachineFunctionPass {
-+
-+private:
-+ static char ID;
-+ const SIInstrInfo *TII;
-+ const SIRegisterInfo &TRI;
-+ const MachineRegisterInfo *MRI;
-+
-+ /// \brief Constant hardware limits
-+ static const Counters WaitCounts;
-+
-+ /// \brief Constant zero value
-+ static const Counters ZeroCounts;
-+
-+ /// \brief Counter values we have already waited on.
-+ Counters WaitedOn;
-+
-+ /// \brief Counter values for last instruction issued.
-+ Counters LastIssued;
-+
-+ /// \brief Registers used by async instructions.
-+ RegCounters UsedRegs;
-+
-+ /// \brief Registers defined by async instructions.
-+ RegCounters DefinedRegs;
-+
-+ /// \brief Different export instruction types seen since last wait.
-+ unsigned ExpInstrTypesSeen;
-+
-+ /// \brief Get increment/decrement amount for this instruction.
-+ Counters getHwCounts(MachineInstr &MI);
-+
-+ /// \brief Is operand relevant for async execution?
-+ bool isOpRelevant(MachineOperand &Op);
-+
-+ /// \brief Get register interval an operand affects.
-+ RegInterval getRegInterval(MachineOperand &Op);
-+
-+ /// \brief Handle instructions async components
-+ void pushInstruction(MachineInstr &MI);
-+
-+ /// \brief Insert the actual wait instruction
-+ bool insertWait(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator I,
-+ const Counters &Counts);
-+
-+ /// \brief Resolve all operand dependencies to counter requirements
-+ Counters handleOperands(MachineInstr &MI);
-+
-+public:
-+ SIInsertWaits(TargetMachine &tm) :
-+ MachineFunctionPass(ID),
-+ TII(static_cast<const SIInstrInfo*>(tm.getInstrInfo())),
-+ TRI(TII->getRegisterInfo()) { }
-+
-+ virtual bool runOnMachineFunction(MachineFunction &MF);
-+
-+ const char *getPassName() const {
-+ return "SI insert wait instructions";
-+ }
-+
-+};
-+
-+} // End anonymous namespace
-+
-+char SIInsertWaits::ID = 0;
-+
-+const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
-+const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
-+
-+FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
-+ return new SIInsertWaits(tm);
-+}
-+
-+Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
-+
-+ uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
-+ Counters Result;
-+
-+ Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
-+
-+ // Only consider stores or EXP for EXP_CNT
-+ Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
-+ (MI.getOpcode() == AMDGPU::EXP || !MI.getDesc().mayStore()));
-+
-+ // LGKM may uses larger values
-+ if (TSFlags & SIInstrFlags::LGKM_CNT) {
-+
-+ MachineOperand &Op = MI.getOperand(0);
-+ assert(Op.isReg() && "First LGKM operand must be a register!");
-+
-+ unsigned Reg = Op.getReg();
-+ unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize();
-+ Result.Named.LGKM = Size > 4 ? 2 : 1;
-+
-+ } else {
-+ Result.Named.LGKM = 0;
-+ }
-+
-+ return Result;
-+}
-+
-+bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
-+
-+ // Constants are always irrelevant
-+ if (!Op.isReg())
-+ return false;
-+
-+ // Defines are always relevant
-+ if (Op.isDef())
-+ return true;
-+
-+ // For exports all registers are relevant
-+ MachineInstr &MI = *Op.getParent();
-+ if (MI.getOpcode() == AMDGPU::EXP)
-+ return true;
-+
-+ // For stores the stored value is also relevant
-+ if (!MI.getDesc().mayStore())
-+ return false;
-+
-+ for (MachineInstr::mop_iterator I = MI.operands_begin(),
-+ E = MI.operands_end(); I != E; ++I) {
-+
-+ if (I->isReg() && I->isUse())
-+ return Op.isIdenticalTo(*I);
-+ }
-+
-+ return false;
-+}
-+
-+RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
-+
-+ if (!Op.isReg())
-+ return std::make_pair(0, 0);
-+
-+ unsigned Reg = Op.getReg();
-+ unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize();
-+
-+ assert(Size >= 4);
-+
-+ RegInterval Result;
-+ Result.first = TRI.getEncodingValue(Reg);
-+ Result.second = Result.first + Size / 4;
-+
-+ return Result;
-+}
-+
-+void SIInsertWaits::pushInstruction(MachineInstr &MI) {
-+
-+ // Get the hardware counter increments and sum them up
-+ Counters Increment = getHwCounts(MI);
-+ unsigned Sum = 0;
-+
-+ for (unsigned i = 0; i < 3; ++i) {
-+ LastIssued.Array[i] += Increment.Array[i];
-+ Sum += Increment.Array[i];
-+ }
-+
-+ // If we don't increase anything then that's it
-+ if (Sum == 0)
-+ return;
-+
-+ // Remember which export instructions we have seen
-+ if (Increment.Named.EXP) {
-+ ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
-+ }
-+
-+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-+
-+ MachineOperand &Op = MI.getOperand(i);
-+ if (!isOpRelevant(Op))
-+ continue;
-+
-+ RegInterval Interval = getRegInterval(Op);
-+ for (unsigned j = Interval.first; j < Interval.second; ++j) {
-+
-+ // Remember which registers we define
-+ if (Op.isDef())
-+ DefinedRegs[j] = LastIssued;
-+
-+ // and which one we are using
-+ if (Op.isUse())
-+ UsedRegs[j] = LastIssued;
-+ }
-+ }
-+}
-+
-+bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator I,
-+ const Counters &Required) {
-+
-+ // End of program? No need to wait on anything
-+ if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
-+ return false;
-+
-+ // Figure out if the async instructions execute in order
-+ bool Ordered[3];
-+
-+ // VM_CNT is always ordered
-+ Ordered[0] = true;
-+
-+ // EXP_CNT is unordered if we have both EXP & VM-writes
-+ Ordered[1] = ExpInstrTypesSeen == 3;
-+
-+ // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
-+ Ordered[2] = false;
-+
-+ // The values we are going to put into the S_WAITCNT instruction
-+ Counters Counts = WaitCounts;
-+
-+ // Do we really need to wait?
-+ bool NeedWait = false;
-+
-+ for (unsigned i = 0; i < 3; ++i) {
-+
-+ if (Required.Array[i] <= WaitedOn.Array[i])
-+ continue;
-+
-+ NeedWait = true;
-+
-+ if (Ordered[i]) {
-+ unsigned Value = LastIssued.Array[i] - Required.Array[i];
-+
-+ // adjust the value to the real hardware posibilities
-+ Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
-+
-+ } else
-+ Counts.Array[i] = 0;
-+
-+ // Remember on what we have waited on
-+ WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
-+ }
-+
-+ if (!NeedWait)
-+ return false;
-+
-+ // Reset EXP_CNT instruction types
-+ if (Counts.Named.EXP == 0)
-+ ExpInstrTypesSeen = 0;
-+
-+ // Build the wait instruction
-+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
-+ .addImm((Counts.Named.VM & 0xF) |
-+ ((Counts.Named.EXP & 0x7) << 4) |
-+ ((Counts.Named.LGKM & 0x7) << 8));
-+
-+ return true;
-+}
-+
-+/// \brief helper function for handleOperands
-+static void increaseCounters(Counters &Dst, const Counters &Src) {
-+
-+ for (unsigned i = 0; i < 3; ++i)
-+ Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
-+}
-+
-+Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
-+
-+ Counters Result = ZeroCounts;
-+
-+ // For each register affected by this
-+ // instruction increase the result sequence
-+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-+
-+ MachineOperand &Op = MI.getOperand(i);
-+ RegInterval Interval = getRegInterval(Op);
-+ for (unsigned j = Interval.first; j < Interval.second; ++j) {
-+
-+ if (Op.isDef())
-+ increaseCounters(Result, UsedRegs[j]);
-+
-+ if (Op.isUse())
-+ increaseCounters(Result, DefinedRegs[j]);
-+ }
-+ }
-+
-+ return Result;
-+}
-+
-+bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
-+
-+ bool Changes = false;
-+
-+ MRI = &MF.getRegInfo();
-+
-+ WaitedOn = ZeroCounts;
-+ LastIssued = ZeroCounts;
-+
-+ memset(&UsedRegs, 0, sizeof(UsedRegs));
-+ memset(&DefinedRegs, 0, sizeof(DefinedRegs));
-+
-+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
-+ BI != BE; ++BI) {
-+
-+ MachineBasicBlock &MBB = *BI;
-+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-+ I != E; ++I) {
-+
-+ Changes |= insertWait(MBB, I, handleOperands(*I));
-+ pushInstruction(*I);
-+ }
-+
-+ // Wait for everything at the end of the MBB
-+ Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
-+ }
-+
-+ return Changes;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIInstrFormats.td llvm-r600/lib/Target/R600/SIInstrFormats.td
---- llvm-3.2.src/lib/Target/R600/SIInstrFormats.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIInstrFormats.td 2013-01-25 19:43:57.473383054 +0100
-@@ -0,0 +1,146 @@
-+//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// SI Instruction format definitions.
-+//
-+// Instructions with _32 take 32-bit operands.
-+// Instructions with _64 take 64-bit operands.
-+//
-+// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
-+// encoding is the standard encoding, but instruction that make use of
-+// any of the instruction modifiers must use the 64-bit encoding.
-+//
-+// Instructions with _e32 use the 32-bit encoding.
-+// Instructions with _e64 use the 64-bit encoding.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+class VOP3b_2IN <bits<9> op, string opName, RegisterClass dstClass,
-+ RegisterClass src0Class, RegisterClass src1Class,
-+ list<dag> pattern>
-+ : VOP3b <op, (outs dstClass:$vdst),
-+ (ins src0Class:$src0, src1Class:$src1, InstFlag:$src2, InstFlag:$sdst,
-+ InstFlag:$omod, InstFlag:$neg),
-+ opName, pattern
-+>;
-+
-+
-+class VOP3_1_32 <bits<9> op, string opName, list<dag> pattern>
-+ : VOP3b_2IN <op, opName, SReg_1, AllReg_32, VReg_32, pattern>;
-+
-+class VOP3_32 <bits<9> op, string opName, list<dag> pattern>
-+ : VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
-+
-+class VOP3_64 <bits<9> op, string opName, list<dag> pattern>
-+ : VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, VReg_64:$src1, VReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
-+
-+
-+class SOP1_32 <bits<8> op, string opName, list<dag> pattern>
-+ : SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>;
-+
-+class SOP1_64 <bits<8> op, string opName, list<dag> pattern>
-+ : SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>;
-+
-+class SOP2_32 <bits<7> op, string opName, list<dag> pattern>
-+ : SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
-+
-+class SOP2_64 <bits<7> op, string opName, list<dag> pattern>
-+ : SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
-+
-+class SOP2_VCC <bits<7> op, string opName, list<dag> pattern>
-+ : SOP2 <op, (outs SReg_1:$vcc), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
-+
-+class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
-+ string opName, list<dag> pattern> :
-+ VOP1 <
-+ op, (outs vrc:$dst), (ins arc:$src0), opName, pattern
-+ >;
-+
-+multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> {
-+ def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>;
-+ def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
-+ opName, []
-+ >;
-+}
-+
-+multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> {
-+
-+ def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>;
-+
-+ def _e64 : VOP3_64 <
-+ {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
-+ opName, []
-+ >;
-+}
-+
-+class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
-+ string opName, list<dag> pattern> :
-+ VOP2 <
-+ op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern
-+ >;
-+
-+multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> {
-+
-+ def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>;
-+
-+ def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
-+ opName, []
-+ >;
-+}
-+
-+multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> {
-+ def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>;
-+
-+ def _e64 : VOP3_64 <
-+ {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
-+ opName, []
-+ >;
-+}
-+
-+class SOPK_32 <bits<5> op, string opName, list<dag> pattern>
-+ : SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>;
-+
-+class SOPK_64 <bits<5> op, string opName, list<dag> pattern>
-+ : SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>;
-+
-+class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
-+ string opName, list<dag> pattern> :
-+ VOPC <
-+ op, (ins arc:$src0, vrc:$src1), opName, pattern
-+ >;
-+
-+multiclass VOPC_32 <bits<9> op, string opName, list<dag> pattern> {
-+
-+ def _e32 : VOPC_Helper <
-+ {op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
-+ VReg_32, AllReg_32, opName, pattern
-+ >;
-+
-+ def _e64 : VOP3_1_32 <
-+ op,
-+ opName, pattern
-+ >;
-+}
-+
-+multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> {
-+
-+ def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>;
-+
-+ def _e64 : VOP3_64 <
-+ {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
-+ opName, []
-+ >;
-+}
-+
-+class SOPC_32 <bits<7> op, string opName, list<dag> pattern>
-+ : SOPC <op, (outs SCCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
-+
-+class SOPC_64 <bits<7> op, string opName, list<dag> pattern>
-+ : SOPC <op, (outs SCCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIInstrInfo.cpp llvm-r600/lib/Target/R600/SIInstrInfo.cpp
---- llvm-3.2.src/lib/Target/R600/SIInstrInfo.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIInstrInfo.cpp 2013-01-25 19:43:57.473383054 +0100
-@@ -0,0 +1,89 @@
-+//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief SI Implementation of TargetInstrInfo.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+
-+#include "SIInstrInfo.h"
-+#include "AMDGPUTargetMachine.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+#include "llvm/MC/MCInstrDesc.h"
-+
-+#include <stdio.h>
-+
-+using namespace llvm;
-+
-+SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
-+ : AMDGPUInstrInfo(tm),
-+ RI(tm, *this)
-+ { }
-+
-+const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const {
-+ return RI;
-+}
-+
-+void
-+SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI, DebugLoc DL,
-+ unsigned DestReg, unsigned SrcReg,
-+ bool KillSrc) const {
-+ // If we are trying to copy to or from SCC, there is a bug somewhere else in
-+ // the backend. While it may be theoretically possible to do this, it should
-+ // never be necessary.
-+ assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
-+
-+ if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
-+ assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
-+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
-+ .addReg(SrcReg, getKillRegState(KillSrc));
-+ } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
-+ assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
-+ AMDGPU::SReg_32RegClass.contains(SrcReg));
-+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
-+ .addReg(SrcReg, getKillRegState(KillSrc));
-+ } else {
-+ assert(AMDGPU::SReg_32RegClass.contains(DestReg));
-+ assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
-+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
-+ .addReg(SrcReg, getKillRegState(KillSrc));
-+ }
-+}
-+
-+MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
-+ int64_t Imm) const {
-+ MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc());
-+ MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
-+ MachineInstrBuilder(MI).addImm(Imm);
-+
-+ return MI;
-+
-+}
-+
-+bool SIInstrInfo::isMov(unsigned Opcode) const {
-+ switch(Opcode) {
-+ default: return false;
-+ case AMDGPU::S_MOV_B32:
-+ case AMDGPU::S_MOV_B64:
-+ case AMDGPU::V_MOV_B32_e32:
-+ case AMDGPU::V_MOV_B32_e64:
-+ case AMDGPU::V_MOV_IMM_F32:
-+ case AMDGPU::V_MOV_IMM_I32:
-+ case AMDGPU::S_MOV_IMM_I32:
-+ return true;
-+ }
-+}
-+
-+bool
-+SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
-+ return RC != &AMDGPU::EXECRegRegClass;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIInstrInfo.h llvm-r600/lib/Target/R600/SIInstrInfo.h
---- llvm-3.2.src/lib/Target/R600/SIInstrInfo.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIInstrInfo.h 2013-01-25 19:43:57.476716387 +0100
-@@ -0,0 +1,64 @@
-+//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Interface definition for SIInstrInfo.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+
-+#ifndef SIINSTRINFO_H
-+#define SIINSTRINFO_H
-+
-+#include "AMDGPUInstrInfo.h"
-+#include "SIRegisterInfo.h"
-+
-+namespace llvm {
-+
-+class SIInstrInfo : public AMDGPUInstrInfo {
-+private:
-+ const SIRegisterInfo RI;
-+
-+public:
-+ explicit SIInstrInfo(AMDGPUTargetMachine &tm);
-+
-+ const SIRegisterInfo &getRegisterInfo() const;
-+
-+ virtual void copyPhysReg(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI, DebugLoc DL,
-+ unsigned DestReg, unsigned SrcReg,
-+ bool KillSrc) const;
-+
-+ /// \returns the encoding type of this instruction.
-+ unsigned getEncodingType(const MachineInstr &MI) const;
-+
-+ /// \returns the size of this instructions encoding in number of bytes.
-+ unsigned getEncodingBytes(const MachineInstr &MI) const;
-+
-+ virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
-+ int64_t Imm) const;
-+
-+ virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
-+ virtual bool isMov(unsigned Opcode) const;
-+
-+ virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
-+ };
-+
-+} // End namespace llvm
-+
-+namespace SIInstrFlags {
-+ enum Flags {
-+ // First 4 bits are the instruction encoding
-+ VM_CNT = 1 << 4,
-+ EXP_CNT = 1 << 5,
-+ LGKM_CNT = 1 << 6
-+ };
-+}
-+
-+#endif //SIINSTRINFO_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIInstrInfo.td llvm-r600/lib/Target/R600/SIInstrInfo.td
---- llvm-3.2.src/lib/Target/R600/SIInstrInfo.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIInstrInfo.td 2013-01-25 19:43:57.476716387 +0100
-@@ -0,0 +1,591 @@
-+//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+//===----------------------------------------------------------------------===//
-+// SI DAG Profiles
-+//===----------------------------------------------------------------------===//
-+def SDTVCCBinaryOp : SDTypeProfile<1, 2, [
-+ SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>
-+]>;
-+
-+//===----------------------------------------------------------------------===//
-+// SI DAG Nodes
-+//===----------------------------------------------------------------------===//
-+
-+// and operation on 64-bit wide vcc
-+def SIsreg1_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
-+ [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// Special bitcast node for sharing VCC register between VALU and SALU
-+def SIsreg1_bitcast : SDNode<"SIISD::VCC_BITCAST",
-+ SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
-+>;
-+
-+// and operation on 64-bit wide vcc
-+def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
-+ [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// Special bitcast node for sharing VCC register between VALU and SALU
-+def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST",
-+ SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
-+>;
-+
-+class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
-+ AMDGPUInst<outs, ins, asm, pattern> {
-+
-+ field bits<4> EncodingType = 0;
-+ field bits<1> VM_CNT = 0;
-+ field bits<1> EXP_CNT = 0;
-+ field bits<1> LGKM_CNT = 0;
-+
-+ let TSFlags{3-0} = EncodingType;
-+ let TSFlags{4} = VM_CNT;
-+ let TSFlags{5} = EXP_CNT;
-+ let TSFlags{6} = LGKM_CNT;
-+}
-+
-+class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
-+ InstSI <outs, ins, asm, pattern> {
-+
-+ field bits<32> Inst;
-+}
-+
-+class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
-+ InstSI <outs, ins, asm, pattern> {
-+
-+ field bits<64> Inst;
-+}
-+
-+class SIOperand <ValueType vt, dag opInfo>: Operand <vt> {
-+ let EncoderMethod = "encodeOperand";
-+ let MIOperandInfo = opInfo;
-+}
-+
-+def IMM16bit : ImmLeaf <
-+ i16,
-+ [{return isInt<16>(Imm);}]
-+>;
-+
-+def IMM8bit : ImmLeaf <
-+ i32,
-+ [{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}]
-+>;
-+
-+def IMM12bit : ImmLeaf <
-+ i16,
-+ [{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}]
-+>;
-+
-+def IMM32bitIn64bit : ImmLeaf <
-+ i64,
-+ [{return isInt<32>(Imm);}]
-+>;
-+
-+class GPR4Align <RegisterClass rc> : Operand <vAny> {
-+ let EncoderMethod = "GPR4AlignEncode";
-+ let MIOperandInfo = (ops rc:$reg);
-+}
-+
-+class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> {
-+ let EncoderMethod = "GPR2AlignEncode";
-+ let MIOperandInfo = (ops rc:$reg);
-+}
-+
-+def SMRDmemrr : Operand<iPTR> {
-+ let MIOperandInfo = (ops SReg_64, SReg_32);
-+ let EncoderMethod = "GPR2AlignEncode";
-+}
-+
-+def SMRDmemri : Operand<iPTR> {
-+ let MIOperandInfo = (ops SReg_64, i32imm);
-+ let EncoderMethod = "SMRDmemriEncode";
-+}
-+
-+def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>;
-+def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>;
-+
-+let Uses = [EXEC] in {
-+
-+def EXP : Enc64<
-+ (outs),
-+ (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
-+ VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
-+ "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
-+ [] > {
-+
-+ bits<4> EN;
-+ bits<6> TGT;
-+ bits<1> COMPR;
-+ bits<1> DONE;
-+ bits<1> VM;
-+ bits<8> VSRC0;
-+ bits<8> VSRC1;
-+ bits<8> VSRC2;
-+ bits<8> VSRC3;
-+
-+ let Inst{3-0} = EN;
-+ let Inst{9-4} = TGT;
-+ let Inst{10} = COMPR;
-+ let Inst{11} = DONE;
-+ let Inst{12} = VM;
-+ let Inst{31-26} = 0x3e;
-+ let Inst{39-32} = VSRC0;
-+ let Inst{47-40} = VSRC1;
-+ let Inst{55-48} = VSRC2;
-+ let Inst{63-56} = VSRC3;
-+ let EncodingType = 0; //SIInstrEncodingType::EXP
-+
-+ let EXP_CNT = 1;
-+}
-+
-+class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
-+ Enc64 <outs, ins, asm, pattern> {
-+
-+ bits<8> VDATA;
-+ bits<4> DMASK;
-+ bits<1> UNORM;
-+ bits<1> GLC;
-+ bits<1> DA;
-+ bits<1> R128;
-+ bits<1> TFE;
-+ bits<1> LWE;
-+ bits<1> SLC;
-+ bits<8> VADDR;
-+ bits<5> SRSRC;
-+ bits<5> SSAMP;
-+
-+ let Inst{11-8} = DMASK;
-+ let Inst{12} = UNORM;
-+ let Inst{13} = GLC;
-+ let Inst{14} = DA;
-+ let Inst{15} = R128;
-+ let Inst{16} = TFE;
-+ let Inst{17} = LWE;
-+ let Inst{24-18} = op;
-+ let Inst{25} = SLC;
-+ let Inst{31-26} = 0x3c;
-+ let Inst{39-32} = VADDR;
-+ let Inst{47-40} = VDATA;
-+ let Inst{52-48} = SRSRC;
-+ let Inst{57-53} = SSAMP;
-+ let EncodingType = 2; //SIInstrEncodingType::MIMG
-+
-+ let VM_CNT = 1;
-+ let EXP_CNT = 1;
-+}
-+
-+class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
-+ Enc64<outs, ins, asm, pattern> {
-+
-+ bits<8> VDATA;
-+ bits<12> OFFSET;
-+ bits<1> OFFEN;
-+ bits<1> IDXEN;
-+ bits<1> GLC;
-+ bits<1> ADDR64;
-+ bits<4> DFMT;
-+ bits<3> NFMT;
-+ bits<8> VADDR;
-+ bits<5> SRSRC;
-+ bits<1> SLC;
-+ bits<1> TFE;
-+ bits<8> SOFFSET;
-+
-+ let Inst{11-0} = OFFSET;
-+ let Inst{12} = OFFEN;
-+ let Inst{13} = IDXEN;
-+ let Inst{14} = GLC;
-+ let Inst{15} = ADDR64;
-+ let Inst{18-16} = op;
-+ let Inst{22-19} = DFMT;
-+ let Inst{25-23} = NFMT;
-+ let Inst{31-26} = 0x3a; //encoding
-+ let Inst{39-32} = VADDR;
-+ let Inst{47-40} = VDATA;
-+ let Inst{52-48} = SRSRC;
-+ let Inst{54} = SLC;
-+ let Inst{55} = TFE;
-+ let Inst{63-56} = SOFFSET;
-+ let EncodingType = 3; //SIInstrEncodingType::MTBUF
-+
-+ let VM_CNT = 1;
-+ let EXP_CNT = 1;
-+
-+ let neverHasSideEffects = 1;
-+}
-+
-+class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
-+ Enc64<outs, ins, asm, pattern> {
-+
-+ bits<8> VDATA;
-+ bits<12> OFFSET;
-+ bits<1> OFFEN;
-+ bits<1> IDXEN;
-+ bits<1> GLC;
-+ bits<1> ADDR64;
-+ bits<1> LDS;
-+ bits<8> VADDR;
-+ bits<5> SRSRC;
-+ bits<1> SLC;
-+ bits<1> TFE;
-+ bits<8> SOFFSET;
-+
-+ let Inst{11-0} = OFFSET;
-+ let Inst{12} = OFFEN;
-+ let Inst{13} = IDXEN;
-+ let Inst{14} = GLC;
-+ let Inst{15} = ADDR64;
-+ let Inst{16} = LDS;
-+ let Inst{24-18} = op;
-+ let Inst{31-26} = 0x38; //encoding
-+ let Inst{39-32} = VADDR;
-+ let Inst{47-40} = VDATA;
-+ let Inst{52-48} = SRSRC;
-+ let Inst{54} = SLC;
-+ let Inst{55} = TFE;
-+ let Inst{63-56} = SOFFSET;
-+ let EncodingType = 4; //SIInstrEncodingType::MUBUF
-+
-+ let VM_CNT = 1;
-+ let EXP_CNT = 1;
-+
-+ let neverHasSideEffects = 1;
-+}
-+
-+} // End Uses = [EXEC]
-+
-+class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
-+ Enc32<outs, ins, asm, pattern> {
-+
-+ bits<7> SDST;
-+ bits<15> PTR;
-+ bits<8> OFFSET = PTR{7-0};
-+ bits<1> IMM = PTR{8};
-+ bits<6> SBASE = PTR{14-9};
-+
-+ let Inst{7-0} = OFFSET;
-+ let Inst{8} = IMM;
-+ let Inst{14-9} = SBASE;
-+ let Inst{21-15} = SDST;
-+ let Inst{26-22} = op;
-+ let Inst{31-27} = 0x18; //encoding
-+ let EncodingType = 5; //SIInstrEncodingType::SMRD
-+
-+ let LGKM_CNT = 1;
-+}
-+
-+class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
-+ Enc32<outs, ins, asm, pattern> {
-+
-+ bits<7> SDST;
-+ bits<8> SSRC0;
-+
-+ let Inst{7-0} = SSRC0;
-+ let Inst{15-8} = op;
-+ let Inst{22-16} = SDST;
-+ let Inst{31-23} = 0x17d; //encoding;
-+ let EncodingType = 6; //SIInstrEncodingType::SOP1
-+
-+ let mayLoad = 0;
-+ let mayStore = 0;
-+ let hasSideEffects = 0;
-+}
-+
-+class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
-+ Enc32 <outs, ins, asm, pattern> {
-+
-+ bits<7> SDST;
-+ bits<8> SSRC0;
-+ bits<8> SSRC1;
-+
-+ let Inst{7-0} = SSRC0;
-+ let Inst{15-8} = SSRC1;
-+ let Inst{22-16} = SDST;
-+ let Inst{29-23} = op;
-+ let Inst{31-30} = 0x2; // encoding
-+ let EncodingType = 7; // SIInstrEncodingType::SOP2
-+
-+ let mayLoad = 0;
-+ let mayStore = 0;
-+ let hasSideEffects = 0;
-+}
-+
-+class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
-+ Enc32<outs, ins, asm, pattern> {
-+
-+ bits<8> SSRC0;
-+ bits<8> SSRC1;
-+
-+ let Inst{7-0} = SSRC0;
-+ let Inst{15-8} = SSRC1;
-+ let Inst{22-16} = op;
-+ let Inst{31-23} = 0x17e;
-+ let EncodingType = 8; // SIInstrEncodingType::SOPC
-+
-+ let DisableEncoding = "$dst";
-+ let mayLoad = 0;
-+ let mayStore = 0;
-+ let hasSideEffects = 0;
-+}
-+
-+class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
-+ Enc32 <outs, ins , asm, pattern> {
-+
-+ bits <7> SDST;
-+ bits <16> SIMM16;
-+
-+ let Inst{15-0} = SIMM16;
-+ let Inst{22-16} = SDST;
-+ let Inst{27-23} = op;
-+ let Inst{31-28} = 0xb; //encoding
-+ let EncodingType = 9; // SIInstrEncodingType::SOPK
-+
-+ let mayLoad = 0;
-+ let mayStore = 0;
-+ let hasSideEffects = 0;
-+}
-+
-+class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
-+ (outs),
-+ ins,
-+ asm,
-+ pattern > {
-+
-+ bits <16> SIMM16;
-+
-+ let Inst{15-0} = SIMM16;
-+ let Inst{22-16} = op;
-+ let Inst{31-23} = 0x17f; // encoding
-+ let EncodingType = 10; // SIInstrEncodingType::SOPP
-+
-+ let mayLoad = 0;
-+ let mayStore = 0;
-+ let hasSideEffects = 0;
-+}
-+
-+let Uses = [EXEC] in {
-+
-+class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
-+ Enc32 <outs, ins, asm, pattern> {
-+
-+ bits<8> VDST;
-+ bits<8> VSRC;
-+ bits<2> ATTRCHAN;
-+ bits<6> ATTR;
-+
-+ let Inst{7-0} = VSRC;
-+ let Inst{9-8} = ATTRCHAN;
-+ let Inst{15-10} = ATTR;
-+ let Inst{17-16} = op;
-+ let Inst{25-18} = VDST;
-+ let Inst{31-26} = 0x32; // encoding
-+ let EncodingType = 11; // SIInstrEncodingType::VINTRP
-+
-+ let neverHasSideEffects = 1;
-+ let mayLoad = 1;
-+ let mayStore = 0;
-+}
-+
-+class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
-+ Enc32 <outs, ins, asm, pattern> {
-+
-+ bits<8> VDST;
-+ bits<9> SRC0;
-+
-+ let Inst{8-0} = SRC0;
-+ let Inst{16-9} = op;
-+ let Inst{24-17} = VDST;
-+ let Inst{31-25} = 0x3f; //encoding
-+
-+ let EncodingType = 12; // SIInstrEncodingType::VOP1
-+ let PostEncoderMethod = "VOPPostEncode";
-+
-+ let mayLoad = 0;
-+ let mayStore = 0;
-+ let hasSideEffects = 0;
-+}
-+
-+class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
-+ Enc32 <outs, ins, asm, pattern> {
-+
-+ bits<8> VDST;
-+ bits<9> SRC0;
-+ bits<8> VSRC1;
-+
-+ let Inst{8-0} = SRC0;
-+ let Inst{16-9} = VSRC1;
-+ let Inst{24-17} = VDST;
-+ let Inst{30-25} = op;
-+ let Inst{31} = 0x0; //encoding
-+
-+ let EncodingType = 13; // SIInstrEncodingType::VOP2
-+ let PostEncoderMethod = "VOPPostEncode";
-+
-+ let mayLoad = 0;
-+ let mayStore = 0;
-+ let hasSideEffects = 0;
-+}
-+
-+class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
-+ Enc64 <outs, ins, asm, pattern> {
-+
-+ bits<8> VDST;
-+ bits<9> SRC0;
-+ bits<9> SRC1;
-+ bits<9> SRC2;
-+ bits<3> ABS;
-+ bits<1> CLAMP;
-+ bits<2> OMOD;
-+ bits<3> NEG;
-+
-+ let Inst{7-0} = VDST;
-+ let Inst{10-8} = ABS;
-+ let Inst{11} = CLAMP;
-+ let Inst{25-17} = op;
-+ let Inst{31-26} = 0x34; //encoding
-+ let Inst{40-32} = SRC0;
-+ let Inst{49-41} = SRC1;
-+ let Inst{58-50} = SRC2;
-+ let Inst{60-59} = OMOD;
-+ let Inst{63-61} = NEG;
-+
-+ let EncodingType = 14; // SIInstrEncodingType::VOP3
-+ let PostEncoderMethod = "VOPPostEncode";
-+
-+ let mayLoad = 0;
-+ let mayStore = 0;
-+ let hasSideEffects = 0;
-+}
-+
-+class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
-+ Enc64 <outs, ins, asm, pattern> {
-+
-+ bits<8> VDST;
-+ bits<9> SRC0;
-+ bits<9> SRC1;
-+ bits<9> SRC2;
-+ bits<7> SDST;
-+ bits<2> OMOD;
-+ bits<3> NEG;
-+
-+ let Inst{7-0} = VDST;
-+ let Inst{14-8} = SDST;
-+ let Inst{25-17} = op;
-+ let Inst{31-26} = 0x34; //encoding
-+ let Inst{40-32} = SRC0;
-+ let Inst{49-41} = SRC1;
-+ let Inst{58-50} = SRC2;
-+ let Inst{60-59} = OMOD;
-+ let Inst{63-61} = NEG;
-+
-+ let EncodingType = 14; // SIInstrEncodingType::VOP3
-+ let PostEncoderMethod = "VOPPostEncode";
-+
-+ let mayLoad = 0;
-+ let mayStore = 0;
-+ let hasSideEffects = 0;
-+}
-+
-+class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
-+ Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
-+
-+ bits<9> SRC0;
-+ bits<8> VSRC1;
-+
-+ let Inst{8-0} = SRC0;
-+ let Inst{16-9} = VSRC1;
-+ let Inst{24-17} = op;
-+ let Inst{31-25} = 0x3e;
-+
-+ let EncodingType = 15; //SIInstrEncodingType::VOPC
-+ let PostEncoderMethod = "VOPPostEncode";
-+ let DisableEncoding = "$dst";
-+ let mayLoad = 0;
-+ let mayStore = 0;
-+ let hasSideEffects = 0;
-+}
-+
-+} // End Uses = [EXEC]
-+
-+class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
-+ op,
-+ (outs VReg_128:$vdata),
-+ (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
-+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr,
-+ GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp),
-+ asm,
-+ []> {
-+ let mayLoad = 1;
-+ let mayStore = 0;
-+}
-+
-+class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
-+ op,
-+ (outs regClass:$dst),
-+ (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
-+ i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc,
-+ i1imm:$tfe, SReg_32:$soffset),
-+ asm,
-+ []> {
-+ let mayLoad = 1;
-+ let mayStore = 0;
-+}
-+
-+class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
-+ op,
-+ (outs regClass:$dst),
-+ (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
-+ i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc,
-+ i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
-+ asm,
-+ []> {
-+ let mayLoad = 1;
-+ let mayStore = 0;
-+}
-+
-+class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
-+ op,
-+ (outs),
-+ (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
-+ i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
-+ GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
-+ asm,
-+ []> {
-+ let mayStore = 1;
-+ let mayLoad = 0;
-+}
-+
-+multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass,
-+ ValueType vt> {
-+ def _IMM : SMRD <
-+ op,
-+ (outs dstClass:$dst),
-+ (ins SMRDmemri:$src0),
-+ asm,
-+ [(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))]
-+ >;
-+
-+ def _SGPR : SMRD <
-+ op,
-+ (outs dstClass:$dst),
-+ (ins SMRDmemrr:$src0),
-+ asm,
-+ [(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))]
-+ >;
-+}
-+
-+multiclass SMRD_32 <bits<5> op, string asm, RegisterClass dstClass> {
-+ defm _F32 : SMRD_Helper <op, asm, dstClass, f32>;
-+ defm _I32 : SMRD_Helper <op, asm, dstClass, i32>;
-+}
-+
-+include "SIInstrFormats.td"
-+include "SIInstructions.td"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIInstructions.td llvm-r600/lib/Target/R600/SIInstructions.td
---- llvm-3.2.src/lib/Target/R600/SIInstructions.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIInstructions.td 2013-01-25 19:43:57.480049720 +0100
-@@ -0,0 +1,1357 @@
-+//===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+// This file was originally auto-generated from a GPU register header file and
-+// all the instruction definitions were originally commented out. Instructions
-+// that are not yet supported remain commented out.
-+//===----------------------------------------------------------------------===//
-+
-+def isSI : Predicate<"Subtarget.device()"
-+ "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">;
-+
-+let Predicates = [isSI] in {
-+
-+let neverHasSideEffects = 1 in {
-+def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>;
-+def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>;
-+def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>;
-+def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>;
-+def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>;
-+def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>;
-+def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
-+def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
-+def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>;
-+def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
-+} // End neverHasSideEffects = 1
-+////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>;
-+////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>;
-+////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>;
-+////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>;
-+////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>;
-+////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
-+////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>;
-+////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
-+//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>;
-+//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
-+def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
-+//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
-+//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>;
-+//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>;
-+////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
-+////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
-+////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
-+////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>;
-+def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>;
-+def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>;
-+def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>;
-+def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>;
-+
-+let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC] in {
-+
-+def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>;
-+def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>;
-+def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>;
-+def S_ANDN2_SAVEEXEC_B64 : SOP1_64 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>;
-+def S_ORN2_SAVEEXEC_B64 : SOP1_64 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>;
-+def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>;
-+def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>;
-+def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>;
-+
-+} // End hasSideEffects = 1
-+
-+def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>;
-+def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>;
-+def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>;
-+def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>;
-+def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>;
-+def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>;
-+//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>;
-+def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
-+def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
-+def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
-+def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
-+def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
-+
-+/*
-+This instruction is disabled for now until we can figure out how to teach
-+the instruction selector to correctly use the S_CMP* vs V_CMP*
-+instructions.
-+
-+When this instruction is enabled the code generator sometimes produces this
-+invalid sequence:
-+
-+SCC = S_CMPK_EQ_I32 SGPR0, imm
-+VCC = COPY SCC
-+VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
-+
-+def S_CMPK_EQ_I32 : SOPK <
-+ 0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
-+ "S_CMPK_EQ_I32",
-+ [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))]
-+>;
-+*/
-+
-+def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
-+def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
-+def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
-+def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>;
-+def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>;
-+def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
-+def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
-+def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
-+def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
-+def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
-+def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
-+def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
-+def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
-+//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>;
-+def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>;
-+def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>;
-+def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
-+//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
-+//def EXP : EXP_ <0x00000000, "EXP", []>;
-+
-+defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32", []>;
-+defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", []>;
-+def : Pat <
-+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LT)),
-+ (V_CMP_LT_F32_e64 AllReg_32:$src0, VReg_32:$src1)
-+>;
-+defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", []>;
-+def : Pat <
-+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)),
-+ (V_CMP_EQ_F32_e64 AllReg_32:$src0, VReg_32:$src1)
-+>;
-+defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", []>;
-+def : Pat <
-+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LE)),
-+ (V_CMP_LE_F32_e64 AllReg_32:$src0, VReg_32:$src1)
-+>;
-+defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", []>;
-+def : Pat <
-+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GT)),
-+ (V_CMP_GT_F32_e64 AllReg_32:$src0, VReg_32:$src1)
-+>;
-+defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", []>;
-+def : Pat <
-+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)),
-+ (V_CMP_LG_F32_e64 AllReg_32:$src0, VReg_32:$src1)
-+>;
-+defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", []>;
-+def : Pat <
-+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GE)),
-+ (V_CMP_GE_F32_e64 AllReg_32:$src0, VReg_32:$src1)
-+>;
-+defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", []>;
-+defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", []>;
-+defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32", []>;
-+defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32", []>;
-+defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32", []>;
-+defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32", []>;
-+defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", []>;
-+def : Pat <
-+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)),
-+ (V_CMP_NEQ_F32_e64 AllReg_32:$src0, VReg_32:$src1)
-+>;
-+defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32", []>;
-+defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32", []>;
-+
-+//Side effect is writing to EXEC
-+let hasSideEffects = 1 in {
-+
-+defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32", []>;
-+defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32", []>;
-+defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32", []>;
-+defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32", []>;
-+defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32", []>;
-+defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32", []>;
-+defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32", []>;
-+defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32", []>;
-+defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32", []>;
-+defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32", []>;
-+defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32", []>;
-+defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32", []>;
-+defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32", []>;
-+defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32", []>;
-+defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32", []>;
-+defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32", []>;
-+
-+} // End hasSideEffects = 1
-+
-+defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64", []>;
-+defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", []>;
-+defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", []>;
-+defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", []>;
-+defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", []>;
-+defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64", []>;
-+defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", []>;
-+defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", []>;
-+defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", []>;
-+defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64", []>;
-+defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64", []>;
-+defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64", []>;
-+defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64", []>;
-+defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", []>;
-+defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64", []>;
-+defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64", []>;
-+
-+//Side effect is writing to EXEC
-+let hasSideEffects = 1 in {
-+
-+defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64", []>;
-+defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64", []>;
-+defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64", []>;
-+defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64", []>;
-+defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64", []>;
-+defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64", []>;
-+defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64", []>;
-+defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64", []>;
-+defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64", []>;
-+defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64", []>;
-+defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64", []>;
-+defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64", []>;
-+defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64", []>;
-+defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64", []>;
-+defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64", []>;
-+defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64", []>;
-+
-+} // End hasSideEffects = 1
-+
-+defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32", []>;
-+defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32", []>;
-+defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32", []>;
-+defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32", []>;
-+defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32", []>;
-+defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32", []>;
-+defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32", []>;
-+defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32", []>;
-+defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32", []>;
-+defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32", []>;
-+defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32", []>;
-+defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32", []>;
-+defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32", []>;
-+defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32", []>;
-+defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32", []>;
-+defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32", []>;
-+defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32", []>;
-+defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32", []>;
-+defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32", []>;
-+defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32", []>;
-+defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32", []>;
-+defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32", []>;
-+defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32", []>;
-+defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32", []>;
-+defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32", []>;
-+defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32", []>;
-+defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32", []>;
-+defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32", []>;
-+defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32", []>;
-+defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32", []>;
-+defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32", []>;
-+defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32", []>;
-+defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64", []>;
-+defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64", []>;
-+defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64", []>;
-+defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64", []>;
-+defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64", []>;
-+defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64", []>;
-+defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64", []>;
-+defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64", []>;
-+defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64", []>;
-+defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64", []>;
-+defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64", []>;
-+defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64", []>;
-+defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64", []>;
-+defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64", []>;
-+defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64", []>;
-+defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64", []>;
-+defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64", []>;
-+defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64", []>;
-+defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64", []>;
-+defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64", []>;
-+defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64", []>;
-+defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64", []>;
-+defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64", []>;
-+defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64", []>;
-+defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64", []>;
-+defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64", []>;
-+defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64", []>;
-+defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64", []>;
-+defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64", []>;
-+defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64", []>;
-+defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64", []>;
-+defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64", []>;
-+defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32", []>;
-+defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", []>;
-+def : Pat <
-+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LT)),
-+ (V_CMP_LT_I32_e64 AllReg_32:$src0, VReg_32:$src1)
-+>;
-+defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", []>;
-+def : Pat <
-+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)),
-+ (V_CMP_EQ_I32_e64 AllReg_32:$src0, VReg_32:$src1)
-+>;
-+defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", []>;
-+def : Pat <
-+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LE)),
-+ (V_CMP_LE_I32_e64 AllReg_32:$src0, VReg_32:$src1)
-+>;
-+defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", []>;
-+def : Pat <
-+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GT)),
-+ (V_CMP_GT_I32_e64 AllReg_32:$src0, VReg_32:$src1)
-+>;
-+defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", []>;
-+def : Pat <
-+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_NE)),
-+ (V_CMP_NE_I32_e64 AllReg_32:$src0, VReg_32:$src1)
-+>;
-+defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", []>;
-+def : Pat <
-+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GE)),
-+ (V_CMP_GE_I32_e64 AllReg_32:$src0, VReg_32:$src1)
-+>;
-+defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32", []>;
-+
-+let hasSideEffects = 1 in {
-+
-+defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32", []>;
-+defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32", []>;
-+defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32", []>;
-+defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32", []>;
-+defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32", []>;
-+defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32", []>;
-+defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32", []>;
-+defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32", []>;
-+
-+} // End hasSideEffects
-+
-+defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64", []>;
-+defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", []>;
-+defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", []>;
-+defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", []>;
-+defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", []>;
-+defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", []>;
-+defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", []>;
-+defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64", []>;
-+
-+let hasSideEffects = 1 in {
-+
-+defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64", []>;
-+defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64", []>;
-+defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64", []>;
-+defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64", []>;
-+defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64", []>;
-+defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64", []>;
-+defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64", []>;
-+defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64", []>;
-+
-+} // End hasSideEffects
-+
-+defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32", []>;
-+defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", []>;
-+defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", []>;
-+defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", []>;
-+defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", []>;
-+defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", []>;
-+defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", []>;
-+defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32", []>;
-+
-+let hasSideEffects = 1 in {
-+
-+defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32", []>;
-+defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32", []>;
-+defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32", []>;
-+defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32", []>;
-+defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32", []>;
-+defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32", []>;
-+defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32", []>;
-+defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32", []>;
-+
-+} // End hasSideEffects
-+
-+defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64", []>;
-+defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", []>;
-+defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", []>;
-+defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", []>;
-+defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", []>;
-+defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", []>;
-+defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", []>;
-+defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64", []>;
-+defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64", []>;
-+defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64", []>;
-+defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64", []>;
-+defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64", []>;
-+defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64", []>;
-+defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64", []>;
-+defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64", []>;
-+defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64", []>;
-+defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32", []>;
-+defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32", []>;
-+defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64", []>;
-+defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64", []>;
-+//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
-+//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
-+//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
-+def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>;
-+//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>;
-+//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>;
-+//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>;
-+//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>;
-+//def BUFFER_LOAD_UBYTE : MUBUF_ <0x00000008, "BUFFER_LOAD_UBYTE", []>;
-+//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>;
-+//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>;
-+//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>;
-+//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>;
-+//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>;
-+//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>;
-+//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
-+//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
-+//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
-+//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>;
-+//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>;
-+//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
-+//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
-+//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
-+//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
-+//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
-+//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
-+//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>;
-+//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>;
-+//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>;
-+//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>;
-+//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>;
-+//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>;
-+//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>;
-+//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>;
-+//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>;
-+//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>;
-+//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>;
-+//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>;
-+//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>;
-+//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>;
-+//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>;
-+//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>;
-+//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>;
-+//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>;
-+//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>;
-+//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>;
-+//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>;
-+//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>;
-+//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>;
-+//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>;
-+//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>;
-+//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>;
-+//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>;
-+//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>;
-+//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>;
-+//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>;
-+//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>;
-+//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
-+//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
-+def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
-+//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>;
-+//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>;
-+//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
-+//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
-+
-+defm S_LOAD_DWORD : SMRD_32 <0x00000000, "S_LOAD_DWORD", SReg_32>;
-+
-+//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>;
-+defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128, v4i32>;
-+defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32>;
-+//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>;
-+//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>;
-+//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>;
-+//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>;
-+//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>;
-+//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>;
-+
-+//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
-+//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
-+//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>;
-+//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>;
-+//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
-+//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
-+//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
-+//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>;
-+//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>;
-+//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
-+//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
-+//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
-+//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>;
-+//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
-+//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
-+//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
-+//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>;
-+//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>;
-+//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>;
-+//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>;
-+//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>;
-+//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>;
-+//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>;
-+//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>;
-+//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>;
-+//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>;
-+//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>;
-+//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
-+//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
-+//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
-+def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">;
-+//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
-+def IMAGE_SAMPLE_D : MIMG_Load_Helper <0x00000022, "IMAGE_SAMPLE_D">;
-+//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
-+def IMAGE_SAMPLE_L : MIMG_Load_Helper <0x00000024, "IMAGE_SAMPLE_L">;
-+def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">;
-+//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
-+//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
-+//def IMAGE_SAMPLE_C : MIMG_NoPattern_ <"IMAGE_SAMPLE_C", 0x00000028>;
-+//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
-+//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>;
-+//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
-+//def IMAGE_SAMPLE_C_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L", 0x0000002c>;
-+//def IMAGE_SAMPLE_C_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B", 0x0000002d>;
-+//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
-+//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
-+//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
-+//def IMAGE_SAMPLE_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL_O", 0x00000031>;
-+//def IMAGE_SAMPLE_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_O", 0x00000032>;
-+//def IMAGE_SAMPLE_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL_O", 0x00000033>;
-+//def IMAGE_SAMPLE_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_L_O", 0x00000034>;
-+//def IMAGE_SAMPLE_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_O", 0x00000035>;
-+//def IMAGE_SAMPLE_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL_O", 0x00000036>;
-+//def IMAGE_SAMPLE_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ_O", 0x00000037>;
-+//def IMAGE_SAMPLE_C_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_O", 0x00000038>;
-+//def IMAGE_SAMPLE_C_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL_O", 0x00000039>;
-+//def IMAGE_SAMPLE_C_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_O", 0x0000003a>;
-+//def IMAGE_SAMPLE_C_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL_O", 0x0000003b>;
-+//def IMAGE_SAMPLE_C_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L_O", 0x0000003c>;
-+//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
-+//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
-+//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
-+//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
-+//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
-+//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
-+//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
-+//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
-+//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
-+//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
-+//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
-+//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
-+//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
-+//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
-+//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
-+//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
-+//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
-+//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
-+//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
-+//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
-+//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
-+//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
-+//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
-+//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
-+//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
-+//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
-+//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
-+//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
-+//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
-+//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
-+//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>;
-+//def IMAGE_SAMPLE_C_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL", 0x0000006b>;
-+//def IMAGE_SAMPLE_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_O", 0x0000006c>;
-+//def IMAGE_SAMPLE_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL_O", 0x0000006d>;
-+//def IMAGE_SAMPLE_C_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_O", 0x0000006e>;
-+//def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>;
-+//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>;
-+//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
-+//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
-+
-+let neverHasSideEffects = 1 in {
-+defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
-+} // End neverHasSideEffects
-+defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
-+//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
-+//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
-+defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
-+ [(set VReg_32:$dst, (sint_to_fp AllReg_32:$src0))]
-+>;
-+//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
-+//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
-+defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
-+ [(set VReg_32:$dst, (fp_to_sint AllReg_32:$src0))]
-+>;
-+defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
-+////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
-+//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>;
-+//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
-+//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
-+//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
-+//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>;
-+//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>;
-+//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>;
-+//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>;
-+//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>;
-+//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>;
-+//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
-+//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
-+defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
-+ [(set VReg_32:$dst, (AMDGPUfract AllReg_32:$src0))]
-+>;
-+defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>;
-+defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", []>;
-+defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
-+ [(set VReg_32:$dst, (frint AllReg_32:$src0))]
-+>;
-+defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
-+ [(set VReg_32:$dst, (ffloor AllReg_32:$src0))]
-+>;
-+defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
-+ [(set VReg_32:$dst, (fexp2 AllReg_32:$src0))]
-+>;
-+defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
-+defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", []>;
-+defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
-+defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
-+defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
-+ [(set VReg_32:$dst, (fdiv FP_ONE, AllReg_32:$src0))]
-+>;
-+defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
-+defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
-+defm V_RSQ_LEGACY_F32 : VOP1_32 <
-+ 0x0000002d, "V_RSQ_LEGACY_F32",
-+ [(set VReg_32:$dst, (int_AMDGPU_rsq AllReg_32:$src0))]
-+>;
-+defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
-+defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
-+defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
-+defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>;
-+defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>;
-+defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>;
-+defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>;
-+defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
-+defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
-+defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
-+defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
-+defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
-+defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>;
-+defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>;
-+//defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>;
-+defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>;
-+defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>;
-+//defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>;
-+defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>;
-+//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>;
-+defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>;
-+defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>;
-+defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
-+
-+def V_INTERP_P1_F32 : VINTRP <
-+ 0x00000000,
-+ (outs VReg_32:$dst),
-+ (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
-+ "V_INTERP_P1_F32",
-+ []> {
-+ let DisableEncoding = "$m0";
-+}
-+
-+def V_INTERP_P2_F32 : VINTRP <
-+ 0x00000001,
-+ (outs VReg_32:$dst),
-+ (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
-+ "V_INTERP_P2_F32",
-+ []> {
-+
-+ let Constraints = "$src0 = $dst";
-+ let DisableEncoding = "$src0,$m0";
-+
-+}
-+
-+def V_INTERP_MOV_F32 : VINTRP <
-+ 0x00000002,
-+ (outs VReg_32:$dst),
-+ (ins i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
-+ "V_INTERP_MOV_F32",
-+ []> {
-+ let VSRC = 0;
-+ let DisableEncoding = "$m0";
-+}
-+
-+//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
-+
-+let isTerminator = 1 in {
-+
-+def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
-+ [(IL_retflag)]> {
-+ let SIMM16 = 0;
-+ let isBarrier = 1;
-+ let hasCtrlDep = 1;
-+}
-+
-+let isBranch = 1 in {
-+def S_BRANCH : SOPP <
-+ 0x00000002, (ins brtarget:$target), "S_BRANCH",
-+ [(br bb:$target)]> {
-+ let isBarrier = 1;
-+}
-+
-+let DisableEncoding = "$scc" in {
-+def S_CBRANCH_SCC0 : SOPP <
-+ 0x00000004, (ins brtarget:$target, SCCReg:$scc),
-+ "S_CBRANCH_SCC0", []
-+>;
-+def S_CBRANCH_SCC1 : SOPP <
-+ 0x00000005, (ins brtarget:$target, SCCReg:$scc),
-+ "S_CBRANCH_SCC1",
-+ []
-+>;
-+} // End DisableEncoding = "$scc"
-+
-+def S_CBRANCH_VCCZ : SOPP <
-+ 0x00000006, (ins brtarget:$target, VCCReg:$vcc),
-+ "S_CBRANCH_VCCZ",
-+ []
-+>;
-+def S_CBRANCH_VCCNZ : SOPP <
-+ 0x00000007, (ins brtarget:$target, VCCReg:$vcc),
-+ "S_CBRANCH_VCCNZ",
-+ []
-+>;
-+
-+let DisableEncoding = "$exec" in {
-+def S_CBRANCH_EXECZ : SOPP <
-+ 0x00000008, (ins brtarget:$target, EXECReg:$exec),
-+ "S_CBRANCH_EXECZ",
-+ []
-+>;
-+def S_CBRANCH_EXECNZ : SOPP <
-+ 0x00000009, (ins brtarget:$target, EXECReg:$exec),
-+ "S_CBRANCH_EXECNZ",
-+ []
-+>;
-+} // End DisableEncoding = "$exec"
-+
-+
-+} // End isBranch = 1
-+} // End isTerminator = 1
-+
-+//def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>;
-+let hasSideEffects = 1 in {
-+def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16",
-+ []
-+>;
-+} // End hasSideEffects
-+//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
-+//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
-+//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
-+//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
-+//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
-+//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
-+//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
-+//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
-+//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
-+//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
-+
-+def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
-+ (ins AllReg_32:$src0, VReg_32:$src1, VCCReg:$vcc), "V_CNDMASK_B32_e32",
-+ []
-+>{
-+ let DisableEncoding = "$vcc";
-+}
-+
-+def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
-+ (ins VReg_32:$src0, VReg_32:$src1, SReg_1:$src2, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
-+ "V_CNDMASK_B32_e64",
-+ [(set (i32 VReg_32:$dst), (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0))]
-+>;
-+
-+//f32 pattern for V_CNDMASK_B32_e64
-+def : Pat <
-+ (f32 (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0)),
-+ (V_CNDMASK_B32_e64 VReg_32:$src0, VReg_32:$src1, SReg_1:$src2)
-+>;
-+
-+defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
-+defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
-+
-+defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", []>;
-+def : Pat <
-+ (f32 (fadd AllReg_32:$src0, VReg_32:$src1)),
-+ (V_ADD_F32_e32 AllReg_32:$src0, VReg_32:$src1)
-+>;
-+
-+defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", []>;
-+def : Pat <
-+ (f32 (fsub AllReg_32:$src0, VReg_32:$src1)),
-+ (V_SUB_F32_e32 AllReg_32:$src0, VReg_32:$src1)
-+>;
-+defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>;
-+defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
-+defm V_MUL_LEGACY_F32 : VOP2_32 <
-+ 0x00000007, "V_MUL_LEGACY_F32",
-+ [(set VReg_32:$dst, (int_AMDGPU_mul AllReg_32:$src0, VReg_32:$src1))]
-+>;
-+
-+defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
-+ [(set VReg_32:$dst, (fmul AllReg_32:$src0, VReg_32:$src1))]
-+>;
-+//defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>;
-+//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>;
-+//defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>;
-+//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
-+defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
-+ [(set VReg_32:$dst, (AMDGPUfmin AllReg_32:$src0, VReg_32:$src1))]
-+>;
-+
-+defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
-+ [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))]
-+>;
-+defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
-+defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
-+defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
-+defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
-+defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
-+defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
-+defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
-+defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>;
-+defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
-+defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>;
-+defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
-+defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>;
-+defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
-+ [(set VReg_32:$dst, (and AllReg_32:$src0, VReg_32:$src1))]
-+>;
-+defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
-+ [(set VReg_32:$dst, (or AllReg_32:$src0, VReg_32:$src1))]
-+>;
-+defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
-+ [(set VReg_32:$dst, (xor AllReg_32:$src0, VReg_32:$src1))]
-+>;
-+defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>;
-+defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
-+defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
-+defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
-+//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
-+//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
-+//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
-+let Defs = [VCC] in { // Carry-out goes to VCC
-+defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32",
-+ [(set VReg_32:$dst, (add (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))]
-+>;
-+defm V_SUB_I32 : VOP2_32 <0x00000026, "V_SUB_I32",
-+ [(set VReg_32:$dst, (sub (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))]
-+>;
-+} // End Defs = [VCC]
-+defm V_SUBREV_I32 : VOP2_32 <0x00000027, "V_SUBREV_I32", []>;
-+defm V_ADDC_U32 : VOP2_32 <0x00000028, "V_ADDC_U32", []>;
-+defm V_SUBB_U32 : VOP2_32 <0x00000029, "V_SUBB_U32", []>;
-+defm V_SUBBREV_U32 : VOP2_32 <0x0000002a, "V_SUBBREV_U32", []>;
-+defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
-+////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
-+////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
-+////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
-+defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
-+ [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))]
-+>;
-+////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
-+////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
-+def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>;
-+def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>;
-+def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>;
-+def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>;
-+def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>;
-+def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>;
-+def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>;
-+def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>;
-+def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>;
-+def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>;
-+def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>;
-+def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>;
-+////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>;
-+////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>;
-+////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>;
-+////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
-+//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
-+
-+let neverHasSideEffects = 1 in {
-+
-+def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>;
-+def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>;
-+//def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", []>;
-+//def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", []>;
-+
-+} // End neverHasSideEffects
-+def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
-+def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
-+def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
-+def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
-+def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
-+def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
-+def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
-+def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>;
-+def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>;
-+//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
-+def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
-+def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>;
-+def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
-+////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>;
-+////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>;
-+////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>;
-+////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>;
-+////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>;
-+////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>;
-+////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>;
-+////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>;
-+////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>;
-+//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>;
-+//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>;
-+//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
-+def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
-+////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
-+def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
-+def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
-+def V_LSHL_B64 : VOP3_64 <0x00000161, "V_LSHL_B64", []>;
-+def V_LSHR_B64 : VOP3_64 <0x00000162, "V_LSHR_B64", []>;
-+def V_ASHR_I64 : VOP3_64 <0x00000163, "V_ASHR_I64", []>;
-+def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>;
-+def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
-+def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
-+def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
-+def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
-+def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
-+def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
-+def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
-+def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
-+def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
-+def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
-+def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
-+def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
-+//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
-+//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
-+//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
-+def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
-+def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
-+def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
-+def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>;
-+def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>;
-+def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>;
-+def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>;
-+def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>;
-+def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>;
-+def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>;
-+def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
-+
-+def S_CSELECT_B32 : SOP2 <
-+ 0x0000000a, (outs SReg_32:$dst),
-+ (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
-+ [(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))]
-+>;
-+
-+def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
-+
-+// f32 pattern for S_CSELECT_B32
-+def : Pat <
-+ (f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)),
-+ (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
-+>;
-+
-+def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
-+
-+def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
-+ [(set SReg_64:$dst, (and SReg_64:$src0, SReg_64:$src1))]
-+>;
-+def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64",
-+ [(set SReg_1:$vcc, (SIvcc_and SReg_64:$src0, SReg_64:$src1))]
-+>;
-+def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
-+def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
-+def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
-+def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
-+def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
-+def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
-+def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>;
-+def S_ORN2_B64 : SOP2_64 <0x00000017, "S_ORN2_B64", []>;
-+def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>;
-+def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>;
-+def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
-+def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
-+def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
-+def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
-+def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>;
-+def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>;
-+def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>;
-+def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>;
-+def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>;
-+def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>;
-+def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
-+def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
-+def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
-+def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>;
-+def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>;
-+def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>;
-+def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
-+//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
-+def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
-+
-+class V_MOV_IMM <Operand immType, SDNode immNode> : InstSI <
-+ (outs VReg_32:$dst),
-+ (ins immType:$src0),
-+ "V_MOV_IMM",
-+ [(set VReg_32:$dst, (immNode:$src0))]
-+>;
-+
-+let isCodeGenOnly = 1, isPseudo = 1 in {
-+
-+def V_MOV_IMM_I32 : V_MOV_IMM<i32imm, imm>;
-+def V_MOV_IMM_F32 : V_MOV_IMM<f32imm, fpimm>;
-+
-+def S_MOV_IMM_I32 : InstSI <
-+ (outs SReg_32:$dst),
-+ (ins i32imm:$src0),
-+ "S_MOV_IMM_I32",
-+ [(set SReg_32:$dst, (imm:$src0))]
-+>;
-+
-+// i64 immediates aren't really supported in hardware, but LLVM will use the i64
-+// type for indices on load and store instructions. The pattern for
-+// S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits,
-+// which the hardware can handle.
-+def S_MOV_IMM_I64 : InstSI <
-+ (outs SReg_64:$dst),
-+ (ins i64imm:$src0),
-+ "S_MOV_IMM_I64 $dst, $src0",
-+ [(set SReg_64:$dst, (IMM32bitIn64bit:$src0))]
-+>;
-+
-+} // End isCodeGenOnly, isPseudo = 1
-+
-+class SI_LOAD_LITERAL<Operand ImmType> :
-+ Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> {
-+
-+ bits<32> imm;
-+ let Inst{31-0} = imm;
-+}
-+
-+def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL<i32imm>;
-+def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL<f32imm>;
-+
-+let isCodeGenOnly = 1, isPseudo = 1 in {
-+
-+def SET_M0 : InstSI <
-+ (outs SReg_32:$dst),
-+ (ins i32imm:$src0),
-+ "SET_M0",
-+ [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))]
-+>;
-+
-+def LOAD_CONST : AMDGPUShaderInst <
-+ (outs GPRF32:$dst),
-+ (ins i32imm:$src),
-+ "LOAD_CONST $dst, $src",
-+ [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
-+>;
-+
-+let usesCustomInserter = 1 in {
-+
-+def SI_V_CNDLT : InstSI <
-+ (outs VReg_32:$dst),
-+ (ins VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
-+ "SI_V_CNDLT $dst, $src0, $src1, $src2",
-+ [(set VReg_32:$dst, (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2))]
-+>;
-+
-+def SI_INTERP : InstSI <
-+ (outs VReg_32:$dst),
-+ (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params),
-+ "SI_INTERP $dst, $i, $j, $attr_chan, $attr, $params",
-+ []
-+>;
-+
-+def SI_INTERP_CONST : InstSI <
-+ (outs VReg_32:$dst),
-+ (ins i32imm:$attr_chan, i32imm:$attr, SReg_32:$params),
-+ "SI_INTERP_CONST $dst, $attr_chan, $attr, $params",
-+ [(set VReg_32:$dst, (int_SI_fs_interp_constant imm:$attr_chan,
-+ imm:$attr, SReg_32:$params))]
-+>;
-+
-+def SI_WQM : InstSI <
-+ (outs),
-+ (ins),
-+ "SI_WQM",
-+ [(int_SI_wqm)]
-+>;
-+
-+} // end usesCustomInserter
-+
-+// SI Psuedo instructions. These are used by the CFG structurizer pass
-+// and should be lowered to ISA instructions prior to codegen.
-+
-+let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
-+ Uses = [EXEC], Defs = [EXEC] in {
-+
-+let isBranch = 1, isTerminator = 1 in {
-+
-+def SI_IF : InstSI <
-+ (outs SReg_64:$dst),
-+ (ins SReg_1:$vcc, brtarget:$target),
-+ "SI_IF",
-+ [(set SReg_64:$dst, (int_SI_if SReg_1:$vcc, bb:$target))]
-+>;
-+
-+def SI_ELSE : InstSI <
-+ (outs SReg_64:$dst),
-+ (ins SReg_64:$src, brtarget:$target),
-+ "SI_ELSE",
-+ [(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> {
-+
-+ let Constraints = "$src = $dst";
-+}
-+
-+def SI_LOOP : InstSI <
-+ (outs),
-+ (ins SReg_64:$saved, brtarget:$target),
-+ "SI_LOOP",
-+ [(int_SI_loop SReg_64:$saved, bb:$target)]
-+>;
-+
-+} // end isBranch = 1, isTerminator = 1
-+
-+def SI_BREAK : InstSI <
-+ (outs SReg_64:$dst),
-+ (ins SReg_64:$src),
-+ "SI_ELSE",
-+ [(set SReg_64:$dst, (int_SI_break SReg_64:$src))]
-+>;
-+
-+def SI_IF_BREAK : InstSI <
-+ (outs SReg_64:$dst),
-+ (ins SReg_1:$vcc, SReg_64:$src),
-+ "SI_IF_BREAK",
-+ [(set SReg_64:$dst, (int_SI_if_break SReg_1:$vcc, SReg_64:$src))]
-+>;
-+
-+def SI_ELSE_BREAK : InstSI <
-+ (outs SReg_64:$dst),
-+ (ins SReg_64:$src0, SReg_64:$src1),
-+ "SI_ELSE_BREAK",
-+ [(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))]
-+>;
-+
-+def SI_END_CF : InstSI <
-+ (outs),
-+ (ins SReg_64:$saved),
-+ "SI_END_CF",
-+ [(int_SI_end_cf SReg_64:$saved)]
-+>;
-+
-+def SI_KILL : InstSI <
-+ (outs),
-+ (ins VReg_32:$src),
-+ "SI_KIL $src",
-+ [(int_AMDGPU_kill VReg_32:$src)]
-+>;
-+
-+} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
-+ // Uses = [EXEC], Defs = [EXEC]
-+
-+} // end IsCodeGenOnly, isPseudo
-+
-+def : Pat <
-+ (int_AMDGPU_kilp),
-+ (SI_KILL (V_MOV_IMM_I32 0xbf800000))
-+>;
-+
-+/* int_SI_vs_load_input */
-+def : Pat<
-+ (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
-+ VReg_32:$buf_idx_vgpr),
-+ (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
-+ VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
-+ 0, 0, (i32 SREG_LIT_0))
-+>;
-+
-+/* int_SI_export */
-+def : Pat <
-+ (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
-+ VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
-+ (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
-+ VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
-+>;
-+
-+/* int_SI_sample */
-+def : Pat <
-+ (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, imm),
-+ (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord,
-+ SReg_256:$rsrc, SReg_128:$sampler)
-+>;
-+
-+def : Pat <
-+ (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT),
-+ (IMAGE_SAMPLE imm:$writemask, 1, 0, 0, 0, 0, 0, 0, VReg_128:$coord,
-+ SReg_256:$rsrc, SReg_128:$sampler)
-+>;
-+
-+/* int_SI_sample_lod */
-+def : Pat <
-+ (int_SI_sample_lod imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, imm),
-+ (IMAGE_SAMPLE_L imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord,
-+ SReg_256:$rsrc, SReg_128:$sampler)
-+>;
-+
-+/* int_SI_sample_bias */
-+def : Pat <
-+ (int_SI_sample_bias imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, imm),
-+ (IMAGE_SAMPLE_B imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord,
-+ SReg_256:$rsrc, SReg_128:$sampler)
-+>;
-+
-+def CLAMP_SI : CLAMP<VReg_32>;
-+def FABS_SI : FABS<VReg_32>;
-+def FNEG_SI : FNEG<VReg_32>;
-+
-+def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>;
-+def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>;
-+def : Extract_Element <f32, v4f32, VReg_128, 2, sel_z>;
-+def : Extract_Element <f32, v4f32, VReg_128, 3, sel_w>;
-+
-+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sel_x>;
-+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>;
-+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>;
-+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sel_w>;
-+
-+def : Vector_Build <v4f32, VReg_128, f32, VReg_32>;
-+def : Vector_Build <v4i32, SReg_128, i32, SReg_32>;
-+
-+def : BitConvert <i32, f32, SReg_32>;
-+def : BitConvert <i32, f32, VReg_32>;
-+
-+def : BitConvert <f32, i32, SReg_32>;
-+def : BitConvert <f32, i32, VReg_32>;
-+
-+def : Pat <
-+ (i64 (SIsreg1_bitcast SReg_1:$vcc)),
-+ (S_MOV_B64 (COPY_TO_REGCLASS SReg_1:$vcc, SReg_64))
-+>;
-+
-+def : Pat <
-+ (i1 (SIsreg1_bitcast SReg_64:$vcc)),
-+ (COPY_TO_REGCLASS SReg_64:$vcc, SReg_1)
-+>;
-+
-+def : Pat <
-+ (i64 (SIvcc_bitcast VCCReg:$vcc)),
-+ (S_MOV_B64 (COPY_TO_REGCLASS VCCReg:$vcc, SReg_64))
-+>;
-+
-+def : Pat <
-+ (i1 (SIvcc_bitcast SReg_64:$vcc)),
-+ (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg)
-+>;
-+
-+/********** ===================== **********/
-+/********** Interpolation Paterns **********/
-+/********** ===================== **********/
-+
-+def : Pat <
-+ (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params),
-+ (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan,
-+ imm:$attr, SReg_32:$params)
-+>;
-+
-+def : Pat <
-+ (int_SI_fs_interp_linear_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
-+ (SI_INTERP (f32 LINEAR_CENTROID_I), (f32 LINEAR_CENTROID_J), imm:$attr_chan,
-+ imm:$attr, SReg_32:$params)
-+>;
-+
-+def : Pat <
-+ (int_SI_fs_interp_persp_center imm:$attr_chan, imm:$attr, SReg_32:$params),
-+ (SI_INTERP (f32 PERSP_CENTER_I), (f32 PERSP_CENTER_J), imm:$attr_chan,
-+ imm:$attr, SReg_32:$params)
-+>;
-+
-+def : Pat <
-+ (int_SI_fs_interp_persp_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
-+ (SI_INTERP (f32 PERSP_CENTROID_I), (f32 PERSP_CENTROID_J), imm:$attr_chan,
-+ imm:$attr, SReg_32:$params)
-+>;
-+
-+def : Pat <
-+ (int_SI_fs_read_face),
-+ (f32 FRONT_FACE)
-+>;
-+
-+def : Pat <
-+ (int_SI_fs_read_pos 0),
-+ (f32 POS_X_FLOAT)
-+>;
-+
-+def : Pat <
-+ (int_SI_fs_read_pos 1),
-+ (f32 POS_Y_FLOAT)
-+>;
-+
-+def : Pat <
-+ (int_SI_fs_read_pos 2),
-+ (f32 POS_Z_FLOAT)
-+>;
-+
-+def : Pat <
-+ (int_SI_fs_read_pos 3),
-+ (f32 POS_W_FLOAT)
-+>;
-+
-+/********** ================== **********/
-+/********** Intrinsic Patterns **********/
-+/********** ================== **********/
-+
-+/* llvm.AMDGPU.pow */
-+/* XXX: We are using IEEE MUL, not the 0 * anything = 0 MUL, is this correct? */
-+def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_F32_e32, VReg_32>;
-+
-+def : Pat <
-+ (int_AMDGPU_div AllReg_32:$src0, AllReg_32:$src1),
-+ (V_MUL_LEGACY_F32_e32 AllReg_32:$src0, (V_RCP_LEGACY_F32_e32 AllReg_32:$src1))
-+>;
-+
-+def : Pat<
-+ (fdiv AllReg_32:$src0, AllReg_32:$src1),
-+ (V_MUL_F32_e32 AllReg_32:$src0, (V_RCP_F32_e32 AllReg_32:$src1))
-+>;
-+
-+def : Pat <
-+ (int_AMDGPU_cube VReg_128:$src),
-+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
-+ (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
-+ (EXTRACT_SUBREG VReg_128:$src, sel_y),
-+ (EXTRACT_SUBREG VReg_128:$src, sel_z),
-+ 0, 0, 0, 0), sel_x),
-+ (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
-+ (EXTRACT_SUBREG VReg_128:$src, sel_y),
-+ (EXTRACT_SUBREG VReg_128:$src, sel_z),
-+ 0, 0, 0, 0), sel_y),
-+ (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
-+ (EXTRACT_SUBREG VReg_128:$src, sel_y),
-+ (EXTRACT_SUBREG VReg_128:$src, sel_z),
-+ 0, 0, 0, 0), sel_z),
-+ (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
-+ (EXTRACT_SUBREG VReg_128:$src, sel_y),
-+ (EXTRACT_SUBREG VReg_128:$src, sel_z),
-+ 0, 0, 0, 0), sel_w)
-+>;
-+
-+/********** ================== **********/
-+/********** VOP3 Patterns **********/
-+/********** ================== **********/
-+
-+def : Pat <(f32 (IL_mad AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2)),
-+ (V_MAD_LEGACY_F32 AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2,
-+ 0, 0, 0, 0)>;
-+
-+} // End isSI predicate
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIIntrinsics.td llvm-r600/lib/Target/R600/SIIntrinsics.td
---- llvm-3.2.src/lib/Target/R600/SIIntrinsics.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIIntrinsics.td 2013-01-25 19:43:57.480049720 +0100
-@@ -0,0 +1,54 @@
-+//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// SI Intrinsic Definitions
-+//
-+//===----------------------------------------------------------------------===//
-+
-+
-+let TargetPrefix = "SI", isTarget = 1 in {
-+
-+ def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
-+ /* XXX: We may need a seperate intrinsic here for loading integer values */
-+ def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
-+ def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
-+ def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ;
-+ def int_SI_wqm : Intrinsic <[], [], []>;
-+
-+ class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrReadMem]>;
-+
-+ def int_SI_sample : Sample;
-+ def int_SI_sample_bias : Sample;
-+ def int_SI_sample_lod : Sample;
-+
-+ /* Interpolation Intrinsics */
-+
-+ def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>;
-+ class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
-+
-+ def int_SI_fs_interp_linear_center : Interp;
-+ def int_SI_fs_interp_linear_centroid : Interp;
-+ def int_SI_fs_interp_persp_center : Interp;
-+ def int_SI_fs_interp_persp_centroid : Interp;
-+ def int_SI_fs_interp_constant : Interp;
-+
-+ def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>;
-+ def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
-+
-+ /* Control flow Intrinsics */
-+
-+ def int_SI_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>;
-+ def int_SI_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], []>;
-+ def int_SI_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>;
-+ def int_SI_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], []>;
-+ def int_SI_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>;
-+ def int_SI_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], []>;
-+ def int_SI_end_cf : Intrinsic<[], [llvm_i64_ty], []>;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIISelLowering.cpp llvm-r600/lib/Target/R600/SIISelLowering.cpp
---- llvm-3.2.src/lib/Target/R600/SIISelLowering.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIISelLowering.cpp 2013-01-25 19:43:57.470049720 +0100
-@@ -0,0 +1,486 @@
-+//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Custom DAG lowering for SI
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "SIISelLowering.h"
-+#include "AMDIL.h"
-+#include "AMDILIntrinsicInfo.h"
-+#include "SIInstrInfo.h"
-+#include "SIMachineFunctionInfo.h"
-+#include "SIRegisterInfo.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+#include "llvm/CodeGen/SelectionDAG.h"
-+
-+using namespace llvm;
-+
-+SITargetLowering::SITargetLowering(TargetMachine &TM) :
-+ AMDGPUTargetLowering(TM),
-+ TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())) {
-+ addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
-+ addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
-+ addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
-+ addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
-+ addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass);
-+ addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass);
-+
-+ addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
-+ addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
-+
-+ computeRegisterProperties();
-+
-+ setOperationAction(ISD::AND, MVT::i1, Custom);
-+
-+ setOperationAction(ISD::ADD, MVT::i64, Legal);
-+ setOperationAction(ISD::ADD, MVT::i32, Legal);
-+
-+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
-+
-+ // We need to custom lower loads from the USER_SGPR address space, so we can
-+ // add the SGPRs as livein registers.
-+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
-+ setOperationAction(ISD::LOAD, MVT::i64, Custom);
-+
-+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
-+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
-+
-+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
-+ setTargetDAGCombine(ISD::SELECT_CC);
-+
-+ setTargetDAGCombine(ISD::SETCC);
-+}
-+
-+MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
-+ MachineInstr * MI, MachineBasicBlock * BB) const {
-+ const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
-+ MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
-+ MachineBasicBlock::iterator I = MI;
-+
-+ switch (MI->getOpcode()) {
-+ default:
-+ return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
-+ case AMDGPU::BRANCH: return BB;
-+ case AMDGPU::CLAMP_SI:
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
-+ .addOperand(MI->getOperand(0))
-+ .addOperand(MI->getOperand(1))
-+ // VSRC1-2 are unused, but we still need to fill all the
-+ // operand slots, so we just reuse the VSRC0 operand
-+ .addOperand(MI->getOperand(1))
-+ .addOperand(MI->getOperand(1))
-+ .addImm(0) // ABS
-+ .addImm(1) // CLAMP
-+ .addImm(0) // OMOD
-+ .addImm(0); // NEG
-+ MI->eraseFromParent();
-+ break;
-+
-+ case AMDGPU::FABS_SI:
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
-+ .addOperand(MI->getOperand(0))
-+ .addOperand(MI->getOperand(1))
-+ // VSRC1-2 are unused, but we still need to fill all the
-+ // operand slots, so we just reuse the VSRC0 operand
-+ .addOperand(MI->getOperand(1))
-+ .addOperand(MI->getOperand(1))
-+ .addImm(1) // ABS
-+ .addImm(0) // CLAMP
-+ .addImm(0) // OMOD
-+ .addImm(0); // NEG
-+ MI->eraseFromParent();
-+ break;
-+
-+ case AMDGPU::FNEG_SI:
-+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
-+ .addOperand(MI->getOperand(0))
-+ .addOperand(MI->getOperand(1))
-+ // VSRC1-2 are unused, but we still need to fill all the
-+ // operand slots, so we just reuse the VSRC0 operand
-+ .addOperand(MI->getOperand(1))
-+ .addOperand(MI->getOperand(1))
-+ .addImm(0) // ABS
-+ .addImm(0) // CLAMP
-+ .addImm(0) // OMOD
-+ .addImm(1); // NEG
-+ MI->eraseFromParent();
-+ break;
-+ case AMDGPU::SHADER_TYPE:
-+ BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType =
-+ MI->getOperand(0).getImm();
-+ MI->eraseFromParent();
-+ break;
-+
-+ case AMDGPU::SI_INTERP:
-+ LowerSI_INTERP(MI, *BB, I, MRI);
-+ break;
-+ case AMDGPU::SI_INTERP_CONST:
-+ LowerSI_INTERP_CONST(MI, *BB, I, MRI);
-+ break;
-+ case AMDGPU::SI_WQM:
-+ LowerSI_WQM(MI, *BB, I, MRI);
-+ break;
-+ case AMDGPU::SI_V_CNDLT:
-+ LowerSI_V_CNDLT(MI, *BB, I, MRI);
-+ break;
-+ }
-+ return BB;
-+}
-+
-+void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
-+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
-+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC)
-+ .addReg(AMDGPU::EXEC);
-+
-+ MI->eraseFromParent();
-+}
-+
-+void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
-+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
-+ unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
-+ unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
-+ MachineOperand dst = MI->getOperand(0);
-+ MachineOperand iReg = MI->getOperand(1);
-+ MachineOperand jReg = MI->getOperand(2);
-+ MachineOperand attr_chan = MI->getOperand(3);
-+ MachineOperand attr = MI->getOperand(4);
-+ MachineOperand params = MI->getOperand(5);
-+
-+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
-+ .addOperand(params);
-+
-+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
-+ .addOperand(iReg)
-+ .addOperand(attr_chan)
-+ .addOperand(attr)
-+ .addReg(M0);
-+
-+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
-+ .addOperand(dst)
-+ .addReg(tmp)
-+ .addOperand(jReg)
-+ .addOperand(attr_chan)
-+ .addOperand(attr)
-+ .addReg(M0);
-+
-+ MI->eraseFromParent();
-+}
-+
-+void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
-+ MachineBasicBlock &BB, MachineBasicBlock::iterator I,
-+ MachineRegisterInfo &MRI) const {
-+ MachineOperand dst = MI->getOperand(0);
-+ MachineOperand attr_chan = MI->getOperand(1);
-+ MachineOperand attr = MI->getOperand(2);
-+ MachineOperand params = MI->getOperand(3);
-+ unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
-+
-+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
-+ .addOperand(params);
-+
-+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32))
-+ .addOperand(dst)
-+ .addOperand(attr_chan)
-+ .addOperand(attr)
-+ .addReg(M0);
-+
-+ MI->eraseFromParent();
-+}
-+
-+void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
-+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
-+ unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-+
-+ BuildMI(BB, I, BB.findDebugLoc(I),
-+ TII->get(AMDGPU::V_CMP_GT_F32_e32),
-+ VCC)
-+ .addReg(AMDGPU::SREG_LIT_0)
-+ .addOperand(MI->getOperand(1));
-+
-+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32_e32))
-+ .addOperand(MI->getOperand(0))
-+ .addOperand(MI->getOperand(3))
-+ .addOperand(MI->getOperand(2))
-+ .addReg(VCC);
-+
-+ MI->eraseFromParent();
-+}
-+
-+EVT SITargetLowering::getSetCCResultType(EVT VT) const {
-+ return MVT::i1;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Custom DAG Lowering Operations
-+//===----------------------------------------------------------------------===//
-+
-+SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
-+ switch (Op.getOpcode()) {
-+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
-+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
-+ case ISD::LOAD: return LowerLOAD(Op, DAG);
-+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
-+ case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND);
-+ case ISD::INTRINSIC_WO_CHAIN: {
-+ unsigned IntrinsicID =
-+ cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-+ EVT VT = Op.getValueType();
-+ switch (IntrinsicID) {
-+ case AMDGPUIntrinsic::SI_vs_load_buffer_index:
-+ return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
-+ AMDGPU::VGPR0, VT);
-+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
-+ }
-+ break;
-+ }
-+ }
-+ return SDValue();
-+}
-+
-+/// \brief The function is for lowering i1 operations on the
-+/// VCC register.
-+///
-+/// In the VALU context, VCC is a one bit register, but in the
-+/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only
-+/// the SALU can perform operations on the VCC register, we need to promote
-+/// the operand types from i1 to i64 in order for tablegen to be able to match
-+/// this operation to the correct SALU instruction. We do this promotion by
-+/// wrapping the operands in a CopyToReg node.
-+///
-+SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op,
-+ SelectionDAG &DAG,
-+ unsigned VCCNode) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+
-+ SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64,
-+ DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
-+ Op.getOperand(0)),
-+ DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
-+ Op.getOperand(1)));
-+
-+ return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode);
-+}
-+
-+/// \brief Helper function for LowerBRCOND
-+static SDNode *findUser(SDValue Value, unsigned Opcode) {
-+
-+ SDNode *Parent = Value.getNode();
-+ for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end();
-+ I != E; ++I) {
-+
-+ if (I.getUse().get() != Value)
-+ continue;
-+
-+ if (I->getOpcode() == Opcode)
-+ return *I;
-+ }
-+ return 0;
-+}
-+
-+/// This transforms the control flow intrinsics to get the branch destination as
-+/// last parameter, also switches branch target with BR if the need arise
-+SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
-+ SelectionDAG &DAG) const {
-+
-+ DebugLoc DL = BRCOND.getDebugLoc();
-+
-+ SDNode *Intr = BRCOND.getOperand(1).getNode();
-+ SDValue Target = BRCOND.getOperand(2);
-+ SDNode *BR = 0;
-+
-+ if (Intr->getOpcode() == ISD::SETCC) {
-+ // As long as we negate the condition everything is fine
-+ SDNode *SetCC = Intr;
-+ assert(SetCC->getConstantOperandVal(1) == 1);
-+
-+ CondCodeSDNode *CC = cast<CondCodeSDNode>(SetCC->getOperand(2).getNode());
-+ assert(CC->get() == ISD::SETNE);
-+ Intr = SetCC->getOperand(0).getNode();
-+
-+ } else {
-+ // Get the target from BR if we don't negate the condition
-+ BR = findUser(BRCOND, ISD::BR);
-+ Target = BR->getOperand(1);
-+ }
-+
-+ assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
-+
-+ // Build the result and
-+ SmallVector<EVT, 4> Res;
-+ for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i)
-+ Res.push_back(Intr->getValueType(i));
-+
-+ // operands of the new intrinsic call
-+ SmallVector<SDValue, 4> Ops;
-+ Ops.push_back(BRCOND.getOperand(0));
-+ for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i)
-+ Ops.push_back(Intr->getOperand(i));
-+ Ops.push_back(Target);
-+
-+ // build the new intrinsic call
-+ SDNode *Result = DAG.getNode(
-+ Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
-+ DAG.getVTList(Res.data(), Res.size()), Ops.data(), Ops.size()).getNode();
-+
-+ if (BR) {
-+ // Give the branch instruction our target
-+ SDValue Ops[] = {
-+ BR->getOperand(0),
-+ BRCOND.getOperand(2)
-+ };
-+ DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2);
-+ }
-+
-+ SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
-+
-+ // Copy the intrinsic results to registers
-+ for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) {
-+ SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg);
-+ if (!CopyToReg)
-+ continue;
-+
-+ Chain = DAG.getCopyToReg(
-+ Chain, DL,
-+ CopyToReg->getOperand(1),
-+ SDValue(Result, i - 1),
-+ SDValue());
-+
-+ DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0));
-+ }
-+
-+ // Remove the old intrinsic from the chain
-+ DAG.ReplaceAllUsesOfValueWith(
-+ SDValue(Intr, Intr->getNumValues() - 1),
-+ Intr->getOperand(0));
-+
-+ return Chain;
-+}
-+
-+SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
-+ EVT VT = Op.getValueType();
-+ LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op);
-+
-+ assert(Ptr);
-+
-+ unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace();
-+
-+ // We only need to lower USER_SGPR address space loads
-+ if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) {
-+ return SDValue();
-+ }
-+
-+ // Loads from the USER_SGPR address space can only have constant value
-+ // pointers.
-+ ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr());
-+ assert(BasePtr);
-+
-+ unsigned TypeDwordWidth = VT.getSizeInBits() / 32;
-+ const TargetRegisterClass * dstClass;
-+ switch (TypeDwordWidth) {
-+ default:
-+ assert(!"USER_SGPR value size not implemented");
-+ return SDValue();
-+ case 1:
-+ dstClass = &AMDGPU::SReg_32RegClass;
-+ break;
-+ case 2:
-+ dstClass = &AMDGPU::SReg_64RegClass;
-+ break;
-+ }
-+ uint64_t Index = BasePtr->getZExtValue();
-+ assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned");
-+ unsigned SGPRIndex = Index / TypeDwordWidth;
-+ unsigned Reg = dstClass->getRegister(SGPRIndex);
-+
-+ DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg,
-+ VT));
-+ return SDValue();
-+}
-+
-+SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
-+ SDValue LHS = Op.getOperand(0);
-+ SDValue RHS = Op.getOperand(1);
-+ SDValue True = Op.getOperand(2);
-+ SDValue False = Op.getOperand(3);
-+ SDValue CC = Op.getOperand(4);
-+ EVT VT = Op.getValueType();
-+ DebugLoc DL = Op.getDebugLoc();
-+
-+ // Possible Min/Max pattern
-+ SDValue MinMax = LowerMinMax(Op, DAG);
-+ if (MinMax.getNode()) {
-+ return MinMax;
-+ }
-+
-+ SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
-+ return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Custom DAG optimizations
-+//===----------------------------------------------------------------------===//
-+
-+SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
-+ DAGCombinerInfo &DCI) const {
-+ SelectionDAG &DAG = DCI.DAG;
-+ DebugLoc DL = N->getDebugLoc();
-+ EVT VT = N->getValueType(0);
-+
-+ switch (N->getOpcode()) {
-+ default: break;
-+ case ISD::SELECT_CC: {
-+ N->dump();
-+ ConstantSDNode *True, *False;
-+ // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
-+ if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
-+ && (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
-+ && True->isAllOnesValue()
-+ && False->isNullValue()
-+ && VT == MVT::i1) {
-+ return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
-+ N->getOperand(1), N->getOperand(4));
-+
-+ }
-+ break;
-+ }
-+ case ISD::SETCC: {
-+ SDValue Arg0 = N->getOperand(0);
-+ SDValue Arg1 = N->getOperand(1);
-+ SDValue CC = N->getOperand(2);
-+ ConstantSDNode * C = NULL;
-+ ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
-+
-+ // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
-+ if (VT == MVT::i1
-+ && Arg0.getOpcode() == ISD::SIGN_EXTEND
-+ && Arg0.getOperand(0).getValueType() == MVT::i1
-+ && (C = dyn_cast<ConstantSDNode>(Arg1))
-+ && C->isNullValue()
-+ && CCOp == ISD::SETNE) {
-+ return SimplifySetCC(VT, Arg0.getOperand(0),
-+ DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
-+ }
-+ break;
-+ }
-+ }
-+ return SDValue();
-+}
-+
-+#define NODE_NAME_CASE(node) case SIISD::node: return #node;
-+
-+const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const {
-+ switch (Opcode) {
-+ default: return AMDGPUTargetLowering::getTargetNodeName(Opcode);
-+ NODE_NAME_CASE(VCC_AND)
-+ NODE_NAME_CASE(VCC_BITCAST)
-+ }
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIISelLowering.h llvm-r600/lib/Target/R600/SIISelLowering.h
---- llvm-3.2.src/lib/Target/R600/SIISelLowering.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIISelLowering.h 2013-01-25 19:43:57.473383054 +0100
-@@ -0,0 +1,55 @@
-+//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief SI DAG Lowering interface definition
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef SIISELLOWERING_H
-+#define SIISELLOWERING_H
-+
-+#include "AMDGPUISelLowering.h"
-+#include "SIInstrInfo.h"
-+
-+namespace llvm {
-+
-+class SITargetLowering : public AMDGPUTargetLowering {
-+ const SIInstrInfo * TII;
-+
-+ void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB,
-+ MachineBasicBlock::iterator I, unsigned Opocde) const;
-+ void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
-+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
-+ void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
-+ MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
-+ void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
-+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
-+ void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
-+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
-+
-+ SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG,
-+ unsigned VCCNode) const;
-+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
-+
-+public:
-+ SITargetLowering(TargetMachine &tm);
-+ virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
-+ MachineBasicBlock * BB) const;
-+ virtual EVT getSetCCResultType(EVT VT) const;
-+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-+ virtual const char* getTargetNodeName(unsigned Opcode) const;
-+};
-+
-+} // End namespace llvm
-+
-+#endif //SIISELLOWERING_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SILowerControlFlow.cpp llvm-r600/lib/Target/R600/SILowerControlFlow.cpp
---- llvm-3.2.src/lib/Target/R600/SILowerControlFlow.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SILowerControlFlow.cpp 2013-01-25 19:43:57.480049720 +0100
-@@ -0,0 +1,372 @@
-+//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief This pass lowers the pseudo control flow instructions to real
-+/// machine instructions.
-+///
-+/// All control flow is handled using predicated instructions and
-+/// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
-+/// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
-+/// by writting to the 64-bit EXEC register (each bit corresponds to a
-+/// single vector ALU). Typically, for predicates, a vector ALU will write
-+/// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
-+/// Vector ALU) and then the ScalarALU will AND the VCC register with the
-+/// EXEC to update the predicates.
-+///
-+/// For example:
-+/// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
-+/// %SGPR0 = SI_IF %VCC
-+/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
-+/// %SGPR0 = SI_ELSE %SGPR0
-+/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
-+/// SI_END_CF %SGPR0
-+///
-+/// becomes:
-+///
-+/// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC // Save and update the exec mask
-+/// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
-+/// S_CBRANCH_EXECZ label0 // This instruction is an optional
-+/// // optimization which allows us to
-+/// // branch if all the bits of
-+/// // EXEC are zero.
-+/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
-+///
-+/// label0:
-+/// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC // Restore the exec mask for the Then block
-+/// %EXEC = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
-+/// S_BRANCH_EXECZ label1 // Use our branch optimization
-+/// // instruction again.
-+/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
-+/// label1:
-+/// %EXEC = S_OR_B64 %EXEC, %SGPR0 // Re-enable saved exec mask bits
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPU.h"
-+#include "SIInstrInfo.h"
-+#include "SIMachineFunctionInfo.h"
-+#include "llvm/CodeGen/MachineFunction.h"
-+#include "llvm/CodeGen/MachineFunctionPass.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+class SILowerControlFlowPass : public MachineFunctionPass {
-+
-+private:
-+ static const unsigned SkipThreshold = 12;
-+
-+ static char ID;
-+ const TargetInstrInfo *TII;
-+
-+ bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
-+
-+ void Skip(MachineInstr &From, MachineOperand &To);
-+ void SkipIfDead(MachineInstr &MI);
-+
-+ void If(MachineInstr &MI);
-+ void Else(MachineInstr &MI);
-+ void Break(MachineInstr &MI);
-+ void IfBreak(MachineInstr &MI);
-+ void ElseBreak(MachineInstr &MI);
-+ void Loop(MachineInstr &MI);
-+ void EndCf(MachineInstr &MI);
-+
-+ void Kill(MachineInstr &MI);
-+ void Branch(MachineInstr &MI);
-+
-+public:
-+ SILowerControlFlowPass(TargetMachine &tm) :
-+ MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
-+
-+ virtual bool runOnMachineFunction(MachineFunction &MF);
-+
-+ const char *getPassName() const {
-+ return "SI Lower control flow instructions";
-+ }
-+
-+};
-+
-+} // End anonymous namespace
-+
-+char SILowerControlFlowPass::ID = 0;
-+
-+FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
-+ return new SILowerControlFlowPass(tm);
-+}
-+
-+bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
-+ MachineBasicBlock *To) {
-+
-+ unsigned NumInstr = 0;
-+
-+ for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
-+ MBB = *MBB->succ_begin()) {
-+
-+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
-+ NumInstr < SkipThreshold && I != E; ++I) {
-+
-+ if (I->isBundle() || !I->isBundled())
-+ if (++NumInstr >= SkipThreshold)
-+ return true;
-+ }
-+ }
-+
-+ return false;
-+}
-+
-+void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
-+
-+ if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
-+ return;
-+
-+ DebugLoc DL = From.getDebugLoc();
-+ BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
-+ .addOperand(To)
-+ .addReg(AMDGPU::EXEC);
-+}
-+
-+void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
-+
-+ MachineBasicBlock &MBB = *MI.getParent();
-+ DebugLoc DL = MI.getDebugLoc();
-+
-+ if (!shouldSkip(&MBB, &MBB.getParent()->back()))
-+ return;
-+
-+ MachineBasicBlock::iterator Insert = &MI;
-+ ++Insert;
-+
-+ // If the exec mask is non-zero, skip the next two instructions
-+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
-+ .addImm(3)
-+ .addReg(AMDGPU::EXEC);
-+
-+ // Exec mask is zero: Export to NULL target...
-+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
-+ .addImm(0)
-+ .addImm(0x09) // V_008DFC_SQ_EXP_NULL
-+ .addImm(0)
-+ .addImm(1)
-+ .addImm(1)
-+ .addReg(AMDGPU::SREG_LIT_0)
-+ .addReg(AMDGPU::SREG_LIT_0)
-+ .addReg(AMDGPU::SREG_LIT_0)
-+ .addReg(AMDGPU::SREG_LIT_0);
-+
-+ // ... and terminate wavefront
-+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
-+}
-+
-+void SILowerControlFlowPass::If(MachineInstr &MI) {
-+ MachineBasicBlock &MBB = *MI.getParent();
-+ DebugLoc DL = MI.getDebugLoc();
-+ unsigned Reg = MI.getOperand(0).getReg();
-+ unsigned Vcc = MI.getOperand(1).getReg();
-+
-+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg)
-+ .addReg(Vcc);
-+
-+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg)
-+ .addReg(AMDGPU::EXEC)
-+ .addReg(Reg);
-+
-+ Skip(MI, MI.getOperand(2));
-+
-+ MI.eraseFromParent();
-+}
-+
-+void SILowerControlFlowPass::Else(MachineInstr &MI) {
-+ MachineBasicBlock &MBB = *MI.getParent();
-+ DebugLoc DL = MI.getDebugLoc();
-+ unsigned Dst = MI.getOperand(0).getReg();
-+ unsigned Src = MI.getOperand(1).getReg();
-+
-+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
-+ .addReg(Src); // Saved EXEC
-+
-+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
-+ .addReg(AMDGPU::EXEC)
-+ .addReg(Dst);
-+
-+ Skip(MI, MI.getOperand(2));
-+
-+ MI.eraseFromParent();
-+}
-+
-+void SILowerControlFlowPass::Break(MachineInstr &MI) {
-+ MachineBasicBlock &MBB = *MI.getParent();
-+ DebugLoc DL = MI.getDebugLoc();
-+
-+ unsigned Dst = MI.getOperand(0).getReg();
-+ unsigned Src = MI.getOperand(1).getReg();
-+
-+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
-+ .addReg(AMDGPU::EXEC)
-+ .addReg(Src);
-+
-+ MI.eraseFromParent();
-+}
-+
-+void SILowerControlFlowPass::IfBreak(MachineInstr &MI) {
-+ MachineBasicBlock &MBB = *MI.getParent();
-+ DebugLoc DL = MI.getDebugLoc();
-+
-+ unsigned Dst = MI.getOperand(0).getReg();
-+ unsigned Vcc = MI.getOperand(1).getReg();
-+ unsigned Src = MI.getOperand(2).getReg();
-+
-+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
-+ .addReg(Vcc)
-+ .addReg(Src);
-+
-+ MI.eraseFromParent();
-+}
-+
-+void SILowerControlFlowPass::ElseBreak(MachineInstr &MI) {
-+ MachineBasicBlock &MBB = *MI.getParent();
-+ DebugLoc DL = MI.getDebugLoc();
-+
-+ unsigned Dst = MI.getOperand(0).getReg();
-+ unsigned Saved = MI.getOperand(1).getReg();
-+ unsigned Src = MI.getOperand(2).getReg();
-+
-+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
-+ .addReg(Saved)
-+ .addReg(Src);
-+
-+ MI.eraseFromParent();
-+}
-+
-+void SILowerControlFlowPass::Loop(MachineInstr &MI) {
-+ MachineBasicBlock &MBB = *MI.getParent();
-+ DebugLoc DL = MI.getDebugLoc();
-+ unsigned Src = MI.getOperand(0).getReg();
-+
-+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC)
-+ .addReg(AMDGPU::EXEC)
-+ .addReg(Src);
-+
-+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
-+ .addOperand(MI.getOperand(1))
-+ .addReg(AMDGPU::EXEC);
-+
-+ MI.eraseFromParent();
-+}
-+
-+void SILowerControlFlowPass::EndCf(MachineInstr &MI) {
-+ MachineBasicBlock &MBB = *MI.getParent();
-+ DebugLoc DL = MI.getDebugLoc();
-+ unsigned Reg = MI.getOperand(0).getReg();
-+
-+ BuildMI(MBB, MBB.getFirstNonPHI(), DL,
-+ TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
-+ .addReg(AMDGPU::EXEC)
-+ .addReg(Reg);
-+
-+ MI.eraseFromParent();
-+}
-+
-+void SILowerControlFlowPass::Branch(MachineInstr &MI) {
-+ MachineBasicBlock *Next = MI.getParent()->getNextNode();
-+ MachineBasicBlock *Target = MI.getOperand(0).getMBB();
-+ if (Target == Next)
-+ MI.eraseFromParent();
-+ else
-+ assert(0);
-+}
-+
-+void SILowerControlFlowPass::Kill(MachineInstr &MI) {
-+
-+ MachineBasicBlock &MBB = *MI.getParent();
-+ DebugLoc DL = MI.getDebugLoc();
-+
-+ // Kill is only allowed in pixel shaders
-+ MachineFunction &MF = *MBB.getParent();
-+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-+ assert(Info->ShaderType == ShaderType::PIXEL);
-+
-+ // Clear this pixel from the exec mask if the operand is negative
-+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
-+ .addReg(AMDGPU::SREG_LIT_0)
-+ .addOperand(MI.getOperand(0));
-+
-+ MI.eraseFromParent();
-+}
-+
-+bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
-+
-+ bool HaveKill = false;
-+ unsigned Depth = 0;
-+
-+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
-+ BI != BE; ++BI) {
-+
-+ MachineBasicBlock &MBB = *BI;
-+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
-+ I != MBB.end(); I = Next) {
-+
-+ Next = llvm::next(I);
-+ MachineInstr &MI = *I;
-+ switch (MI.getOpcode()) {
-+ default: break;
-+ case AMDGPU::SI_IF:
-+ ++Depth;
-+ If(MI);
-+ break;
-+
-+ case AMDGPU::SI_ELSE:
-+ Else(MI);
-+ break;
-+
-+ case AMDGPU::SI_BREAK:
-+ Break(MI);
-+ break;
-+
-+ case AMDGPU::SI_IF_BREAK:
-+ IfBreak(MI);
-+ break;
-+
-+ case AMDGPU::SI_ELSE_BREAK:
-+ ElseBreak(MI);
-+ break;
-+
-+ case AMDGPU::SI_LOOP:
-+ ++Depth;
-+ Loop(MI);
-+ break;
-+
-+ case AMDGPU::SI_END_CF:
-+ if (--Depth == 0 && HaveKill) {
-+ SkipIfDead(MI);
-+ HaveKill = false;
-+ }
-+ EndCf(MI);
-+ break;
-+
-+ case AMDGPU::SI_KILL:
-+ if (Depth == 0)
-+ SkipIfDead(MI);
-+ else
-+ HaveKill = true;
-+ Kill(MI);
-+ break;
-+
-+ case AMDGPU::S_BRANCH:
-+ Branch(MI);
-+ break;
-+ }
-+ }
-+ }
-+
-+ return true;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SILowerLiteralConstants.cpp llvm-r600/lib/Target/R600/SILowerLiteralConstants.cpp
---- llvm-3.2.src/lib/Target/R600/SILowerLiteralConstants.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SILowerLiteralConstants.cpp 2013-01-25 19:43:57.480049720 +0100
-@@ -0,0 +1,108 @@
-+//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief This pass performs the following transformation on instructions with
-+/// literal constants:
-+///
-+/// %VGPR0 = V_MOV_IMM_I32 1
-+///
-+/// becomes:
-+///
-+/// BUNDLE
-+/// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT
-+/// * SI_LOAD_LITERAL 1
-+///
-+/// The resulting sequence matches exactly how the hardware handles immediate
-+/// operands, so this transformation greatly simplifies the code generator.
-+///
-+/// Only the *_MOV_IMM_* support immediate operands at the moment, but when
-+/// support for immediate operands is added to other instructions, they
-+/// will be lowered here as well.
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPU.h"
-+#include "llvm/CodeGen/MachineFunction.h"
-+#include "llvm/CodeGen/MachineFunctionPass.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineInstrBundle.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+class SILowerLiteralConstantsPass : public MachineFunctionPass {
-+
-+private:
-+ static char ID;
-+ const TargetInstrInfo *TII;
-+
-+public:
-+ SILowerLiteralConstantsPass(TargetMachine &tm) :
-+ MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
-+
-+ virtual bool runOnMachineFunction(MachineFunction &MF);
-+
-+ const char *getPassName() const {
-+ return "SI Lower literal constants pass";
-+ }
-+};
-+
-+} // End anonymous namespace
-+
-+char SILowerLiteralConstantsPass::ID = 0;
-+
-+FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) {
-+ return new SILowerLiteralConstantsPass(tm);
-+}
-+
-+bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) {
-+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-+ BB != BB_E; ++BB) {
-+ MachineBasicBlock &MBB = *BB;
-+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
-+ I != MBB.end(); I = Next) {
-+ Next = llvm::next(I);
-+ MachineInstr &MI = *I;
-+ switch (MI.getOpcode()) {
-+ default: break;
-+ case AMDGPU::S_MOV_IMM_I32:
-+ case AMDGPU::S_MOV_IMM_I64:
-+ case AMDGPU::V_MOV_IMM_F32:
-+ case AMDGPU::V_MOV_IMM_I32: {
-+ unsigned MovOpcode;
-+ unsigned LoadLiteralOpcode;
-+ MachineOperand LiteralOp = MI.getOperand(1);
-+ if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) {
-+ MovOpcode = AMDGPU::V_MOV_B32_e32;
-+ } else {
-+ MovOpcode = AMDGPU::S_MOV_B32;
-+ }
-+ if (LiteralOp.isImm()) {
-+ LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32;
-+ } else {
-+ LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32;
-+ }
-+ MachineInstr *First =
-+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MovOpcode),
-+ MI.getOperand(0).getReg())
-+ .addReg(AMDGPU::SI_LITERAL_CONSTANT);
-+ MachineInstr *Last =
-+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(LoadLiteralOpcode))
-+ .addOperand(MI.getOperand(1));
-+ Last->setIsInsideBundle();
-+ llvm::finalizeBundle(MBB, First, Last);
-+ MI.eraseFromParent();
-+ break;
-+ }
-+ }
-+ }
-+ }
-+ return false;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIMachineFunctionInfo.cpp llvm-r600/lib/Target/R600/SIMachineFunctionInfo.cpp
---- llvm-3.2.src/lib/Target/R600/SIMachineFunctionInfo.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIMachineFunctionInfo.cpp 2013-01-25 19:43:57.480049720 +0100
-@@ -0,0 +1,20 @@
-+//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//===----------------------------------------------------------------------===//
-+
-+
-+#include "SIMachineFunctionInfo.h"
-+
-+using namespace llvm;
-+
-+SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
-+ : MachineFunctionInfo(),
-+ SPIPSInputAddr(0),
-+ ShaderType(0)
-+ { }
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIMachineFunctionInfo.h llvm-r600/lib/Target/R600/SIMachineFunctionInfo.h
---- llvm-3.2.src/lib/Target/R600/SIMachineFunctionInfo.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIMachineFunctionInfo.h 2013-01-25 19:43:57.480049720 +0100
-@@ -0,0 +1,34 @@
-+//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+//
-+//===----------------------------------------------------------------------===//
-+
-+
-+#ifndef SIMACHINEFUNCTIONINFO_H_
-+#define SIMACHINEFUNCTIONINFO_H_
-+
-+#include "llvm/CodeGen/MachineFunction.h"
-+
-+namespace llvm {
-+
-+/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
-+/// tells the hardware which interpolation parameters to load.
-+class SIMachineFunctionInfo : public MachineFunctionInfo {
-+public:
-+ SIMachineFunctionInfo(const MachineFunction &MF);
-+ unsigned SPIPSInputAddr;
-+ unsigned ShaderType;
-+};
-+
-+} // End namespace llvm
-+
-+
-+#endif //_SIMACHINEFUNCTIONINFO_H_
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIRegisterInfo.cpp llvm-r600/lib/Target/R600/SIRegisterInfo.cpp
---- llvm-3.2.src/lib/Target/R600/SIRegisterInfo.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIRegisterInfo.cpp 2013-01-25 19:43:57.480049720 +0100
-@@ -0,0 +1,48 @@
-+//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief SI implementation of the TargetRegisterInfo class.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+
-+#include "SIRegisterInfo.h"
-+#include "AMDGPUTargetMachine.h"
-+
-+using namespace llvm;
-+
-+SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm,
-+ const TargetInstrInfo &tii)
-+: AMDGPURegisterInfo(tm, tii),
-+ TM(tm),
-+ TII(tii)
-+ { }
-+
-+BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
-+ BitVector Reserved(getNumRegs());
-+ return Reserved;
-+}
-+
-+const TargetRegisterClass *
-+SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
-+ switch (rc->getID()) {
-+ case AMDGPU::GPRF32RegClassID:
-+ return &AMDGPU::VReg_32RegClass;
-+ default: return rc;
-+ }
-+}
-+
-+const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
-+ MVT VT) const {
-+ switch(VT.SimpleTy) {
-+ default:
-+ case MVT::i32: return &AMDGPU::VReg_32RegClass;
-+ }
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIRegisterInfo.h llvm-r600/lib/Target/R600/SIRegisterInfo.h
---- llvm-3.2.src/lib/Target/R600/SIRegisterInfo.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIRegisterInfo.h 2013-01-25 19:43:57.483383054 +0100
-@@ -0,0 +1,47 @@
-+//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Interface definition for SIRegisterInfo
-+//
-+//===----------------------------------------------------------------------===//
-+
-+
-+#ifndef SIREGISTERINFO_H_
-+#define SIREGISTERINFO_H_
-+
-+#include "AMDGPURegisterInfo.h"
-+
-+namespace llvm {
-+
-+class AMDGPUTargetMachine;
-+class TargetInstrInfo;
-+
-+struct SIRegisterInfo : public AMDGPURegisterInfo {
-+ AMDGPUTargetMachine &TM;
-+ const TargetInstrInfo &TII;
-+
-+ SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
-+
-+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
-+
-+ /// \param RC is an AMDIL reg class.
-+ ///
-+ /// \returns the SI register class that is equivalent to \p RC.
-+ virtual const TargetRegisterClass *
-+ getISARegClass(const TargetRegisterClass *RC) const;
-+
-+ /// \brief get the register class of the specified type to use in the
-+ /// CFGStructurizer
-+ virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
-+};
-+
-+} // End namespace llvm
-+
-+#endif // SIREGISTERINFO_H_
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIRegisterInfo.td llvm-r600/lib/Target/R600/SIRegisterInfo.td
---- llvm-3.2.src/lib/Target/R600/SIRegisterInfo.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SIRegisterInfo.td 2013-01-25 19:43:57.483383054 +0100
-@@ -0,0 +1,167 @@
-+
-+let Namespace = "AMDGPU" in {
-+ def low : SubRegIndex;
-+ def high : SubRegIndex;
-+
-+ def sub0 : SubRegIndex;
-+ def sub1 : SubRegIndex;
-+ def sub2 : SubRegIndex;
-+ def sub3 : SubRegIndex;
-+ def sub4 : SubRegIndex;
-+ def sub5 : SubRegIndex;
-+ def sub6 : SubRegIndex;
-+ def sub7 : SubRegIndex;
-+}
-+
-+class SIReg <string n, bits<16> encoding = 0> : Register<n> {
-+ let Namespace = "AMDGPU";
-+ let HWEncoding = encoding;
-+}
-+
-+class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
-+ let Namespace = "AMDGPU";
-+ let SubRegIndices = [low, high];
-+ let HWEncoding = encoding;
-+}
-+
-+class SGPR_32 <bits<16> num, string name> : SIReg<name, num>;
-+
-+class VGPR_32 <bits<16> num, string name> : SIReg<name, num>;
-+
-+// Special Registers
-+def VCC : SIReg<"VCC", 106>;
-+def EXEC_LO : SIReg <"EXEC LO", 126>;
-+def EXEC_HI : SIReg <"EXEC HI", 127>;
-+def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>;
-+def SCC : SIReg<"SCC", 253>;
-+def SREG_LIT_0 : SIReg <"S LIT 0", 128>;
-+def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT", 255>;
-+def M0 : SIReg <"M0", 124>;
-+
-+//Interpolation registers
-+def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
-+def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
-+def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
-+def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">;
-+def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">;
-+def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">;
-+def PERSP_I_W : SIReg <"PERSP_I_W">;
-+def PERSP_J_W : SIReg <"PERSP_J_W">;
-+def PERSP_1_W : SIReg <"PERSP_1_W">;
-+def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">;
-+def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">;
-+def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">;
-+def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">;
-+def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">;
-+def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">;
-+def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">;
-+def POS_X_FLOAT : SIReg <"POS_X_FLOAT">;
-+def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">;
-+def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">;
-+def POS_W_FLOAT : SIReg <"POS_W_FLOAT">;
-+def FRONT_FACE : SIReg <"FRONT_FACE">;
-+def ANCILLARY : SIReg <"ANCILLARY">;
-+def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
-+def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
-+
-+// SGPR 32-bit registers
-+foreach Index = 0-101 in {
-+ def SGPR#Index : SGPR_32 <Index, "SGPR"#Index>;
-+}
-+
-+def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
-+ (add (sequence "SGPR%u", 0, 101))>;
-+
-+// SGPR 64-bit registers
-+def SGPR_64 : RegisterTuples<[low, high],
-+ [(add (decimate SGPR_32, 2)),
-+ (add(decimate (rotl SGPR_32, 1), 2))]>;
-+
-+// SGPR 128-bit registers
-+def SGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w],
-+ [(add (decimate SGPR_32, 4)),
-+ (add (decimate (rotl SGPR_32, 1), 4)),
-+ (add (decimate (rotl SGPR_32, 2), 4)),
-+ (add (decimate (rotl SGPR_32, 3), 4))]>;
-+
-+// SGPR 256-bit registers
-+def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
-+ [(add (decimate SGPR_32, 8)),
-+ (add (decimate (rotl SGPR_32, 1), 8)),
-+ (add (decimate (rotl SGPR_32, 2), 8)),
-+ (add (decimate (rotl SGPR_32, 3), 8)),
-+ (add (decimate (rotl SGPR_32, 4), 8)),
-+ (add (decimate (rotl SGPR_32, 5), 8)),
-+ (add (decimate (rotl SGPR_32, 6), 8)),
-+ (add (decimate (rotl SGPR_32, 7), 8))]>;
-+
-+// VGPR 32-bit registers
-+foreach Index = 0-255 in {
-+ def VGPR#Index : VGPR_32 <Index, "VGPR"#Index>;
-+}
-+
-+def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
-+ (add (sequence "VGPR%u", 0, 255))>;
-+
-+// VGPR 64-bit registers
-+def VGPR_64 : RegisterTuples<[low, high],
-+ [(add VGPR_32),
-+ (add (rotl VGPR_32, 1))]>;
-+
-+// VGPR 128-bit registers
-+def VGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w],
-+ [(add VGPR_32),
-+ (add (rotl VGPR_32, 1)),
-+ (add (rotl VGPR_32, 2)),
-+ (add (rotl VGPR_32, 3))]>;
-+
-+// Register class for all scalar registers (SGPRs + Special Registers)
-+def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
-+ (add SGPR_32, SREG_LIT_0, M0, EXEC_LO, EXEC_HI)
-+>;
-+
-+def SReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add SGPR_64, VCC, EXEC)>;
-+
-+def SReg_1 : RegisterClass<"AMDGPU", [i1], 1, (add VCC, SGPR_64, EXEC)>;
-+
-+def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>;
-+
-+def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>;
-+
-+// Register class for all vector registers (VGPRs + Interploation Registers)
-+def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
-+ (add VGPR_32,
-+ PERSP_SAMPLE_I, PERSP_SAMPLE_J,
-+ PERSP_CENTER_I, PERSP_CENTER_J,
-+ PERSP_CENTROID_I, PERSP_CENTROID_J,
-+ PERSP_I_W, PERSP_J_W, PERSP_1_W,
-+ LINEAR_SAMPLE_I, LINEAR_SAMPLE_J,
-+ LINEAR_CENTER_I, LINEAR_CENTER_J,
-+ LINEAR_CENTROID_I, LINEAR_CENTROID_J,
-+ LINE_STIPPLE_TEX_COORD,
-+ POS_X_FLOAT,
-+ POS_Y_FLOAT,
-+ POS_Z_FLOAT,
-+ POS_W_FLOAT,
-+ FRONT_FACE,
-+ ANCILLARY,
-+ SAMPLE_COVERAGE,
-+ POS_FIXED_PT
-+ )
-+>;
-+
-+def VReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add VGPR_64)>;
-+
-+def VReg_128 : RegisterClass<"AMDGPU", [v4f32], 128, (add VGPR_128)>;
-+
-+// AllReg_* - A set of all scalar and vector registers of a given width.
-+def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add VReg_32, SReg_32)>;
-+
-+def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64, (add SReg_64, VReg_64)>;
-+
-+// Special register classes for predicates and the M0 register
-+def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
-+def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
-+def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>;
-+def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SISchedule.td llvm-r600/lib/Target/R600/SISchedule.td
---- llvm-3.2.src/lib/Target/R600/SISchedule.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/SISchedule.td 2013-01-25 19:43:57.483383054 +0100
-@@ -0,0 +1,15 @@
-+//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// TODO: This is just a place holder for now.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+
-+def SI_Itin : ProcessorItineraries <[], [], []>;
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp llvm-r600/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp
---- llvm-3.2.src/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp 2013-01-25 19:43:57.483383054 +0100
-@@ -0,0 +1,26 @@
-+//===-- TargetInfo/AMDGPUTargetInfo.cpp - TargetInfo for AMDGPU -----------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPU.h"
-+#include "llvm/Support/TargetRegistry.h"
-+
-+using namespace llvm;
-+
-+/// \brief The target for the AMDGPU backend
-+Target llvm::TheAMDGPUTarget;
-+
-+/// \brief Extern function to initialize the targets for the AMDGPU backend
-+extern "C" void LLVMInitializeR600TargetInfo() {
-+ RegisterTarget<Triple::r600, false>
-+ R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/TargetInfo/CMakeLists.txt llvm-r600/lib/Target/R600/TargetInfo/CMakeLists.txt
---- llvm-3.2.src/lib/Target/R600/TargetInfo/CMakeLists.txt 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/TargetInfo/CMakeLists.txt 2013-01-25 19:43:57.483383054 +0100
-@@ -0,0 +1,7 @@
-+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-+
-+add_llvm_library(LLVMR600Info
-+ AMDGPUTargetInfo.cpp
-+ )
-+
-+add_dependencies(LLVMR600Info AMDGPUCommonTableGen intrinsics_gen)
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/TargetInfo/LLVMBuild.txt llvm-r600/lib/Target/R600/TargetInfo/LLVMBuild.txt
---- llvm-3.2.src/lib/Target/R600/TargetInfo/LLVMBuild.txt 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/TargetInfo/LLVMBuild.txt 2013-01-25 19:43:57.483383054 +0100
-@@ -0,0 +1,23 @@
-+;===- ./lib/Target/R600/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
-+;
-+; The LLVM Compiler Infrastructure
-+;
-+; This file is distributed under the University of Illinois Open Source
-+; License. See LICENSE.TXT for details.
-+;
-+;===------------------------------------------------------------------------===;
-+;
-+; This is an LLVMBuild description file for the components in this subdirectory.
-+;
-+; For more information on the LLVMBuild system, please see:
-+;
-+; http://llvm.org/docs/LLVMBuild.html
-+;
-+;===------------------------------------------------------------------------===;
-+
-+[component_0]
-+type = Library
-+name = R600Info
-+parent = R600
-+required_libraries = MC Support
-+add_to_library_groups = R600
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/TargetInfo/Makefile llvm-r600/lib/Target/R600/TargetInfo/Makefile
---- llvm-3.2.src/lib/Target/R600/TargetInfo/Makefile 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/TargetInfo/Makefile 2013-01-25 19:43:57.483383054 +0100
-@@ -0,0 +1,15 @@
-+##===- lib/Target/AMDGPU/TargetInfo/Makefile ----------------*- Makefile -*-===##
-+#
-+# The LLVM Compiler Infrastructure
-+#
-+# This file is distributed under the University of Illinois Open Source
-+# License. See LICENSE.TXT for details.
-+#
-+##===----------------------------------------------------------------------===##
-+LEVEL = ../../../..
-+LIBRARYNAME = LLVMR600Info
-+
-+# Hack: we need to include 'main' target directory to grab private headers
-+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-+
-+include $(LEVEL)/Makefile.common
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/add.v4i32.ll llvm-r600/test/CodeGen/R600/add.v4i32.ll
---- llvm-3.2.src/test/CodeGen/R600/add.v4i32.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/add.v4i32.ll 2013-01-25 19:43:58.460049700 +0100
-@@ -0,0 +1,15 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-+ %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
-+ %a = load <4 x i32> addrspace(1) * %in
-+ %b = load <4 x i32> addrspace(1) * %b_ptr
-+ %result = add <4 x i32> %a, %b
-+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/and.v4i32.ll llvm-r600/test/CodeGen/R600/and.v4i32.ll
---- llvm-3.2.src/test/CodeGen/R600/and.v4i32.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/and.v4i32.ll 2013-01-25 19:43:58.460049700 +0100
-@@ -0,0 +1,15 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-+ %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
-+ %a = load <4 x i32> addrspace(1) * %in
-+ %b = load <4 x i32> addrspace(1) * %b_ptr
-+ %result = and <4 x i32> %a, %b
-+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll llvm-r600/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
---- llvm-3.2.src/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll 2013-01-25 19:43:58.460049700 +0100
-@@ -0,0 +1,33 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+; This test is for a bug in
-+; DAGCombiner::reduceBuildVecConvertToConvertBuildVec() where
-+; the wrong type was being passed to
-+; TargetLowering::getOperationAction() when checking the legality of
-+; ISD::UINT_TO_FP and ISD::SINT_TO_FP opcodes.
-+
-+define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
-+entry:
-+ %ptr = getelementptr i32 addrspace(1)* %in, i32 1
-+ %sint = load i32 addrspace(1) * %in
-+ %conv = sitofp i32 %sint to float
-+ %0 = insertelement <4 x float> undef, float %conv, i32 0
-+ %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
-+ store <4 x float> %splat, <4 x float> addrspace(1)* %out
-+ ret void
-+}
-+
-+;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
-+entry:
-+ %ptr = getelementptr i32 addrspace(1)* %in, i32 1
-+ %uint = load i32 addrspace(1) * %in
-+ %conv = uitofp i32 %uint to float
-+ %0 = insertelement <4 x float> undef, float %conv, i32 0
-+ %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
-+ store <4 x float> %splat, <4 x float> addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fabs.ll llvm-r600/test/CodeGen/R600/fabs.ll
---- llvm-3.2.src/test/CodeGen/R600/fabs.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/fabs.ll 2013-01-25 19:43:58.460049700 +0100
-@@ -0,0 +1,16 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: MOV T{{[0-9]+\.[XYZW], \|T[0-9]+\.[XYZW]\|}}
-+
-+define void @test() {
-+ %r0 = call float @llvm.R600.load.input(i32 0)
-+ %r1 = call float @fabs( float %r0)
-+ call void @llvm.AMDGPU.store.output(float %r1, i32 0)
-+ ret void
-+}
-+
-+declare float @llvm.R600.load.input(i32) readnone
-+
-+declare void @llvm.AMDGPU.store.output(float, i32)
-+
-+declare float @fabs(float ) readnone
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fadd.ll llvm-r600/test/CodeGen/R600/fadd.ll
---- llvm-3.2.src/test/CodeGen/R600/fadd.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/fadd.ll 2013-01-25 19:43:58.460049700 +0100
-@@ -0,0 +1,16 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test() {
-+ %r0 = call float @llvm.R600.load.input(i32 0)
-+ %r1 = call float @llvm.R600.load.input(i32 1)
-+ %r2 = fadd float %r0, %r1
-+ call void @llvm.AMDGPU.store.output(float %r2, i32 0)
-+ ret void
-+}
-+
-+declare float @llvm.R600.load.input(i32) readnone
-+
-+declare void @llvm.AMDGPU.store.output(float, i32)
-+
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fadd.v4f32.ll llvm-r600/test/CodeGen/R600/fadd.v4f32.ll
---- llvm-3.2.src/test/CodeGen/R600/fadd.v4f32.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/fadd.v4f32.ll 2013-01-25 19:43:58.460049700 +0100
-@@ -0,0 +1,15 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-+ %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
-+ %a = load <4 x float> addrspace(1) * %in
-+ %b = load <4 x float> addrspace(1) * %b_ptr
-+ %result = fadd <4 x float> %a, %b
-+ store <4 x float> %result, <4 x float> addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fcmp-cnde-int-args.ll llvm-r600/test/CodeGen/R600/fcmp-cnde-int-args.ll
---- llvm-3.2.src/test/CodeGen/R600/fcmp-cnde-int-args.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/fcmp-cnde-int-args.ll 2013-01-25 19:43:58.460049700 +0100
-@@ -0,0 +1,16 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+; This test checks a bug in R600TargetLowering::LowerSELECT_CC where the
-+; chance to optimize the fcmp + select instructions to CNDE was missed
-+; due to the fact that the operands to fcmp and select had different types
-+
-+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, 0.0, -1}}
-+
-+define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
-+entry:
-+ %0 = load float addrspace(1)* %in
-+ %cmp = fcmp oeq float %0, 0.000000e+00
-+ %value = select i1 %cmp, i32 -1, i32 0
-+ store i32 %value, i32 addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fcmp-cnd.ll llvm-r600/test/CodeGen/R600/fcmp-cnd.ll
---- llvm-3.2.src/test/CodeGen/R600/fcmp-cnd.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/fcmp-cnd.ll 2013-01-25 19:43:58.460049700 +0100
-@@ -0,0 +1,14 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;Not checking arguments 2 and 3 to CNDE, because they may change between
-+;registers and literal.x depending on what the optimizer does.
-+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
-+entry:
-+ %0 = load float addrspace(1)* %in
-+ %cmp = fcmp oeq float %0, 0.000000e+00
-+ %value = select i1 %cmp, i32 2, i32 3
-+ store i32 %value, i32 addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fcmp.ll llvm-r600/test/CodeGen/R600/fcmp.ll
---- llvm-3.2.src/test/CodeGen/R600/fcmp.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/fcmp.ll 2013-01-25 19:43:58.460049700 +0100
-@@ -0,0 +1,16 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: SETE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: MOV T{{[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-+;CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
-+entry:
-+ %0 = load float addrspace(1)* %in
-+ %arrayidx1 = getelementptr inbounds float addrspace(1)* %in, i32 1
-+ %1 = load float addrspace(1)* %arrayidx1
-+ %cmp = fcmp oeq float %0, %1
-+ %sext = sext i1 %cmp to i32
-+ store i32 %sext, i32 addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fdiv.v4f32.ll llvm-r600/test/CodeGen/R600/fdiv.v4f32.ll
---- llvm-3.2.src/test/CodeGen/R600/fdiv.v4f32.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/fdiv.v4f32.ll 2013-01-25 19:43:58.460049700 +0100
-@@ -0,0 +1,19 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-+ %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
-+ %a = load <4 x float> addrspace(1) * %in
-+ %b = load <4 x float> addrspace(1) * %b_ptr
-+ %result = fdiv <4 x float> %a, %b
-+ store <4 x float> %result, <4 x float> addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/floor.ll llvm-r600/test/CodeGen/R600/floor.ll
---- llvm-3.2.src/test/CodeGen/R600/floor.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/floor.ll 2013-01-25 19:43:58.463383033 +0100
-@@ -0,0 +1,16 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: FLOOR T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test() {
-+ %r0 = call float @llvm.R600.load.input(i32 0)
-+ %r1 = call float @floor(float %r0)
-+ call void @llvm.AMDGPU.store.output(float %r1, i32 0)
-+ ret void
-+}
-+
-+declare float @llvm.R600.load.input(i32) readnone
-+
-+declare void @llvm.AMDGPU.store.output(float, i32)
-+
-+declare float @floor(float) readonly
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fmax.ll llvm-r600/test/CodeGen/R600/fmax.ll
---- llvm-3.2.src/test/CodeGen/R600/fmax.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/fmax.ll 2013-01-25 19:43:58.463383033 +0100
-@@ -0,0 +1,16 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: MAX T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test() {
-+ %r0 = call float @llvm.R600.load.input(i32 0)
-+ %r1 = call float @llvm.R600.load.input(i32 1)
-+ %r2 = fcmp uge float %r0, %r1
-+ %r3 = select i1 %r2, float %r0, float %r1
-+ call void @llvm.AMDGPU.store.output(float %r3, i32 0)
-+ ret void
-+}
-+
-+declare float @llvm.R600.load.input(i32) readnone
-+
-+declare void @llvm.AMDGPU.store.output(float, i32)
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fmin.ll llvm-r600/test/CodeGen/R600/fmin.ll
---- llvm-3.2.src/test/CodeGen/R600/fmin.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/fmin.ll 2013-01-25 19:43:58.463383033 +0100
-@@ -0,0 +1,16 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: MIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test() {
-+ %r0 = call float @llvm.R600.load.input(i32 0)
-+ %r1 = call float @llvm.R600.load.input(i32 1)
-+ %r2 = fcmp uge float %r0, %r1
-+ %r3 = select i1 %r2, float %r1, float %r0
-+ call void @llvm.AMDGPU.store.output(float %r3, i32 0)
-+ ret void
-+}
-+
-+declare float @llvm.R600.load.input(i32) readnone
-+
-+declare void @llvm.AMDGPU.store.output(float, i32)
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fmul.ll llvm-r600/test/CodeGen/R600/fmul.ll
---- llvm-3.2.src/test/CodeGen/R600/fmul.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/fmul.ll 2013-01-25 19:43:58.463383033 +0100
-@@ -0,0 +1,16 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test() {
-+ %r0 = call float @llvm.R600.load.input(i32 0)
-+ %r1 = call float @llvm.R600.load.input(i32 1)
-+ %r2 = fmul float %r0, %r1
-+ call void @llvm.AMDGPU.store.output(float %r2, i32 0)
-+ ret void
-+}
-+
-+declare float @llvm.R600.load.input(i32) readnone
-+
-+declare void @llvm.AMDGPU.store.output(float, i32)
-+
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fmul.v4f32.ll llvm-r600/test/CodeGen/R600/fmul.v4f32.ll
---- llvm-3.2.src/test/CodeGen/R600/fmul.v4f32.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/fmul.v4f32.ll 2013-01-25 19:43:58.463383033 +0100
-@@ -0,0 +1,15 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-+ %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
-+ %a = load <4 x float> addrspace(1) * %in
-+ %b = load <4 x float> addrspace(1) * %b_ptr
-+ %result = fmul <4 x float> %a, %b
-+ store <4 x float> %result, <4 x float> addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fsub.ll llvm-r600/test/CodeGen/R600/fsub.ll
---- llvm-3.2.src/test/CodeGen/R600/fsub.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/fsub.ll 2013-01-25 19:43:58.463383033 +0100
-@@ -0,0 +1,17 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+; CHECK: MOV T{{[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test() {
-+ %r0 = call float @llvm.R600.load.input(i32 0)
-+ %r1 = call float @llvm.R600.load.input(i32 1)
-+ %r2 = fsub float %r0, %r1
-+ call void @llvm.AMDGPU.store.output(float %r2, i32 0)
-+ ret void
-+}
-+
-+declare float @llvm.R600.load.input(i32) readnone
-+
-+declare void @llvm.AMDGPU.store.output(float, i32)
-+
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fsub.v4f32.ll llvm-r600/test/CodeGen/R600/fsub.v4f32.ll
---- llvm-3.2.src/test/CodeGen/R600/fsub.v4f32.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/fsub.v4f32.ll 2013-01-25 19:43:58.463383033 +0100
-@@ -0,0 +1,15 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-+ %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
-+ %a = load <4 x float> addrspace(1) * %in
-+ %b = load <4 x float> addrspace(1) * %b_ptr
-+ %result = fsub <4 x float> %a, %b
-+ store <4 x float> %result, <4 x float> addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/i8_to_double_to_float.ll llvm-r600/test/CodeGen/R600/i8_to_double_to_float.ll
---- llvm-3.2.src/test/CodeGen/R600/i8_to_double_to_float.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/i8_to_double_to_float.ll 2013-01-25 19:43:58.463383033 +0100
-@@ -0,0 +1,11 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) {
-+ %1 = load i8 addrspace(1)* %in
-+ %2 = uitofp i8 %1 to double
-+ %3 = fptrunc double %2 to float
-+ store float %3, float addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/icmp-select-sete-reverse-args.ll llvm-r600/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
---- llvm-3.2.src/test/CodeGen/R600/icmp-select-sete-reverse-args.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/icmp-select-sete-reverse-args.ll 2013-01-25 19:43:58.463383033 +0100
-@@ -0,0 +1,18 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;Test that a select with reversed True/False values is correctly lowered
-+;to a SETNE_INT. There should only be one SETNE_INT instruction.
-+
-+;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK_NOT: SETNE_INT
-+
-+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-+entry:
-+ %0 = load i32 addrspace(1)* %in
-+ %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
-+ %1 = load i32 addrspace(1)* %arrayidx1
-+ %cmp = icmp eq i32 %0, %1
-+ %value = select i1 %cmp, i32 0, i32 -1
-+ store i32 %value, i32 addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/literals.ll llvm-r600/test/CodeGen/R600/literals.ll
---- llvm-3.2.src/test/CodeGen/R600/literals.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/literals.ll 2013-01-25 19:43:58.463383033 +0100
-@@ -0,0 +1,30 @@
-+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+; Test using an integer literal constant.
-+; Generated ASM should be:
-+; ADD_INT REG literal.x, 5
-+; or
-+; ADD_INT literal.x REG, 5
-+
-+; CHECK: ADD_INT {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} 5
-+define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
-+entry:
-+ %0 = add i32 5, %in
-+ store i32 %0, i32 addrspace(1)* %out
-+ ret void
-+}
-+
-+; Test using a float literal constant.
-+; Generated ASM should be:
-+; ADD REG literal.x, 5.0
-+; or
-+; ADD literal.x REG, 5.0
-+
-+; CHECK: ADD {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} {{[0-9]+}}(5.0
-+define void @float_literal(float addrspace(1)* %out, float %in) {
-+entry:
-+ %0 = fadd float 5.0, %in
-+ store float %0, float addrspace(1)* %out
-+ ret void
-+}
-+
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/lit.local.cfg llvm-r600/test/CodeGen/R600/lit.local.cfg
---- llvm-3.2.src/test/CodeGen/R600/lit.local.cfg 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/lit.local.cfg 2013-01-25 19:43:58.463383033 +0100
-@@ -0,0 +1,13 @@
-+config.suffixes = ['.ll', '.c', '.cpp']
-+
-+def getRoot(config):
-+ if not config.parent:
-+ return config
-+ return getRoot(config.parent)
-+
-+root = getRoot(config)
-+
-+targets = set(root.targets_to_build.split())
-+if not 'R600' in targets:
-+ config.unsupported = True
-+
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/llvm.AMDGPU.mul.ll llvm-r600/test/CodeGen/R600/llvm.AMDGPU.mul.ll
---- llvm-3.2.src/test/CodeGen/R600/llvm.AMDGPU.mul.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/llvm.AMDGPU.mul.ll 2013-01-25 19:43:58.463383033 +0100
-@@ -0,0 +1,17 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test() {
-+ %r0 = call float @llvm.R600.load.input(i32 0)
-+ %r1 = call float @llvm.R600.load.input(i32 1)
-+ %r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1)
-+ call void @llvm.AMDGPU.store.output(float %r2, i32 0)
-+ ret void
-+}
-+
-+declare float @llvm.R600.load.input(i32) readnone
-+
-+declare void @llvm.AMDGPU.store.output(float, i32)
-+
-+declare float @llvm.AMDGPU.mul(float ,float ) readnone
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/llvm.AMDGPU.trunc.ll llvm-r600/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
---- llvm-3.2.src/test/CodeGen/R600/llvm.AMDGPU.trunc.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/llvm.AMDGPU.trunc.ll 2013-01-25 19:43:58.463383033 +0100
-@@ -0,0 +1,16 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: TRUNC T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test() {
-+ %r0 = call float @llvm.R600.load.input(i32 0)
-+ %r1 = call float @llvm.AMDGPU.trunc( float %r0)
-+ call void @llvm.AMDGPU.store.output(float %r1, i32 0)
-+ ret void
-+}
-+
-+declare float @llvm.R600.load.input(i32) readnone
-+
-+declare void @llvm.AMDGPU.store.output(float, i32)
-+
-+declare float @llvm.AMDGPU.trunc(float ) readnone
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/llvm.cos.ll llvm-r600/test/CodeGen/R600/llvm.cos.ll
---- llvm-3.2.src/test/CodeGen/R600/llvm.cos.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/llvm.cos.ll 2013-01-25 19:43:58.463383033 +0100
-@@ -0,0 +1,16 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: COS T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test() {
-+ %r0 = call float @llvm.R600.load.input(i32 0)
-+ %r1 = call float @llvm.cos.f32(float %r0)
-+ call void @llvm.AMDGPU.store.output(float %r1, i32 0)
-+ ret void
-+}
-+
-+declare float @llvm.cos.f32(float) readnone
-+
-+declare float @llvm.R600.load.input(i32) readnone
-+
-+declare void @llvm.AMDGPU.store.output(float, i32)
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/llvm.pow.ll llvm-r600/test/CodeGen/R600/llvm.pow.ll
---- llvm-3.2.src/test/CodeGen/R600/llvm.pow.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/llvm.pow.ll 2013-01-25 19:43:58.466716366 +0100
-@@ -0,0 +1,19 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: LOG_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK-NEXT: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+;CHECK-NEXT: EXP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test() {
-+ %r0 = call float @llvm.R600.load.input(i32 0)
-+ %r1 = call float @llvm.R600.load.input(i32 1)
-+ %r2 = call float @llvm.pow.f32( float %r0, float %r1)
-+ call void @llvm.AMDGPU.store.output(float %r2, i32 0)
-+ ret void
-+}
-+
-+declare float @llvm.R600.load.input(i32) readnone
-+
-+declare void @llvm.AMDGPU.store.output(float, i32)
-+
-+declare float @llvm.pow.f32(float ,float ) readonly
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/llvm.sin.ll llvm-r600/test/CodeGen/R600/llvm.sin.ll
---- llvm-3.2.src/test/CodeGen/R600/llvm.sin.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/llvm.sin.ll 2013-01-25 19:43:58.466716366 +0100
-@@ -0,0 +1,16 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: SIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test() {
-+ %r0 = call float @llvm.R600.load.input(i32 0)
-+ %r1 = call float @llvm.sin.f32( float %r0)
-+ call void @llvm.AMDGPU.store.output(float %r1, i32 0)
-+ ret void
-+}
-+
-+declare float @llvm.sin.f32(float) readnone
-+
-+declare float @llvm.R600.load.input(i32) readnone
-+
-+declare void @llvm.AMDGPU.store.output(float, i32)
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/load.constant_addrspace.f32.ll llvm-r600/test/CodeGen/R600/load.constant_addrspace.f32.ll
---- llvm-3.2.src/test/CodeGen/R600/load.constant_addrspace.f32.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/load.constant_addrspace.f32.ll 2013-01-25 19:43:58.466716366 +0100
-@@ -0,0 +1,9 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: VTX_READ_32 T{{[0-9]+\.X, T[0-9]+\.X}}
-+
-+define void @test(float addrspace(1)* %out, float addrspace(2)* %in) {
-+ %1 = load float addrspace(2)* %in
-+ store float %1, float addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/load.i8.ll llvm-r600/test/CodeGen/R600/load.i8.ll
---- llvm-3.2.src/test/CodeGen/R600/load.i8.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/load.i8.ll 2013-01-25 19:43:58.466716366 +0100
-@@ -0,0 +1,10 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-+
-+define void @test(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
-+ %1 = load i8 addrspace(1)* %in
-+ %2 = zext i8 %1 to i32
-+ store i32 %2, i32 addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/reciprocal.ll llvm-r600/test/CodeGen/R600/reciprocal.ll
---- llvm-3.2.src/test/CodeGen/R600/reciprocal.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/reciprocal.ll 2013-01-25 19:43:58.466716366 +0100
-@@ -0,0 +1,16 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test() {
-+ %r0 = call float @llvm.R600.load.input(i32 0)
-+ %r1 = fdiv float 1.0, %r0
-+ call void @llvm.AMDGPU.store.output(float %r1, i32 0)
-+ ret void
-+}
-+
-+declare float @llvm.R600.load.input(i32) readnone
-+
-+declare void @llvm.AMDGPU.store.output(float, i32)
-+
-+declare float @llvm.AMDGPU.rcp(float ) readnone
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/sdiv.ll llvm-r600/test/CodeGen/R600/sdiv.ll
---- llvm-3.2.src/test/CodeGen/R600/sdiv.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/sdiv.ll 2013-01-25 19:43:58.466716366 +0100
-@@ -0,0 +1,21 @@
-+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+; The code generated by sdiv is long and complex and may frequently change.
-+; The goal of this test is to make sure the ISel doesn't fail.
-+;
-+; This program was previously failing to compile when one of the selectcc
-+; opcodes generated by the sdiv lowering was being legalized and optimized to:
-+; selectcc Remainder -1, 0, -1, SETGT
-+; This was fixed by adding an additional pattern in R600Instructions.td to
-+; match this pattern with a CNDGE_INT.
-+
-+; CHECK: RETURN
-+
-+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
-+ %num = load i32 addrspace(1) * %in
-+ %den = load i32 addrspace(1) * %den_ptr
-+ %result = sdiv i32 %num, %den
-+ store i32 %result, i32 addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/selectcc_cnde_int.ll llvm-r600/test/CodeGen/R600/selectcc_cnde_int.ll
---- llvm-3.2.src/test/CodeGen/R600/selectcc_cnde_int.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/selectcc_cnde_int.ll 2013-01-25 19:43:58.466716366 +0100
-@@ -0,0 +1,11 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK-NOT: SETE_INT
-+;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1, literal.x, 2}}
-+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-+ %1 = load i32 addrspace(1)* %in
-+ %2 = icmp eq i32 %1, 0
-+ %3 = select i1 %2, i32 1, i32 2
-+ store i32 %3, i32 addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/selectcc_cnde.ll llvm-r600/test/CodeGen/R600/selectcc_cnde.ll
---- llvm-3.2.src/test/CodeGen/R600/selectcc_cnde.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/selectcc_cnde.ll 2013-01-25 19:43:58.466716366 +0100
-@@ -0,0 +1,11 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK-NOT: SETE
-+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1.0, literal.x, [-0-9]+\(2.0}}
-+define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
-+ %1 = load float addrspace(1)* %in
-+ %2 = fcmp oeq float %1, 0.0
-+ %3 = select i1 %2, float 1.0, float 2.0
-+ store float %3, float addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/selectcc-icmp-select-float.ll llvm-r600/test/CodeGen/R600/selectcc-icmp-select-float.ll
---- llvm-3.2.src/test/CodeGen/R600/selectcc-icmp-select-float.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/selectcc-icmp-select-float.ll 2013-01-25 19:43:58.466716366 +0100
-@@ -0,0 +1,15 @@
-+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+; Note additional optimizations may cause this SGT to be replaced with a
-+; CND* instruction.
-+; CHECK: SGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}}
-+; Test a selectcc with i32 LHS/RHS and float True/False
-+
-+define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
-+entry:
-+ %0 = load i32 addrspace(1)* %in
-+ %1 = icmp sge i32 %0, 0
-+ %2 = select i1 %1, float 1.0, float 0.0
-+ store float %2, float addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/setcc.v4i32.ll llvm-r600/test/CodeGen/R600/setcc.v4i32.ll
---- llvm-3.2.src/test/CodeGen/R600/setcc.v4i32.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/setcc.v4i32.ll 2013-01-25 19:43:58.466716366 +0100
-@@ -0,0 +1,12 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+;CHECK: SETE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-+ %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
-+ %a = load <4 x i32> addrspace(1) * %in
-+ %b = load <4 x i32> addrspace(1) * %b_ptr
-+ %result = icmp eq <4 x i32> %a, %b
-+ %sext = sext <4 x i1> %result to <4 x i32>
-+ store <4 x i32> %sext, <4 x i32> addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/short-args.ll llvm-r600/test/CodeGen/R600/short-args.ll
---- llvm-3.2.src/test/CodeGen/R600/short-args.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/short-args.ll 2013-01-25 19:43:58.466716366 +0100
-@@ -0,0 +1,37 @@
-+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-+
-+define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
-+entry:
-+ %0 = zext i8 %in to i32
-+ store i32 %0, i32 addrspace(1)* %out, align 4
-+ ret void
-+}
-+
-+; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-+
-+define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
-+entry:
-+ %0 = zext i8 %in to i32
-+ store i32 %0, i32 addrspace(1)* %out, align 4
-+ ret void
-+}
-+
-+; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-+
-+define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
-+entry:
-+ %0 = zext i16 %in to i32
-+ store i32 %0, i32 addrspace(1)* %out, align 4
-+ ret void
-+}
-+
-+; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-+
-+define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
-+entry:
-+ %0 = zext i16 %in to i32
-+ store i32 %0, i32 addrspace(1)* %out, align 4
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/store.v4f32.ll llvm-r600/test/CodeGen/R600/store.v4f32.ll
---- llvm-3.2.src/test/CodeGen/R600/store.v4f32.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/store.v4f32.ll 2013-01-25 19:43:58.466716366 +0100
-@@ -0,0 +1,9 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
-+
-+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-+ %1 = load <4 x float> addrspace(1) * %in
-+ store <4 x float> %1, <4 x float> addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/store.v4i32.ll llvm-r600/test/CodeGen/R600/store.v4i32.ll
---- llvm-3.2.src/test/CodeGen/R600/store.v4i32.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/store.v4i32.ll 2013-01-25 19:43:58.466716366 +0100
-@@ -0,0 +1,9 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
-+
-+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-+ %1 = load <4 x i32> addrspace(1) * %in
-+ store <4 x i32> %1, <4 x i32> addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/udiv.v4i32.ll llvm-r600/test/CodeGen/R600/udiv.v4i32.ll
---- llvm-3.2.src/test/CodeGen/R600/udiv.v4i32.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/udiv.v4i32.ll 2013-01-25 19:43:58.466716366 +0100
-@@ -0,0 +1,15 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;The code generated by udiv is long and complex and may frequently change.
-+;The goal of this test is to make sure the ISel doesn't fail when it gets
-+;a v4i32 udiv
-+;CHECK: RETURN
-+
-+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-+ %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
-+ %a = load <4 x i32> addrspace(1) * %in
-+ %b = load <4 x i32> addrspace(1) * %b_ptr
-+ %result = udiv <4 x i32> %a, %b
-+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/urem.v4i32.ll llvm-r600/test/CodeGen/R600/urem.v4i32.ll
---- llvm-3.2.src/test/CodeGen/R600/urem.v4i32.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/urem.v4i32.ll 2013-01-25 19:43:58.470049700 +0100
-@@ -0,0 +1,15 @@
-+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+;The code generated by urem is long and complex and may frequently change.
-+;The goal of this test is to make sure the ISel doesn't fail when it gets
-+;a v4i32 urem
-+;CHECK: RETURN
-+
-+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-+ %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
-+ %a = load <4 x i32> addrspace(1) * %in
-+ %b = load <4 x i32> addrspace(1) * %b_ptr
-+ %result = urem <4 x i32> %a, %b
-+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/vec4-expand.ll llvm-r600/test/CodeGen/R600/vec4-expand.ll
---- llvm-3.2.src/test/CodeGen/R600/vec4-expand.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/R600/vec4-expand.ll 2013-01-25 19:43:58.470049700 +0100
-@@ -0,0 +1,49 @@
-+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-+
-+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @fp_to_sint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-+ %value = load <4 x float> addrspace(1) * %in
-+ %result = fptosi <4 x float> %value to <4 x i32>
-+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out
-+ ret void
-+}
-+
-+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @fp_to_uint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-+ %value = load <4 x float> addrspace(1) * %in
-+ %result = fptoui <4 x float> %value to <4 x i32>
-+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out
-+ ret void
-+}
-+
-+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @sint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-+ %value = load <4 x i32> addrspace(1) * %in
-+ %result = sitofp <4 x i32> %value to <4 x float>
-+ store <4 x float> %result, <4 x float> addrspace(1)* %out
-+ ret void
-+}
-+
-+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-+
-+define void @uint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-+ %value = load <4 x i32> addrspace(1) * %in
-+ %result = uitofp <4 x i32> %value to <4 x float>
-+ store <4 x float> %result, <4 x float> addrspace(1)* %out
-+ ret void
-+}
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/SI/sanity.ll llvm-r600/test/CodeGen/SI/sanity.ll
---- llvm-3.2.src/test/CodeGen/SI/sanity.ll 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/test/CodeGen/SI/sanity.ll 2013-01-25 19:43:58.470049700 +0100
-@@ -0,0 +1,37 @@
-+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
-+
-+; CHECK: S_ENDPGM
-+
-+define void @main() {
-+main_body:
-+ call void @llvm.AMDGPU.shader.type(i32 1)
-+ %0 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*)
-+ %1 = getelementptr <4 x i32> addrspace(2)* %0, i32 0
-+ %2 = load <4 x i32> addrspace(2)* %1
-+ %3 = call i32 @llvm.SI.vs.load.buffer.index()
-+ %4 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %2, i32 0, i32 %3)
-+ %5 = extractelement <4 x float> %4, i32 0
-+ %6 = extractelement <4 x float> %4, i32 1
-+ %7 = extractelement <4 x float> %4, i32 2
-+ %8 = extractelement <4 x float> %4, i32 3
-+ %9 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*)
-+ %10 = getelementptr <4 x i32> addrspace(2)* %9, i32 1
-+ %11 = load <4 x i32> addrspace(2)* %10
-+ %12 = call i32 @llvm.SI.vs.load.buffer.index()
-+ %13 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %11, i32 0, i32 %12)
-+ %14 = extractelement <4 x float> %13, i32 0
-+ %15 = extractelement <4 x float> %13, i32 1
-+ %16 = extractelement <4 x float> %13, i32 2
-+ %17 = extractelement <4 x float> %13, i32 3
-+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17)
-+ call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %5, float %6, float %7, float %8)
-+ ret void
-+}
-+
-+declare void @llvm.AMDGPU.shader.type(i32)
-+
-+declare i32 @llvm.SI.vs.load.buffer.index() readnone
-+
-+declare <4 x float> @llvm.SI.vs.load.input(<4 x i32>, i32, i32)
-+
-+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-diff -Nur -x .git llvm-3.2.src/test/CodeGen/X86/cvtv2f32.ll llvm-r600/test/CodeGen/X86/cvtv2f32.ll
---- llvm-3.2.src/test/CodeGen/X86/cvtv2f32.ll 2012-10-24 06:14:18.000000000 +0200
-+++ llvm-r600/test/CodeGen/X86/cvtv2f32.ll 2013-01-25 19:43:58.856716358 +0100
-@@ -1,3 +1,7 @@
-+; A bug fix in the DAGCombiner made this test fail, so marking as xfail
-+; until this can be investigated further.
-+; XFAIL: *
-+
- ; RUN: llc < %s -mtriple=i686-linux-pc -mcpu=corei7 | FileCheck %s
-
- define <2 x float> @foo(i32 %x, i32 %y, <2 x float> %v) {