From 24ef46b2ee28d9cfc27e8ee824e32abc76ccf67c Mon Sep 17 00:00:00 2001 From: Marcin Krol Date: Tue, 16 Sep 2014 11:58:46 +0000 Subject: [PATCH] - merged 3.3 from PLD, adjusted tld patch --- llvm-config.patch | 6 +- llvm-r600.patch | 23023 -------------------------------------------- llvm-tld.patch | 32 +- llvm.spec | 22 +- 4 files changed, 31 insertions(+), 23052 deletions(-) delete mode 100644 llvm-r600.patch diff --git a/llvm-config.patch b/llvm-config.patch index abdbdb9..38dc3ad 100644 --- a/llvm-config.patch +++ b/llvm-config.patch @@ -72,15 +72,15 @@ "/../../" + Multilib, Paths); // Add the non-multilib suffixed paths (if potentially different). -@@ -2189,7 +2189,7 @@ Linux::Linux(const Driver &D, const llvm +@@ -2342,7 +2342,7 @@ Linux::Linux(const Driver &D, const llvm } } addPathIfExists(SysRoot + "/lib", Paths); - addPathIfExists(SysRoot + "/usr/lib", Paths); + addPathIfExists(SysRoot + LLVM_LIBDIR, Paths); - } - bool Linux::HasNativeLLVMSupport() const { + IsPIEDefault = SanitizerArgs(*this, Args).hasZeroBaseShadow(); + } --- llvm-3.2.src/tools/clang/lib/Driver/Tools.cpp.orig 2012-11-21 08:56:23.000000000 +0100 +++ llvm-3.2.src/tools/clang/lib/Driver/Tools.cpp 2013-01-26 18:43:56.952167604 +0100 @@ -218,7 +218,7 @@ static void addProfileRT(const ToolChain diff --git a/llvm-r600.patch b/llvm-r600.patch deleted file mode 100644 index 0957c01..0000000 --- a/llvm-r600.patch +++ /dev/null @@ -1,23023 +0,0 @@ -diff -Nur -x .git llvm-3.2.src/autoconf/configure.ac llvm-r600/autoconf/configure.ac ---- llvm-3.2.src/autoconf/configure.ac 2012-11-21 17:13:35.000000000 +0100 -+++ llvm-r600/autoconf/configure.ac 2013-01-25 19:43:56.096716416 +0100 -@@ -751,6 +751,11 @@ - - if test ${enableval} != "disable" - then -+ if test ${enableval} = "AMDGPU" -+ then -+ AC_MSG_ERROR([The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600]) -+ enableval="R600" -+ fi - TARGETS_TO_BUILD="$enableval $TARGETS_TO_BUILD" - fi - -diff -Nur -x .git llvm-3.2.src/configure llvm-r600/configure ---- llvm-3.2.src/configure 2012-11-21 17:13:35.000000000 +0100 -+++ llvm-r600/configure 2013-01-25 19:43:56.173383081 +0100 -@@ -5473,6 +5473,13 @@ - - if test ${enableval} != "disable" - then -+ if test ${enableval} = "AMDGPU" -+ then -+ { { echo "$as_me:$LINENO: error: The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600" >&5 -+echo "$as_me: error: The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600" >&2;} -+ { (exit 1); exit 1; }; } -+ enableval="R600" -+ fi - TARGETS_TO_BUILD="$enableval $TARGETS_TO_BUILD" - fi - -@@ -10316,7 +10323,7 @@ - lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 - lt_status=$lt_dlunknown - cat > conftest.$ac_ext < -+ : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, -+ GCCBuiltin; -+ -+multiclass R600ReadPreloadRegisterIntrinsic_xyz { -+ def _x : R600ReadPreloadRegisterIntrinsic; -+ def _y : R600ReadPreloadRegisterIntrinsic; -+ def _z : R600ReadPreloadRegisterIntrinsic; -+} -+ -+defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz < -+ "__builtin_r600_read_global_size">; -+defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz < -+ "__builtin_r600_read_local_size">; -+defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz < -+ "__builtin_r600_read_ngroups">; -+defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz < -+ "__builtin_r600_read_tgid">; -+defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz < -+ "__builtin_r600_read_tidig">; -+} // End TargetPrefix = "r600" -diff -Nur -x .git llvm-3.2.src/include/llvm/Intrinsics.td llvm-r600/include/llvm/Intrinsics.td ---- llvm-3.2.src/include/llvm/Intrinsics.td 2012-10-20 01:00:20.000000000 +0200 -+++ llvm-r600/include/llvm/Intrinsics.td 2013-01-25 19:43:56.426716409 +0100 -@@ -469,3 +469,4 @@ - include "llvm/IntrinsicsHexagon.td" - include "llvm/IntrinsicsNVVM.td" - include "llvm/IntrinsicsMips.td" -+include "llvm/IntrinsicsR600.td" -diff -Nur -x .git llvm-3.2.src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm-r600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp ---- llvm-3.2.src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 2012-11-26 18:01:12.000000000 +0100 -+++ llvm-r600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 2013-01-25 19:43:56.720049736 +0100 -@@ -8514,11 +8514,8 @@ - if (Opcode == ISD::DELETED_NODE && - (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { - Opcode = Opc; -- // If not supported by target, bail out. -- if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal && -- TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom) -- return SDValue(); - } -+ - if (Opc != Opcode) - return SDValue(); - -@@ -8543,6 +8540,10 @@ - assert(SrcVT != MVT::Other && "Cannot determine source type!"); - - EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); -+ -+ if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) -+ return SDValue(); -+ - SmallVector Opnds; - for (unsigned i = 0; i != NumInScalars; ++i) { - SDValue In = N->getOperand(i); -diff -Nur -x .git llvm-3.2.src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm-r600/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp ---- llvm-3.2.src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 2012-10-24 19:25:11.000000000 +0200 -+++ llvm-r600/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 2013-01-25 19:43:56.733383069 +0100 -@@ -731,9 +731,10 @@ - return; - } - case TargetLowering::Promote: { -- assert(VT.isVector() && "Unknown legal promote case!"); -- Value = DAG.getNode(ISD::BITCAST, dl, -- TLI.getTypeToPromoteTo(ISD::STORE, VT), Value); -+ EVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT); -+ assert(NVT.getSizeInBits() == VT.getSizeInBits() && -+ "Can only promote stores to same size type"); -+ Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); - SDValue Result = - DAG.getStore(Chain, dl, Value, Ptr, - ST->getPointerInfo(), isVolatile, -@@ -889,10 +890,9 @@ - break; - } - case TargetLowering::Promote: { -- // Only promote a load of vector type to another. -- assert(VT.isVector() && "Cannot promote this load!"); -- // Change base type to a different vector type. - EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); -+ assert(NVT.getSizeInBits() == VT.getSizeInBits() && -+ "Can only promote loads to same size type"); - - SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), -diff -Nur -x .git llvm-3.2.src/lib/Target/LLVMBuild.txt llvm-r600/lib/Target/LLVMBuild.txt ---- llvm-3.2.src/lib/Target/LLVMBuild.txt 2012-07-16 20:19:46.000000000 +0200 -+++ llvm-r600/lib/Target/LLVMBuild.txt 2013-01-25 19:43:57.173383060 +0100 -@@ -16,7 +16,7 @@ - ;===------------------------------------------------------------------------===; - - [common] --subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore -+subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore - - ; This is a special group whose required libraries are extended (by llvm-build) - ; with the best execution engine (the native JIT, if available, or the -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.cpp llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.cpp ---- llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.cpp 2013-01-25 19:43:57.423383055 +0100 -@@ -0,0 +1,138 @@ -+//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// -+/// The AMDGPUAsmPrinter is used to print both assembly string and also binary -+/// code. When passed an MCAsmStreamer it prints assembly and when passed -+/// an MCObjectStreamer it outputs binary code. -+// -+//===----------------------------------------------------------------------===// -+// -+ -+ -+#include "AMDGPUAsmPrinter.h" -+#include "AMDGPU.h" -+#include "SIMachineFunctionInfo.h" -+#include "SIRegisterInfo.h" -+#include "llvm/MC/MCStreamer.h" -+#include "llvm/Target/TargetLoweringObjectFile.h" -+#include "llvm/Support/TargetRegistry.h" -+ -+using namespace llvm; -+ -+ -+static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm, -+ MCStreamer &Streamer) { -+ return new AMDGPUAsmPrinter(tm, Streamer); -+} -+ -+extern "C" void LLVMInitializeR600AsmPrinter() { -+ TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass); -+} -+ -+/// We need to override this function so we can avoid -+/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle. -+bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { -+ const AMDGPUSubtarget &STM = TM.getSubtarget(); -+ if (STM.dumpCode()) { -+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -+ MF.dump(); -+#endif -+ } -+ SetupMachineFunction(MF); -+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); -+ if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { -+ EmitProgramInfo(MF); -+ } -+ EmitFunctionBody(); -+ return false; -+} -+ -+void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { -+ unsigned MaxSGPR = 0; -+ unsigned MaxVGPR = 0; -+ bool VCCUsed = false; -+ const SIRegisterInfo * RI = -+ static_cast(TM.getRegisterInfo()); -+ -+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); -+ BB != BB_E; ++BB) { -+ MachineBasicBlock &MBB = *BB; -+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); -+ I != E; ++I) { -+ MachineInstr &MI = *I; -+ -+ unsigned numOperands = MI.getNumOperands(); -+ for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { -+ MachineOperand & MO = MI.getOperand(op_idx); -+ unsigned maxUsed; -+ unsigned width = 0; -+ bool isSGPR = false; -+ unsigned reg; -+ unsigned hwReg; -+ if (!MO.isReg()) { -+ continue; -+ } -+ reg = MO.getReg(); -+ if (reg == AMDGPU::VCC) { -+ VCCUsed = true; -+ continue; -+ } -+ switch (reg) { -+ default: break; -+ case AMDGPU::EXEC: -+ case AMDGPU::SI_LITERAL_CONSTANT: -+ case AMDGPU::SREG_LIT_0: -+ case AMDGPU::M0: -+ continue; -+ } -+ -+ if (AMDGPU::SReg_32RegClass.contains(reg)) { -+ isSGPR = true; -+ width = 1; -+ } else if (AMDGPU::VReg_32RegClass.contains(reg)) { -+ isSGPR = false; -+ width = 1; -+ } else if (AMDGPU::SReg_64RegClass.contains(reg)) { -+ isSGPR = true; -+ width = 2; -+ } else if (AMDGPU::VReg_64RegClass.contains(reg)) { -+ isSGPR = false; -+ width = 2; -+ } else if (AMDGPU::SReg_128RegClass.contains(reg)) { -+ isSGPR = true; -+ width = 4; -+ } else if (AMDGPU::VReg_128RegClass.contains(reg)) { -+ isSGPR = false; -+ width = 4; -+ } else if (AMDGPU::SReg_256RegClass.contains(reg)) { -+ isSGPR = true; -+ width = 8; -+ } else { -+ assert(!"Unknown register class"); -+ } -+ hwReg = RI->getEncodingValue(reg); -+ maxUsed = hwReg + width - 1; -+ if (isSGPR) { -+ MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR; -+ } else { -+ MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR; -+ } -+ } -+ } -+ } -+ if (VCCUsed) { -+ MaxSGPR += 2; -+ } -+ SIMachineFunctionInfo * MFI = MF.getInfo(); -+ OutStreamer.EmitIntValue(MaxSGPR + 1, 4); -+ OutStreamer.EmitIntValue(MaxVGPR + 1, 4); -+ OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4); -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.h llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.h ---- llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.h 2013-01-25 19:43:57.426716388 +0100 -@@ -0,0 +1,44 @@ -+//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief AMDGPU Assembly printer class. -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef AMDGPU_ASMPRINTER_H -+#define AMDGPU_ASMPRINTER_H -+ -+#include "llvm/CodeGen/AsmPrinter.h" -+ -+namespace llvm { -+ -+class AMDGPUAsmPrinter : public AsmPrinter { -+ -+public: -+ explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) -+ : AsmPrinter(TM, Streamer) { } -+ -+ virtual bool runOnMachineFunction(MachineFunction &MF); -+ -+ virtual const char *getPassName() const { -+ return "AMDGPU Assembly Printer"; -+ } -+ -+ /// \brief Emit register usage information so that the GPU driver -+ /// can correctly setup the GPU state. -+ void EmitProgramInfo(MachineFunction &MF); -+ -+ /// Implemented in AMDGPUMCInstLower.cpp -+ virtual void EmitInstruction(const MachineInstr *MI); -+}; -+ -+} // End anonymous llvm -+ -+#endif //AMDGPU_ASMPRINTER_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUCodeEmitter.h llvm-r600/lib/Target/R600/AMDGPUCodeEmitter.h ---- llvm-3.2.src/lib/Target/R600/AMDGPUCodeEmitter.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUCodeEmitter.h 2013-01-25 19:43:57.426716388 +0100 -@@ -0,0 +1,49 @@ -+//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief CodeEmitter interface for R600 and SI codegen. -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef AMDGPUCODEEMITTER_H -+#define AMDGPUCODEEMITTER_H -+ -+namespace llvm { -+ -+class AMDGPUCodeEmitter { -+public: -+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; -+ virtual uint64_t getMachineOpValue(const MachineInstr &MI, -+ const MachineOperand &MO) const { return 0; } -+ virtual unsigned GPR4AlignEncode(const MachineInstr &MI, -+ unsigned OpNo) const { -+ return 0; -+ } -+ virtual unsigned GPR2AlignEncode(const MachineInstr &MI, -+ unsigned OpNo) const { -+ return 0; -+ } -+ virtual uint64_t VOPPostEncode(const MachineInstr &MI, -+ uint64_t Value) const { -+ return Value; -+ } -+ virtual uint64_t i32LiteralEncode(const MachineInstr &MI, -+ unsigned OpNo) const { -+ return 0; -+ } -+ virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo) -+ const { -+ return 0; -+ } -+}; -+ -+} // End namespace llvm -+ -+#endif // AMDGPUCODEEMITTER_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUConvertToISA.cpp llvm-r600/lib/Target/R600/AMDGPUConvertToISA.cpp ---- llvm-3.2.src/lib/Target/R600/AMDGPUConvertToISA.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUConvertToISA.cpp 2013-01-25 19:43:57.426716388 +0100 -@@ -0,0 +1,62 @@ -+//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief This pass lowers AMDIL machine instructions to the appropriate -+/// hardware instructions. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPU.h" -+#include "AMDGPUInstrInfo.h" -+#include "llvm/CodeGen/MachineFunctionPass.h" -+ -+using namespace llvm; -+ -+namespace { -+ -+class AMDGPUConvertToISAPass : public MachineFunctionPass { -+ -+private: -+ static char ID; -+ TargetMachine &TM; -+ -+public: -+ AMDGPUConvertToISAPass(TargetMachine &tm) : -+ MachineFunctionPass(ID), TM(tm) { } -+ -+ virtual bool runOnMachineFunction(MachineFunction &MF); -+ -+ virtual const char *getPassName() const {return "AMDGPU Convert to ISA";} -+ -+}; -+ -+} // End anonymous namespace -+ -+char AMDGPUConvertToISAPass::ID = 0; -+ -+FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) { -+ return new AMDGPUConvertToISAPass(tm); -+} -+ -+bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) { -+ const AMDGPUInstrInfo * TII = -+ static_cast(TM.getInstrInfo()); -+ -+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); -+ BB != BB_E; ++BB) { -+ MachineBasicBlock &MBB = *BB; -+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); -+ I != E; ++I) { -+ MachineInstr &MI = *I; -+ TII->convertToISA(MI, MF, MBB.findDebugLoc(I)); -+ } -+ } -+ return false; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPU.h llvm-r600/lib/Target/R600/AMDGPU.h ---- llvm-3.2.src/lib/Target/R600/AMDGPU.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPU.h 2013-01-25 19:43:57.423383055 +0100 -@@ -0,0 +1,51 @@ -+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+/// \file -+//===----------------------------------------------------------------------===// -+ -+#ifndef AMDGPU_H -+#define AMDGPU_H -+ -+#include "AMDGPUTargetMachine.h" -+#include "llvm/Support/TargetRegistry.h" -+#include "llvm/Target/TargetMachine.h" -+ -+namespace llvm { -+ -+class FunctionPass; -+class AMDGPUTargetMachine; -+ -+// R600 Passes -+FunctionPass* createR600KernelParametersPass(const DataLayout *TD); -+FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); -+FunctionPass *createR600LowerConstCopy(TargetMachine &tm); -+ -+// SI Passes -+FunctionPass *createSIAnnotateControlFlowPass(); -+FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); -+FunctionPass *createSILowerControlFlowPass(TargetMachine &tm); -+FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); -+FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm); -+FunctionPass *createSIInsertWaits(TargetMachine &tm); -+ -+// Passes common to R600 and SI -+Pass *createAMDGPUStructurizeCFGPass(); -+FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); -+ -+} // End namespace llvm -+ -+namespace ShaderType { -+ enum Type { -+ PIXEL = 0, -+ VERTEX = 1, -+ GEOMETRY = 2, -+ COMPUTE = 3 -+ }; -+} -+ -+#endif // AMDGPU_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.cpp llvm-r600/lib/Target/R600/AMDGPUInstrInfo.cpp ---- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.cpp 2013-01-25 19:43:57.426716388 +0100 -@@ -0,0 +1,257 @@ -+//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Implementation of the TargetInstrInfo class that is common to all -+/// AMD GPUs. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPUInstrInfo.h" -+#include "AMDGPURegisterInfo.h" -+#include "AMDGPUTargetMachine.h" -+#include "AMDIL.h" -+#include "llvm/CodeGen/MachineFrameInfo.h" -+#include "llvm/CodeGen/MachineInstrBuilder.h" -+#include "llvm/CodeGen/MachineRegisterInfo.h" -+ -+#define GET_INSTRINFO_CTOR -+#include "AMDGPUGenInstrInfo.inc" -+ -+using namespace llvm; -+ -+AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm) -+ : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { } -+ -+const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const { -+ return RI; -+} -+ -+bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, -+ unsigned &SrcReg, unsigned &DstReg, -+ unsigned &SubIdx) const { -+// TODO: Implement this function -+ return false; -+} -+ -+unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, -+ int &FrameIndex) const { -+// TODO: Implement this function -+ return 0; -+} -+ -+unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, -+ int &FrameIndex) const { -+// TODO: Implement this function -+ return 0; -+} -+ -+bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, -+ const MachineMemOperand *&MMO, -+ int &FrameIndex) const { -+// TODO: Implement this function -+ return false; -+} -+unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI, -+ int &FrameIndex) const { -+// TODO: Implement this function -+ return 0; -+} -+unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI, -+ int &FrameIndex) const { -+// TODO: Implement this function -+ return 0; -+} -+bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI, -+ const MachineMemOperand *&MMO, -+ int &FrameIndex) const { -+// TODO: Implement this function -+ return false; -+} -+ -+MachineInstr * -+AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, -+ MachineBasicBlock::iterator &MBBI, -+ LiveVariables *LV) const { -+// TODO: Implement this function -+ return NULL; -+} -+bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter, -+ MachineBasicBlock &MBB) const { -+ while (iter != MBB.end()) { -+ switch (iter->getOpcode()) { -+ default: -+ break; -+ case AMDGPU::BRANCH_COND_i32: -+ case AMDGPU::BRANCH_COND_f32: -+ case AMDGPU::BRANCH: -+ return true; -+ }; -+ ++iter; -+ } -+ return false; -+} -+ -+MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) { -+ MachineBasicBlock::iterator tmp = MBB->end(); -+ if (!MBB->size()) { -+ return MBB->end(); -+ } -+ while (--tmp) { -+ if (tmp->getOpcode() == AMDGPU::ENDLOOP -+ || tmp->getOpcode() == AMDGPU::ENDIF -+ || tmp->getOpcode() == AMDGPU::ELSE) { -+ if (tmp == MBB->begin()) { -+ return tmp; -+ } else { -+ continue; -+ } -+ } else { -+ return ++tmp; -+ } -+ } -+ return MBB->end(); -+} -+ -+void -+AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MI, -+ unsigned SrcReg, bool isKill, -+ int FrameIndex, -+ const TargetRegisterClass *RC, -+ const TargetRegisterInfo *TRI) const { -+ assert(!"Not Implemented"); -+} -+ -+void -+AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MI, -+ unsigned DestReg, int FrameIndex, -+ const TargetRegisterClass *RC, -+ const TargetRegisterInfo *TRI) const { -+ assert(!"Not Implemented"); -+} -+ -+MachineInstr * -+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, -+ MachineInstr *MI, -+ const SmallVectorImpl &Ops, -+ int FrameIndex) const { -+// TODO: Implement this function -+ return 0; -+} -+MachineInstr* -+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, -+ MachineInstr *MI, -+ const SmallVectorImpl &Ops, -+ MachineInstr *LoadMI) const { -+ // TODO: Implement this function -+ return 0; -+} -+bool -+AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, -+ const SmallVectorImpl &Ops) const { -+ // TODO: Implement this function -+ return false; -+} -+bool -+AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, -+ unsigned Reg, bool UnfoldLoad, -+ bool UnfoldStore, -+ SmallVectorImpl &NewMIs) const { -+ // TODO: Implement this function -+ return false; -+} -+ -+bool -+AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, -+ SmallVectorImpl &NewNodes) const { -+ // TODO: Implement this function -+ return false; -+} -+ -+unsigned -+AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, -+ bool UnfoldLoad, bool UnfoldStore, -+ unsigned *LoadRegIndex) const { -+ // TODO: Implement this function -+ return 0; -+} -+ -+bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, -+ int64_t Offset1, int64_t Offset2, -+ unsigned NumLoads) const { -+ assert(Offset2 > Offset1 -+ && "Second offset should be larger than first offset!"); -+ // If we have less than 16 loads in a row, and the offsets are within 16, -+ // then schedule together. -+ // TODO: Make the loads schedule near if it fits in a cacheline -+ return (NumLoads < 16 && (Offset2 - Offset1) < 16); -+} -+ -+bool -+AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl &Cond) -+ const { -+ // TODO: Implement this function -+ return true; -+} -+void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MI) const { -+ // TODO: Implement this function -+} -+ -+bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const { -+ // TODO: Implement this function -+ return false; -+} -+bool -+AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl &Pred1, -+ const SmallVectorImpl &Pred2) -+ const { -+ // TODO: Implement this function -+ return false; -+} -+ -+bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI, -+ std::vector &Pred) const { -+ // TODO: Implement this function -+ return false; -+} -+ -+bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const { -+ // TODO: Implement this function -+ return MI->getDesc().isPredicable(); -+} -+ -+bool -+AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { -+ // TODO: Implement this function -+ return true; -+} -+ -+void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF, -+ DebugLoc DL) const { -+ MachineRegisterInfo &MRI = MF.getRegInfo(); -+ const AMDGPURegisterInfo & RI = getRegisterInfo(); -+ -+ for (unsigned i = 0; i < MI.getNumOperands(); i++) { -+ MachineOperand &MO = MI.getOperand(i); -+ // Convert dst regclass to one that is supported by the ISA -+ if (MO.isReg() && MO.isDef()) { -+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { -+ const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg()); -+ const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass); -+ -+ assert(newRegClass); -+ -+ MRI.setRegClass(MO.getReg(), newRegClass); -+ } -+ } -+ } -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.h llvm-r600/lib/Target/R600/AMDGPUInstrInfo.h ---- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.h 2013-01-25 19:43:57.430049721 +0100 -@@ -0,0 +1,149 @@ -+//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Contains the definition of a TargetInstrInfo class that is common -+/// to all AMD GPUs. -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef AMDGPUINSTRUCTIONINFO_H -+#define AMDGPUINSTRUCTIONINFO_H -+ -+#include "AMDGPURegisterInfo.h" -+#include "AMDGPUInstrInfo.h" -+#include "llvm/Target/TargetInstrInfo.h" -+ -+#include -+ -+#define GET_INSTRINFO_HEADER -+#define GET_INSTRINFO_ENUM -+#include "AMDGPUGenInstrInfo.inc" -+ -+#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT -+#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT -+#define OPCODE_IS_ZERO AMDGPU::PRED_SETE -+#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE -+ -+namespace llvm { -+ -+class AMDGPUTargetMachine; -+class MachineFunction; -+class MachineInstr; -+class MachineInstrBuilder; -+ -+class AMDGPUInstrInfo : public AMDGPUGenInstrInfo { -+private: -+ const AMDGPURegisterInfo RI; -+ TargetMachine &TM; -+ bool getNextBranchInstr(MachineBasicBlock::iterator &iter, -+ MachineBasicBlock &MBB) const; -+public: -+ explicit AMDGPUInstrInfo(TargetMachine &tm); -+ -+ virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0; -+ -+ bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, -+ unsigned &DstReg, unsigned &SubIdx) const; -+ -+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; -+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, -+ int &FrameIndex) const; -+ bool hasLoadFromStackSlot(const MachineInstr *MI, -+ const MachineMemOperand *&MMO, -+ int &FrameIndex) const; -+ unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; -+ unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI, -+ int &FrameIndex) const; -+ bool hasStoreFromStackSlot(const MachineInstr *MI, -+ const MachineMemOperand *&MMO, -+ int &FrameIndex) const; -+ -+ MachineInstr * -+ convertToThreeAddress(MachineFunction::iterator &MFI, -+ MachineBasicBlock::iterator &MBBI, -+ LiveVariables *LV) const; -+ -+ -+ virtual void copyPhysReg(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MI, DebugLoc DL, -+ unsigned DestReg, unsigned SrcReg, -+ bool KillSrc) const = 0; -+ -+ void storeRegToStackSlot(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MI, -+ unsigned SrcReg, bool isKill, int FrameIndex, -+ const TargetRegisterClass *RC, -+ const TargetRegisterInfo *TRI) const; -+ void loadRegFromStackSlot(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MI, -+ unsigned DestReg, int FrameIndex, -+ const TargetRegisterClass *RC, -+ const TargetRegisterInfo *TRI) const; -+ -+protected: -+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, -+ MachineInstr *MI, -+ const SmallVectorImpl &Ops, -+ int FrameIndex) const; -+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, -+ MachineInstr *MI, -+ const SmallVectorImpl &Ops, -+ MachineInstr *LoadMI) const; -+public: -+ bool canFoldMemoryOperand(const MachineInstr *MI, -+ const SmallVectorImpl &Ops) const; -+ bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, -+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore, -+ SmallVectorImpl &NewMIs) const; -+ bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, -+ SmallVectorImpl &NewNodes) const; -+ unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, -+ bool UnfoldLoad, bool UnfoldStore, -+ unsigned *LoadRegIndex = 0) const; -+ bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, -+ int64_t Offset1, int64_t Offset2, -+ unsigned NumLoads) const; -+ -+ bool ReverseBranchCondition(SmallVectorImpl &Cond) const; -+ void insertNoop(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MI) const; -+ bool isPredicated(const MachineInstr *MI) const; -+ bool SubsumesPredicate(const SmallVectorImpl &Pred1, -+ const SmallVectorImpl &Pred2) const; -+ bool DefinesPredicate(MachineInstr *MI, -+ std::vector &Pred) const; -+ bool isPredicable(MachineInstr *MI) const; -+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; -+ -+ // Helper functions that check the opcode for status information -+ bool isLoadInst(llvm::MachineInstr *MI) const; -+ bool isExtLoadInst(llvm::MachineInstr *MI) const; -+ bool isSWSExtLoadInst(llvm::MachineInstr *MI) const; -+ bool isSExtLoadInst(llvm::MachineInstr *MI) const; -+ bool isZExtLoadInst(llvm::MachineInstr *MI) const; -+ bool isAExtLoadInst(llvm::MachineInstr *MI) const; -+ bool isStoreInst(llvm::MachineInstr *MI) const; -+ bool isTruncStoreInst(llvm::MachineInstr *MI) const; -+ -+ virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg, -+ int64_t Imm) const = 0; -+ virtual unsigned getIEQOpcode() const = 0; -+ virtual bool isMov(unsigned opcode) const = 0; -+ -+ /// \brief Convert the AMDIL MachineInstr to a supported ISA -+ /// MachineInstr -+ virtual void convertToISA(MachineInstr & MI, MachineFunction &MF, -+ DebugLoc DL) const; -+ -+}; -+ -+} // End llvm namespace -+ -+#endif // AMDGPUINSTRINFO_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.td llvm-r600/lib/Target/R600/AMDGPUInstrInfo.td ---- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.td 2013-01-25 19:43:57.430049721 +0100 -@@ -0,0 +1,74 @@ -+//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// This file contains DAG node defintions for the AMDGPU target. -+// -+//===----------------------------------------------------------------------===// -+ -+//===----------------------------------------------------------------------===// -+// AMDGPU DAG Profiles -+//===----------------------------------------------------------------------===// -+ -+def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [ -+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> -+]>; -+ -+//===----------------------------------------------------------------------===// -+// AMDGPU DAG Nodes -+// -+ -+// out = ((a << 32) | b) >> c) -+// -+// Can be used to optimize rtol: -+// rotl(a, b) = bitalign(a, a, 32 - b) -+def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>; -+ -+// This argument to this node is a dword address. -+def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; -+ -+// out = a - floor(a) -+def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; -+ -+// out = max(a, b) a and b are floats -+def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp, -+ [SDNPCommutative, SDNPAssociative] -+>; -+ -+// out = max(a, b) a and b are signed ints -+def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp, -+ [SDNPCommutative, SDNPAssociative] -+>; -+ -+// out = max(a, b) a and b are unsigned ints -+def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp, -+ [SDNPCommutative, SDNPAssociative] -+>; -+ -+// out = min(a, b) a and b are floats -+def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp, -+ [SDNPCommutative, SDNPAssociative] -+>; -+ -+// out = min(a, b) a snd b are signed ints -+def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp, -+ [SDNPCommutative, SDNPAssociative] -+>; -+ -+// out = min(a, b) a and b are unsigned ints -+def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp, -+ [SDNPCommutative, SDNPAssociative] -+>; -+ -+// urecip - This operation is a helper for integer division, it returns the -+// result of 1 / a as a fractional unsigned integer. -+// out = (2^32 / a) + e -+// e is rounding error -+def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>; -+ -+def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>; -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstructions.td llvm-r600/lib/Target/R600/AMDGPUInstructions.td ---- llvm-3.2.src/lib/Target/R600/AMDGPUInstructions.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUInstructions.td 2013-01-25 19:43:57.430049721 +0100 -@@ -0,0 +1,190 @@ -+//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// This file contains instruction defs that are common to all hw codegen -+// targets. -+// -+//===----------------------------------------------------------------------===// -+ -+class AMDGPUInst pattern> : Instruction { -+ field bits<16> AMDILOp = 0; -+ field bits<3> Gen = 0; -+ -+ let Namespace = "AMDGPU"; -+ let OutOperandList = outs; -+ let InOperandList = ins; -+ let AsmString = asm; -+ let Pattern = pattern; -+ let Itinerary = NullALU; -+ let TSFlags{42-40} = Gen; -+ let TSFlags{63-48} = AMDILOp; -+} -+ -+class AMDGPUShaderInst pattern> -+ : AMDGPUInst { -+ -+ field bits<32> Inst = 0xffffffff; -+ -+} -+ -+def InstFlag : OperandWithDefaultOps ; -+ -+def COND_EQ : PatLeaf < -+ (cond), -+ [{switch(N->get()){{default: return false; -+ case ISD::SETOEQ: case ISD::SETUEQ: -+ case ISD::SETEQ: return true;}}}] -+>; -+ -+def COND_NE : PatLeaf < -+ (cond), -+ [{switch(N->get()){{default: return false; -+ case ISD::SETONE: case ISD::SETUNE: -+ case ISD::SETNE: return true;}}}] -+>; -+def COND_GT : PatLeaf < -+ (cond), -+ [{switch(N->get()){{default: return false; -+ case ISD::SETOGT: case ISD::SETUGT: -+ case ISD::SETGT: return true;}}}] -+>; -+ -+def COND_GE : PatLeaf < -+ (cond), -+ [{switch(N->get()){{default: return false; -+ case ISD::SETOGE: case ISD::SETUGE: -+ case ISD::SETGE: return true;}}}] -+>; -+ -+def COND_LT : PatLeaf < -+ (cond), -+ [{switch(N->get()){{default: return false; -+ case ISD::SETOLT: case ISD::SETULT: -+ case ISD::SETLT: return true;}}}] -+>; -+ -+def COND_LE : PatLeaf < -+ (cond), -+ [{switch(N->get()){{default: return false; -+ case ISD::SETOLE: case ISD::SETULE: -+ case ISD::SETLE: return true;}}}] -+>; -+ -+//===----------------------------------------------------------------------===// -+// Load/Store Pattern Fragments -+//===----------------------------------------------------------------------===// -+ -+def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{ -+ return isGlobalLoad(dyn_cast(N)); -+}]>; -+ -+class Constants { -+int TWO_PI = 0x40c90fdb; -+int PI = 0x40490fdb; -+int TWO_PI_INV = 0x3e22f983; -+} -+def CONST : Constants; -+ -+def FP_ZERO : PatLeaf < -+ (fpimm), -+ [{return N->getValueAPF().isZero();}] -+>; -+ -+def FP_ONE : PatLeaf < -+ (fpimm), -+ [{return N->isExactlyValue(1.0);}] -+>; -+ -+let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in { -+ -+class CLAMP : AMDGPUShaderInst < -+ (outs rc:$dst), -+ (ins rc:$src0), -+ "CLAMP $dst, $src0", -+ [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] -+>; -+ -+class FABS : AMDGPUShaderInst < -+ (outs rc:$dst), -+ (ins rc:$src0), -+ "FABS $dst, $src0", -+ [(set rc:$dst, (fabs rc:$src0))] -+>; -+ -+class FNEG : AMDGPUShaderInst < -+ (outs rc:$dst), -+ (ins rc:$src0), -+ "FNEG $dst, $src0", -+ [(set rc:$dst, (fneg rc:$src0))] -+>; -+ -+def SHADER_TYPE : AMDGPUShaderInst < -+ (outs), -+ (ins i32imm:$type), -+ "SHADER_TYPE $type", -+ [(int_AMDGPU_shader_type imm:$type)] -+>; -+ -+} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1 -+ -+/* Generic helper patterns for intrinsics */ -+/* -------------------------------------- */ -+ -+class POW_Common : Pat < -+ (fpow rc:$src0, rc:$src1), -+ (exp_ieee (mul rc:$src1, (log_ieee rc:$src0))) -+>; -+ -+/* Other helper patterns */ -+/* --------------------- */ -+ -+/* Extract element pattern */ -+class Extract_Element : Pat< -+ (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)), -+ (EXTRACT_SUBREG vec_class:$src, sub_reg) -+>; -+ -+/* Insert element pattern */ -+class Insert_Element : Pat < -+ -+ (vec_type (vector_insert (vec_type vec_class:$vec), -+ (elem_type elem_class:$elem), sub_idx)), -+ (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg) -+>; -+ -+// Vector Build pattern -+class Vector_Build : Pat < -+ (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y), -+ (elemType elemClass:$z), (elemType elemClass:$w))), -+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG -+ (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y), -+ elemClass:$z, sel_z), elemClass:$w, sel_w) -+>; -+ -+// bitconvert pattern -+class BitConvert : Pat < -+ (dt (bitconvert (st rc:$src0))), -+ (dt rc:$src0) -+>; -+ -+class DwordAddrPat : Pat < -+ (vt (AMDGPUdwordaddr (vt rc:$addr))), -+ (vt rc:$addr) -+>; -+ -+include "R600Instructions.td" -+ -+include "SIInstrInfo.td" -+ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUIntrinsics.td llvm-r600/lib/Target/R600/AMDGPUIntrinsics.td ---- llvm-3.2.src/lib/Target/R600/AMDGPUIntrinsics.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUIntrinsics.td 2013-01-25 19:43:57.430049721 +0100 -@@ -0,0 +1,62 @@ -+//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// This file defines intrinsics that are used by all hw codegen targets. -+// -+//===----------------------------------------------------------------------===// -+ -+let TargetPrefix = "AMDGPU", isTarget = 1 in { -+ -+ def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; -+ def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>; -+ def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>; -+ def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; -+ def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; -+ -+ def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; -+ def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; -+ def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; -+ def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -+ def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>; -+ def int_AMDGPU_kilp : Intrinsic<[], [], []>; -+ def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; -+ def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; -+ def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; -+ def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; -+ def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; -+ def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; -+ def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; -+ def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; -+ def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; -+ def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; -+ def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; -+ def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -+ def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -+ def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -+ def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -+ def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -+ def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -+ def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; -+ def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -+ def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -+ def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -+ def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -+ def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -+ def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -+ def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; -+ -+ def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>; -+} -+ -+let TargetPrefix = "TGSI", isTarget = 1 in { -+ -+ def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>; -+} -+ -+include "SIIntrinsics.td" -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.cpp llvm-r600/lib/Target/R600/AMDGPUISelLowering.cpp ---- llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUISelLowering.cpp 2013-01-25 19:43:57.426716388 +0100 -@@ -0,0 +1,418 @@ -+//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief This is the parent TargetLowering class for hardware code gen -+/// targets. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPUISelLowering.h" -+#include "AMDILIntrinsicInfo.h" -+#include "llvm/CodeGen/MachineFunction.h" -+#include "llvm/CodeGen/MachineRegisterInfo.h" -+#include "llvm/CodeGen/SelectionDAG.h" -+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -+ -+using namespace llvm; -+ -+AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : -+ TargetLowering(TM, new TargetLoweringObjectFileELF()) { -+ -+ // Initialize target lowering borrowed from AMDIL -+ InitAMDILLowering(); -+ -+ // We need to custom lower some of the intrinsics -+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); -+ -+ // Library functions. These default to Expand, but we have instructions -+ // for them. -+ setOperationAction(ISD::FCEIL, MVT::f32, Legal); -+ setOperationAction(ISD::FEXP2, MVT::f32, Legal); -+ setOperationAction(ISD::FPOW, MVT::f32, Legal); -+ setOperationAction(ISD::FLOG2, MVT::f32, Legal); -+ setOperationAction(ISD::FABS, MVT::f32, Legal); -+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal); -+ setOperationAction(ISD::FRINT, MVT::f32, Legal); -+ -+ // Lower floating point store/load to integer store/load to reduce the number -+ // of patterns in tablegen. -+ setOperationAction(ISD::STORE, MVT::f32, Promote); -+ AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); -+ -+ setOperationAction(ISD::STORE, MVT::v4f32, Promote); -+ AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); -+ -+ setOperationAction(ISD::LOAD, MVT::f32, Promote); -+ AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); -+ -+ setOperationAction(ISD::LOAD, MVT::v4f32, Promote); -+ AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); -+ -+ setOperationAction(ISD::UDIV, MVT::i32, Expand); -+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom); -+ setOperationAction(ISD::UREM, MVT::i32, Expand); -+} -+ -+//===---------------------------------------------------------------------===// -+// TargetLowering Callbacks -+//===---------------------------------------------------------------------===// -+ -+SDValue AMDGPUTargetLowering::LowerFormalArguments( -+ SDValue Chain, -+ CallingConv::ID CallConv, -+ bool isVarArg, -+ const SmallVectorImpl &Ins, -+ DebugLoc DL, SelectionDAG &DAG, -+ SmallVectorImpl &InVals) const { -+ for (unsigned i = 0, e = Ins.size(); i < e; ++i) { -+ InVals.push_back(SDValue()); -+ } -+ return Chain; -+} -+ -+SDValue AMDGPUTargetLowering::LowerReturn( -+ SDValue Chain, -+ CallingConv::ID CallConv, -+ bool isVarArg, -+ const SmallVectorImpl &Outs, -+ const SmallVectorImpl &OutVals, -+ DebugLoc DL, SelectionDAG &DAG) const { -+ return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); -+} -+ -+//===---------------------------------------------------------------------===// -+// Target specific lowering -+//===---------------------------------------------------------------------===// -+ -+SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) -+ const { -+ switch (Op.getOpcode()) { -+ default: -+ Op.getNode()->dump(); -+ assert(0 && "Custom lowering code for this" -+ "instruction is not implemented yet!"); -+ break; -+ // AMDIL DAG lowering -+ case ISD::SDIV: return LowerSDIV(Op, DAG); -+ case ISD::SREM: return LowerSREM(Op, DAG); -+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); -+ case ISD::BRCOND: return LowerBRCOND(Op, DAG); -+ // AMDGPU DAG lowering -+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); -+ case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); -+ } -+ return Op; -+} -+ -+SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, -+ SelectionDAG &DAG) const { -+ unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue(); -+ DebugLoc DL = Op.getDebugLoc(); -+ EVT VT = Op.getValueType(); -+ -+ switch (IntrinsicID) { -+ default: return Op; -+ case AMDGPUIntrinsic::AMDIL_abs: -+ return LowerIntrinsicIABS(Op, DAG); -+ case AMDGPUIntrinsic::AMDIL_exp: -+ return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); -+ case AMDGPUIntrinsic::AMDGPU_lrp: -+ return LowerIntrinsicLRP(Op, DAG); -+ case AMDGPUIntrinsic::AMDIL_fraction: -+ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); -+ case AMDGPUIntrinsic::AMDIL_mad: -+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), -+ Op.getOperand(2), Op.getOperand(3)); -+ case AMDGPUIntrinsic::AMDIL_max: -+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), -+ Op.getOperand(2)); -+ case AMDGPUIntrinsic::AMDGPU_imax: -+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), -+ Op.getOperand(2)); -+ case AMDGPUIntrinsic::AMDGPU_umax: -+ return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), -+ Op.getOperand(2)); -+ case AMDGPUIntrinsic::AMDIL_min: -+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), -+ Op.getOperand(2)); -+ case AMDGPUIntrinsic::AMDGPU_imin: -+ return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), -+ Op.getOperand(2)); -+ case AMDGPUIntrinsic::AMDGPU_umin: -+ return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), -+ Op.getOperand(2)); -+ case AMDGPUIntrinsic::AMDIL_round_nearest: -+ return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); -+ } -+} -+ -+///IABS(a) = SMAX(sub(0, a), a) -+SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, -+ SelectionDAG &DAG) const { -+ -+ DebugLoc DL = Op.getDebugLoc(); -+ EVT VT = Op.getValueType(); -+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), -+ Op.getOperand(1)); -+ -+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); -+} -+ -+/// Linear Interpolation -+/// LRP(a, b, c) = muladd(a, b, (1 - a) * c) -+SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, -+ SelectionDAG &DAG) const { -+ DebugLoc DL = Op.getDebugLoc(); -+ EVT VT = Op.getValueType(); -+ SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, -+ DAG.getConstantFP(1.0f, MVT::f32), -+ Op.getOperand(1)); -+ SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, -+ Op.getOperand(3)); -+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), -+ Op.getOperand(2), -+ OneSubAC); -+} -+ -+/// \brief Generate Min/Max node -+SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, -+ SelectionDAG &DAG) const { -+ DebugLoc DL = Op.getDebugLoc(); -+ EVT VT = Op.getValueType(); -+ -+ SDValue LHS = Op.getOperand(0); -+ SDValue RHS = Op.getOperand(1); -+ SDValue True = Op.getOperand(2); -+ SDValue False = Op.getOperand(3); -+ SDValue CC = Op.getOperand(4); -+ -+ if (VT != MVT::f32 || -+ !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { -+ return SDValue(); -+ } -+ -+ ISD::CondCode CCOpcode = cast(CC)->get(); -+ switch (CCOpcode) { -+ case ISD::SETOEQ: -+ case ISD::SETONE: -+ case ISD::SETUNE: -+ case ISD::SETNE: -+ case ISD::SETUEQ: -+ case ISD::SETEQ: -+ case ISD::SETFALSE: -+ case ISD::SETFALSE2: -+ case ISD::SETTRUE: -+ case ISD::SETTRUE2: -+ case ISD::SETUO: -+ case ISD::SETO: -+ assert(0 && "Operation should already be optimised !"); -+ case ISD::SETULE: -+ case ISD::SETULT: -+ case ISD::SETOLE: -+ case ISD::SETOLT: -+ case ISD::SETLE: -+ case ISD::SETLT: { -+ if (LHS == True) -+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); -+ else -+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); -+ } -+ case ISD::SETGT: -+ case ISD::SETGE: -+ case ISD::SETUGE: -+ case ISD::SETOGE: -+ case ISD::SETUGT: -+ case ISD::SETOGT: { -+ if (LHS == True) -+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); -+ else -+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); -+ } -+ case ISD::SETCC_INVALID: -+ assert(0 && "Invalid setcc condcode !"); -+ } -+ return Op; -+} -+ -+ -+ -+SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, -+ SelectionDAG &DAG) const { -+ DebugLoc DL = Op.getDebugLoc(); -+ EVT VT = Op.getValueType(); -+ -+ SDValue Num = Op.getOperand(0); -+ SDValue Den = Op.getOperand(1); -+ -+ SmallVector Results; -+ -+ // RCP = URECIP(Den) = 2^32 / Den + e -+ // e is rounding error. -+ SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); -+ -+ // RCP_LO = umulo(RCP, Den) */ -+ SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); -+ -+ // RCP_HI = mulhu (RCP, Den) */ -+ SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); -+ -+ // NEG_RCP_LO = -RCP_LO -+ SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), -+ RCP_LO); -+ -+ // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) -+ SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), -+ NEG_RCP_LO, RCP_LO, -+ ISD::SETEQ); -+ // Calculate the rounding error from the URECIP instruction -+ // E = mulhu(ABS_RCP_LO, RCP) -+ SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); -+ -+ // RCP_A_E = RCP + E -+ SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); -+ -+ // RCP_S_E = RCP - E -+ SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); -+ -+ // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) -+ SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), -+ RCP_A_E, RCP_S_E, -+ ISD::SETEQ); -+ // Quotient = mulhu(Tmp0, Num) -+ SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); -+ -+ // Num_S_Remainder = Quotient * Den -+ SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); -+ -+ // Remainder = Num - Num_S_Remainder -+ SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); -+ -+ // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) -+ SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, -+ DAG.getConstant(-1, VT), -+ DAG.getConstant(0, VT), -+ ISD::SETGE); -+ // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0) -+ SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder, -+ DAG.getConstant(0, VT), -+ DAG.getConstant(-1, VT), -+ DAG.getConstant(0, VT), -+ ISD::SETGE); -+ // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero -+ SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, -+ Remainder_GE_Zero); -+ -+ // Calculate Division result: -+ -+ // Quotient_A_One = Quotient + 1 -+ SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, -+ DAG.getConstant(1, VT)); -+ -+ // Quotient_S_One = Quotient - 1 -+ SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, -+ DAG.getConstant(1, VT)); -+ -+ // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) -+ SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), -+ Quotient, Quotient_A_One, ISD::SETEQ); -+ -+ // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) -+ Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), -+ Quotient_S_One, Div, ISD::SETEQ); -+ -+ // Calculate Rem result: -+ -+ // Remainder_S_Den = Remainder - Den -+ SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); -+ -+ // Remainder_A_Den = Remainder + Den -+ SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); -+ -+ // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) -+ SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), -+ Remainder, Remainder_S_Den, ISD::SETEQ); -+ -+ // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) -+ Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), -+ Remainder_A_Den, Rem, ISD::SETEQ); -+ SDValue Ops[2]; -+ Ops[0] = Div; -+ Ops[1] = Rem; -+ return DAG.getMergeValues(Ops, 2, DL); -+} -+ -+//===----------------------------------------------------------------------===// -+// Helper functions -+//===----------------------------------------------------------------------===// -+ -+bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const { -+ if (ConstantFPSDNode * CFP = dyn_cast(Op)) { -+ return CFP->isExactlyValue(1.0); -+ } -+ if (ConstantSDNode *C = dyn_cast(Op)) { -+ return C->isAllOnesValue(); -+ } -+ return false; -+} -+ -+bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const { -+ if (ConstantFPSDNode * CFP = dyn_cast(Op)) { -+ return CFP->getValueAPF().isZero(); -+ } -+ if (ConstantSDNode *C = dyn_cast(Op)) { -+ return C->isNullValue(); -+ } -+ return false; -+} -+ -+SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, -+ const TargetRegisterClass *RC, -+ unsigned Reg, EVT VT) const { -+ MachineFunction &MF = DAG.getMachineFunction(); -+ MachineRegisterInfo &MRI = MF.getRegInfo(); -+ unsigned VirtualRegister; -+ if (!MRI.isLiveIn(Reg)) { -+ VirtualRegister = MRI.createVirtualRegister(RC); -+ MRI.addLiveIn(Reg, VirtualRegister); -+ } else { -+ VirtualRegister = MRI.getLiveInVirtReg(Reg); -+ } -+ return DAG.getRegister(VirtualRegister, VT); -+} -+ -+#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; -+ -+const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { -+ switch (Opcode) { -+ default: return 0; -+ // AMDIL DAG nodes -+ NODE_NAME_CASE(MAD); -+ NODE_NAME_CASE(CALL); -+ NODE_NAME_CASE(UMUL); -+ NODE_NAME_CASE(DIV_INF); -+ NODE_NAME_CASE(RET_FLAG); -+ NODE_NAME_CASE(BRANCH_COND); -+ -+ // AMDGPU DAG nodes -+ NODE_NAME_CASE(DWORDADDR) -+ NODE_NAME_CASE(FRACT) -+ NODE_NAME_CASE(FMAX) -+ NODE_NAME_CASE(SMAX) -+ NODE_NAME_CASE(UMAX) -+ NODE_NAME_CASE(FMIN) -+ NODE_NAME_CASE(SMIN) -+ NODE_NAME_CASE(UMIN) -+ NODE_NAME_CASE(URECIP) -+ NODE_NAME_CASE(INTERP) -+ NODE_NAME_CASE(INTERP_P0) -+ NODE_NAME_CASE(EXPORT) -+ NODE_NAME_CASE(CONST_ADDRESS) -+ } -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.h llvm-r600/lib/Target/R600/AMDGPUISelLowering.h ---- llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUISelLowering.h 2013-01-25 19:43:57.426716388 +0100 -@@ -0,0 +1,145 @@ -+//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Interface definition of the TargetLowering class that is common -+/// to all AMD GPUs. -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef AMDGPUISELLOWERING_H -+#define AMDGPUISELLOWERING_H -+ -+#include "llvm/Target/TargetLowering.h" -+ -+namespace llvm { -+ -+class MachineRegisterInfo; -+ -+class AMDGPUTargetLowering : public TargetLowering { -+private: -+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; -+ -+protected: -+ -+ /// \brief Helper function that adds Reg to the LiveIn list of the DAG's -+ /// MachineFunction. -+ /// -+ /// \returns a RegisterSDNode representing Reg. -+ SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, -+ unsigned Reg, EVT VT) const; -+ -+ bool isHWTrueValue(SDValue Op) const; -+ bool isHWFalseValue(SDValue Op) const; -+ -+public: -+ AMDGPUTargetLowering(TargetMachine &TM); -+ -+ virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, -+ bool isVarArg, -+ const SmallVectorImpl &Ins, -+ DebugLoc DL, SelectionDAG &DAG, -+ SmallVectorImpl &InVals) const; -+ -+ virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, -+ bool isVarArg, -+ const SmallVectorImpl &Outs, -+ const SmallVectorImpl &OutVals, -+ DebugLoc DL, SelectionDAG &DAG) const; -+ -+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; -+ virtual const char* getTargetNodeName(unsigned Opcode) const; -+ -+// Functions defined in AMDILISelLowering.cpp -+public: -+ -+ /// \brief Determine which of the bits specified in \p Mask are known to be -+ /// either zero or one and return them in the \p KnownZero and \p KnownOne -+ /// bitsets. -+ virtual void computeMaskedBitsForTargetNode(const SDValue Op, -+ APInt &KnownZero, -+ APInt &KnownOne, -+ const SelectionDAG &DAG, -+ unsigned Depth = 0) const; -+ -+ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, -+ const CallInst &I, unsigned Intrinsic) const; -+ -+ /// We want to mark f32/f64 floating point values as legal. -+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const; -+ -+ /// We don't want to shrink f64/f32 constants. -+ bool ShouldShrinkFPConstant(EVT VT) const; -+ -+private: -+ void InitAMDILLowering(); -+ SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; -+ EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const; -+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; -+}; -+ -+namespace AMDGPUISD { -+ -+enum { -+ // AMDIL ISD Opcodes -+ FIRST_NUMBER = ISD::BUILTIN_OP_END, -+ MAD, // 32bit Fused Multiply Add instruction -+ CALL, // Function call based on a single integer -+ UMUL, // 32bit unsigned multiplication -+ DIV_INF, // Divide with infinity returned on zero divisor -+ RET_FLAG, -+ BRANCH_COND, -+ // End AMDIL ISD Opcodes -+ BITALIGN, -+ DWORDADDR, -+ FRACT, -+ FMAX, -+ SMAX, -+ UMAX, -+ FMIN, -+ SMIN, -+ UMIN, -+ URECIP, -+ INTERP, -+ INTERP_P0, -+ EXPORT, -+ CONST_ADDRESS, -+ LAST_AMDGPU_ISD_NUMBER -+}; -+ -+ -+} // End namespace AMDGPUISD -+ -+namespace SIISD { -+ -+enum { -+ SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER, -+ VCC_AND, -+ VCC_BITCAST -+}; -+ -+} // End namespace SIISD -+ -+} // End namespace llvm -+ -+#endif // AMDGPUISELLOWERING_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.cpp llvm-r600/lib/Target/R600/AMDGPUMCInstLower.cpp ---- llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUMCInstLower.cpp 2013-01-25 19:43:57.430049721 +0100 -@@ -0,0 +1,83 @@ -+//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Code to lower AMDGPU MachineInstrs to their corresponding MCInst. -+// -+//===----------------------------------------------------------------------===// -+// -+ -+#include "AMDGPUMCInstLower.h" -+#include "AMDGPUAsmPrinter.h" -+#include "R600InstrInfo.h" -+#include "llvm/CodeGen/MachineBasicBlock.h" -+#include "llvm/CodeGen/MachineInstr.h" -+#include "llvm/Constants.h" -+#include "llvm/MC/MCInst.h" -+#include "llvm/MC/MCStreamer.h" -+#include "llvm/MC/MCExpr.h" -+#include "llvm/Support/ErrorHandling.h" -+ -+using namespace llvm; -+ -+AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx): -+ Ctx(ctx) -+{ } -+ -+void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { -+ OutMI.setOpcode(MI->getOpcode()); -+ -+ for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) { -+ const MachineOperand &MO = MI->getOperand(i); -+ -+ MCOperand MCOp; -+ switch (MO.getType()) { -+ default: -+ llvm_unreachable("unknown operand type"); -+ case MachineOperand::MO_FPImmediate: { -+ const APFloat &FloatValue = MO.getFPImm()->getValueAPF(); -+ assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle && -+ "Only floating point immediates are supported at the moment."); -+ MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat()); -+ break; -+ } -+ case MachineOperand::MO_Immediate: -+ MCOp = MCOperand::CreateImm(MO.getImm()); -+ break; -+ case MachineOperand::MO_Register: -+ MCOp = MCOperand::CreateReg(MO.getReg()); -+ break; -+ case MachineOperand::MO_MachineBasicBlock: -+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( -+ MO.getMBB()->getSymbol(), Ctx)); -+ } -+ OutMI.addOperand(MCOp); -+ } -+} -+ -+void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { -+ AMDGPUMCInstLower MCInstLowering(OutContext); -+ -+ if (MI->isBundle()) { -+ const MachineBasicBlock *MBB = MI->getParent(); -+ MachineBasicBlock::const_instr_iterator I = MI; -+ ++I; -+ while (I != MBB->end() && I->isInsideBundle()) { -+ MCInst MCBundleInst; -+ const MachineInstr *BundledInst = I; -+ MCInstLowering.lower(BundledInst, MCBundleInst); -+ OutStreamer.EmitInstruction(MCBundleInst); -+ ++I; -+ } -+ } else { -+ MCInst TmpInst; -+ MCInstLowering.lower(MI, TmpInst); -+ OutStreamer.EmitInstruction(TmpInst); -+ } -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.h llvm-r600/lib/Target/R600/AMDGPUMCInstLower.h ---- llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUMCInstLower.h 2013-01-25 19:43:57.430049721 +0100 -@@ -0,0 +1,34 @@ -+//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+/// \file -+//===----------------------------------------------------------------------===// -+ -+#ifndef AMDGPU_MCINSTLOWER_H -+#define AMDGPU_MCINSTLOWER_H -+ -+namespace llvm { -+ -+class MCInst; -+class MCContext; -+class MachineInstr; -+ -+class AMDGPUMCInstLower { -+ -+ MCContext &Ctx; -+ -+public: -+ AMDGPUMCInstLower(MCContext &ctx); -+ -+ /// \brief Lower a MachineInstr to an MCInst -+ void lower(const MachineInstr *MI, MCInst &OutMI) const; -+ -+}; -+ -+} // End namespace llvm -+ -+#endif //AMDGPU_MCINSTLOWER_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.cpp llvm-r600/lib/Target/R600/AMDGPURegisterInfo.cpp ---- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.cpp 2013-01-25 19:43:57.430049721 +0100 -@@ -0,0 +1,51 @@ -+//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Parent TargetRegisterInfo class common to all hw codegen targets. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPURegisterInfo.h" -+#include "AMDGPUTargetMachine.h" -+ -+using namespace llvm; -+ -+AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm, -+ const TargetInstrInfo &tii) -+: AMDGPUGenRegisterInfo(0), -+ TM(tm), -+ TII(tii) -+ { } -+ -+//===----------------------------------------------------------------------===// -+// Function handling callbacks - Functions are a seldom used feature of GPUS, so -+// they are not supported at this time. -+//===----------------------------------------------------------------------===// -+ -+const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister; -+ -+const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) -+ const { -+ return &CalleeSavedReg; -+} -+ -+void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, -+ int SPAdj, -+ RegScavenger *RS) const { -+ assert(!"Subroutines not supported yet"); -+} -+ -+unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const { -+ assert(!"Subroutines not supported yet"); -+ return 0; -+} -+ -+#define GET_REGINFO_TARGET_DESC -+#include "AMDGPUGenRegisterInfo.inc" -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.h llvm-r600/lib/Target/R600/AMDGPURegisterInfo.h ---- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.h 2013-01-25 19:43:57.430049721 +0100 -@@ -0,0 +1,63 @@ -+//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief TargetRegisterInfo interface that is implemented by all hw codegen -+/// targets. -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef AMDGPUREGISTERINFO_H -+#define AMDGPUREGISTERINFO_H -+ -+#include "llvm/ADT/BitVector.h" -+#include "llvm/Target/TargetRegisterInfo.h" -+ -+#define GET_REGINFO_HEADER -+#define GET_REGINFO_ENUM -+#include "AMDGPUGenRegisterInfo.inc" -+ -+namespace llvm { -+ -+class AMDGPUTargetMachine; -+class TargetInstrInfo; -+ -+struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { -+ TargetMachine &TM; -+ const TargetInstrInfo &TII; -+ static const uint16_t CalleeSavedReg; -+ -+ AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii); -+ -+ virtual BitVector getReservedRegs(const MachineFunction &MF) const { -+ assert(!"Unimplemented"); return BitVector(); -+ } -+ -+ /// \param RC is an AMDIL reg class. -+ /// -+ /// \returns The ISA reg class that is equivalent to \p RC. -+ virtual const TargetRegisterClass * getISARegClass( -+ const TargetRegisterClass * RC) const { -+ assert(!"Unimplemented"); return NULL; -+ } -+ -+ virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const { -+ assert(!"Unimplemented"); return NULL; -+ } -+ -+ const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const; -+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, -+ RegScavenger *RS) const; -+ unsigned getFrameRegister(const MachineFunction &MF) const; -+ -+}; -+ -+} // End namespace llvm -+ -+#endif // AMDIDSAREGISTERINFO_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.td llvm-r600/lib/Target/R600/AMDGPURegisterInfo.td ---- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.td 2013-01-25 19:43:57.433383055 +0100 -@@ -0,0 +1,22 @@ -+//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// Tablegen register definitions common to all hw codegen targets. -+// -+//===----------------------------------------------------------------------===// -+ -+let Namespace = "AMDGPU" in { -+ def sel_x : SubRegIndex; -+ def sel_y : SubRegIndex; -+ def sel_z : SubRegIndex; -+ def sel_w : SubRegIndex; -+} -+ -+include "R600RegisterInfo.td" -+include "SIRegisterInfo.td" -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUStructurizeCFG.cpp llvm-r600/lib/Target/R600/AMDGPUStructurizeCFG.cpp ---- llvm-3.2.src/lib/Target/R600/AMDGPUStructurizeCFG.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUStructurizeCFG.cpp 2013-01-25 19:43:57.433383055 +0100 -@@ -0,0 +1,714 @@ -+//===-- AMDGPUStructurizeCFG.cpp - ------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// The pass implemented in this file transforms the programs control flow -+/// graph into a form that's suitable for code generation on hardware that -+/// implements control flow by execution masking. This currently includes all -+/// AMD GPUs but may as well be useful for other types of hardware. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPU.h" -+#include "llvm/Module.h" -+#include "llvm/ADT/SCCIterator.h" -+#include "llvm/Analysis/RegionIterator.h" -+#include "llvm/Analysis/RegionInfo.h" -+#include "llvm/Analysis/RegionPass.h" -+#include "llvm/Transforms/Utils/SSAUpdater.h" -+ -+using namespace llvm; -+ -+namespace { -+ -+// Definition of the complex types used in this pass. -+ -+typedef std::pair BBValuePair; -+typedef ArrayRef BBVecRef; -+ -+typedef SmallVector RNVector; -+typedef SmallVector BBVector; -+typedef SmallVector BBValueVector; -+ -+typedef DenseMap PhiMap; -+typedef DenseMap BBPhiMap; -+typedef DenseMap BBPredicates; -+typedef DenseMap PredMap; -+typedef DenseMap VisitedMap; -+ -+// The name for newly created blocks. -+ -+static const char *FlowBlockName = "Flow"; -+ -+/// @brief Transforms the control flow graph on one single entry/exit region -+/// at a time. -+/// -+/// After the transform all "If"/"Then"/"Else" style control flow looks like -+/// this: -+/// -+/// \verbatim -+/// 1 -+/// || -+/// | | -+/// 2 | -+/// | / -+/// |/ -+/// 3 -+/// || Where: -+/// | | 1 = "If" block, calculates the condition -+/// 4 | 2 = "Then" subregion, runs if the condition is true -+/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow -+/// |/ 4 = "Else" optional subregion, runs if the condition is false -+/// 5 5 = "End" block, also rejoins the control flow -+/// \endverbatim -+/// -+/// Control flow is expressed as a branch where the true exit goes into the -+/// "Then"/"Else" region, while the false exit skips the region -+/// The condition for the optional "Else" region is expressed as a PHI node. -+/// The incomming values of the PHI node are true for the "If" edge and false -+/// for the "Then" edge. -+/// -+/// Additionally to that even complicated loops look like this: -+/// -+/// \verbatim -+/// 1 -+/// || -+/// | | -+/// 2 ^ Where: -+/// | / 1 = "Entry" block -+/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block -+/// 3 3 = "Flow" block, with back edge to entry block -+/// | -+/// \endverbatim -+/// -+/// The back edge of the "Flow" block is always on the false side of the branch -+/// while the true side continues the general flow. So the loop condition -+/// consist of a network of PHI nodes where the true incoming values expresses -+/// breaks and the false values expresses continue states. -+class AMDGPUStructurizeCFG : public RegionPass { -+ -+ static char ID; -+ -+ Type *Boolean; -+ ConstantInt *BoolTrue; -+ ConstantInt *BoolFalse; -+ UndefValue *BoolUndef; -+ -+ Function *Func; -+ Region *ParentRegion; -+ -+ DominatorTree *DT; -+ -+ RNVector Order; -+ VisitedMap Visited; -+ PredMap Predicates; -+ BBPhiMap DeletedPhis; -+ BBVector FlowsInserted; -+ -+ BasicBlock *LoopStart; -+ BasicBlock *LoopEnd; -+ BBPredicates LoopPred; -+ -+ void orderNodes(); -+ -+ void buildPredicate(BranchInst *Term, unsigned Idx, -+ BBPredicates &Pred, bool Invert); -+ -+ void analyzeBlock(BasicBlock *BB); -+ -+ void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx); -+ -+ void collectInfos(); -+ -+ bool dominatesPredicates(BasicBlock *A, BasicBlock *B); -+ -+ void killTerminator(BasicBlock *BB); -+ -+ RegionNode *skipChained(RegionNode *Node); -+ -+ void delPhiValues(BasicBlock *From, BasicBlock *To); -+ -+ void addPhiValues(BasicBlock *From, BasicBlock *To); -+ -+ BasicBlock *getNextFlow(BasicBlock *Prev); -+ -+ bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node); -+ -+ BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node); -+ -+ void createFlow(); -+ -+ void insertConditions(); -+ -+ void rebuildSSA(); -+ -+public: -+ AMDGPUStructurizeCFG(): -+ RegionPass(ID) { -+ -+ initializeRegionInfoPass(*PassRegistry::getPassRegistry()); -+ } -+ -+ virtual bool doInitialization(Region *R, RGPassManager &RGM); -+ -+ virtual bool runOnRegion(Region *R, RGPassManager &RGM); -+ -+ virtual const char *getPassName() const { -+ return "AMDGPU simplify control flow"; -+ } -+ -+ void getAnalysisUsage(AnalysisUsage &AU) const { -+ -+ AU.addRequired(); -+ AU.addPreserved(); -+ RegionPass::getAnalysisUsage(AU); -+ } -+ -+}; -+ -+} // end anonymous namespace -+ -+char AMDGPUStructurizeCFG::ID = 0; -+ -+/// \brief Initialize the types and constants used in the pass -+bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) { -+ LLVMContext &Context = R->getEntry()->getContext(); -+ -+ Boolean = Type::getInt1Ty(Context); -+ BoolTrue = ConstantInt::getTrue(Context); -+ BoolFalse = ConstantInt::getFalse(Context); -+ BoolUndef = UndefValue::get(Boolean); -+ -+ return false; -+} -+ -+/// \brief Build up the general order of nodes -+void AMDGPUStructurizeCFG::orderNodes() { -+ scc_iterator I = scc_begin(ParentRegion), -+ E = scc_end(ParentRegion); -+ for (Order.clear(); I != E; ++I) { -+ std::vector &Nodes = *I; -+ Order.append(Nodes.begin(), Nodes.end()); -+ } -+} -+ -+/// \brief Build blocks and loop predicates -+void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx, -+ BBPredicates &Pred, bool Invert) { -+ Value *True = Invert ? BoolFalse : BoolTrue; -+ Value *False = Invert ? BoolTrue : BoolFalse; -+ -+ RegionInfo *RI = ParentRegion->getRegionInfo(); -+ BasicBlock *BB = Term->getParent(); -+ -+ // Handle the case where multiple regions start at the same block -+ Region *R = BB != ParentRegion->getEntry() ? -+ RI->getRegionFor(BB) : ParentRegion; -+ -+ if (R == ParentRegion) { -+ // It's a top level block in our region -+ Value *Cond = True; -+ if (Term->isConditional()) { -+ BasicBlock *Other = Term->getSuccessor(!Idx); -+ -+ if (Visited.count(Other)) { -+ if (!Pred.count(Other)) -+ Pred[Other] = False; -+ -+ if (!Pred.count(BB)) -+ Pred[BB] = True; -+ return; -+ } -+ Cond = Term->getCondition(); -+ -+ if (Idx != Invert) -+ Cond = BinaryOperator::CreateNot(Cond, "", Term); -+ } -+ -+ Pred[BB] = Cond; -+ -+ } else if (ParentRegion->contains(R)) { -+ // It's a block in a sub region -+ while(R->getParent() != ParentRegion) -+ R = R->getParent(); -+ -+ Pred[R->getEntry()] = True; -+ -+ } else { -+ // It's a branch from outside into our parent region -+ Pred[BB] = True; -+ } -+} -+ -+/// \brief Analyze the successors of each block and build up predicates -+void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) { -+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB); -+ BBPredicates &Pred = Predicates[BB]; -+ -+ for (; PI != PE; ++PI) { -+ BranchInst *Term = cast((*PI)->getTerminator()); -+ -+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { -+ BasicBlock *Succ = Term->getSuccessor(i); -+ if (Succ != BB) -+ continue; -+ buildPredicate(Term, i, Pred, false); -+ } -+ } -+} -+ -+/// \brief Analyze the conditions leading to loop to a previous block -+void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) { -+ BranchInst *Term = cast(BB->getTerminator()); -+ -+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { -+ BasicBlock *Succ = Term->getSuccessor(i); -+ -+ // Ignore it if it's not a back edge -+ if (!Visited.count(Succ)) -+ continue; -+ -+ buildPredicate(Term, i, LoopPred, true); -+ -+ LoopEnd = BB; -+ if (Visited[Succ] < LoopIdx) { -+ LoopIdx = Visited[Succ]; -+ LoopStart = Succ; -+ } -+ } -+} -+ -+/// \brief Collect various loop and predicate infos -+void AMDGPUStructurizeCFG::collectInfos() { -+ unsigned Number = 0, LoopIdx = ~0; -+ -+ // Reset predicate -+ Predicates.clear(); -+ -+ // and loop infos -+ LoopStart = LoopEnd = 0; -+ LoopPred.clear(); -+ -+ RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend(); -+ for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) { -+ -+ // Analyze all the conditions leading to a node -+ analyzeBlock((*OI)->getEntry()); -+ -+ if ((*OI)->isSubRegion()) -+ continue; -+ -+ // Find the first/last loop nodes and loop predicates -+ analyzeLoop((*OI)->getNodeAs(), LoopIdx); -+ } -+} -+ -+/// \brief Does A dominate all the predicates of B ? -+bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) { -+ BBPredicates &Preds = Predicates[B]; -+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); -+ PI != PE; ++PI) { -+ -+ if (!DT->dominates(A, PI->first)) -+ return false; -+ } -+ return true; -+} -+ -+/// \brief Remove phi values from all successors and the remove the terminator. -+void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) { -+ TerminatorInst *Term = BB->getTerminator(); -+ if (!Term) -+ return; -+ -+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); -+ SI != SE; ++SI) { -+ -+ delPhiValues(BB, *SI); -+ } -+ -+ Term->eraseFromParent(); -+} -+ -+/// First: Skip forward to the first region node that either isn't a subregion or not -+/// dominating it's exit, remove all the skipped nodes from the node order. -+/// -+/// Second: Handle the first successor directly if the resulting nodes successor -+/// predicates are still dominated by the original entry -+RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) { -+ BasicBlock *Entry = Node->getEntry(); -+ -+ // Skip forward as long as it is just a linear flow -+ while (true) { -+ BasicBlock *Entry = Node->getEntry(); -+ BasicBlock *Exit; -+ -+ if (Node->isSubRegion()) { -+ Exit = Node->getNodeAs()->getExit(); -+ } else { -+ TerminatorInst *Term = Entry->getTerminator(); -+ if (Term->getNumSuccessors() != 1) -+ break; -+ Exit = Term->getSuccessor(0); -+ } -+ -+ // It's a back edge, break here so we can insert a loop node -+ if (!Visited.count(Exit)) -+ return Node; -+ -+ // More than node edges are pointing to exit -+ if (!DT->dominates(Entry, Exit)) -+ return Node; -+ -+ RegionNode *Next = ParentRegion->getNode(Exit); -+ RNVector::iterator I = std::find(Order.begin(), Order.end(), Next); -+ assert(I != Order.end()); -+ -+ Visited.erase(Next->getEntry()); -+ Order.erase(I); -+ Node = Next; -+ } -+ -+ BasicBlock *BB = Node->getEntry(); -+ TerminatorInst *Term = BB->getTerminator(); -+ if (Term->getNumSuccessors() != 2) -+ return Node; -+ -+ // Our node has exactly two succesors, check if we can handle -+ // any of them directly -+ BasicBlock *Succ = Term->getSuccessor(0); -+ if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) { -+ Succ = Term->getSuccessor(1); -+ if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) -+ return Node; -+ } else { -+ BasicBlock *Succ2 = Term->getSuccessor(1); -+ if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] && -+ dominatesPredicates(Entry, Succ2)) -+ Succ = Succ2; -+ } -+ -+ RegionNode *Next = ParentRegion->getNode(Succ); -+ RNVector::iterator E = Order.end(); -+ RNVector::iterator I = std::find(Order.begin(), E, Next); -+ assert(I != E); -+ -+ killTerminator(BB); -+ FlowsInserted.push_back(BB); -+ Visited.erase(Succ); -+ Order.erase(I); -+ return ParentRegion->getNode(wireFlowBlock(BB, Next)); -+} -+ -+/// \brief Remove all PHI values coming from "From" into "To" and remember -+/// them in DeletedPhis -+void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { -+ PhiMap &Map = DeletedPhis[To]; -+ for (BasicBlock::iterator I = To->begin(), E = To->end(); -+ I != E && isa(*I);) { -+ -+ PHINode &Phi = cast(*I++); -+ while (Phi.getBasicBlockIndex(From) != -1) { -+ Value *Deleted = Phi.removeIncomingValue(From, false); -+ Map[&Phi].push_back(std::make_pair(From, Deleted)); -+ } -+ } -+} -+ -+/// \brief Add the PHI values back once we knew the new predecessor -+void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { -+ if (!DeletedPhis.count(To)) -+ return; -+ -+ PhiMap &Map = DeletedPhis[To]; -+ SSAUpdater Updater; -+ -+ for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) { -+ -+ PHINode *Phi = I->first; -+ Updater.Initialize(Phi->getType(), ""); -+ BasicBlock *Fallback = To; -+ bool HaveFallback = false; -+ -+ for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end(); -+ VI != VE; ++VI) { -+ -+ Updater.AddAvailableValue(VI->first, VI->second); -+ BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first); -+ if (Dom == VI->first) -+ HaveFallback = true; -+ else if (Dom != Fallback) -+ HaveFallback = false; -+ Fallback = Dom; -+ } -+ if (!HaveFallback) { -+ Value *Undef = UndefValue::get(Phi->getType()); -+ Updater.AddAvailableValue(Fallback, Undef); -+ } -+ -+ Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From); -+ } -+ DeletedPhis.erase(To); -+} -+ -+/// \brief Create a new flow node and update dominator tree and region info -+BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) { -+ LLVMContext &Context = Func->getContext(); -+ BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : -+ Order.back()->getEntry(); -+ BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, -+ Func, Insert); -+ DT->addNewBlock(Flow, Prev); -+ ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); -+ FlowsInserted.push_back(Flow); -+ return Flow; -+} -+ -+/// \brief Can we predict that this node will always be called? -+bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev, -+ BasicBlock *Node) { -+ BBPredicates &Preds = Predicates[Node]; -+ bool Dominated = false; -+ -+ for (BBPredicates::iterator I = Preds.begin(), E = Preds.end(); -+ I != E; ++I) { -+ -+ if (I->second != BoolTrue) -+ return false; -+ -+ if (!Dominated && DT->dominates(I->first, Prev)) -+ Dominated = true; -+ } -+ return Dominated; -+} -+ -+/// \brief Wire up the new control flow by inserting or updating the branch -+/// instructions at node exits -+BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev, -+ RegionNode *Node) { -+ BasicBlock *Entry = Node->getEntry(); -+ -+ if (LoopStart == Entry) { -+ LoopStart = Prev; -+ LoopPred[Prev] = BoolTrue; -+ } -+ -+ // Wire it up temporary, skipChained may recurse into us -+ BranchInst::Create(Entry, Prev); -+ DT->changeImmediateDominator(Entry, Prev); -+ addPhiValues(Prev, Entry); -+ -+ Node = skipChained(Node); -+ -+ BasicBlock *Next = getNextFlow(Prev); -+ if (!isPredictableTrue(Prev, Entry)) { -+ // Let Prev point to entry and next block -+ Prev->getTerminator()->eraseFromParent(); -+ BranchInst::Create(Entry, Next, BoolUndef, Prev); -+ } else { -+ DT->changeImmediateDominator(Next, Entry); -+ } -+ -+ // Let node exit(s) point to next block -+ if (Node->isSubRegion()) { -+ Region *SubRegion = Node->getNodeAs(); -+ BasicBlock *Exit = SubRegion->getExit(); -+ -+ // Find all the edges from the sub region to the exit -+ BBVector ToDo; -+ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { -+ if (SubRegion->contains(*I)) -+ ToDo.push_back(*I); -+ } -+ -+ // Modify the edges to point to the new flow block -+ for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) { -+ delPhiValues(*I, Exit); -+ TerminatorInst *Term = (*I)->getTerminator(); -+ Term->replaceUsesOfWith(Exit, Next); -+ } -+ -+ // Update the region info -+ SubRegion->replaceExit(Next); -+ -+ } else { -+ BasicBlock *BB = Node->getNodeAs(); -+ killTerminator(BB); -+ BranchInst::Create(Next, BB); -+ -+ if (BB == LoopEnd) -+ LoopEnd = 0; -+ } -+ -+ return Next; -+} -+ -+/// Destroy node order and visited map, build up flow order instead. -+/// After this function control flow looks like it should be, but -+/// branches only have undefined conditions. -+void AMDGPUStructurizeCFG::createFlow() { -+ DeletedPhis.clear(); -+ -+ BasicBlock *Prev = Order.pop_back_val()->getEntry(); -+ assert(Prev == ParentRegion->getEntry() && "Incorrect node order!"); -+ Visited.erase(Prev); -+ -+ if (LoopStart == Prev) { -+ // Loop starts at entry, split entry so that we can predicate it -+ BasicBlock::iterator Insert = Prev->getFirstInsertionPt(); -+ BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName); -+ DT->addNewBlock(Split, Prev); -+ ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion); -+ Predicates[Split] = Predicates[Prev]; -+ Order.push_back(ParentRegion->getBBNode(Split)); -+ LoopPred[Prev] = BoolTrue; -+ -+ } else if (LoopStart == Order.back()->getEntry()) { -+ // Loop starts behind entry, split entry so that we can jump to it -+ Instruction *Term = Prev->getTerminator(); -+ BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName); -+ DT->addNewBlock(Split, Prev); -+ ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion); -+ Prev = Split; -+ } -+ -+ killTerminator(Prev); -+ FlowsInserted.clear(); -+ FlowsInserted.push_back(Prev); -+ -+ while (!Order.empty()) { -+ RegionNode *Node = Order.pop_back_val(); -+ Visited.erase(Node->getEntry()); -+ Prev = wireFlowBlock(Prev, Node); -+ if (LoopStart && !LoopEnd) { -+ // Create an extra loop end node -+ LoopEnd = Prev; -+ Prev = getNextFlow(LoopEnd); -+ BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd); -+ addPhiValues(LoopEnd, LoopStart); -+ } -+ } -+ -+ BasicBlock *Exit = ParentRegion->getExit(); -+ BranchInst::Create(Exit, Prev); -+ addPhiValues(Prev, Exit); -+ if (DT->dominates(ParentRegion->getEntry(), Exit)) -+ DT->changeImmediateDominator(Exit, Prev); -+ -+ if (LoopStart && LoopEnd) { -+ BBVector::iterator FI = std::find(FlowsInserted.begin(), -+ FlowsInserted.end(), -+ LoopStart); -+ for (; *FI != LoopEnd; ++FI) { -+ addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0)); -+ } -+ } -+ -+ assert(Order.empty()); -+ assert(Visited.empty()); -+ assert(DeletedPhis.empty()); -+} -+ -+/// \brief Insert the missing branch conditions -+void AMDGPUStructurizeCFG::insertConditions() { -+ SSAUpdater PhiInserter; -+ -+ for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end(); -+ FI != FE; ++FI) { -+ -+ BranchInst *Term = cast((*FI)->getTerminator()); -+ if (Term->isUnconditional()) -+ continue; -+ -+ PhiInserter.Initialize(Boolean, ""); -+ PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse); -+ -+ BasicBlock *Succ = Term->getSuccessor(0); -+ BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ]; -+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); -+ PI != PE; ++PI) { -+ -+ PhiInserter.AddAvailableValue(PI->first, PI->second); -+ } -+ -+ Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI)); -+ } -+} -+ -+/// Handle a rare case where the disintegrated nodes instructions -+/// no longer dominate all their uses. Not sure if this is really nessasary -+void AMDGPUStructurizeCFG::rebuildSSA() { -+ SSAUpdater Updater; -+ for (Region::block_iterator I = ParentRegion->block_begin(), -+ E = ParentRegion->block_end(); -+ I != E; ++I) { -+ -+ BasicBlock *BB = *I; -+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); -+ II != IE; ++II) { -+ -+ bool Initialized = false; -+ for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) { -+ -+ Next = I->getNext(); -+ -+ Instruction *User = cast(I->getUser()); -+ if (User->getParent() == BB) { -+ continue; -+ -+ } else if (PHINode *UserPN = dyn_cast(User)) { -+ if (UserPN->getIncomingBlock(*I) == BB) -+ continue; -+ } -+ -+ if (DT->dominates(II, User)) -+ continue; -+ -+ if (!Initialized) { -+ Value *Undef = UndefValue::get(II->getType()); -+ Updater.Initialize(II->getType(), ""); -+ Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); -+ Updater.AddAvailableValue(BB, II); -+ Initialized = true; -+ } -+ Updater.RewriteUseAfterInsertions(*I); -+ } -+ } -+ } -+} -+ -+/// \brief Run the transformation for each region found -+bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) { -+ if (R->isTopLevelRegion()) -+ return false; -+ -+ Func = R->getEntry()->getParent(); -+ ParentRegion = R; -+ -+ DT = &getAnalysis(); -+ -+ orderNodes(); -+ collectInfos(); -+ createFlow(); -+ insertConditions(); -+ rebuildSSA(); -+ -+ Order.clear(); -+ Visited.clear(); -+ Predicates.clear(); -+ DeletedPhis.clear(); -+ FlowsInserted.clear(); -+ -+ return true; -+} -+ -+/// \brief Create the pass -+Pass *llvm::createAMDGPUStructurizeCFGPass() { -+ return new AMDGPUStructurizeCFG(); -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.cpp llvm-r600/lib/Target/R600/AMDGPUSubtarget.cpp ---- llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUSubtarget.cpp 2013-01-25 19:43:57.433383055 +0100 -@@ -0,0 +1,87 @@ -+//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Implements the AMDGPU specific subclass of TargetSubtarget. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPUSubtarget.h" -+ -+using namespace llvm; -+ -+#define GET_SUBTARGETINFO_ENUM -+#define GET_SUBTARGETINFO_TARGET_DESC -+#define GET_SUBTARGETINFO_CTOR -+#include "AMDGPUGenSubtargetInfo.inc" -+ -+AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) : -+ AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) { -+ InstrItins = getInstrItineraryForCPU(CPU); -+ -+ memset(CapsOverride, 0, sizeof(*CapsOverride) -+ * AMDGPUDeviceInfo::MaxNumberCapabilities); -+ // Default card -+ StringRef GPU = CPU; -+ Is64bit = false; -+ DefaultSize[0] = 64; -+ DefaultSize[1] = 1; -+ DefaultSize[2] = 1; -+ ParseSubtargetFeatures(GPU, FS); -+ DevName = GPU; -+ Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit); -+} -+ -+AMDGPUSubtarget::~AMDGPUSubtarget() { -+ delete Device; -+} -+ -+bool -+AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const { -+ assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities && -+ "Caps index is out of bounds!"); -+ return CapsOverride[caps]; -+} -+bool -+AMDGPUSubtarget::is64bit() const { -+ return Is64bit; -+} -+bool -+AMDGPUSubtarget::isTargetELF() const { -+ return false; -+} -+size_t -+AMDGPUSubtarget::getDefaultSize(uint32_t dim) const { -+ if (dim > 3) { -+ return 1; -+ } else { -+ return DefaultSize[dim]; -+ } -+} -+ -+std::string -+AMDGPUSubtarget::getDataLayout() const { -+ if (!Device) { -+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" -+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" -+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" -+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" -+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64"); -+ } -+ return Device->getDataLayout(); -+} -+ -+std::string -+AMDGPUSubtarget::getDeviceName() const { -+ return DevName; -+} -+const AMDGPUDevice * -+AMDGPUSubtarget::device() const { -+ return Device; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.h llvm-r600/lib/Target/R600/AMDGPUSubtarget.h ---- llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUSubtarget.h 2013-01-25 19:43:57.433383055 +0100 -@@ -0,0 +1,65 @@ -+//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief AMDGPU specific subclass of TargetSubtarget. -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef AMDGPUSUBTARGET_H -+#define AMDGPUSUBTARGET_H -+#include "AMDILDevice.h" -+#include "llvm/ADT/StringExtras.h" -+#include "llvm/ADT/StringRef.h" -+#include "llvm/Target/TargetSubtargetInfo.h" -+ -+#define GET_SUBTARGETINFO_HEADER -+#include "AMDGPUGenSubtargetInfo.inc" -+ -+#define MAX_CB_SIZE (1 << 16) -+ -+namespace llvm { -+ -+class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { -+private: -+ bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities]; -+ const AMDGPUDevice *Device; -+ size_t DefaultSize[3]; -+ std::string DevName; -+ bool Is64bit; -+ bool Is32on64bit; -+ bool DumpCode; -+ bool R600ALUInst; -+ -+ InstrItineraryData InstrItins; -+ -+public: -+ AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS); -+ virtual ~AMDGPUSubtarget(); -+ -+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } -+ virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS); -+ -+ bool isOverride(AMDGPUDeviceInfo::Caps) const; -+ bool is64bit() const; -+ -+ // Helper functions to simplify if statements -+ bool isTargetELF() const; -+ const AMDGPUDevice* device() const; -+ std::string getDataLayout() const; -+ std::string getDeviceName() const; -+ virtual size_t getDefaultSize(uint32_t dim) const; -+ bool dumpCode() const { return DumpCode; } -+ bool r600ALUEncoding() const { return R600ALUInst; } -+ -+}; -+ -+} // End namespace llvm -+ -+#endif // AMDGPUSUBTARGET_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.cpp llvm-r600/lib/Target/R600/AMDGPUTargetMachine.cpp ---- llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUTargetMachine.cpp 2013-01-25 19:43:57.433383055 +0100 -@@ -0,0 +1,148 @@ -+//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief The AMDGPU target machine contains all of the hardware specific -+/// information needed to emit code for R600 and SI GPUs. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPUTargetMachine.h" -+#include "AMDGPU.h" -+#include "R600ISelLowering.h" -+#include "R600InstrInfo.h" -+#include "SIISelLowering.h" -+#include "SIInstrInfo.h" -+#include "llvm/Analysis/Passes.h" -+#include "llvm/Analysis/Verifier.h" -+#include "llvm/CodeGen/MachineFunctionAnalysis.h" -+#include "llvm/CodeGen/MachineModuleInfo.h" -+#include "llvm/CodeGen/Passes.h" -+#include "llvm/MC/MCAsmInfo.h" -+#include "llvm/PassManager.h" -+#include "llvm/Support/TargetRegistry.h" -+#include "llvm/Support/raw_os_ostream.h" -+#include "llvm/Transforms/IPO.h" -+#include "llvm/Transforms/Scalar.h" -+#include -+ -+using namespace llvm; -+ -+extern "C" void LLVMInitializeR600Target() { -+ // Register the target -+ RegisterTargetMachine X(TheAMDGPUTarget); -+} -+ -+AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, -+ StringRef CPU, StringRef FS, -+ TargetOptions Options, -+ Reloc::Model RM, CodeModel::Model CM, -+ CodeGenOpt::Level OptLevel -+) -+: -+ LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), -+ Subtarget(TT, CPU, FS), -+ Layout(Subtarget.getDataLayout()), -+ FrameLowering(TargetFrameLowering::StackGrowsUp, -+ Subtarget.device()->getStackAlignment(), 0), -+ IntrinsicInfo(this), -+ InstrItins(&Subtarget.getInstrItineraryData()) { -+ // TLInfo uses InstrInfo so it must be initialized after. -+ if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { -+ InstrInfo = new R600InstrInfo(*this); -+ TLInfo = new R600TargetLowering(*this); -+ } else { -+ InstrInfo = new SIInstrInfo(*this); -+ TLInfo = new SITargetLowering(*this); -+ } -+} -+ -+AMDGPUTargetMachine::~AMDGPUTargetMachine() { -+} -+ -+namespace { -+class AMDGPUPassConfig : public TargetPassConfig { -+public: -+ AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM) -+ : TargetPassConfig(TM, PM) {} -+ -+ AMDGPUTargetMachine &getAMDGPUTargetMachine() const { -+ return getTM(); -+ } -+ -+ virtual bool addPreISel(); -+ virtual bool addInstSelector(); -+ virtual bool addPreRegAlloc(); -+ virtual bool addPostRegAlloc(); -+ virtual bool addPreSched2(); -+ virtual bool addPreEmitPass(); -+}; -+} // End of anonymous namespace -+ -+TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) { -+ return new AMDGPUPassConfig(this, PM); -+} -+ -+bool -+AMDGPUPassConfig::addPreISel() { -+ const AMDGPUSubtarget &ST = TM->getSubtarget(); -+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { -+ addPass(createAMDGPUStructurizeCFGPass()); -+ addPass(createSIAnnotateControlFlowPass()); -+ } -+ return false; -+} -+ -+bool AMDGPUPassConfig::addInstSelector() { -+ addPass(createAMDGPUPeepholeOpt(*TM)); -+ addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); -+ return false; -+} -+ -+bool AMDGPUPassConfig::addPreRegAlloc() { -+ const AMDGPUSubtarget &ST = TM->getSubtarget(); -+ -+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { -+ addPass(createSIAssignInterpRegsPass(*TM)); -+ } -+ addPass(createAMDGPUConvertToISAPass(*TM)); -+ return false; -+} -+ -+bool AMDGPUPassConfig::addPostRegAlloc() { -+ const AMDGPUSubtarget &ST = TM->getSubtarget(); -+ -+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { -+ addPass(createSIInsertWaits(*TM)); -+ } -+ return false; -+} -+ -+bool AMDGPUPassConfig::addPreSched2() { -+ -+ addPass(&IfConverterID); -+ return false; -+} -+ -+bool AMDGPUPassConfig::addPreEmitPass() { -+ const AMDGPUSubtarget &ST = TM->getSubtarget(); -+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { -+ addPass(createAMDGPUCFGPreparationPass(*TM)); -+ addPass(createAMDGPUCFGStructurizerPass(*TM)); -+ addPass(createR600ExpandSpecialInstrsPass(*TM)); -+ addPass(createR600LowerConstCopy(*TM)); -+ addPass(&FinalizeMachineBundlesID); -+ } else { -+ addPass(createSILowerLiteralConstantsPass(*TM)); -+ addPass(createSILowerControlFlowPass(*TM)); -+ } -+ -+ return false; -+} -+ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.h llvm-r600/lib/Target/R600/AMDGPUTargetMachine.h ---- llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPUTargetMachine.h 2013-01-25 19:43:57.433383055 +0100 -@@ -0,0 +1,70 @@ -+//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief The AMDGPU TargetMachine interface definition for hw codgen targets. -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef AMDGPU_TARGET_MACHINE_H -+#define AMDGPU_TARGET_MACHINE_H -+ -+#include "AMDGPUInstrInfo.h" -+#include "AMDGPUSubtarget.h" -+#include "AMDILFrameLowering.h" -+#include "AMDILIntrinsicInfo.h" -+#include "R600ISelLowering.h" -+#include "llvm/ADT/OwningPtr.h" -+#include "llvm/DataLayout.h" -+ -+namespace llvm { -+ -+MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT); -+ -+class AMDGPUTargetMachine : public LLVMTargetMachine { -+ -+ AMDGPUSubtarget Subtarget; -+ const DataLayout Layout; -+ AMDGPUFrameLowering FrameLowering; -+ AMDGPUIntrinsicInfo IntrinsicInfo; -+ const AMDGPUInstrInfo * InstrInfo; -+ AMDGPUTargetLowering * TLInfo; -+ const InstrItineraryData* InstrItins; -+ -+public: -+ AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS, -+ StringRef CPU, -+ TargetOptions Options, -+ Reloc::Model RM, CodeModel::Model CM, -+ CodeGenOpt::Level OL); -+ ~AMDGPUTargetMachine(); -+ virtual const AMDGPUFrameLowering* getFrameLowering() const { -+ return &FrameLowering; -+ } -+ virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const { -+ return &IntrinsicInfo; -+ } -+ virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;} -+ virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; } -+ virtual const AMDGPURegisterInfo *getRegisterInfo() const { -+ return &InstrInfo->getRegisterInfo(); -+ } -+ virtual AMDGPUTargetLowering * getTargetLowering() const { -+ return TLInfo; -+ } -+ virtual const InstrItineraryData* getInstrItineraryData() const { -+ return InstrItins; -+ } -+ virtual const DataLayout* getDataLayout() const { return &Layout; } -+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); -+}; -+ -+} // End namespace llvm -+ -+#endif // AMDGPU_TARGET_MACHINE_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPU.td llvm-r600/lib/Target/R600/AMDGPU.td ---- llvm-3.2.src/lib/Target/R600/AMDGPU.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDGPU.td 2013-01-25 19:43:57.423383055 +0100 -@@ -0,0 +1,40 @@ -+//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+ -+// Include AMDIL TD files -+include "AMDILBase.td" -+ -+ -+def AMDGPUInstrInfo : InstrInfo { -+ let guessInstructionProperties = 1; -+} -+ -+//===----------------------------------------------------------------------===// -+// Declare the target which we are implementing -+//===----------------------------------------------------------------------===// -+def AMDGPUAsmWriter : AsmWriter { -+ string AsmWriterClassName = "InstPrinter"; -+ int Variant = 0; -+ bit isMCAsmWriter = 1; -+} -+ -+def AMDGPU : Target { -+ // Pull in Instruction Info: -+ let InstructionSet = AMDGPUInstrInfo; -+ let AssemblyWriters = [AMDGPUAsmWriter]; -+} -+ -+// Include AMDGPU TD files -+include "R600Schedule.td" -+include "SISchedule.td" -+include "Processors.td" -+include "AMDGPUInstrInfo.td" -+include "AMDGPUIntrinsics.td" -+include "AMDGPURegisterInfo.td" -+include "AMDGPUInstructions.td" -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.cpp llvm-r600/lib/Target/R600/AMDIL7XXDevice.cpp ---- llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDIL7XXDevice.cpp 2013-01-25 19:43:57.433383055 +0100 -@@ -0,0 +1,115 @@ -+//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+// \file -+//==-----------------------------------------------------------------------===// -+#include "AMDIL7XXDevice.h" -+#include "AMDGPUSubtarget.h" -+#include "AMDILDevice.h" -+ -+using namespace llvm; -+ -+AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) { -+ setCaps(); -+ std::string name = mSTM->getDeviceName(); -+ if (name == "rv710") { -+ DeviceFlag = OCL_DEVICE_RV710; -+ } else if (name == "rv730") { -+ DeviceFlag = OCL_DEVICE_RV730; -+ } else { -+ DeviceFlag = OCL_DEVICE_RV770; -+ } -+} -+ -+AMDGPU7XXDevice::~AMDGPU7XXDevice() { -+} -+ -+void AMDGPU7XXDevice::setCaps() { -+ mSWBits.set(AMDGPUDeviceInfo::LocalMem); -+} -+ -+size_t AMDGPU7XXDevice::getMaxLDSSize() const { -+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { -+ return MAX_LDS_SIZE_700; -+ } -+ return 0; -+} -+ -+size_t AMDGPU7XXDevice::getWavefrontSize() const { -+ return AMDGPUDevice::HalfWavefrontSize; -+} -+ -+uint32_t AMDGPU7XXDevice::getGeneration() const { -+ return AMDGPUDeviceInfo::HD4XXX; -+} -+ -+uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const { -+ switch (DeviceID) { -+ default: -+ assert(0 && "ID type passed in is unknown!"); -+ break; -+ case GLOBAL_ID: -+ case CONSTANT_ID: -+ case RAW_UAV_ID: -+ case ARENA_UAV_ID: -+ break; -+ case LDS_ID: -+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { -+ return DEFAULT_LDS_ID; -+ } -+ break; -+ case SCRATCH_ID: -+ if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) { -+ return DEFAULT_SCRATCH_ID; -+ } -+ break; -+ case GDS_ID: -+ assert(0 && "GDS UAV ID is not supported on this chip"); -+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) { -+ return DEFAULT_GDS_ID; -+ } -+ break; -+ }; -+ -+ return 0; -+} -+ -+uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const { -+ return 1; -+} -+ -+AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) { -+ setCaps(); -+} -+ -+AMDGPU770Device::~AMDGPU770Device() { -+} -+ -+void AMDGPU770Device::setCaps() { -+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) { -+ mSWBits.set(AMDGPUDeviceInfo::FMA); -+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps); -+ } -+ mSWBits.set(AMDGPUDeviceInfo::BarrierDetect); -+ mHWBits.reset(AMDGPUDeviceInfo::LongOps); -+ mSWBits.set(AMDGPUDeviceInfo::LongOps); -+ mSWBits.set(AMDGPUDeviceInfo::LocalMem); -+} -+ -+size_t AMDGPU770Device::getWavefrontSize() const { -+ return AMDGPUDevice::WavefrontSize; -+} -+ -+AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) { -+} -+ -+AMDGPU710Device::~AMDGPU710Device() { -+} -+ -+size_t AMDGPU710Device::getWavefrontSize() const { -+ return AMDGPUDevice::QuarterWavefrontSize; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.h llvm-r600/lib/Target/R600/AMDIL7XXDevice.h ---- llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDIL7XXDevice.h 2013-01-25 19:43:57.436716388 +0100 -@@ -0,0 +1,72 @@ -+//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+/// \file -+/// \brief Interface for the subtarget data classes. -+/// -+/// This file will define the interface that each generation needs to -+/// implement in order to correctly answer queries on the capabilities of the -+/// specific hardware. -+//===----------------------------------------------------------------------===// -+#ifndef AMDIL7XXDEVICEIMPL_H -+#define AMDIL7XXDEVICEIMPL_H -+#include "AMDILDevice.h" -+ -+namespace llvm { -+class AMDGPUSubtarget; -+ -+//===----------------------------------------------------------------------===// -+// 7XX generation of devices and their respective sub classes -+//===----------------------------------------------------------------------===// -+ -+/// \brief The AMDGPU7XXDevice class represents the generic 7XX device. -+/// -+/// All 7XX devices are derived from this class. The AMDGPU7XX device will only -+/// support the minimal features that are required to be considered OpenCL 1.0 -+/// compliant and nothing more. -+class AMDGPU7XXDevice : public AMDGPUDevice { -+public: -+ AMDGPU7XXDevice(AMDGPUSubtarget *ST); -+ virtual ~AMDGPU7XXDevice(); -+ virtual size_t getMaxLDSSize() const; -+ virtual size_t getWavefrontSize() const; -+ virtual uint32_t getGeneration() const; -+ virtual uint32_t getResourceID(uint32_t DeviceID) const; -+ virtual uint32_t getMaxNumUAVs() const; -+ -+protected: -+ virtual void setCaps(); -+}; -+ -+/// \brief The AMDGPU770Device class represents the RV770 chip and it's -+/// derivative cards. -+/// -+/// The difference between this device and the base class is this device device -+/// adds support for double precision and has a larger wavefront size. -+class AMDGPU770Device : public AMDGPU7XXDevice { -+public: -+ AMDGPU770Device(AMDGPUSubtarget *ST); -+ virtual ~AMDGPU770Device(); -+ virtual size_t getWavefrontSize() const; -+private: -+ virtual void setCaps(); -+}; -+ -+/// \brief The AMDGPU710Device class derives from the 7XX base class. -+/// -+/// This class is a smaller derivative, so we need to overload some of the -+/// functions in order to correctly specify this information. -+class AMDGPU710Device : public AMDGPU7XXDevice { -+public: -+ AMDGPU710Device(AMDGPUSubtarget *ST); -+ virtual ~AMDGPU710Device(); -+ virtual size_t getWavefrontSize() const; -+}; -+ -+} // namespace llvm -+#endif // AMDILDEVICEIMPL_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILBase.td llvm-r600/lib/Target/R600/AMDILBase.td ---- llvm-3.2.src/lib/Target/R600/AMDILBase.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILBase.td 2013-01-25 19:43:57.436716388 +0100 -@@ -0,0 +1,85 @@ -+//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// Target-independent interfaces which we are implementing -+//===----------------------------------------------------------------------===// -+ -+include "llvm/Target/Target.td" -+ -+// Dummy Instruction itineraries for pseudo instructions -+def ALU_NULL : FuncUnit; -+def NullALU : InstrItinClass; -+ -+//===----------------------------------------------------------------------===// -+// AMDIL Subtarget features. -+//===----------------------------------------------------------------------===// -+def FeatureFP64 : SubtargetFeature<"fp64", -+ "CapsOverride[AMDGPUDeviceInfo::DoubleOps]", -+ "true", -+ "Enable 64bit double precision operations">; -+def FeatureByteAddress : SubtargetFeature<"byte_addressable_store", -+ "CapsOverride[AMDGPUDeviceInfo::ByteStores]", -+ "true", -+ "Enable byte addressable stores">; -+def FeatureBarrierDetect : SubtargetFeature<"barrier_detect", -+ "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]", -+ "true", -+ "Enable duplicate barrier detection(HD5XXX or later).">; -+def FeatureImages : SubtargetFeature<"images", -+ "CapsOverride[AMDGPUDeviceInfo::Images]", -+ "true", -+ "Enable image functions">; -+def FeatureMultiUAV : SubtargetFeature<"multi_uav", -+ "CapsOverride[AMDGPUDeviceInfo::MultiUAV]", -+ "true", -+ "Generate multiple UAV code(HD5XXX family or later)">; -+def FeatureMacroDB : SubtargetFeature<"macrodb", -+ "CapsOverride[AMDGPUDeviceInfo::MacroDB]", -+ "true", -+ "Use internal macrodb, instead of macrodb in driver">; -+def FeatureNoAlias : SubtargetFeature<"noalias", -+ "CapsOverride[AMDGPUDeviceInfo::NoAlias]", -+ "true", -+ "assert that all kernel argument pointers are not aliased">; -+def FeatureNoInline : SubtargetFeature<"no-inline", -+ "CapsOverride[AMDGPUDeviceInfo::NoInline]", -+ "true", -+ "specify whether to not inline functions">; -+ -+def Feature64BitPtr : SubtargetFeature<"64BitPtr", -+ "Is64bit", -+ "false", -+ "Specify if 64bit addressing should be used.">; -+ -+def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr", -+ "Is32on64bit", -+ "false", -+ "Specify if 64bit sized pointers with 32bit addressing should be used.">; -+def FeatureDebug : SubtargetFeature<"debug", -+ "CapsOverride[AMDGPUDeviceInfo::Debug]", -+ "true", -+ "Debug mode is enabled, so disable hardware accelerated address spaces.">; -+def FeatureDumpCode : SubtargetFeature <"DumpCode", -+ "DumpCode", -+ "true", -+ "Dump MachineInstrs in the CodeEmitter">; -+ -+def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", -+ "R600ALUInst", -+ "false", -+ "Older version of ALU instructions encoding.">; -+ -+ -+//===----------------------------------------------------------------------===// -+// Register File, Calling Conv, Instruction Descriptions -+//===----------------------------------------------------------------------===// -+ -+ -+include "AMDILRegisterInfo.td" -+include "AMDILInstrInfo.td" -+ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILCFGStructurizer.cpp llvm-r600/lib/Target/R600/AMDILCFGStructurizer.cpp ---- llvm-3.2.src/lib/Target/R600/AMDILCFGStructurizer.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILCFGStructurizer.cpp 2013-01-25 19:43:57.436716388 +0100 -@@ -0,0 +1,3045 @@ -+//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+/// \file -+//==-----------------------------------------------------------------------===// -+ -+#define DEBUGME 0 -+#define DEBUG_TYPE "structcfg" -+ -+#include "AMDGPUInstrInfo.h" -+#include "AMDIL.h" -+#include "llvm/ADT/SCCIterator.h" -+#include "llvm/ADT/SmallVector.h" -+#include "llvm/ADT/Statistic.h" -+#include "llvm/Analysis/DominatorInternals.h" -+#include "llvm/Analysis/Dominators.h" -+#include "llvm/CodeGen/MachinePostDominators.h" -+#include "llvm/CodeGen/MachineDominators.h" -+#include "llvm/CodeGen/MachineFunction.h" -+#include "llvm/CodeGen/MachineFunctionAnalysis.h" -+#include "llvm/CodeGen/MachineFunctionPass.h" -+#include "llvm/CodeGen/MachineInstrBuilder.h" -+#include "llvm/CodeGen/MachineJumpTableInfo.h" -+#include "llvm/CodeGen/MachineLoopInfo.h" -+#include "llvm/CodeGen/MachineRegisterInfo.h" -+#include "llvm/Target/TargetInstrInfo.h" -+ -+using namespace llvm; -+ -+// TODO: move-begin. -+ -+//===----------------------------------------------------------------------===// -+// -+// Statistics for CFGStructurizer. -+// -+//===----------------------------------------------------------------------===// -+ -+STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " -+ "matched"); -+STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " -+ "matched"); -+STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break " -+ "pattern matched"); -+STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue " -+ "pattern matched"); -+STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern " -+ "matched"); -+STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); -+STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); -+ -+//===----------------------------------------------------------------------===// -+// -+// Miscellaneous utility for CFGStructurizer. -+// -+//===----------------------------------------------------------------------===// -+namespace llvmCFGStruct { -+#define SHOWNEWINSTR(i) \ -+ if (DEBUGME) errs() << "New instr: " << *i << "\n" -+ -+#define SHOWNEWBLK(b, msg) \ -+if (DEBUGME) { \ -+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ -+ errs() << "\n"; \ -+} -+ -+#define SHOWBLK_DETAIL(b, msg) \ -+if (DEBUGME) { \ -+ if (b) { \ -+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ -+ b->print(errs()); \ -+ errs() << "\n"; \ -+ } \ -+} -+ -+#define INVALIDSCCNUM -1 -+#define INVALIDREGNUM 0 -+ -+template -+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) { -+ for (typename LoopinfoT::iterator iter = LoopInfo.begin(), -+ iterEnd = LoopInfo.end(); -+ iter != iterEnd; ++iter) { -+ (*iter)->print(OS, 0); -+ } -+} -+ -+template -+void ReverseVector(SmallVector &Src) { -+ size_t sz = Src.size(); -+ for (size_t i = 0; i < sz/2; ++i) { -+ NodeT *t = Src[i]; -+ Src[i] = Src[sz - i - 1]; -+ Src[sz - i - 1] = t; -+ } -+} -+ -+} //end namespace llvmCFGStruct -+ -+//===----------------------------------------------------------------------===// -+// -+// supporting data structure for CFGStructurizer -+// -+//===----------------------------------------------------------------------===// -+ -+namespace llvmCFGStruct { -+template -+struct CFGStructTraits { -+}; -+ -+template -+class BlockInformation { -+public: -+ bool isRetired; -+ int sccNum; -+ //SmallVector succInstr; -+ //Instructions defining the corresponding successor. -+ BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {} -+}; -+ -+template -+class LandInformation { -+public: -+ BlockT *landBlk; -+ std::set breakInitRegs; //Registers that need to "reg = 0", before -+ //WHILELOOP(thisloop) init before entering -+ //thisloop. -+ std::set contInitRegs; //Registers that need to "reg = 0", after -+ //WHILELOOP(thisloop) init after entering -+ //thisloop. -+ std::set endbranchInitRegs; //Init before entering this loop, at loop -+ //land block, branch cond on this reg. -+ std::set breakOnRegs; //registers that need to "if (reg) break -+ //endif" after ENDLOOP(thisloop) break -+ //outerLoopOf(thisLoop). -+ std::set contOnRegs; //registers that need to "if (reg) continue -+ //endif" after ENDLOOP(thisloop) continue on -+ //outerLoopOf(thisLoop). -+ LandInformation() : landBlk(NULL) {} -+}; -+ -+} //end of namespace llvmCFGStruct -+ -+//===----------------------------------------------------------------------===// -+// -+// CFGStructurizer -+// -+//===----------------------------------------------------------------------===// -+ -+namespace llvmCFGStruct { -+// bixia TODO: port it to BasicBlock, not just MachineBasicBlock. -+template -+class CFGStructurizer { -+public: -+ typedef enum { -+ Not_SinglePath = 0, -+ SinglePath_InPath = 1, -+ SinglePath_NotInPath = 2 -+ } PathToKind; -+ -+public: -+ typedef typename PassT::InstructionType InstrT; -+ typedef typename PassT::FunctionType FuncT; -+ typedef typename PassT::DominatortreeType DomTreeT; -+ typedef typename PassT::PostDominatortreeType PostDomTreeT; -+ typedef typename PassT::DomTreeNodeType DomTreeNodeT; -+ typedef typename PassT::LoopinfoType LoopInfoT; -+ -+ typedef GraphTraits FuncGTraits; -+ //typedef FuncGTraits::nodes_iterator BlockIterator; -+ typedef typename FuncT::iterator BlockIterator; -+ -+ typedef typename FuncGTraits::NodeType BlockT; -+ typedef GraphTraits BlockGTraits; -+ typedef GraphTraits > InvBlockGTraits; -+ //typedef BlockGTraits::succ_iterator InstructionIterator; -+ typedef typename BlockT::iterator InstrIterator; -+ -+ typedef CFGStructTraits CFGTraits; -+ typedef BlockInformation BlockInfo; -+ typedef std::map BlockInfoMap; -+ -+ typedef int RegiT; -+ typedef typename PassT::LoopType LoopT; -+ typedef LandInformation LoopLandInfo; -+ typedef std::map LoopLandInfoMap; -+ //landing info for loop break -+ typedef SmallVector BlockTSmallerVector; -+ -+public: -+ CFGStructurizer(); -+ ~CFGStructurizer(); -+ -+ /// Perform the CFG structurization -+ bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); -+ -+ /// Perform the CFG preparation -+ bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); -+ -+private: -+ void reversePredicateSetter(typename BlockT::iterator); -+ void orderBlocks(); -+ void printOrderedBlocks(llvm::raw_ostream &OS); -+ int patternMatch(BlockT *CurBlock); -+ int patternMatchGroup(BlockT *CurBlock); -+ -+ int serialPatternMatch(BlockT *CurBlock); -+ int ifPatternMatch(BlockT *CurBlock); -+ int switchPatternMatch(BlockT *CurBlock); -+ int loopendPatternMatch(BlockT *CurBlock); -+ int loopPatternMatch(BlockT *CurBlock); -+ -+ int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); -+ int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); -+ //int loopWithoutBreak(BlockT *); -+ -+ void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop, -+ BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock); -+ void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop, -+ BlockT *ContBlock, LoopT *contLoop); -+ bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block); -+ int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, -+ BlockT *FalseBlock); -+ int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock, -+ BlockT *FalseBlock); -+ int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, -+ BlockT *FalseBlock, BlockT **LandBlockPtr); -+ void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, -+ BlockT *FalseBlock, BlockT *LandBlock, -+ bool Detail = false); -+ PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock, -+ bool AllowSideEntry = true); -+ BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock, -+ bool AllowSideEntry = true); -+ int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock); -+ void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock); -+ -+ void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock, -+ BlockT *TrueBlock, BlockT *FalseBlock, -+ BlockT *LandBlock); -+ void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand); -+ void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock, -+ BlockT *ExitLandBlock, RegiT SetReg); -+ void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock, -+ RegiT SetReg); -+ BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep, -+ std::set &ExitBlockSet, -+ BlockT *ExitLandBlk); -+ BlockT *addLoopEndbranchBlock(LoopT *LoopRep, -+ BlockTSmallerVector &ExitingBlocks, -+ BlockTSmallerVector &ExitBlocks); -+ BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep); -+ void removeUnconditionalBranch(BlockT *SrcBlock); -+ void removeRedundantConditionalBranch(BlockT *SrcBlock); -+ void addDummyExitBlock(SmallVector &RetBlocks); -+ -+ void removeSuccessor(BlockT *SrcBlock); -+ BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock); -+ BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock); -+ -+ void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock, -+ InstrIterator InsertPos); -+ -+ void recordSccnum(BlockT *SrcBlock, int SCCNum); -+ int getSCCNum(BlockT *srcBlk); -+ -+ void retireBlock(BlockT *DstBlock, BlockT *SrcBlock); -+ bool isRetiredBlock(BlockT *SrcBlock); -+ bool isActiveLoophead(BlockT *CurBlock); -+ bool needMigrateBlock(BlockT *Block); -+ -+ BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock, -+ BlockTSmallerVector &exitBlocks, -+ std::set &ExitBlockSet); -+ void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL); -+ BlockT *getLoopLandBlock(LoopT *LoopRep); -+ LoopLandInfo *getLoopLandInfo(LoopT *LoopRep); -+ -+ void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum); -+ void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum); -+ void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum); -+ void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum); -+ void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum); -+ -+ bool hasBackEdge(BlockT *curBlock); -+ unsigned getLoopDepth (LoopT *LoopRep); -+ int countActiveBlock( -+ typename SmallVector::const_iterator IterStart, -+ typename SmallVector::const_iterator IterEnd); -+ BlockT *findNearestCommonPostDom(std::set&); -+ BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2); -+ -+private: -+ DomTreeT *domTree; -+ PostDomTreeT *postDomTree; -+ LoopInfoT *loopInfo; -+ PassT *passRep; -+ FuncT *funcRep; -+ -+ BlockInfoMap blockInfoMap; -+ LoopLandInfoMap loopLandInfoMap; -+ SmallVector orderedBlks; -+ const AMDGPURegisterInfo *TRI; -+ -+}; //template class CFGStructurizer -+ -+template CFGStructurizer::CFGStructurizer() -+ : domTree(NULL), postDomTree(NULL), loopInfo(NULL) { -+} -+ -+template CFGStructurizer::~CFGStructurizer() { -+ for (typename BlockInfoMap::iterator I = blockInfoMap.begin(), -+ E = blockInfoMap.end(); I != E; ++I) { -+ delete I->second; -+ } -+} -+ -+template -+bool CFGStructurizer::prepare(FuncT &func, PassT &pass, -+ const AMDGPURegisterInfo * tri) { -+ passRep = &pass; -+ funcRep = &func; -+ TRI = tri; -+ -+ bool changed = false; -+ -+ //FIXME: if not reducible flow graph, make it so ??? -+ -+ if (DEBUGME) { -+ errs() << "AMDGPUCFGStructurizer::prepare\n"; -+ } -+ -+ loopInfo = CFGTraits::getLoopInfo(pass); -+ if (DEBUGME) { -+ errs() << "LoopInfo:\n"; -+ PrintLoopinfo(*loopInfo, errs()); -+ } -+ -+ orderBlocks(); -+ if (DEBUGME) { -+ errs() << "Ordered blocks:\n"; -+ printOrderedBlocks(errs()); -+ } -+ -+ SmallVector retBlks; -+ -+ for (typename LoopInfoT::iterator iter = loopInfo->begin(), -+ iterEnd = loopInfo->end(); -+ iter != iterEnd; ++iter) { -+ LoopT* loopRep = (*iter); -+ BlockTSmallerVector exitingBlks; -+ loopRep->getExitingBlocks(exitingBlks); -+ -+ if (exitingBlks.size() == 0) { -+ BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep); -+ if (dummyExitBlk != NULL) -+ retBlks.push_back(dummyExitBlk); -+ } -+ } -+ -+ // Remove unconditional branch instr. -+ // Add dummy exit block iff there are multiple returns. -+ -+ for (typename SmallVector::const_iterator -+ iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end(); -+ iterBlk != iterEndBlk; -+ ++iterBlk) { -+ BlockT *curBlk = *iterBlk; -+ removeUnconditionalBranch(curBlk); -+ removeRedundantConditionalBranch(curBlk); -+ if (CFGTraits::isReturnBlock(curBlk)) { -+ retBlks.push_back(curBlk); -+ } -+ assert(curBlk->succ_size() <= 2); -+ } //for -+ -+ if (retBlks.size() >= 2) { -+ addDummyExitBlock(retBlks); -+ changed = true; -+ } -+ -+ return changed; -+} //CFGStructurizer::prepare -+ -+template -+bool CFGStructurizer::run(FuncT &func, PassT &pass, -+ const AMDGPURegisterInfo * tri) { -+ passRep = &pass; -+ funcRep = &func; -+ TRI = tri; -+ -+ //Assume reducible CFG... -+ if (DEBUGME) { -+ errs() << "AMDGPUCFGStructurizer::run\n"; -+ func.viewCFG(); -+ } -+ -+ domTree = CFGTraits::getDominatorTree(pass); -+ if (DEBUGME) { -+ domTree->print(errs(), (const llvm::Module*)0); -+ } -+ -+ postDomTree = CFGTraits::getPostDominatorTree(pass); -+ if (DEBUGME) { -+ postDomTree->print(errs()); -+ } -+ -+ loopInfo = CFGTraits::getLoopInfo(pass); -+ if (DEBUGME) { -+ errs() << "LoopInfo:\n"; -+ PrintLoopinfo(*loopInfo, errs()); -+ } -+ -+ orderBlocks(); -+#ifdef STRESSTEST -+ //Use the worse block ordering to test the algorithm. -+ ReverseVector(orderedBlks); -+#endif -+ -+ if (DEBUGME) { -+ errs() << "Ordered blocks:\n"; -+ printOrderedBlocks(errs()); -+ } -+ int numIter = 0; -+ bool finish = false; -+ BlockT *curBlk; -+ bool makeProgress = false; -+ int numRemainedBlk = countActiveBlock(orderedBlks.begin(), -+ orderedBlks.end()); -+ -+ do { -+ ++numIter; -+ if (DEBUGME) { -+ errs() << "numIter = " << numIter -+ << ", numRemaintedBlk = " << numRemainedBlk << "\n"; -+ } -+ -+ typename SmallVector::const_iterator -+ iterBlk = orderedBlks.begin(); -+ typename SmallVector::const_iterator -+ iterBlkEnd = orderedBlks.end(); -+ -+ typename SmallVector::const_iterator -+ sccBeginIter = iterBlk; -+ BlockT *sccBeginBlk = NULL; -+ int sccNumBlk = 0; // The number of active blocks, init to a -+ // maximum possible number. -+ int sccNumIter; // Number of iteration in this SCC. -+ -+ while (iterBlk != iterBlkEnd) { -+ curBlk = *iterBlk; -+ -+ if (sccBeginBlk == NULL) { -+ sccBeginIter = iterBlk; -+ sccBeginBlk = curBlk; -+ sccNumIter = 0; -+ sccNumBlk = numRemainedBlk; // Init to maximum possible number. -+ if (DEBUGME) { -+ errs() << "start processing SCC" << getSCCNum(sccBeginBlk); -+ errs() << "\n"; -+ } -+ } -+ -+ if (!isRetiredBlock(curBlk)) { -+ patternMatch(curBlk); -+ } -+ -+ ++iterBlk; -+ -+ bool contNextScc = true; -+ if (iterBlk == iterBlkEnd -+ || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) { -+ // Just finish one scc. -+ ++sccNumIter; -+ int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk); -+ if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) { -+ if (DEBUGME) { -+ errs() << "Can't reduce SCC " << getSCCNum(curBlk) -+ << ", sccNumIter = " << sccNumIter; -+ errs() << "doesn't make any progress\n"; -+ } -+ contNextScc = true; -+ } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) { -+ sccNumBlk = sccRemainedNumBlk; -+ iterBlk = sccBeginIter; -+ contNextScc = false; -+ if (DEBUGME) { -+ errs() << "repeat processing SCC" << getSCCNum(curBlk) -+ << "sccNumIter = " << sccNumIter << "\n"; -+ func.viewCFG(); -+ } -+ } else { -+ // Finish the current scc. -+ contNextScc = true; -+ } -+ } else { -+ // Continue on next component in the current scc. -+ contNextScc = false; -+ } -+ -+ if (contNextScc) { -+ sccBeginBlk = NULL; -+ } -+ } //while, "one iteration" over the function. -+ -+ BlockT *entryBlk = FuncGTraits::nodes_begin(&func); -+ if (entryBlk->succ_size() == 0) { -+ finish = true; -+ if (DEBUGME) { -+ errs() << "Reduce to one block\n"; -+ } -+ } else { -+ int newnumRemainedBlk -+ = countActiveBlock(orderedBlks.begin(), orderedBlks.end()); -+ // consider cloned blocks ?? -+ if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) { -+ makeProgress = true; -+ numRemainedBlk = newnumRemainedBlk; -+ } else { -+ makeProgress = false; -+ if (DEBUGME) { -+ errs() << "No progress\n"; -+ } -+ } -+ } -+ } while (!finish && makeProgress); -+ -+ // Misc wrap up to maintain the consistency of the Function representation. -+ CFGTraits::wrapup(FuncGTraits::nodes_begin(&func)); -+ -+ // Detach retired Block, release memory. -+ for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(), -+ iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) { -+ if ((*iterMap).second && (*iterMap).second->isRetired) { -+ assert(((*iterMap).first)->getNumber() != -1); -+ if (DEBUGME) { -+ errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n"; -+ } -+ (*iterMap).first->eraseFromParent(); //Remove from the parent Function. -+ } -+ delete (*iterMap).second; -+ } -+ blockInfoMap.clear(); -+ -+ // clear loopLandInfoMap -+ for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(), -+ iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) { -+ delete (*iterMap).second; -+ } -+ loopLandInfoMap.clear(); -+ -+ if (DEBUGME) { -+ func.viewCFG(); -+ } -+ -+ if (!finish) { -+ assert(!"IRREDUCIBL_CF"); -+ } -+ -+ return true; -+} //CFGStructurizer::run -+ -+/// Print the ordered Blocks. -+/// -+template -+void CFGStructurizer::printOrderedBlocks(llvm::raw_ostream &os) { -+ size_t i = 0; -+ for (typename SmallVector::const_iterator -+ iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end(); -+ iterBlk != iterBlkEnd; -+ ++iterBlk, ++i) { -+ os << "BB" << (*iterBlk)->getNumber(); -+ os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; -+ if (i != 0 && i % 10 == 0) { -+ os << "\n"; -+ } else { -+ os << " "; -+ } -+ } -+} //printOrderedBlocks -+ -+/// Compute the reversed DFS post order of Blocks -+/// -+template void CFGStructurizer::orderBlocks() { -+ int sccNum = 0; -+ BlockT *bb; -+ for (scc_iterator sccIter = scc_begin(funcRep), -+ sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) { -+ std::vector &sccNext = *sccIter; -+ for (typename std::vector::const_iterator -+ blockIter = sccNext.begin(), blockEnd = sccNext.end(); -+ blockIter != blockEnd; ++blockIter) { -+ bb = *blockIter; -+ orderedBlks.push_back(bb); -+ recordSccnum(bb, sccNum); -+ } -+ } -+ -+ //walk through all the block in func to check for unreachable -+ for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep), -+ blockEnd1 = FuncGTraits::nodes_end(funcRep); -+ blockIter1 != blockEnd1; ++blockIter1) { -+ BlockT *bb = &(*blockIter1); -+ sccNum = getSCCNum(bb); -+ if (sccNum == INVALIDSCCNUM) { -+ errs() << "unreachable block BB" << bb->getNumber() << "\n"; -+ } -+ } -+} //orderBlocks -+ -+template int CFGStructurizer::patternMatch(BlockT *curBlk) { -+ int numMatch = 0; -+ int curMatch; -+ -+ if (DEBUGME) { -+ errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n"; -+ } -+ -+ while ((curMatch = patternMatchGroup(curBlk)) > 0) { -+ numMatch += curMatch; -+ } -+ -+ if (DEBUGME) { -+ errs() << "End patternMatch BB" << curBlk->getNumber() -+ << ", numMatch = " << numMatch << "\n"; -+ } -+ -+ return numMatch; -+} //patternMatch -+ -+template -+int CFGStructurizer::patternMatchGroup(BlockT *curBlk) { -+ int numMatch = 0; -+ numMatch += serialPatternMatch(curBlk); -+ numMatch += ifPatternMatch(curBlk); -+ numMatch += loopendPatternMatch(curBlk); -+ numMatch += loopPatternMatch(curBlk); -+ return numMatch; -+}//patternMatchGroup -+ -+template -+int CFGStructurizer::serialPatternMatch(BlockT *curBlk) { -+ if (curBlk->succ_size() != 1) { -+ return 0; -+ } -+ -+ BlockT *childBlk = *curBlk->succ_begin(); -+ if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) { -+ return 0; -+ } -+ -+ mergeSerialBlock(curBlk, childBlk); -+ ++numSerialPatternMatch; -+ return 1; -+} //serialPatternMatch -+ -+template -+int CFGStructurizer::ifPatternMatch(BlockT *curBlk) { -+ //two edges -+ if (curBlk->succ_size() != 2) { -+ return 0; -+ } -+ -+ if (hasBackEdge(curBlk)) { -+ return 0; -+ } -+ -+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk); -+ if (branchInstr == NULL) { -+ return 0; -+ } -+ -+ assert(CFGTraits::isCondBranch(branchInstr)); -+ -+ BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr); -+ BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr); -+ BlockT *landBlk; -+ int cloned = 0; -+ -+ // TODO: Simplify -+ if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1 -+ && *trueBlk->succ_begin() == *falseBlk->succ_begin()) { -+ landBlk = *trueBlk->succ_begin(); -+ } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) { -+ landBlk = NULL; -+ } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) { -+ landBlk = falseBlk; -+ falseBlk = NULL; -+ } else if (falseBlk->succ_size() == 1 -+ && *falseBlk->succ_begin() == trueBlk) { -+ landBlk = trueBlk; -+ trueBlk = NULL; -+ } else if (falseBlk->succ_size() == 1 -+ && isSameloopDetachedContbreak(trueBlk, falseBlk)) { -+ landBlk = *falseBlk->succ_begin(); -+ } else if (trueBlk->succ_size() == 1 -+ && isSameloopDetachedContbreak(falseBlk, trueBlk)) { -+ landBlk = *trueBlk->succ_begin(); -+ } else { -+ return handleJumpintoIf(curBlk, trueBlk, falseBlk); -+ } -+ -+ // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the -+ // new BB created for landBlk==NULL may introduce new challenge to the -+ // reduction process. -+ if (landBlk != NULL && -+ ((trueBlk && trueBlk->pred_size() > 1) -+ || (falseBlk && falseBlk->pred_size() > 1))) { -+ cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk); -+ } -+ -+ if (trueBlk && trueBlk->pred_size() > 1) { -+ trueBlk = cloneBlockForPredecessor(trueBlk, curBlk); -+ ++cloned; -+ } -+ -+ if (falseBlk && falseBlk->pred_size() > 1) { -+ falseBlk = cloneBlockForPredecessor(falseBlk, curBlk); -+ ++cloned; -+ } -+ -+ mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk); -+ -+ ++numIfPatternMatch; -+ -+ numClonedBlock += cloned; -+ -+ return 1 + cloned; -+} //ifPatternMatch -+ -+template -+int CFGStructurizer::switchPatternMatch(BlockT *curBlk) { -+ return 0; -+} //switchPatternMatch -+ -+template -+int CFGStructurizer::loopendPatternMatch(BlockT *curBlk) { -+ LoopT *loopRep = loopInfo->getLoopFor(curBlk); -+ typename std::vector nestedLoops; -+ while (loopRep) { -+ nestedLoops.push_back(loopRep); -+ loopRep = loopRep->getParentLoop(); -+ } -+ -+ if (nestedLoops.size() == 0) { -+ return 0; -+ } -+ -+ // Process nested loop outside->inside, so "continue" to a outside loop won't -+ // be mistaken as "break" of the current loop. -+ int num = 0; -+ for (typename std::vector::reverse_iterator -+ iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend(); -+ iter != iterEnd; ++iter) { -+ loopRep = *iter; -+ -+ if (getLoopLandBlock(loopRep) != NULL) { -+ continue; -+ } -+ -+ BlockT *loopHeader = loopRep->getHeader(); -+ -+ int numBreak = loopbreakPatternMatch(loopRep, loopHeader); -+ -+ if (numBreak == -1) { -+ break; -+ } -+ -+ int numCont = loopcontPatternMatch(loopRep, loopHeader); -+ num += numBreak + numCont; -+ } -+ -+ return num; -+} //loopendPatternMatch -+ -+template -+int CFGStructurizer::loopPatternMatch(BlockT *curBlk) { -+ if (curBlk->succ_size() != 0) { -+ return 0; -+ } -+ -+ int numLoop = 0; -+ LoopT *loopRep = loopInfo->getLoopFor(curBlk); -+ while (loopRep && loopRep->getHeader() == curBlk) { -+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep); -+ if (loopLand) { -+ BlockT *landBlk = loopLand->landBlk; -+ assert(landBlk); -+ if (!isRetiredBlock(landBlk)) { -+ mergeLooplandBlock(curBlk, loopLand); -+ ++numLoop; -+ } -+ } -+ loopRep = loopRep->getParentLoop(); -+ } -+ -+ numLoopPatternMatch += numLoop; -+ -+ return numLoop; -+} //loopPatternMatch -+ -+template -+int CFGStructurizer::loopbreakPatternMatch(LoopT *loopRep, -+ BlockT *loopHeader) { -+ BlockTSmallerVector exitingBlks; -+ loopRep->getExitingBlocks(exitingBlks); -+ -+ if (DEBUGME) { -+ errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n"; -+ } -+ -+ if (exitingBlks.size() == 0) { -+ setLoopLandBlock(loopRep); -+ return 0; -+ } -+ -+ // Compute the corresponding exitBlks and exit block set. -+ BlockTSmallerVector exitBlks; -+ std::set exitBlkSet; -+ for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(), -+ iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) { -+ BlockT *exitingBlk = *iter; -+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); -+ exitBlks.push_back(exitBlk); -+ exitBlkSet.insert(exitBlk); //non-duplicate insert -+ } -+ -+ assert(exitBlkSet.size() > 0); -+ assert(exitBlks.size() == exitingBlks.size()); -+ -+ if (DEBUGME) { -+ errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n"; -+ } -+ -+ // Find exitLandBlk. -+ BlockT *exitLandBlk = NULL; -+ int numCloned = 0; -+ int numSerial = 0; -+ -+ if (exitBlkSet.size() == 1) { -+ exitLandBlk = *exitBlkSet.begin(); -+ } else { -+ exitLandBlk = findNearestCommonPostDom(exitBlkSet); -+ -+ if (exitLandBlk == NULL) { -+ return -1; -+ } -+ -+ bool allInPath = true; -+ bool allNotInPath = true; -+ for (typename std::set::const_iterator -+ iter = exitBlkSet.begin(), -+ iterEnd = exitBlkSet.end(); -+ iter != iterEnd; ++iter) { -+ BlockT *exitBlk = *iter; -+ -+ PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true); -+ if (DEBUGME) { -+ errs() << "BB" << exitBlk->getNumber() -+ << " to BB" << exitLandBlk->getNumber() << " PathToKind=" -+ << pathKind << "\n"; -+ } -+ -+ allInPath = allInPath && (pathKind == SinglePath_InPath); -+ allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath); -+ -+ if (!allInPath && !allNotInPath) { -+ if (DEBUGME) { -+ errs() << "singlePath check fail\n"; -+ } -+ return -1; -+ } -+ } // check all exit blocks -+ -+ if (allNotInPath) { -+ -+ // TODO: Simplify, maybe separate function? -+ LoopT *parentLoopRep = loopRep->getParentLoop(); -+ BlockT *parentLoopHeader = NULL; -+ if (parentLoopRep) -+ parentLoopHeader = parentLoopRep->getHeader(); -+ -+ if (exitLandBlk == parentLoopHeader && -+ (exitLandBlk = relocateLoopcontBlock(parentLoopRep, -+ loopRep, -+ exitBlkSet, -+ exitLandBlk)) != NULL) { -+ if (DEBUGME) { -+ errs() << "relocateLoopcontBlock success\n"; -+ } -+ } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep, -+ exitingBlks, -+ exitBlks)) != NULL) { -+ if (DEBUGME) { -+ errs() << "insertEndbranchBlock success\n"; -+ } -+ } else { -+ if (DEBUGME) { -+ errs() << "loop exit fail\n"; -+ } -+ return -1; -+ } -+ } -+ -+ // Handle side entry to exit path. -+ exitBlks.clear(); -+ exitBlkSet.clear(); -+ for (typename BlockTSmallerVector::iterator iterExiting = -+ exitingBlks.begin(), -+ iterExitingEnd = exitingBlks.end(); -+ iterExiting != iterExitingEnd; ++iterExiting) { -+ BlockT *exitingBlk = *iterExiting; -+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); -+ BlockT *newExitBlk = exitBlk; -+ -+ if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) { -+ newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk); -+ ++numCloned; -+ } -+ -+ numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk); -+ -+ exitBlks.push_back(newExitBlk); -+ exitBlkSet.insert(newExitBlk); -+ } -+ -+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), -+ iterExitEnd = exitBlks.end(); -+ iterExit != iterExitEnd; ++iterExit) { -+ BlockT *exitBlk = *iterExit; -+ numSerial += serialPatternMatch(exitBlk); -+ } -+ -+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), -+ iterExitEnd = exitBlks.end(); -+ iterExit != iterExitEnd; ++iterExit) { -+ BlockT *exitBlk = *iterExit; -+ if (exitBlk->pred_size() > 1) { -+ if (exitBlk != exitLandBlk) { -+ return -1; -+ } -+ } else { -+ if (exitBlk != exitLandBlk && -+ (exitBlk->succ_size() != 1 || -+ *exitBlk->succ_begin() != exitLandBlk)) { -+ return -1; -+ } -+ } -+ } -+ } // else -+ -+ exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet); -+ -+ // Fold break into the breaking block. Leverage across level breaks. -+ assert(exitingBlks.size() == exitBlks.size()); -+ for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(), -+ iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end(); -+ iterExit != iterExitEnd; ++iterExit, ++iterExiting) { -+ BlockT *exitBlk = *iterExit; -+ BlockT *exitingBlk = *iterExiting; -+ assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk); -+ LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk); -+ handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk); -+ } -+ -+ int numBreak = static_cast(exitingBlks.size()); -+ numLoopbreakPatternMatch += numBreak; -+ numClonedBlock += numCloned; -+ return numBreak + numSerial + numCloned; -+} //loopbreakPatternMatch -+ -+template -+int CFGStructurizer::loopcontPatternMatch(LoopT *loopRep, -+ BlockT *loopHeader) { -+ int numCont = 0; -+ SmallVector contBlk; -+ for (typename InvBlockGTraits::ChildIteratorType iter = -+ InvBlockGTraits::child_begin(loopHeader), -+ iterEnd = InvBlockGTraits::child_end(loopHeader); -+ iter != iterEnd; ++iter) { -+ BlockT *curBlk = *iter; -+ if (loopRep->contains(curBlk)) { -+ handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk), -+ loopHeader, loopRep); -+ contBlk.push_back(curBlk); -+ ++numCont; -+ } -+ } -+ -+ for (typename SmallVector::iterator -+ iter = contBlk.begin(), iterEnd = contBlk.end(); -+ iter != iterEnd; ++iter) { -+ (*iter)->removeSuccessor(loopHeader); -+ } -+ -+ numLoopcontPatternMatch += numCont; -+ -+ return numCont; -+} //loopcontPatternMatch -+ -+ -+template -+bool CFGStructurizer::isSameloopDetachedContbreak(BlockT *src1Blk, -+ BlockT *src2Blk) { -+ // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the -+ // same loop with LoopLandInfo without explicitly keeping track of -+ // loopContBlks and loopBreakBlks, this is a method to get the information. -+ // -+ if (src1Blk->succ_size() == 0) { -+ LoopT *loopRep = loopInfo->getLoopFor(src1Blk); -+ if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) { -+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; -+ if (theEntry != NULL) { -+ if (DEBUGME) { -+ errs() << "isLoopContBreakBlock yes src1 = BB" -+ << src1Blk->getNumber() -+ << " src2 = BB" << src2Blk->getNumber() << "\n"; -+ } -+ return true; -+ } -+ } -+ } -+ return false; -+} //isSameloopDetachedContbreak -+ -+template -+int CFGStructurizer::handleJumpintoIf(BlockT *headBlk, -+ BlockT *trueBlk, -+ BlockT *falseBlk) { -+ int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk); -+ if (num == 0) { -+ if (DEBUGME) { -+ errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; -+ } -+ num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk); -+ } -+ return num; -+} -+ -+template -+int CFGStructurizer::handleJumpintoIfImp(BlockT *headBlk, -+ BlockT *trueBlk, -+ BlockT *falseBlk) { -+ int num = 0; -+ BlockT *downBlk; -+ -+ //trueBlk could be the common post dominator -+ downBlk = trueBlk; -+ -+ if (DEBUGME) { -+ errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber() -+ << " true = BB" << trueBlk->getNumber() -+ << ", numSucc=" << trueBlk->succ_size() -+ << " false = BB" << falseBlk->getNumber() << "\n"; -+ } -+ -+ while (downBlk) { -+ if (DEBUGME) { -+ errs() << "check down = BB" << downBlk->getNumber(); -+ } -+ -+ if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) { -+ if (DEBUGME) { -+ errs() << " working\n"; -+ } -+ -+ num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk); -+ num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk); -+ -+ numClonedBlock += num; -+ num += serialPatternMatch(*headBlk->succ_begin()); -+ num += serialPatternMatch(*(++headBlk->succ_begin())); -+ num += ifPatternMatch(headBlk); -+ assert(num > 0); -+ -+ break; -+ } -+ if (DEBUGME) { -+ errs() << " not working\n"; -+ } -+ downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL; -+ } // walk down the postDomTree -+ -+ return num; -+} //handleJumpintoIf -+ -+template -+void CFGStructurizer::showImproveSimpleJumpintoIf(BlockT *headBlk, -+ BlockT *trueBlk, -+ BlockT *falseBlk, -+ BlockT *landBlk, -+ bool detail) { -+ errs() << "head = BB" << headBlk->getNumber() -+ << " size = " << headBlk->size(); -+ if (detail) { -+ errs() << "\n"; -+ headBlk->print(errs()); -+ errs() << "\n"; -+ } -+ -+ if (trueBlk) { -+ errs() << ", true = BB" << trueBlk->getNumber() << " size = " -+ << trueBlk->size() << " numPred = " << trueBlk->pred_size(); -+ if (detail) { -+ errs() << "\n"; -+ trueBlk->print(errs()); -+ errs() << "\n"; -+ } -+ } -+ if (falseBlk) { -+ errs() << ", false = BB" << falseBlk->getNumber() << " size = " -+ << falseBlk->size() << " numPred = " << falseBlk->pred_size(); -+ if (detail) { -+ errs() << "\n"; -+ falseBlk->print(errs()); -+ errs() << "\n"; -+ } -+ } -+ if (landBlk) { -+ errs() << ", land = BB" << landBlk->getNumber() << " size = " -+ << landBlk->size() << " numPred = " << landBlk->pred_size(); -+ if (detail) { -+ errs() << "\n"; -+ landBlk->print(errs()); -+ errs() << "\n"; -+ } -+ } -+ -+ errs() << "\n"; -+} //showImproveSimpleJumpintoIf -+ -+template -+int CFGStructurizer::improveSimpleJumpintoIf(BlockT *headBlk, -+ BlockT *trueBlk, -+ BlockT *falseBlk, -+ BlockT **plandBlk) { -+ bool migrateTrue = false; -+ bool migrateFalse = false; -+ -+ BlockT *landBlk = *plandBlk; -+ -+ assert((trueBlk == NULL || trueBlk->succ_size() <= 1) -+ && (falseBlk == NULL || falseBlk->succ_size() <= 1)); -+ -+ if (trueBlk == falseBlk) { -+ return 0; -+ } -+ -+ migrateTrue = needMigrateBlock(trueBlk); -+ migrateFalse = needMigrateBlock(falseBlk); -+ -+ if (!migrateTrue && !migrateFalse) { -+ return 0; -+ } -+ -+ // If we need to migrate either trueBlk and falseBlk, migrate the rest that -+ // have more than one predecessors. without doing this, its predecessor -+ // rather than headBlk will have undefined value in initReg. -+ if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) { -+ migrateTrue = true; -+ } -+ if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) { -+ migrateFalse = true; -+ } -+ -+ if (DEBUGME) { -+ errs() << "before improveSimpleJumpintoIf: "; -+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); -+ } -+ -+ // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk -+ // -+ // new: headBlk => if () {initReg = 1; org trueBlk branch} else -+ // {initReg = 0; org falseBlk branch } -+ // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} -+ // => org landBlk -+ // if landBlk->pred_size() > 2, put the about if-else inside -+ // if (initReg !=2) {...} -+ // -+ // add initReg = initVal to headBlk -+ -+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); -+ unsigned initReg = -+ funcRep->getRegInfo().createVirtualRegister(I32RC); -+ if (!migrateTrue || !migrateFalse) { -+ int initVal = migrateTrue ? 0 : 1; -+ CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal); -+ } -+ -+ int numNewBlk = 0; -+ -+ if (landBlk == NULL) { -+ landBlk = funcRep->CreateMachineBasicBlock(); -+ funcRep->push_back(landBlk); //insert to function -+ -+ if (trueBlk) { -+ trueBlk->addSuccessor(landBlk); -+ } else { -+ headBlk->addSuccessor(landBlk); -+ } -+ -+ if (falseBlk) { -+ falseBlk->addSuccessor(landBlk); -+ } else { -+ headBlk->addSuccessor(landBlk); -+ } -+ -+ numNewBlk ++; -+ } -+ -+ bool landBlkHasOtherPred = (landBlk->pred_size() > 2); -+ -+ //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" -+ typename BlockT::iterator insertPos = -+ CFGTraits::getInstrPos -+ (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep)); -+ -+ if (landBlkHasOtherPred) { -+ unsigned immReg = -+ funcRep->getRegInfo().createVirtualRegister(I32RC); -+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2); -+ unsigned cmpResReg = -+ funcRep->getRegInfo().createVirtualRegister(I32RC); -+ -+ CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg, -+ initReg, immReg); -+ CFGTraits::insertCondBranchBefore(landBlk, insertPos, -+ AMDGPU::IF_PREDICATE_SET, passRep, -+ cmpResReg, DebugLoc()); -+ } -+ -+ CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET, -+ passRep, initReg, DebugLoc()); -+ -+ if (migrateTrue) { -+ migrateInstruction(trueBlk, landBlk, insertPos); -+ // need to uncondionally insert the assignment to ensure a path from its -+ // predecessor rather than headBlk has valid value in initReg if -+ // (initVal != 1). -+ CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1); -+ } -+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep); -+ -+ if (migrateFalse) { -+ migrateInstruction(falseBlk, landBlk, insertPos); -+ // need to uncondionally insert the assignment to ensure a path from its -+ // predecessor rather than headBlk has valid value in initReg if -+ // (initVal != 0) -+ CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0); -+ } -+ -+ if (landBlkHasOtherPred) { -+ // add endif -+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep); -+ -+ // put initReg = 2 to other predecessors of landBlk -+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), -+ predIterEnd = landBlk->pred_end(); predIter != predIterEnd; -+ ++predIter) { -+ BlockT *curBlk = *predIter; -+ if (curBlk != trueBlk && curBlk != falseBlk) { -+ CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2); -+ } -+ } //for -+ } -+ if (DEBUGME) { -+ errs() << "result from improveSimpleJumpintoIf: "; -+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); -+ } -+ -+ // update landBlk -+ *plandBlk = landBlk; -+ -+ return numNewBlk; -+} //improveSimpleJumpintoIf -+ -+template -+void CFGStructurizer::handleLoopbreak(BlockT *exitingBlk, -+ LoopT *exitingLoop, -+ BlockT *exitBlk, -+ LoopT *exitLoop, -+ BlockT *landBlk) { -+ if (DEBUGME) { -+ errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop) -+ << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n"; -+ } -+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); -+ -+ RegiT initReg = INVALIDREGNUM; -+ if (exitingLoop != exitLoop) { -+ initReg = static_cast -+ (funcRep->getRegInfo().createVirtualRegister(I32RC)); -+ assert(initReg != INVALIDREGNUM); -+ addLoopBreakInitReg(exitLoop, initReg); -+ while (exitingLoop != exitLoop && exitingLoop) { -+ addLoopBreakOnReg(exitingLoop, initReg); -+ exitingLoop = exitingLoop->getParentLoop(); -+ } -+ assert(exitingLoop == exitLoop); -+ } -+ -+ mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg); -+ -+} //handleLoopbreak -+ -+template -+void CFGStructurizer::handleLoopcontBlock(BlockT *contingBlk, -+ LoopT *contingLoop, -+ BlockT *contBlk, -+ LoopT *contLoop) { -+ if (DEBUGME) { -+ errs() << "loopcontPattern cont = BB" << contingBlk->getNumber() -+ << " header = BB" << contBlk->getNumber() << "\n"; -+ -+ errs() << "Trying to continue loop-depth = " -+ << getLoopDepth(contLoop) -+ << " from loop-depth = " << getLoopDepth(contingLoop) << "\n"; -+ } -+ -+ RegiT initReg = INVALIDREGNUM; -+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); -+ if (contingLoop != contLoop) { -+ initReg = static_cast -+ (funcRep->getRegInfo().createVirtualRegister(I32RC)); -+ assert(initReg != INVALIDREGNUM); -+ addLoopContInitReg(contLoop, initReg); -+ while (contingLoop && contingLoop->getParentLoop() != contLoop) { -+ addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg -+ contingLoop = contingLoop->getParentLoop(); -+ } -+ assert(contingLoop && contingLoop->getParentLoop() == contLoop); -+ addLoopContOnReg(contingLoop, initReg); -+ } -+ -+ settleLoopcontBlock(contingBlk, contBlk, initReg); -+} //handleLoopcontBlock -+ -+template -+void CFGStructurizer::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) { -+ if (DEBUGME) { -+ errs() << "serialPattern BB" << dstBlk->getNumber() -+ << " <= BB" << srcBlk->getNumber() << "\n"; -+ } -+ dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end()); -+ -+ dstBlk->removeSuccessor(srcBlk); -+ CFGTraits::cloneSuccessorList(dstBlk, srcBlk); -+ -+ removeSuccessor(srcBlk); -+ retireBlock(dstBlk, srcBlk); -+} //mergeSerialBlock -+ -+template -+void CFGStructurizer::mergeIfthenelseBlock(InstrT *branchInstr, -+ BlockT *curBlk, -+ BlockT *trueBlk, -+ BlockT *falseBlk, -+ BlockT *landBlk) { -+ if (DEBUGME) { -+ errs() << "ifPattern BB" << curBlk->getNumber(); -+ errs() << "{ "; -+ if (trueBlk) { -+ errs() << "BB" << trueBlk->getNumber(); -+ } -+ errs() << " } else "; -+ errs() << "{ "; -+ if (falseBlk) { -+ errs() << "BB" << falseBlk->getNumber(); -+ } -+ errs() << " }\n "; -+ errs() << "landBlock: "; -+ if (landBlk == NULL) { -+ errs() << "NULL"; -+ } else { -+ errs() << "BB" << landBlk->getNumber(); -+ } -+ errs() << "\n"; -+ } -+ -+ int oldOpcode = branchInstr->getOpcode(); -+ DebugLoc branchDL = branchInstr->getDebugLoc(); -+ -+// transform to -+// if cond -+// trueBlk -+// else -+// falseBlk -+// endif -+// landBlk -+ -+ typename BlockT::iterator branchInstrPos = -+ CFGTraits::getInstrPos(curBlk, branchInstr); -+ CFGTraits::insertCondBranchBefore(branchInstrPos, -+ CFGTraits::getBranchNzeroOpcode(oldOpcode), -+ passRep, -+ branchDL); -+ -+ if (trueBlk) { -+ curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end()); -+ curBlk->removeSuccessor(trueBlk); -+ if (landBlk && trueBlk->succ_size()!=0) { -+ trueBlk->removeSuccessor(landBlk); -+ } -+ retireBlock(curBlk, trueBlk); -+ } -+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep); -+ -+ if (falseBlk) { -+ curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(), -+ falseBlk->end()); -+ curBlk->removeSuccessor(falseBlk); -+ if (landBlk && falseBlk->succ_size() != 0) { -+ falseBlk->removeSuccessor(landBlk); -+ } -+ retireBlock(curBlk, falseBlk); -+ } -+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep); -+ -+ branchInstr->eraseFromParent(); -+ -+ if (landBlk && trueBlk && falseBlk) { -+ curBlk->addSuccessor(landBlk); -+ } -+ -+} //mergeIfthenelseBlock -+ -+template -+void CFGStructurizer::mergeLooplandBlock(BlockT *dstBlk, -+ LoopLandInfo *loopLand) { -+ BlockT *landBlk = loopLand->landBlk; -+ -+ if (DEBUGME) { -+ errs() << "loopPattern header = BB" << dstBlk->getNumber() -+ << " land = BB" << landBlk->getNumber() << "\n"; -+ } -+ -+ // Loop contInitRegs are init at the beginning of the loop. -+ for (typename std::set::const_iterator iter = -+ loopLand->contInitRegs.begin(), -+ iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) { -+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); -+ } -+ -+ /* we last inserterd the DebugLoc in the -+ * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk. -+ * search for the DebugLoc in the that statement. -+ * if not found, we have to insert the empty/default DebugLoc */ -+ InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk); -+ DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc(); -+ -+ CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak); -+ // Loop breakInitRegs are init before entering the loop. -+ for (typename std::set::const_iterator iter = -+ loopLand->breakInitRegs.begin(), -+ iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) { -+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); -+ } -+ // Loop endbranchInitRegs are init before entering the loop. -+ for (typename std::set::const_iterator iter = -+ loopLand->endbranchInitRegs.begin(), -+ iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) { -+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); -+ } -+ -+ /* we last inserterd the DebugLoc in the continue statement in the current dstBlk -+ * search for the DebugLoc in the continue statement. -+ * if not found, we have to insert the empty/default DebugLoc */ -+ InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk); -+ DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc(); -+ -+ CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue); -+ // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this -+ // loop. -+ for (typename std::set::const_iterator iter = -+ loopLand->breakOnRegs.begin(), -+ iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) { -+ CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep, -+ *iter); -+ } -+ -+ // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this -+ // loop. -+ for (std::set::const_iterator iter = loopLand->contOnRegs.begin(), -+ iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) { -+ CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32, -+ passRep, *iter); -+ } -+ -+ dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end()); -+ -+ for (typename BlockT::succ_iterator iter = landBlk->succ_begin(), -+ iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) { -+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of. -+ } -+ -+ removeSuccessor(landBlk); -+ retireBlock(dstBlk, landBlk); -+} //mergeLooplandBlock -+ -+template -+void CFGStructurizer::reversePredicateSetter(typename BlockT::iterator I) { -+ while (I--) { -+ if (I->getOpcode() == AMDGPU::PRED_X) { -+ switch (static_cast(I)->getOperand(2).getImm()) { -+ case OPCODE_IS_ZERO_INT: -+ static_cast(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT); -+ return; -+ case OPCODE_IS_NOT_ZERO_INT: -+ static_cast(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT); -+ return; -+ case OPCODE_IS_ZERO: -+ static_cast(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO); -+ return; -+ case OPCODE_IS_NOT_ZERO: -+ static_cast(I)->getOperand(2).setImm(OPCODE_IS_ZERO); -+ return; -+ default: -+ assert(0 && "PRED_X Opcode invalid!"); -+ } -+ } -+ } -+} -+ -+template -+void CFGStructurizer::mergeLoopbreakBlock(BlockT *exitingBlk, -+ BlockT *exitBlk, -+ BlockT *exitLandBlk, -+ RegiT setReg) { -+ if (DEBUGME) { -+ errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber() -+ << " exit = BB" << exitBlk->getNumber() -+ << " land = BB" << exitLandBlk->getNumber() << "\n"; -+ } -+ -+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk); -+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); -+ -+ DebugLoc DL = branchInstr->getDebugLoc(); -+ -+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); -+ -+ // transform exitingBlk to -+ // if ( ) { -+ // exitBlk (if exitBlk != exitLandBlk) -+ // setReg = 1 -+ // break -+ // }endif -+ // successor = {orgSuccessor(exitingBlk) - exitBlk} -+ -+ typename BlockT::iterator branchInstrPos = -+ CFGTraits::getInstrPos(exitingBlk, branchInstr); -+ -+ if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) { -+ //break_logical -+ -+ if (trueBranch != exitBlk) { -+ reversePredicateSetter(branchInstrPos); -+ } -+ CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL); -+ } else { -+ if (trueBranch != exitBlk) { -+ reversePredicateSetter(branchInstr); -+ } -+ CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL); -+ if (exitBlk != exitLandBlk) { -+ //splice is insert-before ... -+ exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(), -+ exitBlk->end()); -+ } -+ if (setReg != INVALIDREGNUM) { -+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); -+ } -+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep); -+ } //if_logical -+ -+ //now branchInst can be erase safely -+ branchInstr->eraseFromParent(); -+ -+ //now take care of successors, retire blocks -+ exitingBlk->removeSuccessor(exitBlk); -+ if (exitBlk != exitLandBlk) { -+ //splice is insert-before ... -+ exitBlk->removeSuccessor(exitLandBlk); -+ retireBlock(exitingBlk, exitBlk); -+ } -+ -+} //mergeLoopbreakBlock -+ -+template -+void CFGStructurizer::settleLoopcontBlock(BlockT *contingBlk, -+ BlockT *contBlk, -+ RegiT setReg) { -+ if (DEBUGME) { -+ errs() << "settleLoopcontBlock conting = BB" -+ << contingBlk->getNumber() -+ << ", cont = BB" << contBlk->getNumber() << "\n"; -+ } -+ -+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk); -+ if (branchInstr) { -+ assert(CFGTraits::isCondBranch(branchInstr)); -+ typename BlockT::iterator branchInstrPos = -+ CFGTraits::getInstrPos(contingBlk, branchInstr); -+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); -+ int oldOpcode = branchInstr->getOpcode(); -+ DebugLoc DL = branchInstr->getDebugLoc(); -+ -+ // transform contingBlk to -+ // if () { -+ // move instr after branchInstr -+ // continue -+ // or -+ // setReg = 1 -+ // break -+ // }endif -+ // successor = {orgSuccessor(contingBlk) - loopHeader} -+ -+ bool useContinueLogical = -+ (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr); -+ -+ if (useContinueLogical == false) { -+ int branchOpcode = -+ trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode) -+ : CFGTraits::getBranchZeroOpcode(oldOpcode); -+ -+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); -+ -+ if (setReg != INVALIDREGNUM) { -+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); -+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr. -+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL); -+ } else { -+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr. -+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL); -+ } -+ -+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL); -+ } else { -+ int branchOpcode = -+ trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode) -+ : CFGTraits::getContinueZeroOpcode(oldOpcode); -+ -+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); -+ } -+ -+ branchInstr->eraseFromParent(); -+ } else { -+ // if we've arrived here then we've already erased the branch instruction -+ // travel back up the basic block to see the last reference of our debug location -+ // we've just inserted that reference here so it should be representative -+ if (setReg != INVALIDREGNUM) { -+ CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1); -+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr. -+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); -+ } else { -+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr. -+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); -+ } -+ } //else -+ -+} //settleLoopcontBlock -+ -+// BBs in exitBlkSet are determined as in break-path for loopRep, -+// before we can put code for BBs as inside loop-body for loopRep -+// check whether those BBs are determined as cont-BB for parentLoopRep -+// earlier. -+// If so, generate a new BB newBlk -+// (1) set newBlk common successor of BBs in exitBlkSet -+// (2) change the continue-instr in BBs in exitBlkSet to break-instr -+// (3) generate continue-instr in newBlk -+// -+template -+typename CFGStructurizer::BlockT * -+CFGStructurizer::relocateLoopcontBlock(LoopT *parentLoopRep, -+ LoopT *loopRep, -+ std::set &exitBlkSet, -+ BlockT *exitLandBlk) { -+ std::set endBlkSet; -+ -+ -+ -+ for (typename std::set::const_iterator iter = exitBlkSet.begin(), -+ iterEnd = exitBlkSet.end(); -+ iter != iterEnd; ++iter) { -+ BlockT *exitBlk = *iter; -+ BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk); -+ -+ if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL) -+ return NULL; -+ -+ endBlkSet.insert(endBlk); -+ } -+ -+ BlockT *newBlk = funcRep->CreateMachineBasicBlock(); -+ funcRep->push_back(newBlk); //insert to function -+ CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep); -+ SHOWNEWBLK(newBlk, "New continue block: "); -+ -+ for (typename std::set::const_iterator iter = endBlkSet.begin(), -+ iterEnd = endBlkSet.end(); -+ iter != iterEnd; ++iter) { -+ BlockT *endBlk = *iter; -+ InstrT *contInstr = CFGTraits::getContinueInstr(endBlk); -+ if (contInstr) { -+ contInstr->eraseFromParent(); -+ } -+ endBlk->addSuccessor(newBlk); -+ if (DEBUGME) { -+ errs() << "Add new continue Block to BB" -+ << endBlk->getNumber() << " successors\n"; -+ } -+ } -+ -+ return newBlk; -+} //relocateLoopcontBlock -+ -+ -+// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as -+// LoopLandBlock. This BB branch on the loop endBranchInit register to the -+// pathes corresponding to the loop exiting branches. -+ -+template -+typename CFGStructurizer::BlockT * -+CFGStructurizer::addLoopEndbranchBlock(LoopT *loopRep, -+ BlockTSmallerVector &exitingBlks, -+ BlockTSmallerVector &exitBlks) { -+ const AMDGPUInstrInfo *tii = -+ static_cast(passRep->getTargetInstrInfo()); -+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); -+ -+ RegiT endBranchReg = static_cast -+ (funcRep->getRegInfo().createVirtualRegister(I32RC)); -+ assert(endBranchReg >= 0); -+ -+ // reg = 0 before entering the loop -+ addLoopEndbranchInitReg(loopRep, endBranchReg); -+ -+ uint32_t numBlks = static_cast(exitingBlks.size()); -+ assert(numBlks >=2 && numBlks == exitBlks.size()); -+ -+ BlockT *preExitingBlk = exitingBlks[0]; -+ BlockT *preExitBlk = exitBlks[0]; -+ BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock(); -+ funcRep->push_back(preBranchBlk); //insert to function -+ SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: "); -+ -+ BlockT *newLandBlk = preBranchBlk; -+ -+ CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk, -+ newLandBlk); -+ preExitingBlk->removeSuccessor(preExitBlk); -+ preExitingBlk->addSuccessor(newLandBlk); -+ -+ //it is redundant to add reg = 0 to exitingBlks[0] -+ -+ // For 1..n th exiting path (the last iteration handles two pathes) create the -+ // branch to the previous path and the current path. -+ for (uint32_t i = 1; i < numBlks; ++i) { -+ BlockT *curExitingBlk = exitingBlks[i]; -+ BlockT *curExitBlk = exitBlks[i]; -+ BlockT *curBranchBlk; -+ -+ if (i == numBlks - 1) { -+ curBranchBlk = curExitBlk; -+ } else { -+ curBranchBlk = funcRep->CreateMachineBasicBlock(); -+ funcRep->push_back(curBranchBlk); //insert to function -+ SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: "); -+ } -+ -+ // Add reg = i to exitingBlks[i]. -+ CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep, -+ endBranchReg, i); -+ -+ // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge -+ // (exitingBlks[i], newLandBlk). -+ CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk, -+ newLandBlk); -+ curExitingBlk->removeSuccessor(curExitBlk); -+ curExitingBlk->addSuccessor(newLandBlk); -+ -+ // add to preBranchBlk the branch instruction: -+ // if (endBranchReg == preVal) -+ // preExitBlk -+ // else -+ // curBranchBlk -+ // -+ // preValReg = i - 1 -+ -+ DebugLoc DL; -+ RegiT preValReg = static_cast -+ (funcRep->getRegInfo().createVirtualRegister(I32RC)); -+ -+ preBranchBlk->insert(preBranchBlk->begin(), -+ tii->getMovImmInstr(preBranchBlk->getParent(), preValReg, -+ i - 1)); -+ -+ // condResReg = (endBranchReg == preValReg) -+ RegiT condResReg = static_cast -+ (funcRep->getRegInfo().createVirtualRegister(I32RC)); -+ BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg) -+ .addReg(endBranchReg).addReg(preValReg); -+ -+ BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32)) -+ .addMBB(preExitBlk).addReg(condResReg); -+ -+ preBranchBlk->addSuccessor(preExitBlk); -+ preBranchBlk->addSuccessor(curBranchBlk); -+ -+ // Update preExitingBlk, preExitBlk, preBranchBlk. -+ preExitingBlk = curExitingBlk; -+ preExitBlk = curExitBlk; -+ preBranchBlk = curBranchBlk; -+ -+ } //end for 1 .. n blocks -+ -+ return newLandBlk; -+} //addLoopEndbranchBlock -+ -+template -+typename CFGStructurizer::PathToKind -+CFGStructurizer::singlePathTo(BlockT *srcBlk, BlockT *dstBlk, -+ bool allowSideEntry) { -+ assert(dstBlk); -+ -+ if (srcBlk == dstBlk) { -+ return SinglePath_InPath; -+ } -+ -+ while (srcBlk && srcBlk->succ_size() == 1) { -+ srcBlk = *srcBlk->succ_begin(); -+ if (srcBlk == dstBlk) { -+ return SinglePath_InPath; -+ } -+ -+ if (!allowSideEntry && srcBlk->pred_size() > 1) { -+ return Not_SinglePath; -+ } -+ } -+ -+ if (srcBlk && srcBlk->succ_size()==0) { -+ return SinglePath_NotInPath; -+ } -+ -+ return Not_SinglePath; -+} //singlePathTo -+ -+// If there is a single path from srcBlk to dstBlk, return the last block before -+// dstBlk If there is a single path from srcBlk->end without dstBlk, return the -+// last block in the path Otherwise, return NULL -+template -+typename CFGStructurizer::BlockT * -+CFGStructurizer::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk, -+ bool allowSideEntry) { -+ assert(dstBlk); -+ -+ if (srcBlk == dstBlk) { -+ return srcBlk; -+ } -+ -+ if (srcBlk->succ_size() == 0) { -+ return srcBlk; -+ } -+ -+ while (srcBlk && srcBlk->succ_size() == 1) { -+ BlockT *preBlk = srcBlk; -+ -+ srcBlk = *srcBlk->succ_begin(); -+ if (srcBlk == NULL) { -+ return preBlk; -+ } -+ -+ if (!allowSideEntry && srcBlk->pred_size() > 1) { -+ return NULL; -+ } -+ } -+ -+ if (srcBlk && srcBlk->succ_size()==0) { -+ return srcBlk; -+ } -+ -+ return NULL; -+ -+} //singlePathEnd -+ -+template -+int CFGStructurizer::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk, -+ BlockT *dstBlk) { -+ int cloned = 0; -+ assert(preBlk->isSuccessor(srcBlk)); -+ while (srcBlk && srcBlk != dstBlk) { -+ assert(srcBlk->succ_size() == 1); -+ if (srcBlk->pred_size() > 1) { -+ srcBlk = cloneBlockForPredecessor(srcBlk, preBlk); -+ ++cloned; -+ } -+ -+ preBlk = srcBlk; -+ srcBlk = *srcBlk->succ_begin(); -+ } -+ -+ return cloned; -+} //cloneOnSideEntryTo -+ -+template -+typename CFGStructurizer::BlockT * -+CFGStructurizer::cloneBlockForPredecessor(BlockT *curBlk, -+ BlockT *predBlk) { -+ assert(predBlk->isSuccessor(curBlk) && -+ "succBlk is not a prececessor of curBlk"); -+ -+ BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions -+ CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk); -+ //srcBlk, oldBlk, newBlk -+ -+ predBlk->removeSuccessor(curBlk); -+ predBlk->addSuccessor(cloneBlk); -+ -+ // add all successor to cloneBlk -+ CFGTraits::cloneSuccessorList(cloneBlk, curBlk); -+ -+ numClonedInstr += curBlk->size(); -+ -+ if (DEBUGME) { -+ errs() << "Cloned block: " << "BB" -+ << curBlk->getNumber() << "size " << curBlk->size() << "\n"; -+ } -+ -+ SHOWNEWBLK(cloneBlk, "result of Cloned block: "); -+ -+ return cloneBlk; -+} //cloneBlockForPredecessor -+ -+template -+typename CFGStructurizer::BlockT * -+CFGStructurizer::exitingBlock2ExitBlock(LoopT *loopRep, -+ BlockT *exitingBlk) { -+ BlockT *exitBlk = NULL; -+ -+ for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(), -+ iterSuccEnd = exitingBlk->succ_end(); -+ iterSucc != iterSuccEnd; ++iterSucc) { -+ BlockT *curBlk = *iterSucc; -+ if (!loopRep->contains(curBlk)) { -+ assert(exitBlk == NULL); -+ exitBlk = curBlk; -+ } -+ } -+ -+ assert(exitBlk != NULL); -+ -+ return exitBlk; -+} //exitingBlock2ExitBlock -+ -+template -+void CFGStructurizer::migrateInstruction(BlockT *srcBlk, -+ BlockT *dstBlk, -+ InstrIterator insertPos) { -+ InstrIterator spliceEnd; -+ //look for the input branchinstr, not the AMDGPU branchinstr -+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); -+ if (branchInstr == NULL) { -+ if (DEBUGME) { -+ errs() << "migrateInstruction don't see branch instr\n" ; -+ } -+ spliceEnd = srcBlk->end(); -+ } else { -+ if (DEBUGME) { -+ errs() << "migrateInstruction see branch instr\n" ; -+ branchInstr->dump(); -+ } -+ spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr); -+ } -+ if (DEBUGME) { -+ errs() << "migrateInstruction before splice dstSize = " << dstBlk->size() -+ << "srcSize = " << srcBlk->size() << "\n"; -+ } -+ -+ //splice insert before insertPos -+ dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd); -+ -+ if (DEBUGME) { -+ errs() << "migrateInstruction after splice dstSize = " << dstBlk->size() -+ << "srcSize = " << srcBlk->size() << "\n"; -+ } -+} //migrateInstruction -+ -+// normalizeInfiniteLoopExit change -+// B1: -+// uncond_br LoopHeader -+// -+// to -+// B1: -+// cond_br 1 LoopHeader dummyExit -+// and return the newly added dummy exit block -+// -+template -+typename CFGStructurizer::BlockT * -+CFGStructurizer::normalizeInfiniteLoopExit(LoopT* LoopRep) { -+ BlockT *loopHeader; -+ BlockT *loopLatch; -+ loopHeader = LoopRep->getHeader(); -+ loopLatch = LoopRep->getLoopLatch(); -+ BlockT *dummyExitBlk = NULL; -+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); -+ if (loopHeader!=NULL && loopLatch!=NULL) { -+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch); -+ if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) { -+ dummyExitBlk = funcRep->CreateMachineBasicBlock(); -+ funcRep->push_back(dummyExitBlk); //insert to function -+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); -+ -+ if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n"; -+ -+ typename BlockT::iterator insertPos = -+ CFGTraits::getInstrPos(loopLatch, branchInstr); -+ unsigned immReg = -+ funcRep->getRegInfo().createVirtualRegister(I32RC); -+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1); -+ InstrT *newInstr = -+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep); -+ MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false); -+ -+ SHOWNEWINSTR(newInstr); -+ -+ branchInstr->eraseFromParent(); -+ loopLatch->addSuccessor(dummyExitBlk); -+ } -+ } -+ -+ return dummyExitBlk; -+} //normalizeInfiniteLoopExit -+ -+template -+void CFGStructurizer::removeUnconditionalBranch(BlockT *srcBlk) { -+ InstrT *branchInstr; -+ -+ // I saw two unconditional branch in one basic block in example -+ // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. -+ while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk)) -+ && CFGTraits::isUncondBranch(branchInstr)) { -+ if (DEBUGME) { -+ errs() << "Removing unconditional branch instruction" ; -+ branchInstr->dump(); -+ } -+ branchInstr->eraseFromParent(); -+ } -+} //removeUnconditionalBranch -+ -+template -+void CFGStructurizer::removeRedundantConditionalBranch(BlockT *srcBlk) { -+ if (srcBlk->succ_size() == 2) { -+ BlockT *blk1 = *srcBlk->succ_begin(); -+ BlockT *blk2 = *(++srcBlk->succ_begin()); -+ -+ if (blk1 == blk2) { -+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); -+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); -+ if (DEBUGME) { -+ errs() << "Removing unneeded conditional branch instruction" ; -+ branchInstr->dump(); -+ } -+ branchInstr->eraseFromParent(); -+ SHOWNEWBLK(blk1, "Removing redundant successor"); -+ srcBlk->removeSuccessor(blk1); -+ } -+ } -+} //removeRedundantConditionalBranch -+ -+template -+void CFGStructurizer::addDummyExitBlock(SmallVector &retBlks) { -+ BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock(); -+ funcRep->push_back(dummyExitBlk); //insert to function -+ CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep); -+ -+ for (typename SmallVector::iterator iter = -+ retBlks.begin(), -+ iterEnd = retBlks.end(); iter != iterEnd; ++iter) { -+ BlockT *curBlk = *iter; -+ InstrT *curInstr = CFGTraits::getReturnInstr(curBlk); -+ if (curInstr) { -+ curInstr->eraseFromParent(); -+ } -+ curBlk->addSuccessor(dummyExitBlk); -+ if (DEBUGME) { -+ errs() << "Add dummyExitBlock to BB" << curBlk->getNumber() -+ << " successors\n"; -+ } -+ } //for -+ -+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: "); -+} //addDummyExitBlock -+ -+template -+void CFGStructurizer::removeSuccessor(BlockT *srcBlk) { -+ while (srcBlk->succ_size()) { -+ srcBlk->removeSuccessor(*srcBlk->succ_begin()); -+ } -+} -+ -+template -+void CFGStructurizer::recordSccnum(BlockT *srcBlk, int sccNum) { -+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; -+ -+ if (srcBlkInfo == NULL) { -+ srcBlkInfo = new BlockInfo(); -+ } -+ -+ srcBlkInfo->sccNum = sccNum; -+} -+ -+template -+int CFGStructurizer::getSCCNum(BlockT *srcBlk) { -+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; -+ return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM; -+} -+ -+template -+void CFGStructurizer::retireBlock(BlockT *dstBlk, BlockT *srcBlk) { -+ if (DEBUGME) { -+ errs() << "Retiring BB" << srcBlk->getNumber() << "\n"; -+ } -+ -+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; -+ -+ if (srcBlkInfo == NULL) { -+ srcBlkInfo = new BlockInfo(); -+ } -+ -+ srcBlkInfo->isRetired = true; -+ assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0 -+ && "can't retire block yet"); -+} -+ -+template -+bool CFGStructurizer::isRetiredBlock(BlockT *srcBlk) { -+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; -+ return (srcBlkInfo && srcBlkInfo->isRetired); -+} -+ -+template -+bool CFGStructurizer::isActiveLoophead(BlockT *curBlk) { -+ LoopT *loopRep = loopInfo->getLoopFor(curBlk); -+ while (loopRep && loopRep->getHeader() == curBlk) { -+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep); -+ -+ if(loopLand == NULL) -+ return true; -+ -+ BlockT *landBlk = loopLand->landBlk; -+ assert(landBlk); -+ if (!isRetiredBlock(landBlk)) { -+ return true; -+ } -+ -+ loopRep = loopRep->getParentLoop(); -+ } -+ -+ return false; -+} //isActiveLoophead -+ -+template -+bool CFGStructurizer::needMigrateBlock(BlockT *blk) { -+ const unsigned blockSizeThreshold = 30; -+ const unsigned cloneInstrThreshold = 100; -+ -+ bool multiplePreds = blk && (blk->pred_size() > 1); -+ -+ if(!multiplePreds) -+ return false; -+ -+ unsigned blkSize = blk->size(); -+ return ((blkSize > blockSizeThreshold) -+ && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold)); -+} //needMigrateBlock -+ -+template -+typename CFGStructurizer::BlockT * -+CFGStructurizer::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk, -+ BlockTSmallerVector &exitBlks, -+ std::set &exitBlkSet) { -+ SmallVector inpathBlks; //in exit path blocks -+ -+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), -+ predIterEnd = landBlk->pred_end(); -+ predIter != predIterEnd; ++predIter) { -+ BlockT *curBlk = *predIter; -+ if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) { -+ inpathBlks.push_back(curBlk); -+ } -+ } //for -+ -+ //if landBlk has predecessors that are not in the given loop, -+ //create a new block -+ BlockT *newLandBlk = landBlk; -+ if (inpathBlks.size() != landBlk->pred_size()) { -+ newLandBlk = funcRep->CreateMachineBasicBlock(); -+ funcRep->push_back(newLandBlk); //insert to function -+ newLandBlk->addSuccessor(landBlk); -+ for (typename SmallVector::iterator iter = -+ inpathBlks.begin(), -+ iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) { -+ BlockT *curBlk = *iter; -+ CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk); -+ //srcBlk, oldBlk, newBlk -+ curBlk->removeSuccessor(landBlk); -+ curBlk->addSuccessor(newLandBlk); -+ } -+ for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) { -+ if (exitBlks[i] == landBlk) { -+ exitBlks[i] = newLandBlk; -+ } -+ } -+ SHOWNEWBLK(newLandBlk, "NewLandingBlock: "); -+ } -+ -+ setLoopLandBlock(loopRep, newLandBlk); -+ -+ return newLandBlk; -+} // recordLoopbreakLand -+ -+template -+void CFGStructurizer::setLoopLandBlock(LoopT *loopRep, BlockT *blk) { -+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; -+ -+ if (theEntry == NULL) { -+ theEntry = new LoopLandInfo(); -+ } -+ assert(theEntry->landBlk == NULL); -+ -+ if (blk == NULL) { -+ blk = funcRep->CreateMachineBasicBlock(); -+ funcRep->push_back(blk); //insert to function -+ SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: "); -+ } -+ -+ theEntry->landBlk = blk; -+ -+ if (DEBUGME) { -+ errs() << "setLoopLandBlock loop-header = BB" -+ << loopRep->getHeader()->getNumber() -+ << " landing-block = BB" << blk->getNumber() << "\n"; -+ } -+} // setLoopLandBlock -+ -+template -+void CFGStructurizer::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) { -+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; -+ -+ if (theEntry == NULL) { -+ theEntry = new LoopLandInfo(); -+ } -+ -+ theEntry->breakOnRegs.insert(regNum); -+ -+ if (DEBUGME) { -+ errs() << "addLoopBreakOnReg loop-header = BB" -+ << loopRep->getHeader()->getNumber() -+ << " regNum = " << regNum << "\n"; -+ } -+} // addLoopBreakOnReg -+ -+template -+void CFGStructurizer::addLoopContOnReg(LoopT *loopRep, RegiT regNum) { -+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; -+ -+ if (theEntry == NULL) { -+ theEntry = new LoopLandInfo(); -+ } -+ theEntry->contOnRegs.insert(regNum); -+ -+ if (DEBUGME) { -+ errs() << "addLoopContOnReg loop-header = BB" -+ << loopRep->getHeader()->getNumber() -+ << " regNum = " << regNum << "\n"; -+ } -+} // addLoopContOnReg -+ -+template -+void CFGStructurizer::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) { -+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; -+ -+ if (theEntry == NULL) { -+ theEntry = new LoopLandInfo(); -+ } -+ theEntry->breakInitRegs.insert(regNum); -+ -+ if (DEBUGME) { -+ errs() << "addLoopBreakInitReg loop-header = BB" -+ << loopRep->getHeader()->getNumber() -+ << " regNum = " << regNum << "\n"; -+ } -+} // addLoopBreakInitReg -+ -+template -+void CFGStructurizer::addLoopContInitReg(LoopT *loopRep, RegiT regNum) { -+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; -+ -+ if (theEntry == NULL) { -+ theEntry = new LoopLandInfo(); -+ } -+ theEntry->contInitRegs.insert(regNum); -+ -+ if (DEBUGME) { -+ errs() << "addLoopContInitReg loop-header = BB" -+ << loopRep->getHeader()->getNumber() -+ << " regNum = " << regNum << "\n"; -+ } -+} // addLoopContInitReg -+ -+template -+void CFGStructurizer::addLoopEndbranchInitReg(LoopT *loopRep, -+ RegiT regNum) { -+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; -+ -+ if (theEntry == NULL) { -+ theEntry = new LoopLandInfo(); -+ } -+ theEntry->endbranchInitRegs.insert(regNum); -+ -+ if (DEBUGME) { -+ errs() << "addLoopEndbranchInitReg loop-header = BB" -+ << loopRep->getHeader()->getNumber() -+ << " regNum = " << regNum << "\n"; -+ } -+} // addLoopEndbranchInitReg -+ -+template -+typename CFGStructurizer::LoopLandInfo * -+CFGStructurizer::getLoopLandInfo(LoopT *loopRep) { -+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; -+ -+ return theEntry; -+} // getLoopLandInfo -+ -+template -+typename CFGStructurizer::BlockT * -+CFGStructurizer::getLoopLandBlock(LoopT *loopRep) { -+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; -+ -+ return theEntry ? theEntry->landBlk : NULL; -+} // getLoopLandBlock -+ -+ -+template -+bool CFGStructurizer::hasBackEdge(BlockT *curBlk) { -+ LoopT *loopRep = loopInfo->getLoopFor(curBlk); -+ if (loopRep == NULL) -+ return false; -+ -+ BlockT *loopHeader = loopRep->getHeader(); -+ -+ return curBlk->isSuccessor(loopHeader); -+ -+} //hasBackEdge -+ -+template -+unsigned CFGStructurizer::getLoopDepth(LoopT *loopRep) { -+ return loopRep ? loopRep->getLoopDepth() : 0; -+} //getLoopDepth -+ -+template -+int CFGStructurizer::countActiveBlock -+(typename SmallVector::const_iterator iterStart, -+ typename SmallVector::const_iterator iterEnd) { -+ int count = 0; -+ while (iterStart != iterEnd) { -+ if (!isRetiredBlock(*iterStart)) { -+ ++count; -+ } -+ ++iterStart; -+ } -+ -+ return count; -+} //countActiveBlock -+ -+// This is work around solution for findNearestCommonDominator not avaiable to -+// post dom a proper fix should go to Dominators.h. -+ -+template -+typename CFGStructurizer::BlockT* -+CFGStructurizer::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) { -+ -+ if (postDomTree->dominates(blk1, blk2)) { -+ return blk1; -+ } -+ if (postDomTree->dominates(blk2, blk1)) { -+ return blk2; -+ } -+ -+ DomTreeNodeT *node1 = postDomTree->getNode(blk1); -+ DomTreeNodeT *node2 = postDomTree->getNode(blk2); -+ -+ // Handle newly cloned node. -+ if (node1 == NULL && blk1->succ_size() == 1) { -+ return findNearestCommonPostDom(*blk1->succ_begin(), blk2); -+ } -+ if (node2 == NULL && blk2->succ_size() == 1) { -+ return findNearestCommonPostDom(blk1, *blk2->succ_begin()); -+ } -+ -+ if (node1 == NULL || node2 == NULL) { -+ return NULL; -+ } -+ -+ node1 = node1->getIDom(); -+ while (node1) { -+ if (postDomTree->dominates(node1, node2)) { -+ return node1->getBlock(); -+ } -+ node1 = node1->getIDom(); -+ } -+ -+ return NULL; -+} -+ -+template -+typename CFGStructurizer::BlockT * -+CFGStructurizer::findNearestCommonPostDom -+(typename std::set &blks) { -+ BlockT *commonDom; -+ typename std::set::const_iterator iter = blks.begin(); -+ typename std::set::const_iterator iterEnd = blks.end(); -+ for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) { -+ BlockT *curBlk = *iter; -+ if (curBlk != commonDom) { -+ commonDom = findNearestCommonPostDom(curBlk, commonDom); -+ } -+ } -+ -+ if (DEBUGME) { -+ errs() << "Common post dominator for exit blocks is "; -+ if (commonDom) { -+ errs() << "BB" << commonDom->getNumber() << "\n"; -+ } else { -+ errs() << "NULL\n"; -+ } -+ } -+ -+ return commonDom; -+} //findNearestCommonPostDom -+ -+} //end namespace llvm -+ -+//todo: move-end -+ -+ -+//===----------------------------------------------------------------------===// -+// -+// CFGStructurizer for AMDGPU -+// -+//===----------------------------------------------------------------------===// -+ -+ -+using namespace llvmCFGStruct; -+ -+namespace llvm { -+class AMDGPUCFGStructurizer : public MachineFunctionPass { -+public: -+ typedef MachineInstr InstructionType; -+ typedef MachineFunction FunctionType; -+ typedef MachineBasicBlock BlockType; -+ typedef MachineLoopInfo LoopinfoType; -+ typedef MachineDominatorTree DominatortreeType; -+ typedef MachinePostDominatorTree PostDominatortreeType; -+ typedef MachineDomTreeNode DomTreeNodeType; -+ typedef MachineLoop LoopType; -+ -+protected: -+ TargetMachine &TM; -+ const TargetInstrInfo *TII; -+ const AMDGPURegisterInfo *TRI; -+ -+public: -+ AMDGPUCFGStructurizer(char &pid, TargetMachine &tm); -+ const TargetInstrInfo *getTargetInstrInfo() const; -+ -+private: -+ -+}; -+ -+} //end of namespace llvm -+AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm) -+: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()), -+ TRI(static_cast(tm.getRegisterInfo())) { -+} -+ -+const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const { -+ return TII; -+} -+//===----------------------------------------------------------------------===// -+// -+// CFGPrepare -+// -+//===----------------------------------------------------------------------===// -+ -+ -+using namespace llvmCFGStruct; -+ -+namespace llvm { -+class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer { -+public: -+ static char ID; -+ -+public: -+ AMDGPUCFGPrepare(TargetMachine &tm); -+ -+ virtual const char *getPassName() const; -+ virtual void getAnalysisUsage(AnalysisUsage &AU) const; -+ -+ bool runOnMachineFunction(MachineFunction &F); -+ -+private: -+ -+}; -+ -+char AMDGPUCFGPrepare::ID = 0; -+} //end of namespace llvm -+ -+AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm) -+ : AMDGPUCFGStructurizer(ID, tm ) { -+} -+const char *AMDGPUCFGPrepare::getPassName() const { -+ return "AMD IL Control Flow Graph Preparation Pass"; -+} -+ -+void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const { -+ AU.addPreserved(); -+ AU.addRequired(); -+ AU.addRequired(); -+ AU.addRequired(); -+ AU.addRequired(); -+} -+ -+//===----------------------------------------------------------------------===// -+// -+// CFGPerform -+// -+//===----------------------------------------------------------------------===// -+ -+ -+using namespace llvmCFGStruct; -+ -+namespace llvm { -+class AMDGPUCFGPerform : public AMDGPUCFGStructurizer { -+public: -+ static char ID; -+ -+public: -+ AMDGPUCFGPerform(TargetMachine &tm); -+ virtual const char *getPassName() const; -+ virtual void getAnalysisUsage(AnalysisUsage &AU) const; -+ bool runOnMachineFunction(MachineFunction &F); -+ -+private: -+ -+}; -+ -+char AMDGPUCFGPerform::ID = 0; -+} //end of namespace llvm -+ -+ AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm) -+: AMDGPUCFGStructurizer(ID, tm) { -+} -+ -+const char *AMDGPUCFGPerform::getPassName() const { -+ return "AMD IL Control Flow Graph structurizer Pass"; -+} -+ -+void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const { -+ AU.addPreserved(); -+ AU.addRequired(); -+ AU.addRequired(); -+ AU.addRequired(); -+ AU.addRequired(); -+} -+ -+//===----------------------------------------------------------------------===// -+// -+// CFGStructTraits -+// -+//===----------------------------------------------------------------------===// -+ -+namespace llvmCFGStruct { -+// this class is tailor to the AMDGPU backend -+template<> -+struct CFGStructTraits { -+ typedef int RegiT; -+ -+ static int getBranchNzeroOpcode(int oldOpcode) { -+ switch(oldOpcode) { -+ case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; -+ case AMDGPU::BRANCH_COND_i32: -+ case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; -+ default: -+ assert(0 && "internal error"); -+ } -+ return -1; -+ } -+ -+ static int getBranchZeroOpcode(int oldOpcode) { -+ switch(oldOpcode) { -+ case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; -+ case AMDGPU::BRANCH_COND_i32: -+ case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; -+ default: -+ assert(0 && "internal error"); -+ } -+ return -1; -+ } -+ -+ static int getContinueNzeroOpcode(int oldOpcode) { -+ switch(oldOpcode) { -+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; -+ default: -+ assert(0 && "internal error"); -+ }; -+ return -1; -+ } -+ -+ static int getContinueZeroOpcode(int oldOpcode) { -+ switch(oldOpcode) { -+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; -+ default: -+ assert(0 && "internal error"); -+ } -+ return -1; -+ } -+ -+ static MachineBasicBlock *getTrueBranch(MachineInstr *instr) { -+ return instr->getOperand(0).getMBB(); -+ } -+ -+ static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) { -+ instr->getOperand(0).setMBB(blk); -+ } -+ -+ static MachineBasicBlock * -+ getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) { -+ assert(blk->succ_size() == 2); -+ MachineBasicBlock *trueBranch = getTrueBranch(instr); -+ MachineBasicBlock::succ_iterator iter = blk->succ_begin(); -+ MachineBasicBlock::succ_iterator iterNext = iter; -+ ++iterNext; -+ -+ return (*iter == trueBranch) ? *iterNext : *iter; -+ } -+ -+ static bool isCondBranch(MachineInstr *instr) { -+ switch (instr->getOpcode()) { -+ case AMDGPU::JUMP: -+ return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0; -+ case AMDGPU::BRANCH_COND_i32: -+ case AMDGPU::BRANCH_COND_f32: -+ break; -+ default: -+ return false; -+ } -+ return true; -+ } -+ -+ static bool isUncondBranch(MachineInstr *instr) { -+ switch (instr->getOpcode()) { -+ case AMDGPU::JUMP: -+ return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0; -+ case AMDGPU::BRANCH: -+ return true; -+ default: -+ return false; -+ } -+ return true; -+ } -+ -+ static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) { -+ //get DebugLoc from the first MachineBasicBlock instruction with debug info -+ DebugLoc DL; -+ for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) { -+ MachineInstr *instr = &(*iter); -+ if (instr->getDebugLoc().isUnknown() == false) { -+ DL = instr->getDebugLoc(); -+ } -+ } -+ return DL; -+ } -+ -+ static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) { -+ MachineBasicBlock::reverse_iterator iter = blk->rbegin(); -+ MachineInstr *instr = &*iter; -+ if (instr && (isCondBranch(instr) || isUncondBranch(instr))) { -+ return instr; -+ } -+ return NULL; -+ } -+ -+ // The correct naming for this is getPossibleLoopendBlockBranchInstr. -+ // -+ // BB with backward-edge could have move instructions after the branch -+ // instruction. Such move instruction "belong to" the loop backward-edge. -+ // -+ static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) { -+ const AMDGPUInstrInfo * TII = static_cast( -+ blk->getParent()->getTarget().getInstrInfo()); -+ -+ for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(), -+ iterEnd = blk->rend(); iter != iterEnd; ++iter) { -+ // FIXME: Simplify -+ MachineInstr *instr = &*iter; -+ if (instr) { -+ if (isCondBranch(instr) || isUncondBranch(instr)) { -+ return instr; -+ } else if (!TII->isMov(instr->getOpcode())) { -+ break; -+ } -+ } -+ } -+ return NULL; -+ } -+ -+ static MachineInstr *getReturnInstr(MachineBasicBlock *blk) { -+ MachineBasicBlock::reverse_iterator iter = blk->rbegin(); -+ if (iter != blk->rend()) { -+ MachineInstr *instr = &(*iter); -+ if (instr->getOpcode() == AMDGPU::RETURN) { -+ return instr; -+ } -+ } -+ return NULL; -+ } -+ -+ static MachineInstr *getContinueInstr(MachineBasicBlock *blk) { -+ MachineBasicBlock::reverse_iterator iter = blk->rbegin(); -+ if (iter != blk->rend()) { -+ MachineInstr *instr = &(*iter); -+ if (instr->getOpcode() == AMDGPU::CONTINUE) { -+ return instr; -+ } -+ } -+ return NULL; -+ } -+ -+ static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) { -+ for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) { -+ MachineInstr *instr = &(*iter); -+ if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) { -+ return instr; -+ } -+ } -+ return NULL; -+ } -+ -+ static bool isReturnBlock(MachineBasicBlock *blk) { -+ MachineInstr *instr = getReturnInstr(blk); -+ bool isReturn = (blk->succ_size() == 0); -+ if (instr) { -+ assert(isReturn); -+ } else if (isReturn) { -+ if (DEBUGME) { -+ errs() << "BB" << blk->getNumber() -+ <<" is return block without RETURN instr\n"; -+ } -+ } -+ -+ return isReturn; -+ } -+ -+ static MachineBasicBlock::iterator -+ getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) { -+ assert(instr->getParent() == blk && "instruction doesn't belong to block"); -+ MachineBasicBlock::iterator iter = blk->begin(); -+ MachineBasicBlock::iterator iterEnd = blk->end(); -+ while (&(*iter) != instr && iter != iterEnd) { -+ ++iter; -+ } -+ -+ assert(iter != iterEnd); -+ return iter; -+ }//getInstrPos -+ -+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, -+ AMDGPUCFGStructurizer *passRep) { -+ return insertInstrBefore(blk,newOpcode,passRep,DebugLoc()); -+ } //insertInstrBefore -+ -+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, -+ AMDGPUCFGStructurizer *passRep, DebugLoc DL) { -+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); -+ MachineInstr *newInstr = -+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); -+ -+ MachineBasicBlock::iterator res; -+ if (blk->begin() != blk->end()) { -+ blk->insert(blk->begin(), newInstr); -+ } else { -+ blk->push_back(newInstr); -+ } -+ -+ SHOWNEWINSTR(newInstr); -+ -+ return newInstr; -+ } //insertInstrBefore -+ -+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, -+ AMDGPUCFGStructurizer *passRep) { -+ insertInstrEnd(blk,newOpcode,passRep,DebugLoc()); -+ } //insertInstrEnd -+ -+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, -+ AMDGPUCFGStructurizer *passRep, DebugLoc DL) { -+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); -+ MachineInstr *newInstr = blk->getParent() -+ ->CreateMachineInstr(tii->get(newOpcode), DL); -+ -+ blk->push_back(newInstr); -+ //assume the instruction doesn't take any reg operand ... -+ -+ SHOWNEWINSTR(newInstr); -+ } //insertInstrEnd -+ -+ static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos, -+ int newOpcode, -+ AMDGPUCFGStructurizer *passRep) { -+ MachineInstr *oldInstr = &(*instrPos); -+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); -+ MachineBasicBlock *blk = oldInstr->getParent(); -+ MachineInstr *newInstr = -+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), -+ DebugLoc()); -+ -+ blk->insert(instrPos, newInstr); -+ //assume the instruction doesn't take any reg operand ... -+ -+ SHOWNEWINSTR(newInstr); -+ return newInstr; -+ } //insertInstrBefore -+ -+ static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos, -+ int newOpcode, -+ AMDGPUCFGStructurizer *passRep, -+ DebugLoc DL) { -+ MachineInstr *oldInstr = &(*instrPos); -+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); -+ MachineBasicBlock *blk = oldInstr->getParent(); -+ MachineInstr *newInstr = -+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), -+ DL); -+ -+ blk->insert(instrPos, newInstr); -+ MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(), -+ false); -+ -+ SHOWNEWINSTR(newInstr); -+ //erase later oldInstr->eraseFromParent(); -+ } //insertCondBranchBefore -+ -+ static void insertCondBranchBefore(MachineBasicBlock *blk, -+ MachineBasicBlock::iterator insertPos, -+ int newOpcode, -+ AMDGPUCFGStructurizer *passRep, -+ RegiT regNum, -+ DebugLoc DL) { -+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); -+ -+ MachineInstr *newInstr = -+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); -+ -+ //insert before -+ blk->insert(insertPos, newInstr); -+ MachineInstrBuilder(newInstr).addReg(regNum, false); -+ -+ SHOWNEWINSTR(newInstr); -+ } //insertCondBranchBefore -+ -+ static void insertCondBranchEnd(MachineBasicBlock *blk, -+ int newOpcode, -+ AMDGPUCFGStructurizer *passRep, -+ RegiT regNum) { -+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); -+ MachineInstr *newInstr = -+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc()); -+ -+ blk->push_back(newInstr); -+ MachineInstrBuilder(newInstr).addReg(regNum, false); -+ -+ SHOWNEWINSTR(newInstr); -+ } //insertCondBranchEnd -+ -+ -+ static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos, -+ AMDGPUCFGStructurizer *passRep, -+ RegiT regNum, int regVal) { -+ MachineInstr *oldInstr = &(*instrPos); -+ const AMDGPUInstrInfo *tii = -+ static_cast(passRep->getTargetInstrInfo()); -+ MachineBasicBlock *blk = oldInstr->getParent(); -+ MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, -+ regVal); -+ blk->insert(instrPos, newInstr); -+ -+ SHOWNEWINSTR(newInstr); -+ } //insertAssignInstrBefore -+ -+ static void insertAssignInstrBefore(MachineBasicBlock *blk, -+ AMDGPUCFGStructurizer *passRep, -+ RegiT regNum, int regVal) { -+ const AMDGPUInstrInfo *tii = -+ static_cast(passRep->getTargetInstrInfo()); -+ -+ MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, -+ regVal); -+ if (blk->begin() != blk->end()) { -+ blk->insert(blk->begin(), newInstr); -+ } else { -+ blk->push_back(newInstr); -+ } -+ -+ SHOWNEWINSTR(newInstr); -+ -+ } //insertInstrBefore -+ -+ static void insertCompareInstrBefore(MachineBasicBlock *blk, -+ MachineBasicBlock::iterator instrPos, -+ AMDGPUCFGStructurizer *passRep, -+ RegiT dstReg, RegiT src1Reg, -+ RegiT src2Reg) { -+ const AMDGPUInstrInfo *tii = -+ static_cast(passRep->getTargetInstrInfo()); -+ MachineInstr *newInstr = -+ blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc()); -+ -+ MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target -+ MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value -+ MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value -+ -+ blk->insert(instrPos, newInstr); -+ SHOWNEWINSTR(newInstr); -+ -+ } //insertCompareInstrBefore -+ -+ static void cloneSuccessorList(MachineBasicBlock *dstBlk, -+ MachineBasicBlock *srcBlk) { -+ for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(), -+ iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) { -+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of -+ } -+ } //cloneSuccessorList -+ -+ static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) { -+ MachineFunction *func = srcBlk->getParent(); -+ MachineBasicBlock *newBlk = func->CreateMachineBasicBlock(); -+ func->push_back(newBlk); //insert to function -+ for (MachineBasicBlock::iterator iter = srcBlk->begin(), -+ iterEnd = srcBlk->end(); -+ iter != iterEnd; ++iter) { -+ MachineInstr *instr = func->CloneMachineInstr(iter); -+ newBlk->push_back(instr); -+ } -+ return newBlk; -+ } -+ -+ //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because -+ //the AMDGPU instruction is not recognized as terminator fix this and retire -+ //this routine -+ static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk, -+ MachineBasicBlock *oldBlk, -+ MachineBasicBlock *newBlk) { -+ MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk); -+ if (branchInstr && isCondBranch(branchInstr) && -+ getTrueBranch(branchInstr) == oldBlk) { -+ setTrueBranch(branchInstr, newBlk); -+ } -+ } -+ -+ static void wrapup(MachineBasicBlock *entryBlk) { -+ assert((!entryBlk->getParent()->getJumpTableInfo() -+ || entryBlk->getParent()->getJumpTableInfo()->isEmpty()) -+ && "found a jump table"); -+ -+ //collect continue right before endloop -+ SmallVector contInstr; -+ MachineBasicBlock::iterator pre = entryBlk->begin(); -+ MachineBasicBlock::iterator iterEnd = entryBlk->end(); -+ MachineBasicBlock::iterator iter = pre; -+ while (iter != iterEnd) { -+ if (pre->getOpcode() == AMDGPU::CONTINUE -+ && iter->getOpcode() == AMDGPU::ENDLOOP) { -+ contInstr.push_back(pre); -+ } -+ pre = iter; -+ ++iter; -+ } //end while -+ -+ //delete continue right before endloop -+ for (unsigned i = 0; i < contInstr.size(); ++i) { -+ contInstr[i]->eraseFromParent(); -+ } -+ -+ // TODO to fix up jump table so later phase won't be confused. if -+ // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but -+ // there isn't such an interface yet. alternatively, replace all the other -+ // blocks in the jump table with the entryBlk //} -+ -+ } //wrapup -+ -+ static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) { -+ return &pass.getAnalysis(); -+ } -+ -+ static MachinePostDominatorTree* -+ getPostDominatorTree(AMDGPUCFGStructurizer &pass) { -+ return &pass.getAnalysis(); -+ } -+ -+ static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) { -+ return &pass.getAnalysis(); -+ } -+}; // template class CFGStructTraits -+} //end of namespace llvm -+ -+// createAMDGPUCFGPreparationPass- Returns a pass -+FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm -+ ) { -+ return new AMDGPUCFGPrepare(tm ); -+} -+ -+bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) { -+ return llvmCFGStruct::CFGStructurizer().prepare(func, -+ *this, -+ TRI); -+} -+ -+// createAMDGPUCFGStructurizerPass- Returns a pass -+FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm -+ ) { -+ return new AMDGPUCFGPerform(tm ); -+} -+ -+bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) { -+ return llvmCFGStruct::CFGStructurizer().run(func, -+ *this, -+ TRI); -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILDevice.cpp llvm-r600/lib/Target/R600/AMDILDevice.cpp ---- llvm-3.2.src/lib/Target/R600/AMDILDevice.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILDevice.cpp 2013-01-25 19:43:57.440049721 +0100 -@@ -0,0 +1,124 @@ -+//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+/// \file -+//==-----------------------------------------------------------------------===// -+#include "AMDILDevice.h" -+#include "AMDGPUSubtarget.h" -+ -+using namespace llvm; -+// Default implementation for all of the classes. -+AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST) { -+ mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities); -+ mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities); -+ setCaps(); -+ DeviceFlag = OCL_DEVICE_ALL; -+} -+ -+AMDGPUDevice::~AMDGPUDevice() { -+ mHWBits.clear(); -+ mSWBits.clear(); -+} -+ -+size_t AMDGPUDevice::getMaxGDSSize() const { -+ return 0; -+} -+ -+uint32_t -+AMDGPUDevice::getDeviceFlag() const { -+ return DeviceFlag; -+} -+ -+size_t AMDGPUDevice::getMaxNumCBs() const { -+ if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) { -+ return HW_MAX_NUM_CB; -+ } -+ -+ return 0; -+} -+ -+size_t AMDGPUDevice::getMaxCBSize() const { -+ if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) { -+ return MAX_CB_SIZE; -+ } -+ -+ return 0; -+} -+ -+size_t AMDGPUDevice::getMaxScratchSize() const { -+ return 65536; -+} -+ -+uint32_t AMDGPUDevice::getStackAlignment() const { -+ return 16; -+} -+ -+void AMDGPUDevice::setCaps() { -+ mSWBits.set(AMDGPUDeviceInfo::HalfOps); -+ mSWBits.set(AMDGPUDeviceInfo::ByteOps); -+ mSWBits.set(AMDGPUDeviceInfo::ShortOps); -+ mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod); -+ if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) { -+ mSWBits.set(AMDGPUDeviceInfo::NoInline); -+ } -+ if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) { -+ mSWBits.set(AMDGPUDeviceInfo::MacroDB); -+ } -+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) { -+ mSWBits.set(AMDGPUDeviceInfo::ConstantMem); -+ } else { -+ mHWBits.set(AMDGPUDeviceInfo::ConstantMem); -+ } -+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) { -+ mSWBits.set(AMDGPUDeviceInfo::PrivateMem); -+ } else { -+ mHWBits.set(AMDGPUDeviceInfo::PrivateMem); -+ } -+ if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) { -+ mSWBits.set(AMDGPUDeviceInfo::BarrierDetect); -+ } -+ mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps); -+ mSWBits.set(AMDGPUDeviceInfo::LongOps); -+} -+ -+AMDGPUDeviceInfo::ExecutionMode -+AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const { -+ if (mHWBits[Caps]) { -+ assert(!mSWBits[Caps] && "Cannot set both SW and HW caps"); -+ return AMDGPUDeviceInfo::Hardware; -+ } -+ -+ if (mSWBits[Caps]) { -+ assert(!mHWBits[Caps] && "Cannot set both SW and HW caps"); -+ return AMDGPUDeviceInfo::Software; -+ } -+ -+ return AMDGPUDeviceInfo::Unsupported; -+ -+} -+ -+bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const { -+ return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported; -+} -+ -+bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const { -+ return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware; -+} -+ -+bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const { -+ return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software; -+} -+ -+std::string -+AMDGPUDevice::getDataLayout() const { -+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" -+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" -+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" -+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" -+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048" -+ "-n8:16:32:64"); -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILDevice.h llvm-r600/lib/Target/R600/AMDILDevice.h ---- llvm-3.2.src/lib/Target/R600/AMDILDevice.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILDevice.h 2013-01-25 19:43:57.440049721 +0100 -@@ -0,0 +1,117 @@ -+//===---- AMDILDevice.h - Define Device Data for AMDGPU -----*- C++ -*------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Interface for the subtarget data classes. -+// -+/// This file will define the interface that each generation needs to -+/// implement in order to correctly answer queries on the capabilities of the -+/// specific hardware. -+//===----------------------------------------------------------------------===// -+#ifndef AMDILDEVICEIMPL_H -+#define AMDILDEVICEIMPL_H -+#include "AMDIL.h" -+#include "llvm/ADT/BitVector.h" -+ -+namespace llvm { -+ class AMDGPUSubtarget; -+ class MCStreamer; -+//===----------------------------------------------------------------------===// -+// Interface for data that is specific to a single device -+//===----------------------------------------------------------------------===// -+class AMDGPUDevice { -+public: -+ AMDGPUDevice(AMDGPUSubtarget *ST); -+ virtual ~AMDGPUDevice(); -+ -+ // Enum values for the various memory types. -+ enum { -+ RAW_UAV_ID = 0, -+ ARENA_UAV_ID = 1, -+ LDS_ID = 2, -+ GDS_ID = 3, -+ SCRATCH_ID = 4, -+ CONSTANT_ID = 5, -+ GLOBAL_ID = 6, -+ MAX_IDS = 7 -+ } IO_TYPE_IDS; -+ -+ /// \returns The max LDS size that the hardware supports. Size is in -+ /// bytes. -+ virtual size_t getMaxLDSSize() const = 0; -+ -+ /// \returns The max GDS size that the hardware supports if the GDS is -+ /// supported by the hardware. Size is in bytes. -+ virtual size_t getMaxGDSSize() const; -+ -+ /// \returns The max number of hardware constant address spaces that -+ /// are supported by this device. -+ virtual size_t getMaxNumCBs() const; -+ -+ /// \returns The max number of bytes a single hardware constant buffer -+ /// can support. Size is in bytes. -+ virtual size_t getMaxCBSize() const; -+ -+ /// \returns The max number of bytes allowed by the hardware scratch -+ /// buffer. Size is in bytes. -+ virtual size_t getMaxScratchSize() const; -+ -+ /// \brief Get the flag that corresponds to the device. -+ virtual uint32_t getDeviceFlag() const; -+ -+ /// \returns The number of work-items that exist in a single hardware -+ /// wavefront. -+ virtual size_t getWavefrontSize() const = 0; -+ -+ /// \brief Get the generational name of this specific device. -+ virtual uint32_t getGeneration() const = 0; -+ -+ /// \brief Get the stack alignment of this specific device. -+ virtual uint32_t getStackAlignment() const; -+ -+ /// \brief Get the resource ID for this specific device. -+ virtual uint32_t getResourceID(uint32_t DeviceID) const = 0; -+ -+ /// \brief Get the max number of UAV's for this device. -+ virtual uint32_t getMaxNumUAVs() const = 0; -+ -+ -+ // API utilizing more detailed capabilities of each family of -+ // cards. If a capability is supported, then either usesHardware or -+ // usesSoftware returned true. If usesHardware returned true, then -+ // usesSoftware must return false for the same capability. Hardware -+ // execution means that the feature is done natively by the hardware -+ // and is not emulated by the softare. Software execution means -+ // that the feature could be done in the hardware, but there is -+ // software that emulates it with possibly using the hardware for -+ // support since the hardware does not fully comply with OpenCL -+ // specs. -+ -+ bool isSupported(AMDGPUDeviceInfo::Caps Mode) const; -+ bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const; -+ bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const; -+ virtual std::string getDataLayout() const; -+ static const unsigned int MAX_LDS_SIZE_700 = 16384; -+ static const unsigned int MAX_LDS_SIZE_800 = 32768; -+ static const unsigned int WavefrontSize = 64; -+ static const unsigned int HalfWavefrontSize = 32; -+ static const unsigned int QuarterWavefrontSize = 16; -+protected: -+ virtual void setCaps(); -+ llvm::BitVector mHWBits; -+ llvm::BitVector mSWBits; -+ AMDGPUSubtarget *mSTM; -+ uint32_t DeviceFlag; -+private: -+ AMDGPUDeviceInfo::ExecutionMode -+ getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const; -+}; -+ -+} // namespace llvm -+#endif // AMDILDEVICEIMPL_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILDeviceInfo.cpp llvm-r600/lib/Target/R600/AMDILDeviceInfo.cpp ---- llvm-3.2.src/lib/Target/R600/AMDILDeviceInfo.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILDeviceInfo.cpp 2013-01-25 19:43:57.440049721 +0100 -@@ -0,0 +1,94 @@ -+//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Function that creates DeviceInfo from a device name and other information. -+// -+//==-----------------------------------------------------------------------===// -+#include "AMDILDevices.h" -+#include "AMDGPUSubtarget.h" -+ -+using namespace llvm; -+namespace llvm { -+namespace AMDGPUDeviceInfo { -+ -+AMDGPUDevice* getDeviceFromName(const std::string &deviceName, -+ AMDGPUSubtarget *ptr, -+ bool is64bit, bool is64on32bit) { -+ if (deviceName.c_str()[2] == '7') { -+ switch (deviceName.c_str()[3]) { -+ case '1': -+ return new AMDGPU710Device(ptr); -+ case '7': -+ return new AMDGPU770Device(ptr); -+ default: -+ return new AMDGPU7XXDevice(ptr); -+ } -+ } else if (deviceName == "cypress") { -+#if DEBUG -+ assert(!is64bit && "This device does not support 64bit pointers!"); -+ assert(!is64on32bit && "This device does not support 64bit" -+ " on 32bit pointers!"); -+#endif -+ return new AMDGPUCypressDevice(ptr); -+ } else if (deviceName == "juniper") { -+#if DEBUG -+ assert(!is64bit && "This device does not support 64bit pointers!"); -+ assert(!is64on32bit && "This device does not support 64bit" -+ " on 32bit pointers!"); -+#endif -+ return new AMDGPUEvergreenDevice(ptr); -+ } else if (deviceName == "redwood") { -+#if DEBUG -+ assert(!is64bit && "This device does not support 64bit pointers!"); -+ assert(!is64on32bit && "This device does not support 64bit" -+ " on 32bit pointers!"); -+#endif -+ return new AMDGPURedwoodDevice(ptr); -+ } else if (deviceName == "cedar") { -+#if DEBUG -+ assert(!is64bit && "This device does not support 64bit pointers!"); -+ assert(!is64on32bit && "This device does not support 64bit" -+ " on 32bit pointers!"); -+#endif -+ return new AMDGPUCedarDevice(ptr); -+ } else if (deviceName == "barts" || deviceName == "turks") { -+#if DEBUG -+ assert(!is64bit && "This device does not support 64bit pointers!"); -+ assert(!is64on32bit && "This device does not support 64bit" -+ " on 32bit pointers!"); -+#endif -+ return new AMDGPUNIDevice(ptr); -+ } else if (deviceName == "cayman") { -+#if DEBUG -+ assert(!is64bit && "This device does not support 64bit pointers!"); -+ assert(!is64on32bit && "This device does not support 64bit" -+ " on 32bit pointers!"); -+#endif -+ return new AMDGPUCaymanDevice(ptr); -+ } else if (deviceName == "caicos") { -+#if DEBUG -+ assert(!is64bit && "This device does not support 64bit pointers!"); -+ assert(!is64on32bit && "This device does not support 64bit" -+ " on 32bit pointers!"); -+#endif -+ return new AMDGPUNIDevice(ptr); -+ } else if (deviceName == "SI") { -+ return new AMDGPUSIDevice(ptr); -+ } else { -+#if DEBUG -+ assert(!is64bit && "This device does not support 64bit pointers!"); -+ assert(!is64on32bit && "This device does not support 64bit" -+ " on 32bit pointers!"); -+#endif -+ return new AMDGPU7XXDevice(ptr); -+ } -+} -+} // End namespace AMDGPUDeviceInfo -+} // End namespace llvm -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILDeviceInfo.h llvm-r600/lib/Target/R600/AMDILDeviceInfo.h ---- llvm-3.2.src/lib/Target/R600/AMDILDeviceInfo.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILDeviceInfo.h 2013-01-25 19:43:57.440049721 +0100 -@@ -0,0 +1,88 @@ -+//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+/// \file -+//==-----------------------------------------------------------------------===// -+#ifndef AMDILDEVICEINFO_H -+#define AMDILDEVICEINFO_H -+ -+ -+#include -+ -+namespace llvm { -+ class AMDGPUDevice; -+ class AMDGPUSubtarget; -+ namespace AMDGPUDeviceInfo { -+ /// Each Capabilities can be executed using a hardware instruction, -+ /// emulated with a sequence of software instructions, or not -+ /// supported at all. -+ enum ExecutionMode { -+ Unsupported = 0, ///< Unsupported feature on the card(Default value) -+ /// This is the execution mode that is set if the feature is emulated in -+ /// software. -+ Software, -+ /// This execution mode is set if the feature exists natively in hardware -+ Hardware -+ }; -+ -+ enum Caps { -+ HalfOps = 0x1, ///< Half float is supported or not. -+ DoubleOps = 0x2, ///< Double is supported or not. -+ ByteOps = 0x3, ///< Byte(char) is support or not. -+ ShortOps = 0x4, ///< Short is supported or not. -+ LongOps = 0x5, ///< Long is supported or not. -+ Images = 0x6, ///< Images are supported or not. -+ ByteStores = 0x7, ///< ByteStores available(!HD4XXX). -+ ConstantMem = 0x8, ///< Constant/CB memory. -+ LocalMem = 0x9, ///< Local/LDS memory. -+ PrivateMem = 0xA, ///< Scratch/Private/Stack memory. -+ RegionMem = 0xB, ///< OCL GDS Memory Extension. -+ FMA = 0xC, ///< Use HW FMA or SW FMA. -+ ArenaSegment = 0xD, ///< Use for Arena UAV per pointer 12-1023. -+ MultiUAV = 0xE, ///< Use for UAV per Pointer 0-7. -+ Reserved0 = 0xF, ///< ReservedFlag -+ NoAlias = 0x10, ///< Cached loads. -+ Signed24BitOps = 0x11, ///< Peephole Optimization. -+ /// Debug mode implies that no hardware features or optimizations -+ /// are performned and that all memory access go through a single -+ /// uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX). -+ Debug = 0x12, -+ CachedMem = 0x13, ///< Cached mem is available or not. -+ BarrierDetect = 0x14, ///< Detect duplicate barriers. -+ Reserved1 = 0x15, ///< Reserved flag -+ ByteLDSOps = 0x16, ///< Flag to specify if byte LDS ops are available. -+ ArenaVectors = 0x17, ///< Flag to specify if vector loads from arena work. -+ TmrReg = 0x18, ///< Flag to specify if Tmr register is supported. -+ NoInline = 0x19, ///< Flag to specify that no inlining should occur. -+ MacroDB = 0x1A, ///< Flag to specify that backend handles macrodb. -+ HW64BitDivMod = 0x1B, ///< Flag for backend to generate 64bit div/mod. -+ ArenaUAV = 0x1C, ///< Flag to specify that arena uav is supported. -+ PrivateUAV = 0x1D, ///< Flag to specify that private memory uses uav's. -+ /// If more capabilities are required, then -+ /// this number needs to be increased. -+ /// All capabilities must come before this -+ /// number. -+ MaxNumberCapabilities = 0x20 -+ }; -+ /// These have to be in order with the older generations -+ /// having the lower number enumerations. -+ enum Generation { -+ HD4XXX = 0, ///< 7XX based devices. -+ HD5XXX, ///< Evergreen based devices. -+ HD6XXX, ///< NI/Evergreen+ based devices. -+ HD7XXX, ///< Southern Islands based devices. -+ HDTEST, ///< Experimental feature testing device. -+ HDNUMGEN -+ }; -+ -+ -+ AMDGPUDevice* -+ getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr, -+ bool is64bit = false, bool is64on32bit = false); -+ } // namespace AMDILDeviceInfo -+} // namespace llvm -+#endif // AMDILDEVICEINFO_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILDevices.h llvm-r600/lib/Target/R600/AMDILDevices.h ---- llvm-3.2.src/lib/Target/R600/AMDILDevices.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILDevices.h 2013-01-25 19:43:57.440049721 +0100 -@@ -0,0 +1,19 @@ -+//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+/// \file -+//==-----------------------------------------------------------------------===// -+#ifndef AMDIL_DEVICES_H -+#define AMDIL_DEVICES_H -+// Include all of the device specific header files -+#include "AMDIL7XXDevice.h" -+#include "AMDILDevice.h" -+#include "AMDILEvergreenDevice.h" -+#include "AMDILNIDevice.h" -+#include "AMDILSIDevice.h" -+ -+#endif // AMDIL_DEVICES_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILEvergreenDevice.cpp llvm-r600/lib/Target/R600/AMDILEvergreenDevice.cpp ---- llvm-3.2.src/lib/Target/R600/AMDILEvergreenDevice.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILEvergreenDevice.cpp 2013-01-25 19:43:57.440049721 +0100 -@@ -0,0 +1,169 @@ -+//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+/// \file -+//==-----------------------------------------------------------------------===// -+#include "AMDILEvergreenDevice.h" -+ -+using namespace llvm; -+ -+AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST) -+: AMDGPUDevice(ST) { -+ setCaps(); -+ std::string name = ST->getDeviceName(); -+ if (name == "cedar") { -+ DeviceFlag = OCL_DEVICE_CEDAR; -+ } else if (name == "redwood") { -+ DeviceFlag = OCL_DEVICE_REDWOOD; -+ } else if (name == "cypress") { -+ DeviceFlag = OCL_DEVICE_CYPRESS; -+ } else { -+ DeviceFlag = OCL_DEVICE_JUNIPER; -+ } -+} -+ -+AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() { -+} -+ -+size_t AMDGPUEvergreenDevice::getMaxLDSSize() const { -+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { -+ return MAX_LDS_SIZE_800; -+ } else { -+ return 0; -+ } -+} -+size_t AMDGPUEvergreenDevice::getMaxGDSSize() const { -+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) { -+ return MAX_LDS_SIZE_800; -+ } else { -+ return 0; -+ } -+} -+uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const { -+ return 12; -+} -+ -+uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const { -+ switch(id) { -+ default: -+ assert(0 && "ID type passed in is unknown!"); -+ break; -+ case CONSTANT_ID: -+ case RAW_UAV_ID: -+ return GLOBAL_RETURN_RAW_UAV_ID; -+ case GLOBAL_ID: -+ case ARENA_UAV_ID: -+ return DEFAULT_ARENA_UAV_ID; -+ case LDS_ID: -+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { -+ return DEFAULT_LDS_ID; -+ } else { -+ return DEFAULT_ARENA_UAV_ID; -+ } -+ case GDS_ID: -+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) { -+ return DEFAULT_GDS_ID; -+ } else { -+ return DEFAULT_ARENA_UAV_ID; -+ } -+ case SCRATCH_ID: -+ if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) { -+ return DEFAULT_SCRATCH_ID; -+ } else { -+ return DEFAULT_ARENA_UAV_ID; -+ } -+ }; -+ return 0; -+} -+ -+size_t AMDGPUEvergreenDevice::getWavefrontSize() const { -+ return AMDGPUDevice::WavefrontSize; -+} -+ -+uint32_t AMDGPUEvergreenDevice::getGeneration() const { -+ return AMDGPUDeviceInfo::HD5XXX; -+} -+ -+void AMDGPUEvergreenDevice::setCaps() { -+ mSWBits.set(AMDGPUDeviceInfo::ArenaSegment); -+ mHWBits.set(AMDGPUDeviceInfo::ArenaUAV); -+ mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod); -+ mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod); -+ mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps); -+ if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) { -+ mHWBits.set(AMDGPUDeviceInfo::ByteStores); -+ } -+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) { -+ mSWBits.set(AMDGPUDeviceInfo::LocalMem); -+ mSWBits.set(AMDGPUDeviceInfo::RegionMem); -+ } else { -+ mHWBits.set(AMDGPUDeviceInfo::LocalMem); -+ mHWBits.set(AMDGPUDeviceInfo::RegionMem); -+ } -+ mHWBits.set(AMDGPUDeviceInfo::Images); -+ if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) { -+ mHWBits.set(AMDGPUDeviceInfo::NoAlias); -+ } -+ mHWBits.set(AMDGPUDeviceInfo::CachedMem); -+ if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) { -+ mHWBits.set(AMDGPUDeviceInfo::MultiUAV); -+ } -+ mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps); -+ mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps); -+ mHWBits.set(AMDGPUDeviceInfo::ArenaVectors); -+ mHWBits.set(AMDGPUDeviceInfo::LongOps); -+ mSWBits.reset(AMDGPUDeviceInfo::LongOps); -+ mHWBits.set(AMDGPUDeviceInfo::TmrReg); -+} -+ -+AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST) -+ : AMDGPUEvergreenDevice(ST) { -+ setCaps(); -+} -+ -+AMDGPUCypressDevice::~AMDGPUCypressDevice() { -+} -+ -+void AMDGPUCypressDevice::setCaps() { -+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) { -+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps); -+ mHWBits.set(AMDGPUDeviceInfo::FMA); -+ } -+} -+ -+ -+AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST) -+ : AMDGPUEvergreenDevice(ST) { -+ setCaps(); -+} -+ -+AMDGPUCedarDevice::~AMDGPUCedarDevice() { -+} -+ -+void AMDGPUCedarDevice::setCaps() { -+ mSWBits.set(AMDGPUDeviceInfo::FMA); -+} -+ -+size_t AMDGPUCedarDevice::getWavefrontSize() const { -+ return AMDGPUDevice::QuarterWavefrontSize; -+} -+ -+AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST) -+ : AMDGPUEvergreenDevice(ST) { -+ setCaps(); -+} -+ -+AMDGPURedwoodDevice::~AMDGPURedwoodDevice() { -+} -+ -+void AMDGPURedwoodDevice::setCaps() { -+ mSWBits.set(AMDGPUDeviceInfo::FMA); -+} -+ -+size_t AMDGPURedwoodDevice::getWavefrontSize() const { -+ return AMDGPUDevice::HalfWavefrontSize; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILEvergreenDevice.h llvm-r600/lib/Target/R600/AMDILEvergreenDevice.h ---- llvm-3.2.src/lib/Target/R600/AMDILEvergreenDevice.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILEvergreenDevice.h 2013-01-25 19:43:57.440049721 +0100 -@@ -0,0 +1,93 @@ -+//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Interface for the subtarget data classes. -+/// -+/// This file will define the interface that each generation needs to -+/// implement in order to correctly answer queries on the capabilities of the -+/// specific hardware. -+//===----------------------------------------------------------------------===// -+#ifndef AMDILEVERGREENDEVICE_H -+#define AMDILEVERGREENDEVICE_H -+#include "AMDILDevice.h" -+#include "AMDGPUSubtarget.h" -+ -+namespace llvm { -+ class AMDGPUSubtarget; -+//===----------------------------------------------------------------------===// -+// Evergreen generation of devices and their respective sub classes -+//===----------------------------------------------------------------------===// -+ -+ -+/// \brief The AMDGPUEvergreenDevice is the base device class for all of the Evergreen -+/// series of cards. -+/// -+/// This class contains information required to differentiate -+/// the Evergreen device from the generic AMDGPUDevice. This device represents -+/// that capabilities of the 'Juniper' cards, also known as the HD57XX. -+class AMDGPUEvergreenDevice : public AMDGPUDevice { -+public: -+ AMDGPUEvergreenDevice(AMDGPUSubtarget *ST); -+ virtual ~AMDGPUEvergreenDevice(); -+ virtual size_t getMaxLDSSize() const; -+ virtual size_t getMaxGDSSize() const; -+ virtual size_t getWavefrontSize() const; -+ virtual uint32_t getGeneration() const; -+ virtual uint32_t getMaxNumUAVs() const; -+ virtual uint32_t getResourceID(uint32_t) const; -+protected: -+ virtual void setCaps(); -+}; -+ -+/// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has -+/// support for double precision operations. This device is used to represent -+/// both the Cypress and Hemlock cards, which are commercially known as HD58XX -+/// and HD59XX cards. -+class AMDGPUCypressDevice : public AMDGPUEvergreenDevice { -+public: -+ AMDGPUCypressDevice(AMDGPUSubtarget *ST); -+ virtual ~AMDGPUCypressDevice(); -+private: -+ virtual void setCaps(); -+}; -+ -+ -+/// \brief The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based -+/// devices. -+/// -+/// This class differs from the base AMDGPUEvergreenDevice in that the -+/// device is a ~quarter of the 'Juniper'. These are commercially known as the -+/// HD54XX and HD53XX series of cards. -+class AMDGPUCedarDevice : public AMDGPUEvergreenDevice { -+public: -+ AMDGPUCedarDevice(AMDGPUSubtarget *ST); -+ virtual ~AMDGPUCedarDevice(); -+ virtual size_t getWavefrontSize() const; -+private: -+ virtual void setCaps(); -+}; -+ -+/// \brief The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based -+/// devices. -+/// -+/// This class differs from the base class, in that these devices are -+/// considered about half of a 'Juniper' device. These are commercially known as -+/// the HD55XX and HD56XX series of cards. -+class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice { -+public: -+ AMDGPURedwoodDevice(AMDGPUSubtarget *ST); -+ virtual ~AMDGPURedwoodDevice(); -+ virtual size_t getWavefrontSize() const; -+private: -+ virtual void setCaps(); -+}; -+ -+} // namespace llvm -+#endif // AMDILEVERGREENDEVICE_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILFrameLowering.cpp llvm-r600/lib/Target/R600/AMDILFrameLowering.cpp ---- llvm-3.2.src/lib/Target/R600/AMDILFrameLowering.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILFrameLowering.cpp 2013-01-25 19:43:57.440049721 +0100 -@@ -0,0 +1,47 @@ -+//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Interface to describe a layout of a stack frame on a AMDGPU target -+/// machine. -+// -+//===----------------------------------------------------------------------===// -+#include "AMDILFrameLowering.h" -+#include "llvm/CodeGen/MachineFrameInfo.h" -+ -+using namespace llvm; -+AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl, -+ int LAO, unsigned TransAl) -+ : TargetFrameLowering(D, StackAl, LAO, TransAl) { -+} -+ -+AMDGPUFrameLowering::~AMDGPUFrameLowering() { -+} -+ -+int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF, -+ int FI) const { -+ const MachineFrameInfo *MFI = MF.getFrameInfo(); -+ return MFI->getObjectOffset(FI); -+} -+ -+const TargetFrameLowering::SpillSlot * -+AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { -+ NumEntries = 0; -+ return 0; -+} -+void -+AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const { -+} -+void -+AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { -+} -+bool -+AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const { -+ return false; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILFrameLowering.h llvm-r600/lib/Target/R600/AMDILFrameLowering.h ---- llvm-3.2.src/lib/Target/R600/AMDILFrameLowering.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILFrameLowering.h 2013-01-25 19:43:57.443383054 +0100 -@@ -0,0 +1,40 @@ -+//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Interface to describe a layout of a stack frame on a AMDIL target -+/// machine. -+// -+//===----------------------------------------------------------------------===// -+#ifndef AMDILFRAME_LOWERING_H -+#define AMDILFRAME_LOWERING_H -+ -+#include "llvm/CodeGen/MachineFunction.h" -+#include "llvm/Target/TargetFrameLowering.h" -+ -+namespace llvm { -+ -+/// \brief Information about the stack frame layout on the AMDGPU targets. -+/// -+/// It holds the direction of the stack growth, the known stack alignment on -+/// entry to each function, and the offset to the locals area. -+/// See TargetFrameInfo for more comments. -+class AMDGPUFrameLowering : public TargetFrameLowering { -+public: -+ AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO, -+ unsigned TransAl = 1); -+ virtual ~AMDGPUFrameLowering(); -+ virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const; -+ virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const; -+ virtual void emitPrologue(MachineFunction &MF) const; -+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; -+ virtual bool hasFP(const MachineFunction &MF) const; -+}; -+} // namespace llvm -+#endif // AMDILFRAME_LOWERING_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDIL.h llvm-r600/lib/Target/R600/AMDIL.h ---- llvm-3.2.src/lib/Target/R600/AMDIL.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDIL.h 2013-01-25 19:43:57.433383055 +0100 -@@ -0,0 +1,122 @@ -+//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+// -+/// This file contains the entry points for global functions defined in the LLVM -+/// AMDGPU back-end. -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef AMDIL_H -+#define AMDIL_H -+ -+#include "llvm/CodeGen/MachineFunction.h" -+#include "llvm/Target/TargetMachine.h" -+ -+#define ARENA_SEGMENT_RESERVED_UAVS 12 -+#define DEFAULT_ARENA_UAV_ID 8 -+#define DEFAULT_RAW_UAV_ID 7 -+#define GLOBAL_RETURN_RAW_UAV_ID 11 -+#define HW_MAX_NUM_CB 8 -+#define MAX_NUM_UNIQUE_UAVS 8 -+#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8 -+#define OPENCL_MAX_READ_IMAGES 128 -+#define OPENCL_MAX_WRITE_IMAGES 8 -+#define OPENCL_MAX_SAMPLERS 16 -+ -+// The next two values can never be zero, as zero is the ID that is -+// used to assert against. -+#define DEFAULT_LDS_ID 1 -+#define DEFAULT_GDS_ID 1 -+#define DEFAULT_SCRATCH_ID 1 -+#define DEFAULT_VEC_SLOTS 8 -+ -+#define OCL_DEVICE_RV710 0x0001 -+#define OCL_DEVICE_RV730 0x0002 -+#define OCL_DEVICE_RV770 0x0004 -+#define OCL_DEVICE_CEDAR 0x0008 -+#define OCL_DEVICE_REDWOOD 0x0010 -+#define OCL_DEVICE_JUNIPER 0x0020 -+#define OCL_DEVICE_CYPRESS 0x0040 -+#define OCL_DEVICE_CAICOS 0x0080 -+#define OCL_DEVICE_TURKS 0x0100 -+#define OCL_DEVICE_BARTS 0x0200 -+#define OCL_DEVICE_CAYMAN 0x0400 -+#define OCL_DEVICE_ALL 0x3FFF -+ -+/// The number of function ID's that are reserved for -+/// internal compiler usage. -+const unsigned int RESERVED_FUNCS = 1024; -+ -+namespace llvm { -+class AMDGPUInstrPrinter; -+class FunctionPass; -+class MCAsmInfo; -+class raw_ostream; -+class Target; -+class TargetMachine; -+ -+// Instruction selection passes. -+FunctionPass* -+ createAMDGPUISelDag(TargetMachine &TM); -+FunctionPass* -+ createAMDGPUPeepholeOpt(TargetMachine &TM); -+ -+// Pre emit passes. -+FunctionPass* -+ createAMDGPUCFGPreparationPass(TargetMachine &TM); -+FunctionPass* -+ createAMDGPUCFGStructurizerPass(TargetMachine &TM); -+ -+extern Target TheAMDGPUTarget; -+} // end namespace llvm; -+ -+// Include device information enumerations -+#include "AMDILDeviceInfo.h" -+ -+namespace llvm { -+/// OpenCL uses address spaces to differentiate between -+/// various memory regions on the hardware. On the CPU -+/// all of the address spaces point to the same memory, -+/// however on the GPU, each address space points to -+/// a seperate piece of memory that is unique from other -+/// memory locations. -+namespace AMDGPUAS { -+enum AddressSpaces { -+ PRIVATE_ADDRESS = 0, ///< Address space for private memory. -+ GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). -+ CONSTANT_ADDRESS = 2, ///< Address space for constant memory -+ LOCAL_ADDRESS = 3, ///< Address space for local memory. -+ REGION_ADDRESS = 4, ///< Address space for region memory. -+ ADDRESS_NONE = 5, ///< Address space for unknown memory. -+ PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0) -+ PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1) -+ USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI -+ CONSTANT_BUFFER_0 = 9, -+ CONSTANT_BUFFER_1 = 10, -+ CONSTANT_BUFFER_2 = 11, -+ CONSTANT_BUFFER_3 = 12, -+ CONSTANT_BUFFER_4 = 13, -+ CONSTANT_BUFFER_5 = 14, -+ CONSTANT_BUFFER_6 = 15, -+ CONSTANT_BUFFER_7 = 16, -+ CONSTANT_BUFFER_8 = 17, -+ CONSTANT_BUFFER_9 = 18, -+ CONSTANT_BUFFER_10 = 19, -+ CONSTANT_BUFFER_11 = 20, -+ CONSTANT_BUFFER_12 = 21, -+ CONSTANT_BUFFER_13 = 22, -+ CONSTANT_BUFFER_14 = 23, -+ CONSTANT_BUFFER_15 = 24, -+ LAST_ADDRESS = 25 -+}; -+ -+} // namespace AMDGPUAS -+ -+} // end namespace llvm -+#endif // AMDIL_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILInstrInfo.td llvm-r600/lib/Target/R600/AMDILInstrInfo.td ---- llvm-3.2.src/lib/Target/R600/AMDILInstrInfo.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILInstrInfo.td 2013-01-25 19:43:57.443383054 +0100 -@@ -0,0 +1,208 @@ -+//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+// -+// This file describes the AMDIL instructions in TableGen format. -+// -+//===----------------------------------------------------------------------===// -+// AMDIL Instruction Predicate Definitions -+// Predicate that is set to true if the hardware supports double precision -+// divide -+def HasHWDDiv : Predicate<"Subtarget.device()" -+ "->getGeneration() > AMDGPUDeviceInfo::HD4XXX && " -+ "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">; -+ -+// Predicate that is set to true if the hardware supports double, but not double -+// precision divide in hardware -+def HasSWDDiv : Predicate<"Subtarget.device()" -+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&" -+ "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">; -+ -+// Predicate that is set to true if the hardware support 24bit signed -+// math ops. Otherwise a software expansion to 32bit math ops is used instead. -+def HasHWSign24Bit : Predicate<"Subtarget.device()" -+ "->getGeneration() > AMDGPUDeviceInfo::HD5XXX">; -+ -+// Predicate that is set to true if 64bit operations are supported or not -+def HasHW64Bit : Predicate<"Subtarget.device()" -+ "->usesHardware(AMDGPUDeviceInfo::LongOps)">; -+def HasSW64Bit : Predicate<"Subtarget.device()" -+ "->usesSoftware(AMDGPUDeviceInfo::LongOps)">; -+ -+// Predicate that is set to true if the timer register is supported -+def HasTmrRegister : Predicate<"Subtarget.device()" -+ "->isSupported(AMDGPUDeviceInfo::TmrReg)">; -+// Predicate that is true if we are at least evergreen series -+def HasDeviceIDInst : Predicate<"Subtarget.device()" -+ "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">; -+ -+// Predicate that is true if we have region address space. -+def hasRegionAS : Predicate<"Subtarget.device()" -+ "->usesHardware(AMDGPUDeviceInfo::RegionMem)">; -+ -+// Predicate that is false if we don't have region address space. -+def noRegionAS : Predicate<"!Subtarget.device()" -+ "->isSupported(AMDGPUDeviceInfo::RegionMem)">; -+ -+ -+// Predicate that is set to true if 64bit Mul is supported in the IL or not -+def HasHW64Mul : Predicate<"Subtarget.calVersion()" -+ ">= CAL_VERSION_SC_139" -+ "&& Subtarget.device()" -+ "->getGeneration() >=" -+ "AMDGPUDeviceInfo::HD5XXX">; -+def HasSW64Mul : Predicate<"Subtarget.calVersion()" -+ "< CAL_VERSION_SC_139">; -+// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not -+def HasHW64DivMod : Predicate<"Subtarget.device()" -+ "->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">; -+def HasSW64DivMod : Predicate<"Subtarget.device()" -+ "->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">; -+ -+// Predicate that is set to true if 64bit pointer are used. -+def Has64BitPtr : Predicate<"Subtarget.is64bit()">; -+def Has32BitPtr : Predicate<"!Subtarget.is64bit()">; -+//===--------------------------------------------------------------------===// -+// Custom Operands -+//===--------------------------------------------------------------------===// -+def brtarget : Operand; -+ -+//===--------------------------------------------------------------------===// -+// Custom Selection DAG Type Profiles -+//===--------------------------------------------------------------------===// -+//===----------------------------------------------------------------------===// -+// Generic Profile Types -+//===----------------------------------------------------------------------===// -+ -+def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [ -+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> -+ ]>; -+def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [ -+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3> -+ ]>; -+def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [ -+ SDTCisEltOfVec<1, 0> -+ ]>; -+ -+//===----------------------------------------------------------------------===// -+// Flow Control Profile Types -+//===----------------------------------------------------------------------===// -+// Branch instruction where second and third are basic blocks -+def SDTIL_BRCond : SDTypeProfile<0, 2, [ -+ SDTCisVT<0, OtherVT> -+ ]>; -+ -+//===--------------------------------------------------------------------===// -+// Custom Selection DAG Nodes -+//===--------------------------------------------------------------------===// -+//===----------------------------------------------------------------------===// -+// Flow Control DAG Nodes -+//===----------------------------------------------------------------------===// -+def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>; -+ -+//===----------------------------------------------------------------------===// -+// Call/Return DAG Nodes -+//===----------------------------------------------------------------------===// -+def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, -+ [SDNPHasChain, SDNPOptInGlue]>; -+ -+//===--------------------------------------------------------------------===// -+// Instructions -+//===--------------------------------------------------------------------===// -+// Floating point math functions -+def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>; -+def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>; -+ -+//===----------------------------------------------------------------------===// -+// Integer functions -+//===----------------------------------------------------------------------===// -+def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp, -+ [SDNPCommutative, SDNPAssociative]>; -+ -+//===--------------------------------------------------------------------===// -+// Custom Pattern DAG Nodes -+//===--------------------------------------------------------------------===// -+def global_store : PatFrag<(ops node:$val, node:$ptr), -+ (store node:$val, node:$ptr), [{ -+ return isGlobalStore(dyn_cast(N)); -+}]>; -+ -+//===----------------------------------------------------------------------===// -+// Load pattern fragments -+//===----------------------------------------------------------------------===// -+// Global address space loads -+def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ -+ return isGlobalLoad(dyn_cast(N)); -+}]>; -+// Constant address space loads -+def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ -+ return isConstantLoad(dyn_cast(N), -1); -+}]>; -+ -+//===----------------------------------------------------------------------===// -+// Complex addressing mode patterns -+//===----------------------------------------------------------------------===// -+def ADDR : ComplexPattern; -+def ADDRF : ComplexPattern; -+def ADDR64 : ComplexPattern; -+def ADDR64F : ComplexPattern; -+ -+//===----------------------------------------------------------------------===// -+// Instruction format classes -+//===----------------------------------------------------------------------===// -+class ILFormat pattern> -+: Instruction { -+ -+ let Namespace = "AMDGPU"; -+ dag OutOperandList = outs; -+ dag InOperandList = ins; -+ let Pattern = pattern; -+ let AsmString = !strconcat(asmstr, "\n"); -+ let isPseudo = 1; -+ let Itinerary = NullALU; -+ bit hasIEEEFlag = 0; -+ bit hasZeroOpFlag = 0; -+ let mayLoad = 0; -+ let mayStore = 0; -+ let hasSideEffects = 0; -+} -+ -+//===--------------------------------------------------------------------===// -+// Multiclass Instruction formats -+//===--------------------------------------------------------------------===// -+// Multiclass that handles branch instructions -+multiclass BranchConditional { -+ def _i32 : ILFormat<(outs), -+ (ins brtarget:$target, GPRI32:$src0), -+ "; i32 Pseudo branch instruction", -+ [(Op bb:$target, GPRI32:$src0)]>; -+ def _f32 : ILFormat<(outs), -+ (ins brtarget:$target, GPRF32:$src0), -+ "; f32 Pseudo branch instruction", -+ [(Op bb:$target, GPRF32:$src0)]>; -+} -+ -+// Only scalar types should generate flow control -+multiclass BranchInstr { -+ def _i32 : ILFormat<(outs), (ins GPRI32:$src), -+ !strconcat(name, " $src"), []>; -+ def _f32 : ILFormat<(outs), (ins GPRF32:$src), -+ !strconcat(name, " $src"), []>; -+} -+// Only scalar types should generate flow control -+multiclass BranchInstr2 { -+ def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1), -+ !strconcat(name, " $src0, $src1"), []>; -+ def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1), -+ !strconcat(name, " $src0, $src1"), []>; -+} -+ -+//===--------------------------------------------------------------------===// -+// Intrinsics support -+//===--------------------------------------------------------------------===// -+include "AMDILIntrinsics.td" -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILIntrinsicInfo.cpp llvm-r600/lib/Target/R600/AMDILIntrinsicInfo.cpp ---- llvm-3.2.src/lib/Target/R600/AMDILIntrinsicInfo.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILIntrinsicInfo.cpp 2013-01-25 19:43:57.446716388 +0100 -@@ -0,0 +1,79 @@ -+//===- AMDILIntrinsicInfo.cpp - AMDGPU Intrinsic Information ------*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief AMDGPU Implementation of the IntrinsicInfo class. -+// -+//===-----------------------------------------------------------------------===// -+ -+#include "AMDILIntrinsicInfo.h" -+#include "AMDIL.h" -+#include "AMDGPUSubtarget.h" -+#include "llvm/DerivedTypes.h" -+#include "llvm/Intrinsics.h" -+#include "llvm/Module.h" -+ -+using namespace llvm; -+ -+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN -+#include "AMDGPUGenIntrinsics.inc" -+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN -+ -+AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm) -+ : TargetIntrinsicInfo() { -+} -+ -+std::string -+AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys, -+ unsigned int numTys) const { -+ static const char* const names[] = { -+#define GET_INTRINSIC_NAME_TABLE -+#include "AMDGPUGenIntrinsics.inc" -+#undef GET_INTRINSIC_NAME_TABLE -+ }; -+ -+ if (IntrID < Intrinsic::num_intrinsics) { -+ return 0; -+ } -+ assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics -+ && "Invalid intrinsic ID"); -+ -+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]); -+ return Result; -+} -+ -+unsigned int -+AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const { -+#define GET_FUNCTION_RECOGNIZER -+#include "AMDGPUGenIntrinsics.inc" -+#undef GET_FUNCTION_RECOGNIZER -+ AMDGPUIntrinsic::ID IntrinsicID -+ = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic; -+ IntrinsicID = getIntrinsicForGCCBuiltin("AMDGPU", Name); -+ -+ if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) { -+ return IntrinsicID; -+ } -+ return 0; -+} -+ -+bool -+AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const { -+ // Overload Table -+#define GET_INTRINSIC_OVERLOAD_TABLE -+#include "AMDGPUGenIntrinsics.inc" -+#undef GET_INTRINSIC_OVERLOAD_TABLE -+} -+ -+Function* -+AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, -+ Type **Tys, -+ unsigned numTys) const { -+ assert(!"Not implemented"); -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILIntrinsicInfo.h llvm-r600/lib/Target/R600/AMDILIntrinsicInfo.h ---- llvm-3.2.src/lib/Target/R600/AMDILIntrinsicInfo.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILIntrinsicInfo.h 2013-01-25 19:43:57.446716388 +0100 -@@ -0,0 +1,49 @@ -+//===- AMDILIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Interface for the AMDGPU Implementation of the Intrinsic Info class. -+// -+//===-----------------------------------------------------------------------===// -+#ifndef AMDIL_INTRINSICS_H -+#define AMDIL_INTRINSICS_H -+ -+#include "llvm/Intrinsics.h" -+#include "llvm/Target/TargetIntrinsicInfo.h" -+ -+namespace llvm { -+class TargetMachine; -+ -+namespace AMDGPUIntrinsic { -+enum ID { -+ last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1, -+#define GET_INTRINSIC_ENUM_VALUES -+#include "AMDGPUGenIntrinsics.inc" -+#undef GET_INTRINSIC_ENUM_VALUES -+ , num_AMDGPU_intrinsics -+}; -+ -+} // end namespace AMDGPUIntrinsic -+ -+class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo { -+public: -+ AMDGPUIntrinsicInfo(TargetMachine *tm); -+ std::string getName(unsigned int IntrId, Type **Tys = 0, -+ unsigned int numTys = 0) const; -+ unsigned int lookupName(const char *Name, unsigned int Len) const; -+ bool isOverloaded(unsigned int IID) const; -+ Function *getDeclaration(Module *M, unsigned int ID, -+ Type **Tys = 0, -+ unsigned int numTys = 0) const; -+}; -+ -+} // end namespace llvm -+ -+#endif // AMDIL_INTRINSICS_H -+ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILIntrinsics.td llvm-r600/lib/Target/R600/AMDILIntrinsics.td ---- llvm-3.2.src/lib/Target/R600/AMDILIntrinsics.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILIntrinsics.td 2013-01-25 19:43:57.446716388 +0100 -@@ -0,0 +1,242 @@ -+//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+// -+// This file defines all of the amdil-specific intrinsics -+// -+//===---------------------------------------------------------------===// -+//===--------------------------------------------------------------------===// -+// Intrinsic classes -+// Generic versions of the above classes but for Target specific intrinsics -+// instead of SDNode patterns. -+//===--------------------------------------------------------------------===// -+let TargetPrefix = "AMDIL", isTarget = 1 in { -+ class VoidIntLong : -+ Intrinsic<[llvm_i64_ty], [], []>; -+ class VoidIntInt : -+ Intrinsic<[llvm_i32_ty], [], []>; -+ class VoidIntBool : -+ Intrinsic<[llvm_i32_ty], [], []>; -+ class UnaryIntInt : -+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; -+ class UnaryIntFloat : -+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; -+ class ConvertIntFTOI : -+ Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>; -+ class ConvertIntITOF : -+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>; -+ class UnaryIntNoRetInt : -+ Intrinsic<[], [llvm_anyint_ty], []>; -+ class UnaryIntNoRetFloat : -+ Intrinsic<[], [llvm_anyfloat_ty], []>; -+ class BinaryIntInt : -+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; -+ class BinaryIntFloat : -+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; -+ class BinaryIntNoRetInt : -+ Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>; -+ class BinaryIntNoRetFloat : -+ Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>; -+ class TernaryIntInt : -+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, -+ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; -+ class TernaryIntFloat : -+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, -+ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; -+ class QuaternaryIntInt : -+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, -+ LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; -+ class UnaryAtomicInt : -+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; -+ class BinaryAtomicInt : -+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; -+ class TernaryAtomicInt : -+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>; -+ class UnaryAtomicIntNoRet : -+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; -+ class BinaryAtomicIntNoRet : -+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; -+ class TernaryAtomicIntNoRet : -+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; -+} -+ -+let TargetPrefix = "AMDIL", isTarget = 1 in { -+ def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt; -+ -+ def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">, -+ TernaryIntInt; -+ def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">, -+ TernaryIntInt; -+ def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">, -+ UnaryIntInt; -+ def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">, -+ UnaryIntInt; -+ def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">, -+ UnaryIntInt; -+ def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">, -+ UnaryIntInt; -+ def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">, -+ UnaryIntInt; -+ def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">, -+ TernaryIntInt; -+ def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">, -+ TernaryIntInt; -+ def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">, -+ QuaternaryIntInt; -+ def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">, -+ TernaryIntInt; -+ def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">, -+ BinaryIntInt; -+ def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">, -+ TernaryIntInt; -+ def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">, -+ TernaryIntInt; -+ def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">, -+ TernaryIntFloat; -+ def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">, -+ BinaryIntInt; -+ def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">, -+ BinaryIntInt; -+ def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">, -+ BinaryIntInt; -+ def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">, -+ BinaryIntInt; -+ def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">, -+ BinaryIntInt; -+ def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">, -+ BinaryIntInt; -+ def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">, -+ TernaryIntInt; -+ def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">, -+ TernaryIntInt; -+ def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">, -+ BinaryIntInt; -+ def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">, -+ BinaryIntInt; -+ def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">, -+ BinaryIntInt; -+ def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">, -+ BinaryIntInt; -+ def int_AMDIL_min : GCCBuiltin<"__amdil_min">, -+ BinaryIntFloat; -+ def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">, -+ BinaryIntInt; -+ def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">, -+ BinaryIntInt; -+ def int_AMDIL_max : GCCBuiltin<"__amdil_max">, -+ BinaryIntFloat; -+ def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">, -+ TernaryIntInt; -+ def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">, -+ TernaryIntInt; -+ def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">, -+ TernaryIntInt; -+ def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">, -+ UnaryIntFloat; -+ def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">, -+ TernaryIntFloat; -+ def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">, -+ UnaryIntFloat; -+ def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">, -+ UnaryIntFloat; -+ def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">, -+ UnaryIntFloat; -+ def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">, -+ UnaryIntFloat; -+ def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">, -+ UnaryIntFloat; -+ def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">, -+ UnaryIntFloat; -+ def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">, -+ UnaryIntFloat; -+ def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">, -+ UnaryIntFloat; -+ def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">, -+ UnaryIntFloat; -+ def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">, -+ UnaryIntFloat; -+ def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">, -+ UnaryIntFloat; -+ def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">, -+ UnaryIntFloat; -+ def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat; -+ def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat; -+ def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt; -+ def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">, -+ UnaryIntFloat; -+ def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">, -+ UnaryIntFloat; -+ def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">, -+ UnaryIntFloat; -+ def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">, -+ UnaryIntFloat; -+ def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">, -+ UnaryIntFloat; -+ def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">, -+ UnaryIntFloat; -+ def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">, -+ UnaryIntFloat; -+ def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">, -+ UnaryIntFloat; -+ def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">, -+ TernaryIntFloat; -+ def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">, -+ UnaryIntFloat; -+ def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">, -+ UnaryIntFloat; -+ def int_AMDIL_length : GCCBuiltin<"__amdil_length">, -+ UnaryIntFloat; -+ def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">, -+ TernaryIntFloat; -+ def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">, -+ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, -+ llvm_v4i32_ty, llvm_i32_ty], []>; -+ -+ def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">, -+ Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>; -+ def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">, -+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>; -+ def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">, -+ Intrinsic<[llvm_double_ty], [llvm_double_ty], []>; -+ def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">, -+ ConvertIntITOF; -+ def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">, -+ ConvertIntFTOI; -+ def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">, -+ ConvertIntFTOI; -+ def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">, -+ ConvertIntFTOI; -+ def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">, -+ ConvertIntFTOI; -+ def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">, -+ ConvertIntFTOI; -+ def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">, -+ ConvertIntFTOI; -+ def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">, -+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>; -+ def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">, -+ ConvertIntITOF; -+ def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">, -+ ConvertIntITOF; -+ def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">, -+ ConvertIntITOF; -+ def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">, -+ ConvertIntITOF; -+ def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">, -+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty, -+ llvm_v2f32_ty, llvm_float_ty], []>; -+ def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">, -+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty, -+ llvm_v2f32_ty], []>; -+ def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">, -+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, -+ llvm_v4f32_ty], []>; -+ def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">, -+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, -+ llvm_v4f32_ty], []>; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILISelDAGToDAG.cpp llvm-r600/lib/Target/R600/AMDILISelDAGToDAG.cpp ---- llvm-3.2.src/lib/Target/R600/AMDILISelDAGToDAG.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILISelDAGToDAG.cpp 2013-01-25 19:43:57.443383054 +0100 -@@ -0,0 +1,567 @@ -+//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Defines an instruction selector for the AMDGPU target. -+// -+//===----------------------------------------------------------------------===// -+#include "AMDGPUInstrInfo.h" -+#include "AMDGPUISelLowering.h" // For AMDGPUISD -+#include "AMDGPURegisterInfo.h" -+#include "AMDILDevices.h" -+#include "R600InstrInfo.h" -+#include "llvm/ADT/ValueMap.h" -+#include "llvm/CodeGen/PseudoSourceValue.h" -+#include "llvm/CodeGen/SelectionDAGISel.h" -+#include "llvm/Support/Compiler.h" -+#include "llvm/CodeGen/SelectionDAG.h" -+#include -+#include -+ -+using namespace llvm; -+ -+//===----------------------------------------------------------------------===// -+// Instruction Selector Implementation -+//===----------------------------------------------------------------------===// -+ -+namespace { -+/// AMDGPU specific code to select AMDGPU machine instructions for -+/// SelectionDAG operations. -+class AMDGPUDAGToDAGISel : public SelectionDAGISel { -+ // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can -+ // make the right decision when generating code for different targets. -+ const AMDGPUSubtarget &Subtarget; -+public: -+ AMDGPUDAGToDAGISel(TargetMachine &TM); -+ virtual ~AMDGPUDAGToDAGISel(); -+ -+ SDNode *Select(SDNode *N); -+ virtual const char *getPassName() const; -+ -+private: -+ inline SDValue getSmallIPtrImm(unsigned Imm); -+ bool FoldOperands(unsigned, const R600InstrInfo *, std::vector &); -+ -+ // Complex pattern selectors -+ bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); -+ bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); -+ bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); -+ -+ static bool checkType(const Value *ptr, unsigned int addrspace); -+ static const Value *getBasePointerValue(const Value *V); -+ -+ static bool isGlobalStore(const StoreSDNode *N); -+ static bool isPrivateStore(const StoreSDNode *N); -+ static bool isLocalStore(const StoreSDNode *N); -+ static bool isRegionStore(const StoreSDNode *N); -+ -+ static bool isCPLoad(const LoadSDNode *N); -+ static bool isConstantLoad(const LoadSDNode *N, int cbID); -+ static bool isGlobalLoad(const LoadSDNode *N); -+ static bool isParamLoad(const LoadSDNode *N); -+ static bool isPrivateLoad(const LoadSDNode *N); -+ static bool isLocalLoad(const LoadSDNode *N); -+ static bool isRegionLoad(const LoadSDNode *N); -+ -+ bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); -+ bool SelectGlobalValueVariableOffset(SDValue Addr, -+ SDValue &BaseReg, SDValue& Offset); -+ bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset); -+ bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset); -+ bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); -+ -+ // Include the pieces autogenerated from the target description. -+#include "AMDGPUGenDAGISel.inc" -+}; -+} // end anonymous namespace -+ -+/// \brief This pass converts a legalized DAG into a AMDGPU-specific -+// DAG, ready for instruction scheduling. -+FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM -+ ) { -+ return new AMDGPUDAGToDAGISel(TM); -+} -+ -+AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM -+ ) -+ : SelectionDAGISel(TM), Subtarget(TM.getSubtarget()) { -+} -+ -+AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { -+} -+ -+SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) { -+ return CurDAG->getTargetConstant(Imm, MVT::i32); -+} -+ -+bool AMDGPUDAGToDAGISel::SelectADDRParam( -+ SDValue Addr, SDValue& R1, SDValue& R2) { -+ -+ if (Addr.getOpcode() == ISD::FrameIndex) { -+ if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { -+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); -+ R2 = CurDAG->getTargetConstant(0, MVT::i32); -+ } else { -+ R1 = Addr; -+ R2 = CurDAG->getTargetConstant(0, MVT::i32); -+ } -+ } else if (Addr.getOpcode() == ISD::ADD) { -+ R1 = Addr.getOperand(0); -+ R2 = Addr.getOperand(1); -+ } else { -+ R1 = Addr; -+ R2 = CurDAG->getTargetConstant(0, MVT::i32); -+ } -+ return true; -+} -+ -+bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { -+ if (Addr.getOpcode() == ISD::TargetExternalSymbol || -+ Addr.getOpcode() == ISD::TargetGlobalAddress) { -+ return false; -+ } -+ return SelectADDRParam(Addr, R1, R2); -+} -+ -+ -+bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { -+ if (Addr.getOpcode() == ISD::TargetExternalSymbol || -+ Addr.getOpcode() == ISD::TargetGlobalAddress) { -+ return false; -+ } -+ -+ if (Addr.getOpcode() == ISD::FrameIndex) { -+ if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { -+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); -+ R2 = CurDAG->getTargetConstant(0, MVT::i64); -+ } else { -+ R1 = Addr; -+ R2 = CurDAG->getTargetConstant(0, MVT::i64); -+ } -+ } else if (Addr.getOpcode() == ISD::ADD) { -+ R1 = Addr.getOperand(0); -+ R2 = Addr.getOperand(1); -+ } else { -+ R1 = Addr; -+ R2 = CurDAG->getTargetConstant(0, MVT::i64); -+ } -+ return true; -+} -+ -+SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { -+ unsigned int Opc = N->getOpcode(); -+ if (N->isMachineOpcode()) { -+ return NULL; // Already selected. -+ } -+ switch (Opc) { -+ default: break; -+ case ISD::FrameIndex: { -+ if (FrameIndexSDNode *FIN = dyn_cast(N)) { -+ unsigned int FI = FIN->getIndex(); -+ EVT OpVT = N->getValueType(0); -+ unsigned int NewOpc = AMDGPU::COPY; -+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32); -+ return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI); -+ } -+ break; -+ } -+ case ISD::ConstantFP: -+ case ISD::Constant: { -+ const AMDGPUSubtarget &ST = TM.getSubtarget(); -+ // XXX: Custom immediate lowering not implemented yet. Instead we use -+ // pseudo instructions defined in SIInstructions.td -+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { -+ break; -+ } -+ const R600InstrInfo *TII = static_cast(TM.getInstrInfo()); -+ -+ uint64_t ImmValue = 0; -+ unsigned ImmReg = AMDGPU::ALU_LITERAL_X; -+ -+ if (N->getOpcode() == ISD::ConstantFP) { -+ // XXX: 64-bit Immediates not supported yet -+ assert(N->getValueType(0) != MVT::f64); -+ -+ ConstantFPSDNode *C = dyn_cast(N); -+ APFloat Value = C->getValueAPF(); -+ float FloatValue = Value.convertToFloat(); -+ if (FloatValue == 0.0) { -+ ImmReg = AMDGPU::ZERO; -+ } else if (FloatValue == 0.5) { -+ ImmReg = AMDGPU::HALF; -+ } else if (FloatValue == 1.0) { -+ ImmReg = AMDGPU::ONE; -+ } else { -+ ImmValue = Value.bitcastToAPInt().getZExtValue(); -+ } -+ } else { -+ // XXX: 64-bit Immediates not supported yet -+ assert(N->getValueType(0) != MVT::i64); -+ -+ ConstantSDNode *C = dyn_cast(N); -+ if (C->getZExtValue() == 0) { -+ ImmReg = AMDGPU::ZERO; -+ } else if (C->getZExtValue() == 1) { -+ ImmReg = AMDGPU::ONE_INT; -+ } else { -+ ImmValue = C->getZExtValue(); -+ } -+ } -+ -+ for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use); -+ Use != SDNode::use_end(); Use = Next) { -+ Next = llvm::next(Use); -+ std::vector Ops; -+ for (unsigned i = 0; i < Use->getNumOperands(); ++i) { -+ Ops.push_back(Use->getOperand(i)); -+ } -+ -+ if (!Use->isMachineOpcode()) { -+ if (ImmReg == AMDGPU::ALU_LITERAL_X) { -+ // We can only use literal constants (e.g. AMDGPU::ZERO, -+ // AMDGPU::ONE, etc) in machine opcodes. -+ continue; -+ } -+ } else { -+ if (!TII->isALUInstr(Use->getMachineOpcode())) { -+ continue; -+ } -+ -+ int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM); -+ assert(ImmIdx != -1); -+ -+ // subtract one from ImmIdx, because the DST operand is usually index -+ // 0 for MachineInstrs, but we have no DST in the Ops vector. -+ ImmIdx--; -+ -+ // Check that we aren't already using an immediate. -+ // XXX: It's possible for an instruction to have more than one -+ // immediate operand, but this is not supported yet. -+ if (ImmReg == AMDGPU::ALU_LITERAL_X) { -+ ConstantSDNode *C = dyn_cast(Use->getOperand(ImmIdx)); -+ assert(C); -+ -+ if (C->getZExtValue() != 0) { -+ // This instruction is already using an immediate. -+ continue; -+ } -+ -+ // Set the immediate value -+ Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32); -+ } -+ } -+ // Set the immediate register -+ Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32); -+ -+ CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands()); -+ } -+ break; -+ } -+ } -+ SDNode *Result = SelectCode(N); -+ -+ // Fold operands of selected node -+ -+ const AMDGPUSubtarget &ST = TM.getSubtarget(); -+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { -+ const R600InstrInfo *TII = -+ static_cast(TM.getInstrInfo()); -+ if (Result && TII->isALUInstr(Result->getMachineOpcode())) { -+ bool IsModified = false; -+ do { -+ std::vector Ops; -+ for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); -+ I != E; ++I) -+ Ops.push_back(*I); -+ IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops); -+ if (IsModified) { -+ Result = CurDAG->MorphNodeTo(Result, Result->getOpcode(), -+ Result->getVTList(), Ops.data(), Ops.size()); -+ } -+ } while (IsModified); -+ } -+ } -+ -+ return Result; -+} -+ -+bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, -+ const R600InstrInfo *TII, std::vector &Ops) { -+ int OperandIdx[] = { -+ TII->getOperandIdx(Opcode, R600Operands::SRC0), -+ TII->getOperandIdx(Opcode, R600Operands::SRC1), -+ TII->getOperandIdx(Opcode, R600Operands::SRC2) -+ }; -+ int SelIdx[] = { -+ TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL), -+ TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL), -+ TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL) -+ }; -+ for (unsigned i = 0; i < 3; i++) { -+ if (OperandIdx[i] < 0) -+ return false; -+ SDValue Operand = Ops[OperandIdx[i] - 1]; -+ switch (Operand.getOpcode()) { -+ case AMDGPUISD::CONST_ADDRESS: { -+ SDValue CstOffset; -+ if (!Operand.getValueType().isVector() && -+ SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) { -+ Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); -+ Ops[SelIdx[i] - 1] = CstOffset; -+ return true; -+ } -+ } -+ break; -+ default: -+ break; -+ } -+ } -+ return false; -+} -+ -+bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) { -+ if (!ptr) { -+ return false; -+ } -+ Type *ptrType = ptr->getType(); -+ return dyn_cast(ptrType)->getAddressSpace() == addrspace; -+} -+ -+const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V) { -+ if (!V) { -+ return NULL; -+ } -+ const Value *ret = NULL; -+ ValueMap ValueBitMap; -+ std::queue > ValueQueue; -+ ValueQueue.push(V); -+ while (!ValueQueue.empty()) { -+ V = ValueQueue.front(); -+ if (ValueBitMap.find(V) == ValueBitMap.end()) { -+ ValueBitMap[V] = true; -+ if (dyn_cast(V) && dyn_cast(V->getType())) { -+ ret = V; -+ break; -+ } else if (dyn_cast(V)) { -+ ret = V; -+ break; -+ } else if (dyn_cast(V)) { -+ const ConstantExpr *CE = dyn_cast(V); -+ if (CE) { -+ ValueQueue.push(CE->getOperand(0)); -+ } -+ } else if (const AllocaInst *AI = dyn_cast(V)) { -+ ret = AI; -+ break; -+ } else if (const Instruction *I = dyn_cast(V)) { -+ uint32_t numOps = I->getNumOperands(); -+ for (uint32_t x = 0; x < numOps; ++x) { -+ ValueQueue.push(I->getOperand(x)); -+ } -+ } else { -+ assert(!"Found a Value that we didn't know how to handle!"); -+ } -+ } -+ ValueQueue.pop(); -+ } -+ return ret; -+} -+ -+bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { -+ return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS); -+} -+ -+bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { -+ return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS) -+ && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS) -+ && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)); -+} -+ -+bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { -+ return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS); -+} -+ -+bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { -+ return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS); -+} -+ -+bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) { -+ if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) { -+ return true; -+ } -+ MachineMemOperand *MMO = N->getMemOperand(); -+ const Value *V = MMO->getValue(); -+ const Value *BV = getBasePointerValue(V); -+ if (MMO -+ && MMO->getValue() -+ && ((V && dyn_cast(V)) -+ || (BV && dyn_cast( -+ getBasePointerValue(MMO->getValue()))))) { -+ return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS); -+ } else { -+ return false; -+ } -+} -+ -+bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) { -+ return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS); -+} -+ -+bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) { -+ return checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS); -+} -+ -+bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) { -+ return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS); -+} -+ -+bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) { -+ return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS); -+} -+ -+bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) { -+ MachineMemOperand *MMO = N->getMemOperand(); -+ if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) { -+ if (MMO) { -+ const Value *V = MMO->getValue(); -+ const PseudoSourceValue *PSV = dyn_cast(V); -+ if (PSV && PSV == PseudoSourceValue::getConstantPool()) { -+ return true; -+ } -+ } -+ } -+ return false; -+} -+ -+bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) { -+ if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) { -+ // Check to make sure we are not a constant pool load or a constant load -+ // that is marked as a private load -+ if (isCPLoad(N) || isConstantLoad(N, -1)) { -+ return false; -+ } -+ } -+ if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS) -+ && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS) -+ && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS) -+ && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS) -+ && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS) -+ && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS)) { -+ return true; -+ } -+ return false; -+} -+ -+const char *AMDGPUDAGToDAGISel::getPassName() const { -+ return "AMDGPU DAG->DAG Pattern Instruction Selection"; -+} -+ -+#ifdef DEBUGTMP -+#undef INT64_C -+#endif -+#undef DEBUGTMP -+ -+///==== AMDGPU Functions ====/// -+ -+bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, -+ SDValue& IntPtr) { -+ if (ConstantSDNode *Cst = dyn_cast(Addr)) { -+ IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true); -+ return true; -+ } -+ return false; -+} -+ -+bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, -+ SDValue& BaseReg, SDValue &Offset) { -+ if (!dyn_cast(Addr)) { -+ BaseReg = Addr; -+ Offset = CurDAG->getIntPtrConstant(0, true); -+ return true; -+ } -+ return false; -+} -+ -+bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base, -+ SDValue& Offset) { -+ if (Addr.getOpcode() == ISD::TargetExternalSymbol || -+ Addr.getOpcode() == ISD::TargetGlobalAddress) { -+ return false; -+ } -+ -+ -+ if (Addr.getOpcode() == ISD::ADD) { -+ bool Match = false; -+ -+ // Find the base ptr and the offset -+ for (unsigned i = 0; i < Addr.getNumOperands(); i++) { -+ SDValue Arg = Addr.getOperand(i); -+ ConstantSDNode * OffsetNode = dyn_cast(Arg); -+ // This arg isn't a constant so it must be the base PTR. -+ if (!OffsetNode) { -+ Base = Addr.getOperand(i); -+ continue; -+ } -+ // Check if the constant argument fits in 8-bits. The offset is in bytes -+ // so we need to convert it to dwords. -+ if (isUInt<8>(OffsetNode->getZExtValue() >> 2)) { -+ Match = true; -+ Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2, -+ MVT::i32); -+ } -+ } -+ return Match; -+ } -+ -+ // Default case, no offset -+ Base = Addr; -+ Offset = CurDAG->getTargetConstant(0, MVT::i32); -+ return true; -+} -+ -+bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, -+ SDValue &Offset) { -+ ConstantSDNode * IMMOffset; -+ -+ if (Addr.getOpcode() == ISD::ADD -+ && (IMMOffset = dyn_cast(Addr.getOperand(1))) -+ && isInt<16>(IMMOffset->getZExtValue())) { -+ -+ Base = Addr.getOperand(0); -+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); -+ return true; -+ // If the pointer address is constant, we can move it to the offset field. -+ } else if ((IMMOffset = dyn_cast(Addr)) -+ && isInt<16>(IMMOffset->getZExtValue())) { -+ Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), -+ CurDAG->getEntryNode().getDebugLoc(), -+ AMDGPU::ZERO, MVT::i32); -+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); -+ return true; -+ } -+ -+ // Default case, no offset -+ Base = Addr; -+ Offset = CurDAG->getTargetConstant(0, MVT::i32); -+ return true; -+} -+ -+bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base, -+ SDValue& Offset) { -+ if (Addr.getOpcode() == ISD::TargetExternalSymbol || -+ Addr.getOpcode() == ISD::TargetGlobalAddress || -+ Addr.getOpcode() != ISD::ADD) { -+ return false; -+ } -+ -+ Base = Addr.getOperand(0); -+ Offset = Addr.getOperand(1); -+ -+ return true; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILISelLowering.cpp llvm-r600/lib/Target/R600/AMDILISelLowering.cpp ---- llvm-3.2.src/lib/Target/R600/AMDILISelLowering.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILISelLowering.cpp 2013-01-25 19:43:57.443383054 +0100 -@@ -0,0 +1,651 @@ -+//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief TargetLowering functions borrowed from AMDIL. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPUISelLowering.h" -+#include "AMDGPURegisterInfo.h" -+#include "AMDILDevices.h" -+#include "AMDILIntrinsicInfo.h" -+#include "AMDGPUSubtarget.h" -+#include "llvm/CallingConv.h" -+#include "llvm/CodeGen/MachineFrameInfo.h" -+#include "llvm/CodeGen/MachineRegisterInfo.h" -+#include "llvm/CodeGen/PseudoSourceValue.h" -+#include "llvm/CodeGen/SelectionDAG.h" -+#include "llvm/CodeGen/SelectionDAGNodes.h" -+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -+#include "llvm/DerivedTypes.h" -+#include "llvm/Instructions.h" -+#include "llvm/Intrinsics.h" -+#include "llvm/Support/raw_ostream.h" -+#include "llvm/Target/TargetInstrInfo.h" -+#include "llvm/Target/TargetOptions.h" -+ -+using namespace llvm; -+//===----------------------------------------------------------------------===// -+// Calling Convention Implementation -+//===----------------------------------------------------------------------===// -+#include "AMDGPUGenCallingConv.inc" -+ -+//===----------------------------------------------------------------------===// -+// TargetLowering Implementation Help Functions End -+//===----------------------------------------------------------------------===// -+ -+//===----------------------------------------------------------------------===// -+// TargetLowering Class Implementation Begins -+//===----------------------------------------------------------------------===// -+void AMDGPUTargetLowering::InitAMDILLowering() { -+ int types[] = { -+ (int)MVT::i8, -+ (int)MVT::i16, -+ (int)MVT::i32, -+ (int)MVT::f32, -+ (int)MVT::f64, -+ (int)MVT::i64, -+ (int)MVT::v2i8, -+ (int)MVT::v4i8, -+ (int)MVT::v2i16, -+ (int)MVT::v4i16, -+ (int)MVT::v4f32, -+ (int)MVT::v4i32, -+ (int)MVT::v2f32, -+ (int)MVT::v2i32, -+ (int)MVT::v2f64, -+ (int)MVT::v2i64 -+ }; -+ -+ int IntTypes[] = { -+ (int)MVT::i8, -+ (int)MVT::i16, -+ (int)MVT::i32, -+ (int)MVT::i64 -+ }; -+ -+ int FloatTypes[] = { -+ (int)MVT::f32, -+ (int)MVT::f64 -+ }; -+ -+ int VectorTypes[] = { -+ (int)MVT::v2i8, -+ (int)MVT::v4i8, -+ (int)MVT::v2i16, -+ (int)MVT::v4i16, -+ (int)MVT::v4f32, -+ (int)MVT::v4i32, -+ (int)MVT::v2f32, -+ (int)MVT::v2i32, -+ (int)MVT::v2f64, -+ (int)MVT::v2i64 -+ }; -+ size_t NumTypes = sizeof(types) / sizeof(*types); -+ size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); -+ size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); -+ size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); -+ -+ const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget(); -+ // These are the current register classes that are -+ // supported -+ -+ for (unsigned int x = 0; x < NumTypes; ++x) { -+ MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; -+ -+ //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types -+ // We cannot sextinreg, expand to shifts -+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); -+ setOperationAction(ISD::SUBE, VT, Expand); -+ setOperationAction(ISD::SUBC, VT, Expand); -+ setOperationAction(ISD::ADDE, VT, Expand); -+ setOperationAction(ISD::ADDC, VT, Expand); -+ setOperationAction(ISD::BRCOND, VT, Custom); -+ setOperationAction(ISD::BR_JT, VT, Expand); -+ setOperationAction(ISD::BRIND, VT, Expand); -+ // TODO: Implement custom UREM/SREM routines -+ setOperationAction(ISD::SREM, VT, Expand); -+ setOperationAction(ISD::SMUL_LOHI, VT, Expand); -+ setOperationAction(ISD::UMUL_LOHI, VT, Expand); -+ if (VT != MVT::i64 && VT != MVT::v2i64) { -+ setOperationAction(ISD::SDIV, VT, Custom); -+ } -+ } -+ for (unsigned int x = 0; x < NumFloatTypes; ++x) { -+ MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; -+ -+ // IL does not have these operations for floating point types -+ setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); -+ setOperationAction(ISD::SETOLT, VT, Expand); -+ setOperationAction(ISD::SETOGE, VT, Expand); -+ setOperationAction(ISD::SETOGT, VT, Expand); -+ setOperationAction(ISD::SETOLE, VT, Expand); -+ setOperationAction(ISD::SETULT, VT, Expand); -+ setOperationAction(ISD::SETUGE, VT, Expand); -+ setOperationAction(ISD::SETUGT, VT, Expand); -+ setOperationAction(ISD::SETULE, VT, Expand); -+ } -+ -+ for (unsigned int x = 0; x < NumIntTypes; ++x) { -+ MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; -+ -+ // GPU also does not have divrem function for signed or unsigned -+ setOperationAction(ISD::SDIVREM, VT, Expand); -+ -+ // GPU does not have [S|U]MUL_LOHI functions as a single instruction -+ setOperationAction(ISD::SMUL_LOHI, VT, Expand); -+ setOperationAction(ISD::UMUL_LOHI, VT, Expand); -+ -+ // GPU doesn't have a rotl, rotr, or byteswap instruction -+ setOperationAction(ISD::ROTR, VT, Expand); -+ setOperationAction(ISD::BSWAP, VT, Expand); -+ -+ // GPU doesn't have any counting operators -+ setOperationAction(ISD::CTPOP, VT, Expand); -+ setOperationAction(ISD::CTTZ, VT, Expand); -+ setOperationAction(ISD::CTLZ, VT, Expand); -+ } -+ -+ for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) { -+ MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; -+ -+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); -+ setOperationAction(ISD::SDIVREM, VT, Expand); -+ setOperationAction(ISD::SMUL_LOHI, VT, Expand); -+ // setOperationAction(ISD::VSETCC, VT, Expand); -+ setOperationAction(ISD::SELECT_CC, VT, Expand); -+ -+ } -+ if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) { -+ setOperationAction(ISD::MULHU, MVT::i64, Expand); -+ setOperationAction(ISD::MULHU, MVT::v2i64, Expand); -+ setOperationAction(ISD::MULHS, MVT::i64, Expand); -+ setOperationAction(ISD::MULHS, MVT::v2i64, Expand); -+ setOperationAction(ISD::ADD, MVT::v2i64, Expand); -+ setOperationAction(ISD::SREM, MVT::v2i64, Expand); -+ setOperationAction(ISD::Constant , MVT::i64 , Legal); -+ setOperationAction(ISD::SDIV, MVT::v2i64, Expand); -+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); -+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); -+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); -+ setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); -+ } -+ if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) { -+ // we support loading/storing v2f64 but not operations on the type -+ setOperationAction(ISD::FADD, MVT::v2f64, Expand); -+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand); -+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand); -+ setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); -+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); -+ setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); -+ // We want to expand vector conversions into their scalar -+ // counterparts. -+ setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); -+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); -+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); -+ setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); -+ setOperationAction(ISD::FABS, MVT::f64, Expand); -+ setOperationAction(ISD::FABS, MVT::v2f64, Expand); -+ } -+ // TODO: Fix the UDIV24 algorithm so it works for these -+ // types correctly. This needs vector comparisons -+ // for this to work correctly. -+ setOperationAction(ISD::UDIV, MVT::v2i8, Expand); -+ setOperationAction(ISD::UDIV, MVT::v4i8, Expand); -+ setOperationAction(ISD::UDIV, MVT::v2i16, Expand); -+ setOperationAction(ISD::UDIV, MVT::v4i16, Expand); -+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); -+ setOperationAction(ISD::SUBC, MVT::Other, Expand); -+ setOperationAction(ISD::ADDE, MVT::Other, Expand); -+ setOperationAction(ISD::ADDC, MVT::Other, Expand); -+ setOperationAction(ISD::BRCOND, MVT::Other, Custom); -+ setOperationAction(ISD::BR_JT, MVT::Other, Expand); -+ setOperationAction(ISD::BRIND, MVT::Other, Expand); -+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); -+ -+ -+ // Use the default implementation. -+ setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); -+ setOperationAction(ISD::Constant , MVT::i32 , Legal); -+ -+ setSchedulingPreference(Sched::RegPressure); -+ setPow2DivIsCheap(false); -+ setSelectIsExpensive(true); -+ setJumpIsExpensive(true); -+ -+ maxStoresPerMemcpy = 4096; -+ maxStoresPerMemmove = 4096; -+ maxStoresPerMemset = 4096; -+ -+} -+ -+bool -+AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, -+ const CallInst &I, unsigned Intrinsic) const { -+ return false; -+} -+ -+// The backend supports 32 and 64 bit floating point immediates -+bool -+AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { -+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 -+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { -+ return true; -+ } else { -+ return false; -+ } -+} -+ -+bool -+AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { -+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 -+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { -+ return false; -+ } else { -+ return true; -+ } -+} -+ -+ -+// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to -+// be zero. Op is expected to be a target specific node. Used by DAG -+// combiner. -+ -+void -+AMDGPUTargetLowering::computeMaskedBitsForTargetNode( -+ const SDValue Op, -+ APInt &KnownZero, -+ APInt &KnownOne, -+ const SelectionDAG &DAG, -+ unsigned Depth) const { -+ APInt KnownZero2; -+ APInt KnownOne2; -+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything -+ switch (Op.getOpcode()) { -+ default: break; -+ case ISD::SELECT_CC: -+ DAG.ComputeMaskedBits( -+ Op.getOperand(1), -+ KnownZero, -+ KnownOne, -+ Depth + 1 -+ ); -+ DAG.ComputeMaskedBits( -+ Op.getOperand(0), -+ KnownZero2, -+ KnownOne2 -+ ); -+ assert((KnownZero & KnownOne) == 0 -+ && "Bits known to be one AND zero?"); -+ assert((KnownZero2 & KnownOne2) == 0 -+ && "Bits known to be one AND zero?"); -+ // Only known if known in both the LHS and RHS -+ KnownOne &= KnownOne2; -+ KnownZero &= KnownZero2; -+ break; -+ }; -+} -+ -+//===----------------------------------------------------------------------===// -+// Other Lowering Hooks -+//===----------------------------------------------------------------------===// -+ -+SDValue -+AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { -+ EVT OVT = Op.getValueType(); -+ SDValue DST; -+ if (OVT.getScalarType() == MVT::i64) { -+ DST = LowerSDIV64(Op, DAG); -+ } else if (OVT.getScalarType() == MVT::i32) { -+ DST = LowerSDIV32(Op, DAG); -+ } else if (OVT.getScalarType() == MVT::i16 -+ || OVT.getScalarType() == MVT::i8) { -+ DST = LowerSDIV24(Op, DAG); -+ } else { -+ DST = SDValue(Op.getNode(), 0); -+ } -+ return DST; -+} -+ -+SDValue -+AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const { -+ EVT OVT = Op.getValueType(); -+ SDValue DST; -+ if (OVT.getScalarType() == MVT::i64) { -+ DST = LowerSREM64(Op, DAG); -+ } else if (OVT.getScalarType() == MVT::i32) { -+ DST = LowerSREM32(Op, DAG); -+ } else if (OVT.getScalarType() == MVT::i16) { -+ DST = LowerSREM16(Op, DAG); -+ } else if (OVT.getScalarType() == MVT::i8) { -+ DST = LowerSREM8(Op, DAG); -+ } else { -+ DST = SDValue(Op.getNode(), 0); -+ } -+ return DST; -+} -+ -+SDValue -+AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { -+ SDValue Data = Op.getOperand(0); -+ VTSDNode *BaseType = cast(Op.getOperand(1)); -+ DebugLoc DL = Op.getDebugLoc(); -+ EVT DVT = Data.getValueType(); -+ EVT BVT = BaseType->getVT(); -+ unsigned baseBits = BVT.getScalarType().getSizeInBits(); -+ unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; -+ unsigned shiftBits = srcBits - baseBits; -+ if (srcBits < 32) { -+ // If the op is less than 32 bits, then it needs to extend to 32bits -+ // so it can properly keep the upper bits valid. -+ EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); -+ Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); -+ shiftBits = 32 - baseBits; -+ DVT = IVT; -+ } -+ SDValue Shift = DAG.getConstant(shiftBits, DVT); -+ // Shift left by 'Shift' bits. -+ Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); -+ // Signed shift Right by 'Shift' bits. -+ Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); -+ if (srcBits < 32) { -+ // Once the sign extension is done, the op needs to be converted to -+ // its original type. -+ Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); -+ } -+ return Data; -+} -+EVT -+AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const { -+ int iSize = (size * numEle); -+ int vEle = (iSize >> ((size == 64) ? 6 : 5)); -+ if (!vEle) { -+ vEle = 1; -+ } -+ if (size == 64) { -+ if (vEle == 1) { -+ return EVT(MVT::i64); -+ } else { -+ return EVT(MVT::getVectorVT(MVT::i64, vEle)); -+ } -+ } else { -+ if (vEle == 1) { -+ return EVT(MVT::i32); -+ } else { -+ return EVT(MVT::getVectorVT(MVT::i32, vEle)); -+ } -+ } -+} -+ -+SDValue -+AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { -+ SDValue Chain = Op.getOperand(0); -+ SDValue Cond = Op.getOperand(1); -+ SDValue Jump = Op.getOperand(2); -+ SDValue Result; -+ Result = DAG.getNode( -+ AMDGPUISD::BRANCH_COND, -+ Op.getDebugLoc(), -+ Op.getValueType(), -+ Chain, Jump, Cond); -+ return Result; -+} -+ -+SDValue -+AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const { -+ DebugLoc DL = Op.getDebugLoc(); -+ EVT OVT = Op.getValueType(); -+ SDValue LHS = Op.getOperand(0); -+ SDValue RHS = Op.getOperand(1); -+ MVT INTTY; -+ MVT FLTTY; -+ if (!OVT.isVector()) { -+ INTTY = MVT::i32; -+ FLTTY = MVT::f32; -+ } else if (OVT.getVectorNumElements() == 2) { -+ INTTY = MVT::v2i32; -+ FLTTY = MVT::v2f32; -+ } else if (OVT.getVectorNumElements() == 4) { -+ INTTY = MVT::v4i32; -+ FLTTY = MVT::v4f32; -+ } -+ unsigned bitsize = OVT.getScalarType().getSizeInBits(); -+ // char|short jq = ia ^ ib; -+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); -+ -+ // jq = jq >> (bitsize - 2) -+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); -+ -+ // jq = jq | 0x1 -+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); -+ -+ // jq = (int)jq -+ jq = DAG.getSExtOrTrunc(jq, DL, INTTY); -+ -+ // int ia = (int)LHS; -+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); -+ -+ // int ib, (int)RHS; -+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); -+ -+ // float fa = (float)ia; -+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); -+ -+ // float fb = (float)ib; -+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); -+ -+ // float fq = native_divide(fa, fb); -+ SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb); -+ -+ // fq = trunc(fq); -+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); -+ -+ // float fqneg = -fq; -+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); -+ -+ // float fr = mad(fqneg, fb, fa); -+ SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa); -+ -+ // int iq = (int)fq; -+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); -+ -+ // fr = fabs(fr); -+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); -+ -+ // fb = fabs(fb); -+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); -+ -+ // int cv = fr >= fb; -+ SDValue cv; -+ if (INTTY == MVT::i32) { -+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); -+ } else { -+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); -+ } -+ // jq = (cv ? jq : 0); -+ jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq, -+ DAG.getConstant(0, OVT)); -+ // dst = iq + jq; -+ iq = DAG.getSExtOrTrunc(iq, DL, OVT); -+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); -+ return iq; -+} -+ -+SDValue -+AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const { -+ DebugLoc DL = Op.getDebugLoc(); -+ EVT OVT = Op.getValueType(); -+ SDValue LHS = Op.getOperand(0); -+ SDValue RHS = Op.getOperand(1); -+ // The LowerSDIV32 function generates equivalent to the following IL. -+ // mov r0, LHS -+ // mov r1, RHS -+ // ilt r10, r0, 0 -+ // ilt r11, r1, 0 -+ // iadd r0, r0, r10 -+ // iadd r1, r1, r11 -+ // ixor r0, r0, r10 -+ // ixor r1, r1, r11 -+ // udiv r0, r0, r1 -+ // ixor r10, r10, r11 -+ // iadd r0, r0, r10 -+ // ixor DST, r0, r10 -+ -+ // mov r0, LHS -+ SDValue r0 = LHS; -+ -+ // mov r1, RHS -+ SDValue r1 = RHS; -+ -+ // ilt r10, r0, 0 -+ SDValue r10 = DAG.getSelectCC(DL, -+ r0, DAG.getConstant(0, OVT), -+ DAG.getConstant(-1, MVT::i32), -+ DAG.getConstant(0, MVT::i32), -+ ISD::SETLT); -+ -+ // ilt r11, r1, 0 -+ SDValue r11 = DAG.getSelectCC(DL, -+ r1, DAG.getConstant(0, OVT), -+ DAG.getConstant(-1, MVT::i32), -+ DAG.getConstant(0, MVT::i32), -+ ISD::SETLT); -+ -+ // iadd r0, r0, r10 -+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); -+ -+ // iadd r1, r1, r11 -+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); -+ -+ // ixor r0, r0, r10 -+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); -+ -+ // ixor r1, r1, r11 -+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); -+ -+ // udiv r0, r0, r1 -+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); -+ -+ // ixor r10, r10, r11 -+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); -+ -+ // iadd r0, r0, r10 -+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); -+ -+ // ixor DST, r0, r10 -+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); -+ return DST; -+} -+ -+SDValue -+AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const { -+ return SDValue(Op.getNode(), 0); -+} -+ -+SDValue -+AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const { -+ DebugLoc DL = Op.getDebugLoc(); -+ EVT OVT = Op.getValueType(); -+ MVT INTTY = MVT::i32; -+ if (OVT == MVT::v2i8) { -+ INTTY = MVT::v2i32; -+ } else if (OVT == MVT::v4i8) { -+ INTTY = MVT::v4i32; -+ } -+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); -+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); -+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); -+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); -+ return LHS; -+} -+ -+SDValue -+AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const { -+ DebugLoc DL = Op.getDebugLoc(); -+ EVT OVT = Op.getValueType(); -+ MVT INTTY = MVT::i32; -+ if (OVT == MVT::v2i16) { -+ INTTY = MVT::v2i32; -+ } else if (OVT == MVT::v4i16) { -+ INTTY = MVT::v4i32; -+ } -+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); -+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); -+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); -+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); -+ return LHS; -+} -+ -+SDValue -+AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const { -+ DebugLoc DL = Op.getDebugLoc(); -+ EVT OVT = Op.getValueType(); -+ SDValue LHS = Op.getOperand(0); -+ SDValue RHS = Op.getOperand(1); -+ // The LowerSREM32 function generates equivalent to the following IL. -+ // mov r0, LHS -+ // mov r1, RHS -+ // ilt r10, r0, 0 -+ // ilt r11, r1, 0 -+ // iadd r0, r0, r10 -+ // iadd r1, r1, r11 -+ // ixor r0, r0, r10 -+ // ixor r1, r1, r11 -+ // udiv r20, r0, r1 -+ // umul r20, r20, r1 -+ // sub r0, r0, r20 -+ // iadd r0, r0, r10 -+ // ixor DST, r0, r10 -+ -+ // mov r0, LHS -+ SDValue r0 = LHS; -+ -+ // mov r1, RHS -+ SDValue r1 = RHS; -+ -+ // ilt r10, r0, 0 -+ SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT); -+ -+ // ilt r11, r1, 0 -+ SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT); -+ -+ // iadd r0, r0, r10 -+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); -+ -+ // iadd r1, r1, r11 -+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); -+ -+ // ixor r0, r0, r10 -+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); -+ -+ // ixor r1, r1, r11 -+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); -+ -+ // udiv r20, r0, r1 -+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); -+ -+ // umul r20, r20, r1 -+ r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1); -+ -+ // sub r0, r0, r20 -+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); -+ -+ // iadd r0, r0, r10 -+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); -+ -+ // ixor DST, r0, r10 -+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); -+ return DST; -+} -+ -+SDValue -+AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const { -+ return SDValue(Op.getNode(), 0); -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILNIDevice.cpp llvm-r600/lib/Target/R600/AMDILNIDevice.cpp ---- llvm-3.2.src/lib/Target/R600/AMDILNIDevice.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILNIDevice.cpp 2013-01-25 19:43:57.446716388 +0100 -@@ -0,0 +1,65 @@ -+//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+/// \file -+//==-----------------------------------------------------------------------===// -+#include "AMDILNIDevice.h" -+#include "AMDILEvergreenDevice.h" -+#include "AMDGPUSubtarget.h" -+ -+using namespace llvm; -+ -+AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST) -+ : AMDGPUEvergreenDevice(ST) { -+ std::string name = ST->getDeviceName(); -+ if (name == "caicos") { -+ DeviceFlag = OCL_DEVICE_CAICOS; -+ } else if (name == "turks") { -+ DeviceFlag = OCL_DEVICE_TURKS; -+ } else if (name == "cayman") { -+ DeviceFlag = OCL_DEVICE_CAYMAN; -+ } else { -+ DeviceFlag = OCL_DEVICE_BARTS; -+ } -+} -+AMDGPUNIDevice::~AMDGPUNIDevice() { -+} -+ -+size_t -+AMDGPUNIDevice::getMaxLDSSize() const { -+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { -+ return MAX_LDS_SIZE_900; -+ } else { -+ return 0; -+ } -+} -+ -+uint32_t -+AMDGPUNIDevice::getGeneration() const { -+ return AMDGPUDeviceInfo::HD6XXX; -+} -+ -+ -+AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST) -+ : AMDGPUNIDevice(ST) { -+ setCaps(); -+} -+ -+AMDGPUCaymanDevice::~AMDGPUCaymanDevice() { -+} -+ -+void -+AMDGPUCaymanDevice::setCaps() { -+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) { -+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps); -+ mHWBits.set(AMDGPUDeviceInfo::FMA); -+ } -+ mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps); -+ mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps); -+ mSWBits.set(AMDGPUDeviceInfo::ArenaSegment); -+} -+ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILNIDevice.h llvm-r600/lib/Target/R600/AMDILNIDevice.h ---- llvm-3.2.src/lib/Target/R600/AMDILNIDevice.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILNIDevice.h 2013-01-25 19:43:57.446716388 +0100 -@@ -0,0 +1,57 @@ -+//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+/// \file -+/// \brief Interface for the subtarget data classes. -+/// -+/// This file will define the interface that each generation needs to -+/// implement in order to correctly answer queries on the capabilities of the -+/// specific hardware. -+//===---------------------------------------------------------------------===// -+#ifndef AMDILNIDEVICE_H -+#define AMDILNIDEVICE_H -+#include "AMDILEvergreenDevice.h" -+#include "AMDGPUSubtarget.h" -+ -+namespace llvm { -+ -+class AMDGPUSubtarget; -+//===---------------------------------------------------------------------===// -+// NI generation of devices and their respective sub classes -+//===---------------------------------------------------------------------===// -+ -+/// \brief The AMDGPUNIDevice is the base class for all Northern Island series of -+/// cards. -+/// -+/// It is very similiar to the AMDGPUEvergreenDevice, with the major -+/// exception being differences in wavefront size and hardware capabilities. The -+/// NI devices are all 64 wide wavefronts and also add support for signed 24 bit -+/// integer operations -+class AMDGPUNIDevice : public AMDGPUEvergreenDevice { -+public: -+ AMDGPUNIDevice(AMDGPUSubtarget*); -+ virtual ~AMDGPUNIDevice(); -+ virtual size_t getMaxLDSSize() const; -+ virtual uint32_t getGeneration() const; -+}; -+ -+/// Just as the AMDGPUCypressDevice is the double capable version of the -+/// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version -+/// of the AMDGPUNIDevice. The other major difference is that the Cayman Device -+/// has 4 wide ALU's, whereas the rest of the NI family is a 5 wide. -+class AMDGPUCaymanDevice: public AMDGPUNIDevice { -+public: -+ AMDGPUCaymanDevice(AMDGPUSubtarget*); -+ virtual ~AMDGPUCaymanDevice(); -+private: -+ virtual void setCaps(); -+}; -+ -+static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800; -+} // namespace llvm -+#endif // AMDILNIDEVICE_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILPeepholeOptimizer.cpp llvm-r600/lib/Target/R600/AMDILPeepholeOptimizer.cpp ---- llvm-3.2.src/lib/Target/R600/AMDILPeepholeOptimizer.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILPeepholeOptimizer.cpp 2013-01-25 19:43:57.450049721 +0100 -@@ -0,0 +1,1256 @@ -+//===-- AMDILPeepholeOptimizer.cpp - AMDGPU Peephole optimizations ---------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+/// \file -+//==-----------------------------------------------------------------------===// -+ -+#define DEBUG_TYPE "PeepholeOpt" -+#ifdef DEBUG -+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) -+#else -+#define DEBUGME 0 -+#endif -+ -+#include "AMDILDevices.h" -+#include "AMDGPUInstrInfo.h" -+#include "llvm/ADT/Statistic.h" -+#include "llvm/ADT/StringExtras.h" -+#include "llvm/ADT/StringRef.h" -+#include "llvm/ADT/Twine.h" -+#include "llvm/Constants.h" -+#include "llvm/CodeGen/MachineFunction.h" -+#include "llvm/CodeGen/MachineFunctionAnalysis.h" -+#include "llvm/Function.h" -+#include "llvm/Instructions.h" -+#include "llvm/Module.h" -+#include "llvm/Support/Debug.h" -+#include "llvm/Support/MathExtras.h" -+ -+#include -+ -+#if 0 -+STATISTIC(PointerAssignments, "Number of dynamic pointer " -+ "assigments discovered"); -+STATISTIC(PointerSubtract, "Number of pointer subtractions discovered"); -+#endif -+ -+using namespace llvm; -+// The Peephole optimization pass is used to do simple last minute optimizations -+// that are required for correct code or to remove redundant functions -+namespace { -+ -+class OpaqueType; -+ -+class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass { -+public: -+ TargetMachine &TM; -+ static char ID; -+ AMDGPUPeepholeOpt(TargetMachine &tm); -+ ~AMDGPUPeepholeOpt(); -+ const char *getPassName() const; -+ bool runOnFunction(Function &F); -+ bool doInitialization(Module &M); -+ bool doFinalization(Module &M); -+ void getAnalysisUsage(AnalysisUsage &AU) const; -+protected: -+private: -+ // Function to initiate all of the instruction level optimizations. -+ bool instLevelOptimizations(BasicBlock::iterator *inst); -+ // Quick check to see if we need to dump all of the pointers into the -+ // arena. If this is correct, then we set all pointers to exist in arena. This -+ // is a workaround for aliasing of pointers in a struct/union. -+ bool dumpAllIntoArena(Function &F); -+ // Because I don't want to invalidate any pointers while in the -+ // safeNestedForEachFunction. I push atomic conversions to a vector and handle -+ // it later. This function does the conversions if required. -+ void doAtomicConversionIfNeeded(Function &F); -+ // Because __amdil_is_constant cannot be properly evaluated if -+ // optimizations are disabled, the call's are placed in a vector -+ // and evaluated after the __amdil_image* functions are evaluated -+ // which should allow the __amdil_is_constant function to be -+ // evaluated correctly. -+ void doIsConstCallConversionIfNeeded(); -+ bool mChanged; -+ bool mDebug; -+ bool mConvertAtomics; -+ CodeGenOpt::Level optLevel; -+ // Run a series of tests to see if we can optimize a CALL instruction. -+ bool optimizeCallInst(BasicBlock::iterator *bbb); -+ // A peephole optimization to optimize bit extract sequences. -+ bool optimizeBitExtract(Instruction *inst); -+ // A peephole optimization to optimize bit insert sequences. -+ bool optimizeBitInsert(Instruction *inst); -+ bool setupBitInsert(Instruction *base, -+ Instruction *&src, -+ Constant *&mask, -+ Constant *&shift); -+ // Expand the bit field insert instruction on versions of OpenCL that -+ // don't support it. -+ bool expandBFI(CallInst *CI); -+ // Expand the bit field mask instruction on version of OpenCL that -+ // don't support it. -+ bool expandBFM(CallInst *CI); -+ // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in -+ // this case we need to expand them. These functions check for 24bit functions -+ // and then expand. -+ bool isSigned24BitOps(CallInst *CI); -+ void expandSigned24BitOps(CallInst *CI); -+ // One optimization that can occur is that if the required workgroup size is -+ // specified then the result of get_local_size is known at compile time and -+ // can be returned accordingly. -+ bool isRWGLocalOpt(CallInst *CI); -+ // On northern island cards, the division is slightly less accurate than on -+ // previous generations, so we need to utilize a more accurate division. So we -+ // can translate the accurate divide to a normal divide on all other cards. -+ bool convertAccurateDivide(CallInst *CI); -+ void expandAccurateDivide(CallInst *CI); -+ // If the alignment is set incorrectly, it can produce really inefficient -+ // code. This checks for this scenario and fixes it if possible. -+ bool correctMisalignedMemOp(Instruction *inst); -+ -+ // If we are in no opt mode, then we need to make sure that -+ // local samplers are properly propagated as constant propagation -+ // doesn't occur and we need to know the value of kernel defined -+ // samplers at compile time. -+ bool propagateSamplerInst(CallInst *CI); -+ -+ // Helper functions -+ -+ // Group of functions that recursively calculate the size of a structure based -+ // on it's sub-types. -+ size_t getTypeSize(Type * const T, bool dereferencePtr = false); -+ size_t getTypeSize(StructType * const ST, bool dereferencePtr = false); -+ size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false); -+ size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false); -+ size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false); -+ size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false); -+ size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false); -+ size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false); -+ -+ LLVMContext *mCTX; -+ Function *mF; -+ const AMDGPUSubtarget *mSTM; -+ SmallVector< std::pair, 16> atomicFuncs; -+ SmallVector isConstVec; -+}; // class AMDGPUPeepholeOpt -+ char AMDGPUPeepholeOpt::ID = 0; -+ -+// A template function that has two levels of looping before calling the -+// function with a pointer to the current iterator. -+template -+Function safeNestedForEach(InputIterator First, InputIterator Last, -+ SecondIterator S, Function F) { -+ for ( ; First != Last; ++First) { -+ SecondIterator sf, sl; -+ for (sf = First->begin(), sl = First->end(); -+ sf != sl; ) { -+ if (!F(&sf)) { -+ ++sf; -+ } -+ } -+ } -+ return F; -+} -+ -+} // anonymous namespace -+ -+namespace llvm { -+ FunctionPass * -+ createAMDGPUPeepholeOpt(TargetMachine &tm) { -+ return new AMDGPUPeepholeOpt(tm); -+ } -+} // llvm namespace -+ -+AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm) -+ : FunctionPass(ID), TM(tm) { -+ mDebug = DEBUGME; -+ optLevel = TM.getOptLevel(); -+ -+} -+ -+AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt() { -+} -+ -+const char * -+AMDGPUPeepholeOpt::getPassName() const { -+ return "AMDGPU PeepHole Optimization Pass"; -+} -+ -+bool -+containsPointerType(Type *Ty) { -+ if (!Ty) { -+ return false; -+ } -+ switch(Ty->getTypeID()) { -+ default: -+ return false; -+ case Type::StructTyID: { -+ const StructType *ST = dyn_cast(Ty); -+ for (StructType::element_iterator stb = ST->element_begin(), -+ ste = ST->element_end(); stb != ste; ++stb) { -+ if (!containsPointerType(*stb)) { -+ continue; -+ } -+ return true; -+ } -+ break; -+ } -+ case Type::VectorTyID: -+ case Type::ArrayTyID: -+ return containsPointerType(dyn_cast(Ty)->getElementType()); -+ case Type::PointerTyID: -+ return true; -+ }; -+ return false; -+} -+ -+bool -+AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F) { -+ bool dumpAll = false; -+ for (Function::const_arg_iterator cab = F.arg_begin(), -+ cae = F.arg_end(); cab != cae; ++cab) { -+ const Argument *arg = cab; -+ const PointerType *PT = dyn_cast(arg->getType()); -+ if (!PT) { -+ continue; -+ } -+ Type *DereferencedType = PT->getElementType(); -+ if (!dyn_cast(DereferencedType) -+ ) { -+ continue; -+ } -+ if (!containsPointerType(DereferencedType)) { -+ continue; -+ } -+ // FIXME: Because a pointer inside of a struct/union may be aliased to -+ // another pointer we need to take the conservative approach and place all -+ // pointers into the arena until more advanced detection is implemented. -+ dumpAll = true; -+ } -+ return dumpAll; -+} -+void -+AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() { -+ if (isConstVec.empty()) { -+ return; -+ } -+ for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) { -+ CallInst *CI = isConstVec[x]; -+ Constant *CV = dyn_cast(CI->getOperand(0)); -+ Type *aType = Type::getInt32Ty(*mCTX); -+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) -+ : ConstantInt::get(aType, 0); -+ CI->replaceAllUsesWith(Val); -+ CI->eraseFromParent(); -+ } -+ isConstVec.clear(); -+} -+void -+AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F) { -+ // Don't do anything if we don't have any atomic operations. -+ if (atomicFuncs.empty()) { -+ return; -+ } -+ // Change the function name for the atomic if it is required -+ uint32_t size = atomicFuncs.size(); -+ for (uint32_t x = 0; x < size; ++x) { -+ atomicFuncs[x].first->setOperand( -+ atomicFuncs[x].first->getNumOperands()-1, -+ atomicFuncs[x].second); -+ -+ } -+ mChanged = true; -+ if (mConvertAtomics) { -+ return; -+ } -+} -+ -+bool -+AMDGPUPeepholeOpt::runOnFunction(Function &MF) { -+ mChanged = false; -+ mF = &MF; -+ mSTM = &TM.getSubtarget(); -+ if (mDebug) { -+ MF.dump(); -+ } -+ mCTX = &MF.getType()->getContext(); -+ mConvertAtomics = true; -+ safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(), -+ std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations), -+ this)); -+ -+ doAtomicConversionIfNeeded(MF); -+ doIsConstCallConversionIfNeeded(); -+ -+ if (mDebug) { -+ MF.dump(); -+ } -+ return mChanged; -+} -+ -+bool -+AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) { -+ Instruction *inst = (*bbb); -+ CallInst *CI = dyn_cast(inst); -+ if (!CI) { -+ return false; -+ } -+ if (isSigned24BitOps(CI)) { -+ expandSigned24BitOps(CI); -+ ++(*bbb); -+ CI->eraseFromParent(); -+ return true; -+ } -+ if (propagateSamplerInst(CI)) { -+ return false; -+ } -+ if (expandBFI(CI) || expandBFM(CI)) { -+ ++(*bbb); -+ CI->eraseFromParent(); -+ return true; -+ } -+ if (convertAccurateDivide(CI)) { -+ expandAccurateDivide(CI); -+ ++(*bbb); -+ CI->eraseFromParent(); -+ return true; -+ } -+ -+ StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName(); -+ if (calleeName.startswith("__amdil_is_constant")) { -+ // If we do not have optimizations, then this -+ // cannot be properly evaluated, so we add the -+ // call instruction to a vector and process -+ // them at the end of processing after the -+ // samplers have been correctly handled. -+ if (optLevel == CodeGenOpt::None) { -+ isConstVec.push_back(CI); -+ return false; -+ } else { -+ Constant *CV = dyn_cast(CI->getOperand(0)); -+ Type *aType = Type::getInt32Ty(*mCTX); -+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) -+ : ConstantInt::get(aType, 0); -+ CI->replaceAllUsesWith(Val); -+ ++(*bbb); -+ CI->eraseFromParent(); -+ return true; -+ } -+ } -+ -+ if (calleeName.equals("__amdil_is_asic_id_i32")) { -+ ConstantInt *CV = dyn_cast(CI->getOperand(0)); -+ Type *aType = Type::getInt32Ty(*mCTX); -+ Value *Val = CV; -+ if (Val) { -+ Val = ConstantInt::get(aType, -+ mSTM->device()->getDeviceFlag() & CV->getZExtValue()); -+ } else { -+ Val = ConstantInt::get(aType, 0); -+ } -+ CI->replaceAllUsesWith(Val); -+ ++(*bbb); -+ CI->eraseFromParent(); -+ return true; -+ } -+ Function *F = dyn_cast(CI->getOperand(CI->getNumOperands()-1)); -+ if (!F) { -+ return false; -+ } -+ if (F->getName().startswith("__atom") && !CI->getNumUses() -+ && F->getName().find("_xchg") == StringRef::npos) { -+ std::string buffer(F->getName().str() + "_noret"); -+ F = dyn_cast( -+ F->getParent()->getOrInsertFunction(buffer, F->getFunctionType())); -+ atomicFuncs.push_back(std::make_pair (CI, F)); -+ } -+ -+ if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment) -+ && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) { -+ return false; -+ } -+ if (!mConvertAtomics) { -+ return false; -+ } -+ StringRef name = F->getName(); -+ if (name.startswith("__atom") && name.find("_g") != StringRef::npos) { -+ mConvertAtomics = false; -+ } -+ return false; -+} -+ -+bool -+AMDGPUPeepholeOpt::setupBitInsert(Instruction *base, -+ Instruction *&src, -+ Constant *&mask, -+ Constant *&shift) { -+ if (!base) { -+ if (mDebug) { -+ dbgs() << "Null pointer passed into function.\n"; -+ } -+ return false; -+ } -+ bool andOp = false; -+ if (base->getOpcode() == Instruction::Shl) { -+ shift = dyn_cast(base->getOperand(1)); -+ } else if (base->getOpcode() == Instruction::And) { -+ mask = dyn_cast(base->getOperand(1)); -+ andOp = true; -+ } else { -+ if (mDebug) { -+ dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n"; -+ } -+ // If the base is neither a Shl or a And, we don't fit any of the patterns above. -+ return false; -+ } -+ src = dyn_cast(base->getOperand(0)); -+ if (!src) { -+ if (mDebug) { -+ dbgs() << "Failed setup since the base operand is not an instruction!\n"; -+ } -+ return false; -+ } -+ // If we find an 'and' operation, then we don't need to -+ // find the next operation as we already know the -+ // bits that are valid at this point. -+ if (andOp) { -+ return true; -+ } -+ if (src->getOpcode() == Instruction::Shl && !shift) { -+ shift = dyn_cast(src->getOperand(1)); -+ src = dyn_cast(src->getOperand(0)); -+ } else if (src->getOpcode() == Instruction::And && !mask) { -+ mask = dyn_cast(src->getOperand(1)); -+ } -+ if (!mask && !shift) { -+ if (mDebug) { -+ dbgs() << "Failed setup since both mask and shift are NULL!\n"; -+ } -+ // Did not find a constant mask or a shift. -+ return false; -+ } -+ return true; -+} -+bool -+AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) { -+ if (!inst) { -+ return false; -+ } -+ if (!inst->isBinaryOp()) { -+ return false; -+ } -+ if (inst->getOpcode() != Instruction::Or) { -+ return false; -+ } -+ if (optLevel == CodeGenOpt::None) { -+ return false; -+ } -+ // We want to do an optimization on a sequence of ops that in the end equals a -+ // single ISA instruction. -+ // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F) -+ // Some simplified versions of this pattern are as follows: -+ // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0 -+ // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E -+ // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B -+ // (A & B) | (D << F) when (1 << F) >= B -+ // (A << C) | (D & E) when (1 << C) >= E -+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) { -+ // The HD4XXX hardware doesn't support the ubit_insert instruction. -+ return false; -+ } -+ Type *aType = inst->getType(); -+ bool isVector = aType->isVectorTy(); -+ int numEle = 1; -+ // This optimization only works on 32bit integers. -+ if (aType->getScalarType() -+ != Type::getInt32Ty(inst->getContext())) { -+ return false; -+ } -+ if (isVector) { -+ const VectorType *VT = dyn_cast(aType); -+ numEle = VT->getNumElements(); -+ // We currently cannot support more than 4 elements in a intrinsic and we -+ // cannot support Vec3 types. -+ if (numEle > 4 || numEle == 3) { -+ return false; -+ } -+ } -+ // TODO: Handle vectors. -+ if (isVector) { -+ if (mDebug) { -+ dbgs() << "!!! Vectors are not supported yet!\n"; -+ } -+ return false; -+ } -+ Instruction *LHSSrc = NULL, *RHSSrc = NULL; -+ Constant *LHSMask = NULL, *RHSMask = NULL; -+ Constant *LHSShift = NULL, *RHSShift = NULL; -+ Instruction *LHS = dyn_cast(inst->getOperand(0)); -+ Instruction *RHS = dyn_cast(inst->getOperand(1)); -+ if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) { -+ if (mDebug) { -+ dbgs() << "Found an OR Operation that failed setup!\n"; -+ inst->dump(); -+ if (LHS) { LHS->dump(); } -+ if (LHSSrc) { LHSSrc->dump(); } -+ if (LHSMask) { LHSMask->dump(); } -+ if (LHSShift) { LHSShift->dump(); } -+ } -+ // There was an issue with the setup for BitInsert. -+ return false; -+ } -+ if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) { -+ if (mDebug) { -+ dbgs() << "Found an OR Operation that failed setup!\n"; -+ inst->dump(); -+ if (RHS) { RHS->dump(); } -+ if (RHSSrc) { RHSSrc->dump(); } -+ if (RHSMask) { RHSMask->dump(); } -+ if (RHSShift) { RHSShift->dump(); } -+ } -+ // There was an issue with the setup for BitInsert. -+ return false; -+ } -+ if (mDebug) { -+ dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n"; -+ dbgs() << "Op: "; inst->dump(); -+ dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; } -+ dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; } -+ dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; } -+ dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; } -+ dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; } -+ dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; } -+ dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; } -+ dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; } -+ } -+ Constant *offset = NULL; -+ Constant *width = NULL; -+ uint32_t lhsMaskVal = 0, rhsMaskVal = 0; -+ uint32_t lhsShiftVal = 0, rhsShiftVal = 0; -+ uint32_t lhsMaskWidth = 0, rhsMaskWidth = 0; -+ uint32_t lhsMaskOffset = 0, rhsMaskOffset = 0; -+ lhsMaskVal = (LHSMask -+ ? dyn_cast(LHSMask)->getZExtValue() : 0); -+ rhsMaskVal = (RHSMask -+ ? dyn_cast(RHSMask)->getZExtValue() : 0); -+ lhsShiftVal = (LHSShift -+ ? dyn_cast(LHSShift)->getZExtValue() : 0); -+ rhsShiftVal = (RHSShift -+ ? dyn_cast(RHSShift)->getZExtValue() : 0); -+ lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal; -+ rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal; -+ lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal; -+ rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal; -+ // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks). -+ if (mDebug) { -+ dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")"); -+ dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ; -+ dbgs() << (RHSMask ? " & E)" : ")"); -+ dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n"); -+ dbgs() << "A = LHSSrc\t\tD = RHSSrc \n"; -+ dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n"; -+ dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n"; -+ dbgs() << "width(B) = " << lhsMaskWidth; -+ dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n"; -+ dbgs() << "offset(B) = " << lhsMaskOffset; -+ dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n"; -+ dbgs() << "Constraints: \n"; -+ dbgs() << "\t(1) B ^ E == 0\n"; -+ dbgs() << "\t(2-LHS) B is a mask\n"; -+ dbgs() << "\t(2-LHS) E is a mask\n"; -+ dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n"; -+ dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n"; -+ } -+ if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) { -+ if (mDebug) { -+ dbgs() << lhsMaskVal << " ^ " << rhsMaskVal; -+ dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n"; -+ dbgs() << "Failed constraint 1!\n"; -+ } -+ return false; -+ } -+ if (mDebug) { -+ dbgs() << "LHS = " << lhsMaskOffset << ""; -+ dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = "; -+ dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)); -+ dbgs() << "\nRHS = " << rhsMaskOffset << ""; -+ dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = "; -+ dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)); -+ dbgs() << "\n"; -+ } -+ if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) { -+ offset = ConstantInt::get(aType, lhsMaskOffset, false); -+ width = ConstantInt::get(aType, lhsMaskWidth, false); -+ RHSSrc = RHS; -+ if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) { -+ if (mDebug) { -+ dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n"; -+ dbgs() << "Failed constraint 2!\n"; -+ } -+ return false; -+ } -+ if (!LHSShift) { -+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, -+ "MaskShr", LHS); -+ } else if (lhsShiftVal != lhsMaskOffset) { -+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, -+ "MaskShr", LHS); -+ } -+ if (mDebug) { -+ dbgs() << "Optimizing LHS!\n"; -+ } -+ } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) { -+ offset = ConstantInt::get(aType, rhsMaskOffset, false); -+ width = ConstantInt::get(aType, rhsMaskWidth, false); -+ LHSSrc = RHSSrc; -+ RHSSrc = LHS; -+ if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) { -+ if (mDebug) { -+ dbgs() << "Non-Mask: " << rhsMaskVal << "\n"; -+ dbgs() << "Failed constraint 2!\n"; -+ } -+ return false; -+ } -+ if (!RHSShift) { -+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, -+ "MaskShr", RHS); -+ } else if (rhsShiftVal != rhsMaskOffset) { -+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, -+ "MaskShr", RHS); -+ } -+ if (mDebug) { -+ dbgs() << "Optimizing RHS!\n"; -+ } -+ } else { -+ if (mDebug) { -+ dbgs() << "Failed constraint 3!\n"; -+ } -+ return false; -+ } -+ if (mDebug) { -+ dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; } -+ dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; } -+ dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; } -+ dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; } -+ } -+ if (!offset || !width) { -+ if (mDebug) { -+ dbgs() << "Either width or offset are NULL, failed detection!\n"; -+ } -+ return false; -+ } -+ // Lets create the function signature. -+ std::vector callTypes; -+ callTypes.push_back(aType); -+ callTypes.push_back(aType); -+ callTypes.push_back(aType); -+ callTypes.push_back(aType); -+ FunctionType *funcType = FunctionType::get(aType, callTypes, false); -+ std::string name = "__amdil_ubit_insert"; -+ if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; } -+ Function *Func = -+ dyn_cast(inst->getParent()->getParent()->getParent()-> -+ getOrInsertFunction(llvm::StringRef(name), funcType)); -+ Value *Operands[4] = { -+ width, -+ offset, -+ LHSSrc, -+ RHSSrc -+ }; -+ CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt"); -+ if (mDebug) { -+ dbgs() << "Old Inst: "; -+ inst->dump(); -+ dbgs() << "New Inst: "; -+ CI->dump(); -+ dbgs() << "\n\n"; -+ } -+ CI->insertBefore(inst); -+ inst->replaceAllUsesWith(CI); -+ return true; -+} -+ -+bool -+AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) { -+ if (!inst) { -+ return false; -+ } -+ if (!inst->isBinaryOp()) { -+ return false; -+ } -+ if (inst->getOpcode() != Instruction::And) { -+ return false; -+ } -+ if (optLevel == CodeGenOpt::None) { -+ return false; -+ } -+ // We want to do some simple optimizations on Shift right/And patterns. The -+ // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a -+ // value smaller than 32 and C is a mask. If C is a constant value, then the -+ // following transformation can occur. For signed integers, it turns into the -+ // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned -+ // integers, it turns into the function call dst = -+ // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract -+ // can be found in Section 7.9 of the ATI IL spec of the stream SDK for -+ // Evergreen hardware. -+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) { -+ // This does not work on HD4XXX hardware. -+ return false; -+ } -+ Type *aType = inst->getType(); -+ bool isVector = aType->isVectorTy(); -+ -+ // XXX Support vector types -+ if (isVector) { -+ return false; -+ } -+ int numEle = 1; -+ // This only works on 32bit integers -+ if (aType->getScalarType() -+ != Type::getInt32Ty(inst->getContext())) { -+ return false; -+ } -+ if (isVector) { -+ const VectorType *VT = dyn_cast(aType); -+ numEle = VT->getNumElements(); -+ // We currently cannot support more than 4 elements in a intrinsic and we -+ // cannot support Vec3 types. -+ if (numEle > 4 || numEle == 3) { -+ return false; -+ } -+ } -+ BinaryOperator *ShiftInst = dyn_cast(inst->getOperand(0)); -+ // If the first operand is not a shift instruction, then we can return as it -+ // doesn't match this pattern. -+ if (!ShiftInst || !ShiftInst->isShift()) { -+ return false; -+ } -+ // If we are a shift left, then we need don't match this pattern. -+ if (ShiftInst->getOpcode() == Instruction::Shl) { -+ return false; -+ } -+ bool isSigned = ShiftInst->isArithmeticShift(); -+ Constant *AndMask = dyn_cast(inst->getOperand(1)); -+ Constant *ShrVal = dyn_cast(ShiftInst->getOperand(1)); -+ // Lets make sure that the shift value and the and mask are constant integers. -+ if (!AndMask || !ShrVal) { -+ return false; -+ } -+ Constant *newMaskConst; -+ Constant *shiftValConst; -+ if (isVector) { -+ // Handle the vector case -+ std::vector maskVals; -+ std::vector shiftVals; -+ ConstantVector *AndMaskVec = dyn_cast(AndMask); -+ ConstantVector *ShrValVec = dyn_cast(ShrVal); -+ Type *scalarType = AndMaskVec->getType()->getScalarType(); -+ assert(AndMaskVec->getNumOperands() == -+ ShrValVec->getNumOperands() && "cannot have a " -+ "combination where the number of elements to a " -+ "shift and an and are different!"); -+ for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) { -+ ConstantInt *AndCI = dyn_cast(AndMaskVec->getOperand(x)); -+ ConstantInt *ShiftIC = dyn_cast(ShrValVec->getOperand(x)); -+ if (!AndCI || !ShiftIC) { -+ return false; -+ } -+ uint32_t maskVal = (uint32_t)AndCI->getZExtValue(); -+ if (!isMask_32(maskVal)) { -+ return false; -+ } -+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal); -+ uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue(); -+ // If the mask or shiftval is greater than the bitcount, then break out. -+ if (maskVal >= 32 || shiftVal >= 32) { -+ return false; -+ } -+ // If the mask val is greater than the the number of original bits left -+ // then this optimization is invalid. -+ if (maskVal > (32 - shiftVal)) { -+ return false; -+ } -+ maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned)); -+ shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned)); -+ } -+ newMaskConst = ConstantVector::get(maskVals); -+ shiftValConst = ConstantVector::get(shiftVals); -+ } else { -+ // Handle the scalar case -+ uint32_t maskVal = (uint32_t)dyn_cast(AndMask)->getZExtValue(); -+ // This must be a mask value where all lower bits are set to 1 and then any -+ // bit higher is set to 0. -+ if (!isMask_32(maskVal)) { -+ return false; -+ } -+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal); -+ // Count the number of bits set in the mask, this is the width of the -+ // resulting bit set that is extracted from the source value. -+ uint32_t shiftVal = (uint32_t)dyn_cast(ShrVal)->getZExtValue(); -+ // If the mask or shift val is greater than the bitcount, then break out. -+ if (maskVal >= 32 || shiftVal >= 32) { -+ return false; -+ } -+ // If the mask val is greater than the the number of original bits left then -+ // this optimization is invalid. -+ if (maskVal > (32 - shiftVal)) { -+ return false; -+ } -+ newMaskConst = ConstantInt::get(aType, maskVal, isSigned); -+ shiftValConst = ConstantInt::get(aType, shiftVal, isSigned); -+ } -+ // Lets create the function signature. -+ std::vector callTypes; -+ callTypes.push_back(aType); -+ callTypes.push_back(aType); -+ callTypes.push_back(aType); -+ FunctionType *funcType = FunctionType::get(aType, callTypes, false); -+ std::string name = "llvm.AMDGPU.bit.extract.u32"; -+ if (isVector) { -+ name += ".v" + itostr(numEle) + "i32"; -+ } else { -+ name += "."; -+ } -+ // Lets create the function. -+ Function *Func = -+ dyn_cast(inst->getParent()->getParent()->getParent()-> -+ getOrInsertFunction(llvm::StringRef(name), funcType)); -+ Value *Operands[3] = { -+ ShiftInst->getOperand(0), -+ shiftValConst, -+ newMaskConst -+ }; -+ // Lets create the Call with the operands -+ CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt"); -+ CI->setDoesNotAccessMemory(); -+ CI->insertBefore(inst); -+ inst->replaceAllUsesWith(CI); -+ return true; -+} -+ -+bool -+AMDGPUPeepholeOpt::expandBFI(CallInst *CI) { -+ if (!CI) { -+ return false; -+ } -+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1); -+ if (!LHS->getName().startswith("__amdil_bfi")) { -+ return false; -+ } -+ Type* type = CI->getOperand(0)->getType(); -+ Constant *negOneConst = NULL; -+ if (type->isVectorTy()) { -+ std::vector negOneVals; -+ negOneConst = ConstantInt::get(CI->getContext(), -+ APInt(32, StringRef("-1"), 10)); -+ for (size_t x = 0, -+ y = dyn_cast(type)->getNumElements(); x < y; ++x) { -+ negOneVals.push_back(negOneConst); -+ } -+ negOneConst = ConstantVector::get(negOneVals); -+ } else { -+ negOneConst = ConstantInt::get(CI->getContext(), -+ APInt(32, StringRef("-1"), 10)); -+ } -+ // __amdil_bfi => (A & B) | (~A & C) -+ BinaryOperator *lhs = -+ BinaryOperator::Create(Instruction::And, CI->getOperand(0), -+ CI->getOperand(1), "bfi_and", CI); -+ BinaryOperator *rhs = -+ BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst, -+ "bfi_not", CI); -+ rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2), -+ "bfi_and", CI); -+ lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI); -+ CI->replaceAllUsesWith(lhs); -+ return true; -+} -+ -+bool -+AMDGPUPeepholeOpt::expandBFM(CallInst *CI) { -+ if (!CI) { -+ return false; -+ } -+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1); -+ if (!LHS->getName().startswith("__amdil_bfm")) { -+ return false; -+ } -+ // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f) -+ Constant *newMaskConst = NULL; -+ Constant *newShiftConst = NULL; -+ Type* type = CI->getOperand(0)->getType(); -+ if (type->isVectorTy()) { -+ std::vector newMaskVals, newShiftVals; -+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); -+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); -+ for (size_t x = 0, -+ y = dyn_cast(type)->getNumElements(); x < y; ++x) { -+ newMaskVals.push_back(newMaskConst); -+ newShiftVals.push_back(newShiftConst); -+ } -+ newMaskConst = ConstantVector::get(newMaskVals); -+ newShiftConst = ConstantVector::get(newShiftVals); -+ } else { -+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); -+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); -+ } -+ BinaryOperator *lhs = -+ BinaryOperator::Create(Instruction::And, CI->getOperand(0), -+ newMaskConst, "bfm_mask", CI); -+ lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst, -+ lhs, "bfm_shl", CI); -+ lhs = BinaryOperator::Create(Instruction::Sub, lhs, -+ newShiftConst, "bfm_sub", CI); -+ BinaryOperator *rhs = -+ BinaryOperator::Create(Instruction::And, CI->getOperand(1), -+ newMaskConst, "bfm_mask", CI); -+ lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI); -+ CI->replaceAllUsesWith(lhs); -+ return true; -+} -+ -+bool -+AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) { -+ Instruction *inst = (*bbb); -+ if (optimizeCallInst(bbb)) { -+ return true; -+ } -+ if (optimizeBitExtract(inst)) { -+ return false; -+ } -+ if (optimizeBitInsert(inst)) { -+ return false; -+ } -+ if (correctMisalignedMemOp(inst)) { -+ return false; -+ } -+ return false; -+} -+bool -+AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) { -+ LoadInst *linst = dyn_cast(inst); -+ StoreInst *sinst = dyn_cast(inst); -+ unsigned alignment; -+ Type* Ty = inst->getType(); -+ if (linst) { -+ alignment = linst->getAlignment(); -+ Ty = inst->getType(); -+ } else if (sinst) { -+ alignment = sinst->getAlignment(); -+ Ty = sinst->getValueOperand()->getType(); -+ } else { -+ return false; -+ } -+ unsigned size = getTypeSize(Ty); -+ if (size == alignment || size < alignment) { -+ return false; -+ } -+ if (!Ty->isStructTy()) { -+ return false; -+ } -+ if (alignment < 4) { -+ if (linst) { -+ linst->setAlignment(0); -+ return true; -+ } else if (sinst) { -+ sinst->setAlignment(0); -+ return true; -+ } -+ } -+ return false; -+} -+bool -+AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI) { -+ if (!CI) { -+ return false; -+ } -+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1); -+ std::string namePrefix = LHS->getName().substr(0, 14); -+ if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24" -+ && namePrefix != "__amdil__imul24_high") { -+ return false; -+ } -+ if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) { -+ return false; -+ } -+ return true; -+} -+ -+void -+AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) { -+ assert(isSigned24BitOps(CI) && "Must be a " -+ "signed 24 bit operation to call this function!"); -+ Value *LHS = CI->getOperand(CI->getNumOperands()-1); -+ // On 7XX and 8XX we do not have signed 24bit, so we need to -+ // expand it to the following: -+ // imul24 turns into 32bit imul -+ // imad24 turns into 32bit imad -+ // imul24_high turns into 32bit imulhigh -+ if (LHS->getName().substr(0, 14) == "__amdil_imad24") { -+ Type *aType = CI->getOperand(0)->getType(); -+ bool isVector = aType->isVectorTy(); -+ int numEle = isVector ? dyn_cast(aType)->getNumElements() : 1; -+ std::vector callTypes; -+ callTypes.push_back(CI->getOperand(0)->getType()); -+ callTypes.push_back(CI->getOperand(1)->getType()); -+ callTypes.push_back(CI->getOperand(2)->getType()); -+ FunctionType *funcType = -+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); -+ std::string name = "__amdil_imad"; -+ if (isVector) { -+ name += "_v" + itostr(numEle) + "i32"; -+ } else { -+ name += "_i32"; -+ } -+ Function *Func = dyn_cast( -+ CI->getParent()->getParent()->getParent()-> -+ getOrInsertFunction(llvm::StringRef(name), funcType)); -+ Value *Operands[3] = { -+ CI->getOperand(0), -+ CI->getOperand(1), -+ CI->getOperand(2) -+ }; -+ CallInst *nCI = CallInst::Create(Func, Operands, "imad24"); -+ nCI->insertBefore(CI); -+ CI->replaceAllUsesWith(nCI); -+ } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") { -+ BinaryOperator *mulOp = -+ BinaryOperator::Create(Instruction::Mul, CI->getOperand(0), -+ CI->getOperand(1), "imul24", CI); -+ CI->replaceAllUsesWith(mulOp); -+ } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") { -+ Type *aType = CI->getOperand(0)->getType(); -+ -+ bool isVector = aType->isVectorTy(); -+ int numEle = isVector ? dyn_cast(aType)->getNumElements() : 1; -+ std::vector callTypes; -+ callTypes.push_back(CI->getOperand(0)->getType()); -+ callTypes.push_back(CI->getOperand(1)->getType()); -+ FunctionType *funcType = -+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); -+ std::string name = "__amdil_imul_high"; -+ if (isVector) { -+ name += "_v" + itostr(numEle) + "i32"; -+ } else { -+ name += "_i32"; -+ } -+ Function *Func = dyn_cast( -+ CI->getParent()->getParent()->getParent()-> -+ getOrInsertFunction(llvm::StringRef(name), funcType)); -+ Value *Operands[2] = { -+ CI->getOperand(0), -+ CI->getOperand(1) -+ }; -+ CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high"); -+ nCI->insertBefore(CI); -+ CI->replaceAllUsesWith(nCI); -+ } -+} -+ -+bool -+AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI) { -+ return (CI != NULL -+ && CI->getOperand(CI->getNumOperands() - 1)->getName() -+ == "__amdil_get_local_size_int"); -+} -+ -+bool -+AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI) { -+ if (!CI) { -+ return false; -+ } -+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX -+ && (mSTM->getDeviceName() == "cayman")) { -+ return false; -+ } -+ return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20) -+ == "__amdil_improved_div"; -+} -+ -+void -+AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI) { -+ assert(convertAccurateDivide(CI) -+ && "expanding accurate divide can only happen if it is expandable!"); -+ BinaryOperator *divOp = -+ BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0), -+ CI->getOperand(1), "fdiv32", CI); -+ CI->replaceAllUsesWith(divOp); -+} -+ -+bool -+AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) { -+ if (optLevel != CodeGenOpt::None) { -+ return false; -+ } -+ -+ if (!CI) { -+ return false; -+ } -+ -+ unsigned funcNameIdx = 0; -+ funcNameIdx = CI->getNumOperands() - 1; -+ StringRef calleeName = CI->getOperand(funcNameIdx)->getName(); -+ if (calleeName != "__amdil_image2d_read_norm" -+ && calleeName != "__amdil_image2d_read_unnorm" -+ && calleeName != "__amdil_image3d_read_norm" -+ && calleeName != "__amdil_image3d_read_unnorm") { -+ return false; -+ } -+ -+ unsigned samplerIdx = 2; -+ samplerIdx = 1; -+ Value *sampler = CI->getOperand(samplerIdx); -+ LoadInst *lInst = dyn_cast(sampler); -+ if (!lInst) { -+ return false; -+ } -+ -+ if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { -+ return false; -+ } -+ -+ GlobalVariable *gv = dyn_cast(lInst->getPointerOperand()); -+ // If we are loading from what is not a global value, then we -+ // fail and return. -+ if (!gv) { -+ return false; -+ } -+ -+ // If we don't have an initializer or we have an initializer and -+ // the initializer is not a 32bit integer, we fail. -+ if (!gv->hasInitializer() -+ || !gv->getInitializer()->getType()->isIntegerTy(32)) { -+ return false; -+ } -+ -+ // Now that we have the global variable initializer, lets replace -+ // all uses of the load instruction with the samplerVal and -+ // reparse the __amdil_is_constant() function. -+ Constant *samplerVal = gv->getInitializer(); -+ lInst->replaceAllUsesWith(samplerVal); -+ return true; -+} -+ -+bool -+AMDGPUPeepholeOpt::doInitialization(Module &M) { -+ return false; -+} -+ -+bool -+AMDGPUPeepholeOpt::doFinalization(Module &M) { -+ return false; -+} -+ -+void -+AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const { -+ AU.addRequired(); -+ FunctionPass::getAnalysisUsage(AU); -+ AU.setPreservesAll(); -+} -+ -+size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) { -+ size_t size = 0; -+ if (!T) { -+ return size; -+ } -+ switch (T->getTypeID()) { -+ case Type::X86_FP80TyID: -+ case Type::FP128TyID: -+ case Type::PPC_FP128TyID: -+ case Type::LabelTyID: -+ assert(0 && "These types are not supported by this backend"); -+ default: -+ case Type::FloatTyID: -+ case Type::DoubleTyID: -+ size = T->getPrimitiveSizeInBits() >> 3; -+ break; -+ case Type::PointerTyID: -+ size = getTypeSize(dyn_cast(T), dereferencePtr); -+ break; -+ case Type::IntegerTyID: -+ size = getTypeSize(dyn_cast(T), dereferencePtr); -+ break; -+ case Type::StructTyID: -+ size = getTypeSize(dyn_cast(T), dereferencePtr); -+ break; -+ case Type::ArrayTyID: -+ size = getTypeSize(dyn_cast(T), dereferencePtr); -+ break; -+ case Type::FunctionTyID: -+ size = getTypeSize(dyn_cast(T), dereferencePtr); -+ break; -+ case Type::VectorTyID: -+ size = getTypeSize(dyn_cast(T), dereferencePtr); -+ break; -+ }; -+ return size; -+} -+ -+size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST, -+ bool dereferencePtr) { -+ size_t size = 0; -+ if (!ST) { -+ return size; -+ } -+ Type *curType; -+ StructType::element_iterator eib; -+ StructType::element_iterator eie; -+ for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) { -+ curType = *eib; -+ size += getTypeSize(curType, dereferencePtr); -+ } -+ return size; -+} -+ -+size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT, -+ bool dereferencePtr) { -+ return IT ? (IT->getBitWidth() >> 3) : 0; -+} -+ -+size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT, -+ bool dereferencePtr) { -+ assert(0 && "Should not be able to calculate the size of an function type"); -+ return 0; -+} -+ -+size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT, -+ bool dereferencePtr) { -+ return (size_t)(AT ? (getTypeSize(AT->getElementType(), -+ dereferencePtr) * AT->getNumElements()) -+ : 0); -+} -+ -+size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT, -+ bool dereferencePtr) { -+ return VT ? (VT->getBitWidth() >> 3) : 0; -+} -+ -+size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT, -+ bool dereferencePtr) { -+ if (!PT) { -+ return 0; -+ } -+ Type *CT = PT->getElementType(); -+ if (CT->getTypeID() == Type::StructTyID && -+ PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) { -+ return getTypeSize(dyn_cast(CT)); -+ } else if (dereferencePtr) { -+ size_t size = 0; -+ for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) { -+ size += getTypeSize(PT->getContainedType(x), dereferencePtr); -+ } -+ return size; -+ } else { -+ return 4; -+ } -+} -+ -+size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT, -+ bool dereferencePtr) { -+ //assert(0 && "Should not be able to calculate the size of an opaque type"); -+ return 4; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILRegisterInfo.td llvm-r600/lib/Target/R600/AMDILRegisterInfo.td ---- llvm-3.2.src/lib/Target/R600/AMDILRegisterInfo.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILRegisterInfo.td 2013-01-25 19:43:57.450049721 +0100 -@@ -0,0 +1,107 @@ -+//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+// -+// Declarations that describe the AMDIL register file -+// -+//===----------------------------------------------------------------------===// -+ -+class AMDILReg num, string n> : Register { -+ field bits<16> Value; -+ let Value = num; -+ let Namespace = "AMDGPU"; -+} -+ -+// We will start with 8 registers for each class before expanding to more -+// Since the swizzle is added based on the register class, we can leave it -+// off here and just specify different registers for different register classes -+def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>; -+def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>; -+def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>; -+def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>; -+def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>; -+def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>; -+def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>; -+def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>; -+def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>; -+def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>; -+def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>; -+def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>; -+def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>; -+def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>; -+def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>; -+def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>; -+def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>; -+def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>; -+def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>; -+def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>; -+ -+// All registers between 1000 and 1024 are reserved and cannot be used -+// unless commented in this section -+// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's -+// r1020 is used to hold the frame index for local arrays -+// r1019 is used to hold the dynamic stack allocation pointer -+// r1018 is used as a temporary register for handwritten code -+// r1017 is used as a temporary register for handwritten code -+// r1016 is used as a temporary register for load/store code -+// r1015 is used as a temporary register for data segment offset -+// r1014 is used as a temporary register for store code -+// r1013 is used as the section data pointer register -+// r1012-r1010 and r1001-r1008 are used for temporary I/O registers -+// r1009 is used as the frame pointer register -+// r999 is used as the mem register. -+// r998 is used as the return address register. -+//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>; -+//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>; -+//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>; -+//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>; -+//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>; -+//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>; -+def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>; -+def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>; -+def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>; -+def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>; -+def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>; -+def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>; -+def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>; -+def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>; -+def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>; -+def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>; -+def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>; -+def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>; -+def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>; -+def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>; -+def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>; -+def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>; -+def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>; -+def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>; -+def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>; -+def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>; -+def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>; -+def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>; -+def GPRI16 : RegisterClass<"AMDGPU", [i16], 16, -+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> { -+ let AltOrders = [(add (sequence "R%u", 1, 20))]; -+ let AltOrderSelect = [{ -+ return 1; -+ }]; -+ } -+def GPRI32 : RegisterClass<"AMDGPU", [i32], 32, -+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> { -+ let AltOrders = [(add (sequence "R%u", 1, 20))]; -+ let AltOrderSelect = [{ -+ return 1; -+ }]; -+ } -+def GPRF32 : RegisterClass<"AMDGPU", [f32], 32, -+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> { -+ let AltOrders = [(add (sequence "R%u", 1, 20))]; -+ let AltOrderSelect = [{ -+ return 1; -+ }]; -+ } -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILSIDevice.cpp llvm-r600/lib/Target/R600/AMDILSIDevice.cpp ---- llvm-3.2.src/lib/Target/R600/AMDILSIDevice.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILSIDevice.cpp 2013-01-25 19:43:57.450049721 +0100 -@@ -0,0 +1,45 @@ -+//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+/// \file -+//==-----------------------------------------------------------------------===// -+#include "AMDILSIDevice.h" -+#include "AMDILEvergreenDevice.h" -+#include "AMDILNIDevice.h" -+#include "AMDGPUSubtarget.h" -+ -+using namespace llvm; -+ -+AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST) -+ : AMDGPUEvergreenDevice(ST) { -+} -+AMDGPUSIDevice::~AMDGPUSIDevice() { -+} -+ -+size_t -+AMDGPUSIDevice::getMaxLDSSize() const { -+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { -+ return MAX_LDS_SIZE_900; -+ } else { -+ return 0; -+ } -+} -+ -+uint32_t -+AMDGPUSIDevice::getGeneration() const { -+ return AMDGPUDeviceInfo::HD7XXX; -+} -+ -+std::string -+AMDGPUSIDevice::getDataLayout() const { -+ return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16" -+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" -+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" -+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" -+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048" -+ "-n8:16:32:64"); -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILSIDevice.h llvm-r600/lib/Target/R600/AMDILSIDevice.h ---- llvm-3.2.src/lib/Target/R600/AMDILSIDevice.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/AMDILSIDevice.h 2013-01-25 19:43:57.450049721 +0100 -@@ -0,0 +1,39 @@ -+//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//==-----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Interface for the subtarget data classes. -+/// -+/// This file will define the interface that each generation needs to -+/// implement in order to correctly answer queries on the capabilities of the -+/// specific hardware. -+//===---------------------------------------------------------------------===// -+#ifndef AMDILSIDEVICE_H -+#define AMDILSIDEVICE_H -+#include "AMDILEvergreenDevice.h" -+ -+namespace llvm { -+class AMDGPUSubtarget; -+//===---------------------------------------------------------------------===// -+// SI generation of devices and their respective sub classes -+//===---------------------------------------------------------------------===// -+ -+/// \brief The AMDGPUSIDevice is the base class for all Southern Island series -+/// of cards. -+class AMDGPUSIDevice : public AMDGPUEvergreenDevice { -+public: -+ AMDGPUSIDevice(AMDGPUSubtarget*); -+ virtual ~AMDGPUSIDevice(); -+ virtual size_t getMaxLDSSize() const; -+ virtual uint32_t getGeneration() const; -+ virtual std::string getDataLayout() const; -+}; -+ -+} // namespace llvm -+#endif // AMDILSIDEVICE_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/CMakeLists.txt llvm-r600/lib/Target/R600/CMakeLists.txt ---- llvm-3.2.src/lib/Target/R600/CMakeLists.txt 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/CMakeLists.txt 2013-01-25 19:43:57.453383054 +0100 -@@ -0,0 +1,55 @@ -+set(LLVM_TARGET_DEFINITIONS AMDGPU.td) -+ -+tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info) -+tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info) -+tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel) -+tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv) -+tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget) -+tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic) -+tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter -mc-emitter) -+tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer) -+tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer) -+add_public_tablegen_target(AMDGPUCommonTableGen) -+ -+add_llvm_target(AMDGPUCodeGen -+ AMDIL7XXDevice.cpp -+ AMDILCFGStructurizer.cpp -+ AMDILDevice.cpp -+ AMDILDeviceInfo.cpp -+ AMDILEvergreenDevice.cpp -+ AMDILFrameLowering.cpp -+ AMDILIntrinsicInfo.cpp -+ AMDILISelDAGToDAG.cpp -+ AMDILISelLowering.cpp -+ AMDILNIDevice.cpp -+ AMDILPeepholeOptimizer.cpp -+ AMDILSIDevice.cpp -+ AMDGPUAsmPrinter.cpp -+ AMDGPUMCInstLower.cpp -+ AMDGPUSubtarget.cpp -+ AMDGPUTargetMachine.cpp -+ AMDGPUISelLowering.cpp -+ AMDGPUConvertToISA.cpp -+ AMDGPUInstrInfo.cpp -+ AMDGPURegisterInfo.cpp -+ R600ExpandSpecialInstrs.cpp -+ R600InstrInfo.cpp -+ R600ISelLowering.cpp -+ R600LowerConstCopy.cpp -+ R600MachineFunctionInfo.cpp -+ R600RegisterInfo.cpp -+ SIAssignInterpRegs.cpp -+ SIInstrInfo.cpp -+ SIISelLowering.cpp -+ SILowerLiteralConstants.cpp -+ SILowerControlFlow.cpp -+ SIMachineFunctionInfo.cpp -+ SIRegisterInfo.cpp -+ SIFixSGPRLiveness.cpp -+ ) -+ -+add_dependencies(LLVMR600CodeGen intrinsics_gen) -+ -+add_subdirectory(InstPrinter) -+add_subdirectory(TargetInfo) -+add_subdirectory(MCTargetDesc) -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp llvm-r600/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp ---- llvm-3.2.src/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp 2013-01-25 19:43:57.456716387 +0100 -@@ -0,0 +1,156 @@ -+//===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+// \file -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPUInstPrinter.h" -+#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -+#include "llvm/MC/MCInst.h" -+ -+using namespace llvm; -+ -+void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, -+ StringRef Annot) { -+ printInstruction(MI, OS); -+ -+ printAnnotation(OS, Annot); -+} -+ -+void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, -+ raw_ostream &O) { -+ -+ const MCOperand &Op = MI->getOperand(OpNo); -+ if (Op.isReg()) { -+ switch (Op.getReg()) { -+ // This is the default predicate state, so we don't need to print it. -+ case AMDGPU::PRED_SEL_OFF: break; -+ default: O << getRegisterName(Op.getReg()); break; -+ } -+ } else if (Op.isImm()) { -+ O << Op.getImm(); -+ } else if (Op.isFPImm()) { -+ O << Op.getFPImm(); -+ } else { -+ assert(!"unknown operand type in printOperand"); -+ } -+} -+ -+void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, -+ raw_ostream &O) { -+ printOperand(MI, OpNo, O); -+ O << ", "; -+ printOperand(MI, OpNo + 1, O); -+} -+ -+void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo, -+ raw_ostream &O, StringRef Asm) { -+ const MCOperand &Op = MI->getOperand(OpNo); -+ assert(Op.isImm()); -+ if (Op.getImm() == 1) { -+ O << Asm; -+ } -+} -+ -+void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo, -+ raw_ostream &O) { -+ printIfSet(MI, OpNo, O, "|"); -+} -+ -+void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo, -+ raw_ostream &O) { -+ printIfSet(MI, OpNo, O, "_SAT"); -+} -+ -+void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo, -+ raw_ostream &O) { -+ union Literal { -+ float f; -+ int32_t i; -+ } L; -+ -+ L.i = MI->getOperand(OpNo).getImm(); -+ O << L.i << "(" << L.f << ")"; -+} -+ -+void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo, -+ raw_ostream &O) { -+ printIfSet(MI, OpNo, O, " *"); -+} -+ -+void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo, -+ raw_ostream &O) { -+ printIfSet(MI, OpNo, O, "-"); -+} -+ -+void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo, -+ raw_ostream &O) { -+ switch (MI->getOperand(OpNo).getImm()) { -+ default: break; -+ case 1: -+ O << " * 2.0"; -+ break; -+ case 2: -+ O << " * 4.0"; -+ break; -+ case 3: -+ O << " / 2.0"; -+ break; -+ } -+} -+ -+void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo, -+ raw_ostream &O) { -+ const MCOperand &Op = MI->getOperand(OpNo); -+ if (Op.getImm() != 0) { -+ O << " + " << Op.getImm(); -+ } -+} -+ -+void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo, -+ raw_ostream &O) { -+ printIfSet(MI, OpNo, O, "ExecMask,"); -+} -+ -+void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo, -+ raw_ostream &O) { -+ printIfSet(MI, OpNo, O, "Pred,"); -+} -+ -+void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo, -+ raw_ostream &O) { -+ const MCOperand &Op = MI->getOperand(OpNo); -+ if (Op.getImm() == 0) { -+ O << " (MASKED)"; -+ } -+} -+ -+void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo, -+ raw_ostream &O) { -+ const char * chans = "XYZW"; -+ int sel = MI->getOperand(OpNo).getImm(); -+ -+ int chan = sel & 3; -+ sel >>= 2; -+ -+ if (sel >= 512) { -+ sel -= 512; -+ int cb = sel >> 12; -+ sel &= 4095; -+ O << cb << "[" << sel << "]"; -+ } else if (sel >= 448) { -+ sel -= 448; -+ O << sel; -+ } else if (sel >= 0){ -+ O << sel; -+ } -+ -+ if (sel >= 0) -+ O << "." << chans[chan]; -+} -+ -+#include "AMDGPUGenAsmWriter.inc" -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h llvm-r600/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h ---- llvm-3.2.src/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h 2013-01-25 19:43:57.456716387 +0100 -@@ -0,0 +1,53 @@ -+//===-- AMDGPUInstPrinter.h - AMDGPU MC Inst -> ASM interface ---*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+//===----------------------------------------------------------------------===// -+ -+#ifndef AMDGPUINSTPRINTER_H -+#define AMDGPUINSTPRINTER_H -+ -+#include "llvm/ADT/StringRef.h" -+#include "llvm/MC/MCInstPrinter.h" -+#include "llvm/Support/raw_ostream.h" -+ -+namespace llvm { -+ -+class AMDGPUInstPrinter : public MCInstPrinter { -+public: -+ AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, -+ const MCRegisterInfo &MRI) -+ : MCInstPrinter(MAI, MII, MRI) {} -+ -+ //Autogenerated by tblgen -+ void printInstruction(const MCInst *MI, raw_ostream &O); -+ static const char *getRegisterName(unsigned RegNo); -+ -+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); -+ -+private: -+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); -+ void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); -+ void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm); -+ void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O); -+ void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O); -+ void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O); -+ void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O); -+ void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O); -+ void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O); -+ void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O); -+ void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O); -+ void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O); -+ void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O); -+ void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O); -+}; -+ -+} // End namespace llvm -+ -+#endif // AMDGPUINSTRPRINTER_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/InstPrinter/CMakeLists.txt llvm-r600/lib/Target/R600/InstPrinter/CMakeLists.txt ---- llvm-3.2.src/lib/Target/R600/InstPrinter/CMakeLists.txt 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/InstPrinter/CMakeLists.txt 2013-01-25 19:43:57.456716387 +0100 -@@ -0,0 +1,7 @@ -+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) -+ -+add_llvm_library(LLVMR600AsmPrinter -+ AMDGPUInstPrinter.cpp -+ ) -+ -+add_dependencies(LLVMR600AsmPrinter R600CommonTableGen) -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/InstPrinter/LLVMBuild.txt llvm-r600/lib/Target/R600/InstPrinter/LLVMBuild.txt ---- llvm-3.2.src/lib/Target/R600/InstPrinter/LLVMBuild.txt 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/InstPrinter/LLVMBuild.txt 2013-01-25 19:43:57.456716387 +0100 -@@ -0,0 +1,24 @@ -+;===- ./lib/Target/R600/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===; -+; -+; The LLVM Compiler Infrastructure -+; -+; This file is distributed under the University of Illinois Open Source -+; License. See LICENSE.TXT for details. -+; -+;===------------------------------------------------------------------------===; -+; -+; This is an LLVMBuild description file for the components in this subdirectory. -+; -+; For more information on the LLVMBuild system, please see: -+; -+; http://llvm.org/docs/LLVMBuild.html -+; -+;===------------------------------------------------------------------------===; -+ -+[component_0] -+type = Library -+name = R600AsmPrinter -+parent = R600 -+required_libraries = MC Support -+add_to_library_groups = R600 -+ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/InstPrinter/Makefile llvm-r600/lib/Target/R600/InstPrinter/Makefile ---- llvm-3.2.src/lib/Target/R600/InstPrinter/Makefile 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/InstPrinter/Makefile 2013-01-25 19:43:57.456716387 +0100 -@@ -0,0 +1,15 @@ -+#===- lib/Target/R600/AsmPrinter/Makefile ------------------*- Makefile -*-===## -+# -+# The LLVM Compiler Infrastructure -+# -+# This file is distributed under the University of Illinois Open Source -+# License. See LICENSE.TXT for details. -+# -+##===----------------------------------------------------------------------===## -+LEVEL = ../../../.. -+LIBRARYNAME = LLVMR600AsmPrinter -+ -+# Hack: we need to include 'main' x86 target directory to grab private headers -+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. -+ -+include $(LEVEL)/Makefile.common -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/LLVMBuild.txt llvm-r600/lib/Target/R600/LLVMBuild.txt ---- llvm-3.2.src/lib/Target/R600/LLVMBuild.txt 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/LLVMBuild.txt 2013-01-25 19:43:57.456716387 +0100 -@@ -0,0 +1,32 @@ -+;===- ./lib/Target/AMDIL/LLVMBuild.txt -------------------------*- Conf -*--===; -+; -+; The LLVM Compiler Infrastructure -+; -+; This file is distributed under the University of Illinois Open Source -+; License. See LICENSE.TXT for details. -+; -+;===------------------------------------------------------------------------===; -+; -+; This is an LLVMBuild description file for the components in this subdirectory. -+; -+; For more information on the LLVMBuild system, please see: -+; -+; http://llvm.org/docs/LLVMBuild.html -+; -+;===------------------------------------------------------------------------===; -+ -+[common] -+subdirectories = InstPrinter MCTargetDesc TargetInfo -+ -+[component_0] -+type = TargetGroup -+name = R600 -+parent = Target -+has_asmprinter = 1 -+ -+[component_1] -+type = Library -+name = R600CodeGen -+parent = R600 -+required_libraries = AsmPrinter CodeGen Core SelectionDAG Support Target MC R600AsmPrinter R600Desc R600Info -+add_to_library_groups = R600 -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/Makefile llvm-r600/lib/Target/R600/Makefile ---- llvm-3.2.src/lib/Target/R600/Makefile 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/Makefile 2013-01-25 19:43:57.460049721 +0100 -@@ -0,0 +1,23 @@ -+##===- lib/Target/R600/Makefile ---------------------------*- Makefile -*-===## -+# -+# The LLVM Compiler Infrastructure -+# -+# This file is distributed under the University of Illinois Open Source -+# License. See LICENSE.TXT for details. -+# -+##===----------------------------------------------------------------------===## -+ -+LEVEL = ../../.. -+LIBRARYNAME = LLVMR600CodeGen -+TARGET = AMDGPU -+ -+# Make sure that tblgen is run, first thing. -+BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \ -+ AMDGPUGenDAGISel.inc AMDGPUGenSubtargetInfo.inc \ -+ AMDGPUGenMCCodeEmitter.inc AMDGPUGenCallingConv.inc \ -+ AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \ -+ AMDGPUGenAsmWriter.inc -+ -+DIRS = InstPrinter TargetInfo MCTargetDesc -+ -+include $(LEVEL)/Makefile.common -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp ---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp 2013-01-25 19:43:57.456716387 +0100 -@@ -0,0 +1,90 @@ -+//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+/// \file -+//===----------------------------------------------------------------------===// -+ -+#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -+#include "llvm/ADT/StringRef.h" -+#include "llvm/MC/MCAsmBackend.h" -+#include "llvm/MC/MCAssembler.h" -+#include "llvm/MC/MCObjectWriter.h" -+#include "llvm/MC/MCValue.h" -+#include "llvm/Support/TargetRegistry.h" -+ -+using namespace llvm; -+ -+namespace { -+ -+class AMDGPUMCObjectWriter : public MCObjectWriter { -+public: -+ AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { } -+ virtual void ExecutePostLayoutBinding(MCAssembler &Asm, -+ const MCAsmLayout &Layout) { -+ //XXX: Implement if necessary. -+ } -+ virtual void RecordRelocation(const MCAssembler &Asm, -+ const MCAsmLayout &Layout, -+ const MCFragment *Fragment, -+ const MCFixup &Fixup, -+ MCValue Target, uint64_t &FixedValue) { -+ assert(!"Not implemented"); -+ } -+ -+ virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout); -+ -+}; -+ -+class AMDGPUAsmBackend : public MCAsmBackend { -+public: -+ AMDGPUAsmBackend(const Target &T) -+ : MCAsmBackend() {} -+ -+ virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const; -+ virtual unsigned getNumFixupKinds() const { return 0; }; -+ virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, -+ uint64_t Value) const; -+ virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, -+ const MCInstFragment *DF, -+ const MCAsmLayout &Layout) const { -+ return false; -+ } -+ virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const { -+ assert(!"Not implemented"); -+ } -+ virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; } -+ virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { -+ return true; -+ } -+}; -+ -+} //End anonymous namespace -+ -+void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm, -+ const MCAsmLayout &Layout) { -+ for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) { -+ Asm.writeSectionData(I, Layout); -+ } -+} -+ -+MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT, -+ StringRef CPU) { -+ return new AMDGPUAsmBackend(T); -+} -+ -+AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter( -+ raw_ostream &OS) const { -+ return new AMDGPUMCObjectWriter(OS); -+} -+ -+void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, -+ unsigned DataSize, uint64_t Value) const { -+ -+ uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset()); -+ assert(Fixup.getKind() == FK_PCRel_4); -+ *Dst = (Value - 4) / 4; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp ---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp 2013-01-25 19:43:57.456716387 +0100 -@@ -0,0 +1,85 @@ -+//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+/// \file -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPUMCAsmInfo.h" -+ -+using namespace llvm; -+AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() { -+ HasSingleParameterDotFile = false; -+ WeakDefDirective = 0; -+ //===------------------------------------------------------------------===// -+ HasSubsectionsViaSymbols = true; -+ HasMachoZeroFillDirective = false; -+ HasMachoTBSSDirective = false; -+ HasStaticCtorDtorReferenceInStaticMode = false; -+ LinkerRequiresNonEmptyDwarfLines = true; -+ MaxInstLength = 16; -+ PCSymbol = "$"; -+ SeparatorString = "\n"; -+ CommentColumn = 40; -+ CommentString = ";"; -+ LabelSuffix = ":"; -+ GlobalPrefix = "@"; -+ PrivateGlobalPrefix = ";."; -+ LinkerPrivateGlobalPrefix = "!"; -+ InlineAsmStart = ";#ASMSTART"; -+ InlineAsmEnd = ";#ASMEND"; -+ AssemblerDialect = 0; -+ AllowQuotesInName = false; -+ AllowNameToStartWithDigit = false; -+ AllowPeriodsInName = false; -+ -+ //===--- Data Emission Directives -------------------------------------===// -+ ZeroDirective = ".zero"; -+ AsciiDirective = ".ascii\t"; -+ AscizDirective = ".asciz\t"; -+ Data8bitsDirective = ".byte\t"; -+ Data16bitsDirective = ".short\t"; -+ Data32bitsDirective = ".long\t"; -+ Data64bitsDirective = ".quad\t"; -+ GPRel32Directive = 0; -+ SunStyleELFSectionSwitchSyntax = true; -+ UsesELFSectionDirectiveForBSS = true; -+ HasMicrosoftFastStdCallMangling = false; -+ -+ //===--- Alignment Information ----------------------------------------===// -+ AlignDirective = ".align\t"; -+ AlignmentIsInBytes = true; -+ TextAlignFillValue = 0; -+ -+ //===--- Global Variable Emission Directives --------------------------===// -+ GlobalDirective = ".global"; -+ ExternDirective = ".extern"; -+ HasSetDirective = false; -+ HasAggressiveSymbolFolding = true; -+ COMMDirectiveAlignmentIsInBytes = false; -+ HasDotTypeDotSizeDirective = false; -+ HasNoDeadStrip = true; -+ HasSymbolResolver = false; -+ WeakRefDirective = ".weakref\t"; -+ LinkOnceDirective = 0; -+ //===--- Dwarf Emission Directives -----------------------------------===// -+ HasLEB128 = true; -+ SupportsDebugInformation = true; -+ ExceptionsType = ExceptionHandling::None; -+ DwarfUsesInlineInfoSection = false; -+ DwarfSectionOffsetDirective = ".offset"; -+ -+} -+ -+const char* -+AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const { -+ return 0; -+} -+ -+const MCSection* -+AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const { -+ return 0; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h ---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h 2013-01-25 19:43:57.456716387 +0100 -@@ -0,0 +1,30 @@ -+//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface ----------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef AMDGPUMCASMINFO_H -+#define AMDGPUMCASMINFO_H -+ -+#include "llvm/MC/MCAsmInfo.h" -+namespace llvm { -+ -+class Target; -+class StringRef; -+ -+class AMDGPUMCAsmInfo : public MCAsmInfo { -+public: -+ explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT); -+ const char* getDataASDirective(unsigned int Size, unsigned int AS) const; -+ const MCSection* getNonexecutableStackSection(MCContext &CTX) const; -+}; -+} // namespace llvm -+#endif // AMDGPUMCASMINFO_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h ---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h 2013-01-25 19:43:57.456716387 +0100 -@@ -0,0 +1,60 @@ -+//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief CodeEmitter interface for R600 and SI codegen. -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef AMDGPUCODEEMITTER_H -+#define AMDGPUCODEEMITTER_H -+ -+#include "llvm/MC/MCCodeEmitter.h" -+#include "llvm/Support/raw_ostream.h" -+ -+namespace llvm { -+ -+class MCInst; -+class MCOperand; -+ -+class AMDGPUMCCodeEmitter : public MCCodeEmitter { -+public: -+ -+ uint64_t getBinaryCodeForInstr(const MCInst &MI, -+ SmallVectorImpl &Fixups) const; -+ -+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, -+ SmallVectorImpl &Fixups) const { -+ return 0; -+ } -+ -+ virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo, -+ SmallVectorImpl &Fixups) const { -+ return 0; -+ } -+ virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo, -+ SmallVectorImpl &Fixups) const { -+ return 0; -+ } -+ virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const { -+ return Value; -+ } -+ virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo, -+ SmallVectorImpl &Fixups) const { -+ return 0; -+ } -+ virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo, -+ SmallVectorImpl &Fixups) const { -+ return 0; -+ } -+}; -+ -+} // End namespace llvm -+ -+#endif // AMDGPUCODEEMITTER_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp ---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp 2013-01-25 19:43:57.460049721 +0100 -@@ -0,0 +1,113 @@ -+//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief This file provides AMDGPU specific target descriptions. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPUMCTargetDesc.h" -+#include "AMDGPUMCAsmInfo.h" -+#include "InstPrinter/AMDGPUInstPrinter.h" -+#include "llvm/MC/MachineLocation.h" -+#include "llvm/MC/MCCodeGenInfo.h" -+#include "llvm/MC/MCInstrInfo.h" -+#include "llvm/MC/MCRegisterInfo.h" -+#include "llvm/MC/MCStreamer.h" -+#include "llvm/MC/MCSubtargetInfo.h" -+#include "llvm/Support/ErrorHandling.h" -+#include "llvm/Support/TargetRegistry.h" -+ -+#define GET_INSTRINFO_MC_DESC -+#include "AMDGPUGenInstrInfo.inc" -+ -+#define GET_SUBTARGETINFO_MC_DESC -+#include "AMDGPUGenSubtargetInfo.inc" -+ -+#define GET_REGINFO_MC_DESC -+#include "AMDGPUGenRegisterInfo.inc" -+ -+using namespace llvm; -+ -+static MCInstrInfo *createAMDGPUMCInstrInfo() { -+ MCInstrInfo *X = new MCInstrInfo(); -+ InitAMDGPUMCInstrInfo(X); -+ return X; -+} -+ -+static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) { -+ MCRegisterInfo *X = new MCRegisterInfo(); -+ InitAMDGPUMCRegisterInfo(X, 0); -+ return X; -+} -+ -+static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU, -+ StringRef FS) { -+ MCSubtargetInfo * X = new MCSubtargetInfo(); -+ InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS); -+ return X; -+} -+ -+static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM, -+ CodeModel::Model CM, -+ CodeGenOpt::Level OL) { -+ MCCodeGenInfo *X = new MCCodeGenInfo(); -+ X->InitMCCodeGenInfo(RM, CM, OL); -+ return X; -+} -+ -+static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T, -+ unsigned SyntaxVariant, -+ const MCAsmInfo &MAI, -+ const MCInstrInfo &MII, -+ const MCRegisterInfo &MRI, -+ const MCSubtargetInfo &STI) { -+ return new AMDGPUInstPrinter(MAI, MII, MRI); -+} -+ -+static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII, -+ const MCRegisterInfo &MRI, -+ const MCSubtargetInfo &STI, -+ MCContext &Ctx) { -+ if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) { -+ return createSIMCCodeEmitter(MCII, MRI, STI, Ctx); -+ } else { -+ return createR600MCCodeEmitter(MCII, MRI, STI, Ctx); -+ } -+} -+ -+static MCStreamer *createMCStreamer(const Target &T, StringRef TT, -+ MCContext &Ctx, MCAsmBackend &MAB, -+ raw_ostream &_OS, -+ MCCodeEmitter *_Emitter, -+ bool RelaxAll, -+ bool NoExecStack) { -+ return createPureStreamer(Ctx, MAB, _OS, _Emitter); -+} -+ -+extern "C" void LLVMInitializeR600TargetMC() { -+ -+ RegisterMCAsmInfo Y(TheAMDGPUTarget); -+ -+ TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo); -+ -+ TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo); -+ -+ TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo); -+ -+ TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo); -+ -+ TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter); -+ -+ TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter); -+ -+ TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend); -+ -+ TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer); -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h ---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h 2013-01-25 19:43:57.460049721 +0100 -@@ -0,0 +1,55 @@ -+//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Provides AMDGPU specific target descriptions. -+// -+//===----------------------------------------------------------------------===// -+// -+ -+#ifndef AMDGPUMCTARGETDESC_H -+#define AMDGPUMCTARGETDESC_H -+ -+#include "llvm/ADT/StringRef.h" -+ -+namespace llvm { -+class MCAsmBackend; -+class MCCodeEmitter; -+class MCContext; -+class MCInstrInfo; -+class MCRegisterInfo; -+class MCSubtargetInfo; -+class Target; -+ -+extern Target TheAMDGPUTarget; -+ -+MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII, -+ const MCRegisterInfo &MRI, -+ const MCSubtargetInfo &STI, -+ MCContext &Ctx); -+ -+MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, -+ const MCRegisterInfo &MRI, -+ const MCSubtargetInfo &STI, -+ MCContext &Ctx); -+ -+MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT, -+ StringRef CPU); -+} // End llvm namespace -+ -+#define GET_REGINFO_ENUM -+#include "AMDGPUGenRegisterInfo.inc" -+ -+#define GET_INSTRINFO_ENUM -+#include "AMDGPUGenInstrInfo.inc" -+ -+#define GET_SUBTARGETINFO_ENUM -+#include "AMDGPUGenSubtargetInfo.inc" -+ -+#endif // AMDGPUMCTARGETDESC_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/CMakeLists.txt llvm-r600/lib/Target/R600/MCTargetDesc/CMakeLists.txt ---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/CMakeLists.txt 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/MCTargetDesc/CMakeLists.txt 2013-01-25 19:43:57.460049721 +0100 -@@ -0,0 +1,10 @@ -+ -+add_llvm_library(LLVMR600Desc -+ AMDGPUAsmBackend.cpp -+ AMDGPUMCTargetDesc.cpp -+ AMDGPUMCAsmInfo.cpp -+ R600MCCodeEmitter.cpp -+ SIMCCodeEmitter.cpp -+ ) -+ -+add_dependencies(LLVMR600Desc AMDGPUCommonTableGen) -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/LLVMBuild.txt llvm-r600/lib/Target/R600/MCTargetDesc/LLVMBuild.txt ---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/LLVMBuild.txt 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/MCTargetDesc/LLVMBuild.txt 2013-01-25 19:43:57.460049721 +0100 -@@ -0,0 +1,23 @@ -+;===- ./lib/Target/R600/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; -+; -+; The LLVM Compiler Infrastructure -+; -+; This file is distributed under the University of Illinois Open Source -+; License. See LICENSE.TXT for details. -+; -+;===------------------------------------------------------------------------===; -+; -+; This is an LLVMBuild description file for the components in this subdirectory. -+; -+; For more information on the LLVMBuild system, please see: -+; -+; http://llvm.org/docs/LLVMBuild.html -+; -+;===------------------------------------------------------------------------===; -+ -+[component_0] -+type = Library -+name = R600Desc -+parent = R600 -+required_libraries = R600AsmPrinter R600Info MC -+add_to_library_groups = R600 -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/Makefile llvm-r600/lib/Target/R600/MCTargetDesc/Makefile ---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/Makefile 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/MCTargetDesc/Makefile 2013-01-25 19:43:57.460049721 +0100 -@@ -0,0 +1,16 @@ -+##===- lib/Target/AMDGPU/TargetDesc/Makefile ----------------*- Makefile -*-===## -+# -+# The LLVM Compiler Infrastructure -+# -+# This file is distributed under the University of Illinois Open Source -+# License. See LICENSE.TXT for details. -+# -+##===----------------------------------------------------------------------===## -+ -+LEVEL = ../../../.. -+LIBRARYNAME = LLVMR600Desc -+ -+# Hack: we need to include 'main' target directory to grab private headers -+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. -+ -+include $(LEVEL)/Makefile.common -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp llvm-r600/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp ---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp 2013-01-25 19:43:57.460049721 +0100 -@@ -0,0 +1,580 @@ -+//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// -+/// This code emitter outputs bytecode that is understood by the r600g driver -+/// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA, -+/// but it still needs to be run through a finalizer in order to be executed -+/// by the GPU. -+/// -+/// [1] http://www.mesa3d.org/ -+// -+//===----------------------------------------------------------------------===// -+ -+#include "R600Defines.h" -+#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h" -+#include "llvm/MC/MCCodeEmitter.h" -+#include "llvm/MC/MCContext.h" -+#include "llvm/MC/MCInst.h" -+#include "llvm/MC/MCInstrInfo.h" -+#include "llvm/MC/MCRegisterInfo.h" -+#include "llvm/MC/MCSubtargetInfo.h" -+#include "llvm/Support/raw_ostream.h" -+ -+#include -+ -+#define SRC_BYTE_COUNT 11 -+#define DST_BYTE_COUNT 5 -+ -+using namespace llvm; -+ -+namespace { -+ -+class R600MCCodeEmitter : public AMDGPUMCCodeEmitter { -+ R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT -+ void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT -+ const MCInstrInfo &MCII; -+ const MCRegisterInfo &MRI; -+ const MCSubtargetInfo &STI; -+ MCContext &Ctx; -+ -+public: -+ -+ R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, -+ const MCSubtargetInfo &sti, MCContext &ctx) -+ : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { } -+ -+ /// \brief Encode the instruction and write it to the OS. -+ virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, -+ SmallVectorImpl &Fixups) const; -+ -+ /// \returns the encoding for an MCOperand. -+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, -+ SmallVectorImpl &Fixups) const; -+private: -+ -+ void EmitALUInstr(const MCInst &MI, SmallVectorImpl &Fixups, -+ raw_ostream &OS) const; -+ void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const; -+ void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx, -+ raw_ostream &OS) const; -+ void EmitDst(const MCInst &MI, raw_ostream &OS) const; -+ void EmitTexInstr(const MCInst &MI, SmallVectorImpl &Fixups, -+ raw_ostream &OS) const; -+ void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const; -+ -+ void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const; -+ -+ void EmitByte(unsigned int byte, raw_ostream &OS) const; -+ -+ void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const; -+ -+ void Emit(uint32_t value, raw_ostream &OS) const; -+ void Emit(uint64_t value, raw_ostream &OS) const; -+ -+ unsigned getHWRegChan(unsigned reg) const; -+ unsigned getHWReg(unsigned regNo) const; -+ -+ bool isFCOp(unsigned opcode) const; -+ bool isTexOp(unsigned opcode) const; -+ bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const; -+ -+}; -+ -+} // End anonymous namespace -+ -+enum RegElement { -+ ELEMENT_X = 0, -+ ELEMENT_Y, -+ ELEMENT_Z, -+ ELEMENT_W -+}; -+ -+enum InstrTypes { -+ INSTR_ALU = 0, -+ INSTR_TEX, -+ INSTR_FC, -+ INSTR_NATIVE, -+ INSTR_VTX, -+ INSTR_EXPORT -+}; -+ -+enum FCInstr { -+ FC_IF_PREDICATE = 0, -+ FC_ELSE, -+ FC_ENDIF, -+ FC_BGNLOOP, -+ FC_ENDLOOP, -+ FC_BREAK_PREDICATE, -+ FC_CONTINUE -+}; -+ -+enum TextureTypes { -+ TEXTURE_1D = 1, -+ TEXTURE_2D, -+ TEXTURE_3D, -+ TEXTURE_CUBE, -+ TEXTURE_RECT, -+ TEXTURE_SHADOW1D, -+ TEXTURE_SHADOW2D, -+ TEXTURE_SHADOWRECT, -+ TEXTURE_1D_ARRAY, -+ TEXTURE_2D_ARRAY, -+ TEXTURE_SHADOW1D_ARRAY, -+ TEXTURE_SHADOW2D_ARRAY -+}; -+ -+MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, -+ const MCRegisterInfo &MRI, -+ const MCSubtargetInfo &STI, -+ MCContext &Ctx) { -+ return new R600MCCodeEmitter(MCII, MRI, STI, Ctx); -+} -+ -+void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, -+ SmallVectorImpl &Fixups) const { -+ if (isTexOp(MI.getOpcode())) { -+ EmitTexInstr(MI, Fixups, OS); -+ } else if (isFCOp(MI.getOpcode())){ -+ EmitFCInstr(MI, OS); -+ } else if (MI.getOpcode() == AMDGPU::RETURN || -+ MI.getOpcode() == AMDGPU::BUNDLE || -+ MI.getOpcode() == AMDGPU::KILL) { -+ return; -+ } else { -+ switch(MI.getOpcode()) { -+ case AMDGPU::RAT_WRITE_CACHELESS_32_eg: -+ case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { -+ uint64_t inst = getBinaryCodeForInstr(MI, Fixups); -+ EmitByte(INSTR_NATIVE, OS); -+ Emit(inst, OS); -+ break; -+ } -+ case AMDGPU::CONSTANT_LOAD_eg: -+ case AMDGPU::VTX_READ_PARAM_8_eg: -+ case AMDGPU::VTX_READ_PARAM_16_eg: -+ case AMDGPU::VTX_READ_PARAM_32_eg: -+ case AMDGPU::VTX_READ_GLOBAL_8_eg: -+ case AMDGPU::VTX_READ_GLOBAL_32_eg: -+ case AMDGPU::VTX_READ_GLOBAL_128_eg: -+ case AMDGPU::TEX_VTX_CONSTBUF: { -+ uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); -+ uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset -+ -+ EmitByte(INSTR_VTX, OS); -+ Emit(InstWord01, OS); -+ Emit(InstWord2, OS); -+ break; -+ } -+ case AMDGPU::EG_ExportSwz: -+ case AMDGPU::R600_ExportSwz: -+ case AMDGPU::EG_ExportBuf: -+ case AMDGPU::R600_ExportBuf: { -+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); -+ EmitByte(INSTR_EXPORT, OS); -+ Emit(Inst, OS); -+ break; -+ } -+ -+ default: -+ EmitALUInstr(MI, Fixups, OS); -+ break; -+ } -+ } -+} -+ -+void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, -+ SmallVectorImpl &Fixups, -+ raw_ostream &OS) const { -+ const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); -+ -+ // Emit instruction type -+ EmitByte(INSTR_ALU, OS); -+ -+ uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); -+ -+ //older alu have different encoding for instructions with one or two src -+ //parameters. -+ if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) && -+ !(MCDesc.TSFlags & R600_InstFlag::OP3)) { -+ uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39); -+ InstWord01 &= ~(0x3FFULL << 39); -+ InstWord01 |= ISAOpCode << 1; -+ } -+ -+ unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 : -+ MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1; -+ -+ EmitByte(SrcNum, OS); -+ -+ const unsigned SrcOps[3][2] = { -+ {R600Operands::SRC0, R600Operands::SRC0_SEL}, -+ {R600Operands::SRC1, R600Operands::SRC1_SEL}, -+ {R600Operands::SRC2, R600Operands::SRC2_SEL} -+ }; -+ -+ for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) { -+ unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]]; -+ unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]]; -+ EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS); -+ } -+ -+ Emit(InstWord01, OS); -+ return; -+} -+ -+void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx, -+ raw_ostream &OS) const { -+ const MCOperand &MO = MI.getOperand(OpIdx); -+ union { -+ float f; -+ uint32_t i; -+ } Value; -+ Value.i = 0; -+ // Emit the source select (2 bytes). For GPRs, this is the register index. -+ // For other potential instruction operands, (e.g. constant registers) the -+ // value of the source select is defined in the r600isa docs. -+ if (MO.isReg()) { -+ unsigned reg = MO.getReg(); -+ EmitTwoBytes(getHWReg(reg), OS); -+ if (reg == AMDGPU::ALU_LITERAL_X) { -+ unsigned ImmOpIndex = MI.getNumOperands() - 1; -+ MCOperand ImmOp = MI.getOperand(ImmOpIndex); -+ if (ImmOp.isFPImm()) { -+ Value.f = ImmOp.getFPImm(); -+ } else { -+ assert(ImmOp.isImm()); -+ Value.i = ImmOp.getImm(); -+ } -+ } -+ } else { -+ // XXX: Handle other operand types. -+ EmitTwoBytes(0, OS); -+ } -+ -+ // Emit the source channel (1 byte) -+ if (MO.isReg()) { -+ EmitByte(getHWRegChan(MO.getReg()), OS); -+ } else { -+ EmitByte(0, OS); -+ } -+ -+ // XXX: Emit isNegated (1 byte) -+ if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS))) -+ && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) || -+ (MO.isReg() && -+ (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){ -+ EmitByte(1, OS); -+ } else { -+ EmitByte(0, OS); -+ } -+ -+ // Emit isAbsolute (1 byte) -+ if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) { -+ EmitByte(1, OS); -+ } else { -+ EmitByte(0, OS); -+ } -+ -+ // XXX: Emit relative addressing mode (1 byte) -+ EmitByte(0, OS); -+ -+ // Emit kc_bank, This will be adjusted later by r600_asm -+ EmitByte(0, OS); -+ -+ // Emit the literal value, if applicable (4 bytes). -+ Emit(Value.i, OS); -+ -+} -+ -+void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, -+ unsigned SelOpIdx, raw_ostream &OS) const { -+ const MCOperand &RegMO = MI.getOperand(RegOpIdx); -+ const MCOperand &SelMO = MI.getOperand(SelOpIdx); -+ -+ union { -+ float f; -+ uint32_t i; -+ } InlineConstant; -+ InlineConstant.i = 0; -+ // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0 -+ // and select is 0 (GPR index is encoded in the instr encoding. For constants -+ // type is 1 and select is the original const select passed from the driver. -+ unsigned Reg = RegMO.getReg(); -+ if (Reg == AMDGPU::ALU_CONST) { -+ EmitByte(1, OS); -+ uint32_t Sel = SelMO.getImm(); -+ Emit(Sel, OS); -+ } else { -+ EmitByte(0, OS); -+ Emit((uint32_t)0, OS); -+ } -+ -+ if (Reg == AMDGPU::ALU_LITERAL_X) { -+ unsigned ImmOpIndex = MI.getNumOperands() - 1; -+ MCOperand ImmOp = MI.getOperand(ImmOpIndex); -+ if (ImmOp.isFPImm()) { -+ InlineConstant.f = ImmOp.getFPImm(); -+ } else { -+ assert(ImmOp.isImm()); -+ InlineConstant.i = ImmOp.getImm(); -+ } -+ } -+ -+ // Emit the literal value, if applicable (4 bytes). -+ Emit(InlineConstant.i, OS); -+} -+ -+void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI, -+ SmallVectorImpl &Fixups, -+ raw_ostream &OS) const { -+ -+ unsigned Opcode = MI.getOpcode(); -+ bool hasOffsets = (Opcode == AMDGPU::TEX_LD); -+ unsigned OpOffset = hasOffsets ? 3 : 0; -+ int64_t Resource = MI.getOperand(OpOffset + 2).getImm(); -+ int64_t Sampler = MI.getOperand(OpOffset + 3).getImm(); -+ int64_t TextureType = MI.getOperand(OpOffset + 4).getImm(); -+ unsigned srcSelect[4] = {0, 1, 2, 3}; -+ -+ // Emit instruction type -+ EmitByte(1, OS); -+ -+ // Emit instruction -+ EmitByte(getBinaryCodeForInstr(MI, Fixups), OS); -+ -+ // Emit resource id -+ EmitByte(Resource, OS); -+ -+ // Emit source register -+ EmitByte(getHWReg(MI.getOperand(1).getReg()), OS); -+ -+ // XXX: Emit src isRelativeAddress -+ EmitByte(0, OS); -+ -+ // Emit destination register -+ EmitByte(getHWReg(MI.getOperand(0).getReg()), OS); -+ -+ // XXX: Emit dst isRealtiveAddress -+ EmitByte(0, OS); -+ -+ // XXX: Emit dst select -+ EmitByte(0, OS); // X -+ EmitByte(1, OS); // Y -+ EmitByte(2, OS); // Z -+ EmitByte(3, OS); // W -+ -+ // XXX: Emit lod bias -+ EmitByte(0, OS); -+ -+ // XXX: Emit coord types -+ unsigned coordType[4] = {1, 1, 1, 1}; -+ -+ if (TextureType == TEXTURE_RECT -+ || TextureType == TEXTURE_SHADOWRECT) { -+ coordType[ELEMENT_X] = 0; -+ coordType[ELEMENT_Y] = 0; -+ } -+ -+ if (TextureType == TEXTURE_1D_ARRAY -+ || TextureType == TEXTURE_SHADOW1D_ARRAY) { -+ if (Opcode == AMDGPU::TEX_SAMPLE_C_L || Opcode == AMDGPU::TEX_SAMPLE_C_LB) { -+ coordType[ELEMENT_Y] = 0; -+ } else { -+ coordType[ELEMENT_Z] = 0; -+ srcSelect[ELEMENT_Z] = ELEMENT_Y; -+ } -+ } else if (TextureType == TEXTURE_2D_ARRAY -+ || TextureType == TEXTURE_SHADOW2D_ARRAY) { -+ coordType[ELEMENT_Z] = 0; -+ } -+ -+ for (unsigned i = 0; i < 4; i++) { -+ EmitByte(coordType[i], OS); -+ } -+ -+ // XXX: Emit offsets -+ if (hasOffsets) -+ for (unsigned i = 2; i < 5; i++) -+ EmitByte(MI.getOperand(i).getImm()<<1, OS); -+ else -+ EmitNullBytes(3, OS); -+ -+ // Emit sampler id -+ EmitByte(Sampler, OS); -+ -+ // XXX:Emit source select -+ if ((TextureType == TEXTURE_SHADOW1D -+ || TextureType == TEXTURE_SHADOW2D -+ || TextureType == TEXTURE_SHADOWRECT -+ || TextureType == TEXTURE_SHADOW1D_ARRAY) -+ && Opcode != AMDGPU::TEX_SAMPLE_C_L -+ && Opcode != AMDGPU::TEX_SAMPLE_C_LB) { -+ srcSelect[ELEMENT_W] = ELEMENT_Z; -+ } -+ -+ for (unsigned i = 0; i < 4; i++) { -+ EmitByte(srcSelect[i], OS); -+ } -+} -+ -+void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const { -+ -+ // Emit instruction type -+ EmitByte(INSTR_FC, OS); -+ -+ // Emit SRC -+ unsigned NumOperands = MI.getNumOperands(); -+ if (NumOperands > 0) { -+ assert(NumOperands == 1); -+ EmitSrc(MI, 0, OS); -+ } else { -+ EmitNullBytes(SRC_BYTE_COUNT, OS); -+ } -+ -+ // Emit FC Instruction -+ enum FCInstr instr; -+ switch (MI.getOpcode()) { -+ case AMDGPU::PREDICATED_BREAK: -+ instr = FC_BREAK_PREDICATE; -+ break; -+ case AMDGPU::CONTINUE: -+ instr = FC_CONTINUE; -+ break; -+ case AMDGPU::IF_PREDICATE_SET: -+ instr = FC_IF_PREDICATE; -+ break; -+ case AMDGPU::ELSE: -+ instr = FC_ELSE; -+ break; -+ case AMDGPU::ENDIF: -+ instr = FC_ENDIF; -+ break; -+ case AMDGPU::ENDLOOP: -+ instr = FC_ENDLOOP; -+ break; -+ case AMDGPU::WHILELOOP: -+ instr = FC_BGNLOOP; -+ break; -+ default: -+ abort(); -+ break; -+ } -+ EmitByte(instr, OS); -+} -+ -+void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount, -+ raw_ostream &OS) const { -+ -+ for (unsigned int i = 0; i < ByteCount; i++) { -+ EmitByte(0, OS); -+ } -+} -+ -+void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const { -+ OS.write((uint8_t) Byte & 0xff); -+} -+ -+void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes, -+ raw_ostream &OS) const { -+ OS.write((uint8_t) (Bytes & 0xff)); -+ OS.write((uint8_t) ((Bytes >> 8) & 0xff)); -+} -+ -+void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const { -+ for (unsigned i = 0; i < 4; i++) { -+ OS.write((uint8_t) ((Value >> (8 * i)) & 0xff)); -+ } -+} -+ -+void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const { -+ for (unsigned i = 0; i < 8; i++) { -+ EmitByte((Value >> (8 * i)) & 0xff, OS); -+ } -+} -+ -+unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const { -+ return MRI.getEncodingValue(reg) >> HW_CHAN_SHIFT; -+} -+ -+unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const { -+ return MRI.getEncodingValue(RegNo) & HW_REG_MASK; -+} -+ -+uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI, -+ const MCOperand &MO, -+ SmallVectorImpl &Fixup) const { -+ if (MO.isReg()) { -+ if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) { -+ return MRI.getEncodingValue(MO.getReg()); -+ } else { -+ return getHWReg(MO.getReg()); -+ } -+ } else if (MO.isImm()) { -+ return MO.getImm(); -+ } else { -+ assert(0); -+ return 0; -+ } -+} -+ -+//===----------------------------------------------------------------------===// -+// Encoding helper functions -+//===----------------------------------------------------------------------===// -+ -+bool R600MCCodeEmitter::isFCOp(unsigned opcode) const { -+ switch(opcode) { -+ default: return false; -+ case AMDGPU::PREDICATED_BREAK: -+ case AMDGPU::CONTINUE: -+ case AMDGPU::IF_PREDICATE_SET: -+ case AMDGPU::ELSE: -+ case AMDGPU::ENDIF: -+ case AMDGPU::ENDLOOP: -+ case AMDGPU::WHILELOOP: -+ return true; -+ } -+} -+ -+bool R600MCCodeEmitter::isTexOp(unsigned opcode) const { -+ switch(opcode) { -+ default: return false; -+ case AMDGPU::TEX_LD: -+ case AMDGPU::TEX_GET_TEXTURE_RESINFO: -+ case AMDGPU::TEX_SAMPLE: -+ case AMDGPU::TEX_SAMPLE_C: -+ case AMDGPU::TEX_SAMPLE_L: -+ case AMDGPU::TEX_SAMPLE_C_L: -+ case AMDGPU::TEX_SAMPLE_LB: -+ case AMDGPU::TEX_SAMPLE_C_LB: -+ case AMDGPU::TEX_SAMPLE_G: -+ case AMDGPU::TEX_SAMPLE_C_G: -+ case AMDGPU::TEX_GET_GRADIENTS_H: -+ case AMDGPU::TEX_GET_GRADIENTS_V: -+ case AMDGPU::TEX_SET_GRADIENTS_H: -+ case AMDGPU::TEX_SET_GRADIENTS_V: -+ return true; -+ } -+} -+ -+bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand, -+ unsigned Flag) const { -+ const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); -+ unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags); -+ if (FlagIndex == 0) { -+ return false; -+ } -+ assert(MI.getOperand(FlagIndex).isImm()); -+ return !!((MI.getOperand(FlagIndex).getImm() >> -+ (NUM_MO_FLAGS * Operand)) & Flag); -+} -+ -+#include "AMDGPUGenMCCodeEmitter.inc" -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp llvm-r600/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp ---- llvm-3.2.src/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp 2013-01-25 19:43:57.460049721 +0100 -@@ -0,0 +1,298 @@ -+//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief The SI code emitter produces machine code that can be executed -+/// directly on the GPU device. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h" -+#include "llvm/MC/MCCodeEmitter.h" -+#include "llvm/MC/MCContext.h" -+#include "llvm/MC/MCInst.h" -+#include "llvm/MC/MCInstrInfo.h" -+#include "llvm/MC/MCRegisterInfo.h" -+#include "llvm/MC/MCSubtargetInfo.h" -+#include "llvm/MC/MCFixup.h" -+#include "llvm/Support/raw_ostream.h" -+ -+#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1)) -+#define SI_INSTR_FLAGS_ENCODING_MASK 0xf -+ -+// These must be kept in sync with SIInstructions.td and also the -+// InstrEncodingInfo array in SIInstrInfo.cpp. -+// -+// NOTE: This enum is only used to identify the encoding type within LLVM, -+// the actual encoding type that is part of the instruction format is different -+namespace SIInstrEncodingType { -+ enum Encoding { -+ EXP = 0, -+ LDS = 1, -+ MIMG = 2, -+ MTBUF = 3, -+ MUBUF = 4, -+ SMRD = 5, -+ SOP1 = 6, -+ SOP2 = 7, -+ SOPC = 8, -+ SOPK = 9, -+ SOPP = 10, -+ VINTRP = 11, -+ VOP1 = 12, -+ VOP2 = 13, -+ VOP3 = 14, -+ VOPC = 15 -+ }; -+} -+ -+using namespace llvm; -+ -+namespace { -+class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { -+ SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT -+ void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT -+ const MCInstrInfo &MCII; -+ const MCRegisterInfo &MRI; -+ const MCSubtargetInfo &STI; -+ MCContext &Ctx; -+ -+public: -+ SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, -+ const MCSubtargetInfo &sti, MCContext &ctx) -+ : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { } -+ -+ ~SIMCCodeEmitter() { } -+ -+ /// \breif Encode the instruction and write it to the OS. -+ virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, -+ SmallVectorImpl &Fixups) const; -+ -+ /// \returns the encoding for an MCOperand. -+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, -+ SmallVectorImpl &Fixups) const; -+ -+public: -+ -+ /// \brief Encode a sequence of registers with the correct alignment. -+ unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const; -+ -+ /// \brief Encoding for when 2 consecutive registers are used -+ virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo, -+ SmallVectorImpl &Fixup) const; -+ -+ /// \brief Encoding for when 4 consectuive registers are used -+ virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo, -+ SmallVectorImpl &Fixup) const; -+ -+ /// \brief Encoding for SMRD indexed loads -+ virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo, -+ SmallVectorImpl &Fixup) const; -+ -+ /// \brief Post-Encoder method for VOP instructions -+ virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const; -+ -+private: -+ -+ /// \returns this SIInstrEncodingType for this instruction. -+ unsigned getEncodingType(const MCInst &MI) const; -+ -+ /// \brief Get then size in bytes of this instructions encoding. -+ unsigned getEncodingBytes(const MCInst &MI) const; -+ -+ /// \returns the hardware encoding for a register -+ unsigned getRegBinaryCode(unsigned reg) const; -+ -+ /// \brief Generated function that returns the hardware encoding for -+ /// a register -+ unsigned getHWRegNum(unsigned reg) const; -+ -+}; -+ -+} // End anonymous namespace -+ -+MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII, -+ const MCRegisterInfo &MRI, -+ const MCSubtargetInfo &STI, -+ MCContext &Ctx) { -+ return new SIMCCodeEmitter(MCII, MRI, STI, Ctx); -+} -+ -+void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, -+ SmallVectorImpl &Fixups) const { -+ uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups); -+ unsigned bytes = getEncodingBytes(MI); -+ for (unsigned i = 0; i < bytes; i++) { -+ OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff)); -+ } -+} -+ -+uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, -+ const MCOperand &MO, -+ SmallVectorImpl &Fixups) const { -+ if (MO.isReg()) { -+ return getRegBinaryCode(MO.getReg()); -+ } else if (MO.isImm()) { -+ return MO.getImm(); -+ } else if (MO.isFPImm()) { -+ // XXX: Not all instructions can use inline literals -+ // XXX: We should make sure this is a 32-bit constant -+ union { -+ float F; -+ uint32_t I; -+ } Imm; -+ Imm.F = MO.getFPImm(); -+ return Imm.I; -+ } else if (MO.isExpr()) { -+ const MCExpr *Expr = MO.getExpr(); -+ MCFixupKind Kind = MCFixupKind(FK_PCRel_4); -+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); -+ return 0; -+ } else{ -+ llvm_unreachable("Encoding of this operand type is not supported yet."); -+ } -+ return 0; -+} -+ -+//===----------------------------------------------------------------------===// -+// Custom Operand Encodings -+//===----------------------------------------------------------------------===// -+ -+unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo, -+ unsigned shift) const { -+ unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg()); -+ return regCode >> shift; -+ return 0; -+} -+unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI, -+ unsigned OpNo , -+ SmallVectorImpl &Fixup) const { -+ return GPRAlign(MI, OpNo, 1); -+} -+ -+unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI, -+ unsigned OpNo, -+ SmallVectorImpl &Fixup) const { -+ return GPRAlign(MI, OpNo, 2); -+} -+ -+#define SMRD_OFFSET_MASK 0xff -+#define SMRD_IMM_SHIFT 8 -+#define SMRD_SBASE_MASK 0x3f -+#define SMRD_SBASE_SHIFT 9 -+/// This function is responsibe for encoding the offset -+/// and the base ptr for SMRD instructions it should return a bit string in -+/// this format: -+/// -+/// OFFSET = bits{7-0} -+/// IMM = bits{8} -+/// SBASE = bits{14-9} -+/// -+uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo, -+ SmallVectorImpl &Fixup) const { -+ uint32_t Encoding; -+ -+ const MCOperand &OffsetOp = MI.getOperand(OpNo + 1); -+ -+ //XXX: Use this function for SMRD loads with register offsets -+ assert(OffsetOp.isImm()); -+ -+ Encoding = -+ (getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK) -+ | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit -+ | ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT) -+ ; -+ -+ return Encoding; -+} -+ -+//===----------------------------------------------------------------------===// -+// Post Encoder Callbacks -+//===----------------------------------------------------------------------===// -+ -+uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{ -+ unsigned encodingType = getEncodingType(MI); -+ unsigned numSrcOps; -+ unsigned vgprBitOffset; -+ -+ if (encodingType == SIInstrEncodingType::VOP3) { -+ numSrcOps = 3; -+ vgprBitOffset = 32; -+ } else { -+ numSrcOps = 1; -+ vgprBitOffset = 0; -+ } -+ -+ // Add one to skip over the destination reg operand. -+ for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) { -+ const MCOperand &MO = MI.getOperand(opIdx); -+ if (MO.isReg()) { -+ unsigned reg = MI.getOperand(opIdx).getReg(); -+ if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) || -+ AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) { -+ Value |= (VGPR_BIT(opIdx)) << vgprBitOffset; -+ } -+ } else if (MO.isFPImm()) { -+ union { -+ float f; -+ uint32_t i; -+ } Imm; -+ // XXX: Not all instructions can use inline literals -+ // XXX: We should make sure this is a 32-bit constant -+ Imm.f = MO.getFPImm(); -+ Value |= ((uint64_t)Imm.i) << 32; -+ } -+ } -+ return Value; -+} -+ -+//===----------------------------------------------------------------------===// -+// Encoding helper functions -+//===----------------------------------------------------------------------===// -+ -+unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const { -+ return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK; -+} -+ -+unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const { -+ -+ // These instructions aren't real instructions with an encoding type, so -+ // we need to manually specify their size. -+ switch (MI.getOpcode()) { -+ default: break; -+ case AMDGPU::SI_LOAD_LITERAL_I32: -+ case AMDGPU::SI_LOAD_LITERAL_F32: -+ return 4; -+ } -+ -+ unsigned encoding_type = getEncodingType(MI); -+ switch (encoding_type) { -+ case SIInstrEncodingType::EXP: -+ case SIInstrEncodingType::LDS: -+ case SIInstrEncodingType::MUBUF: -+ case SIInstrEncodingType::MTBUF: -+ case SIInstrEncodingType::MIMG: -+ case SIInstrEncodingType::VOP3: -+ return 8; -+ default: -+ return 4; -+ } -+} -+ -+ -+unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const { -+ switch (reg) { -+ case AMDGPU::M0: return 124; -+ case AMDGPU::SREG_LIT_0: return 128; -+ case AMDGPU::SI_LITERAL_CONSTANT: return 255; -+ default: return MRI.getEncodingValue(reg); -+ } -+} -+ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/Processors.td llvm-r600/lib/Target/R600/Processors.td ---- llvm-3.2.src/lib/Target/R600/Processors.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/Processors.td 2013-01-25 19:43:57.460049721 +0100 -@@ -0,0 +1,29 @@ -+//===-- Processors.td - TODO: Add brief description -------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// AMDIL processors supported. -+// -+//===----------------------------------------------------------------------===// -+ -+class Proc Features> -+: Processor; -+def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>; -+def : Proc<"rv710", R600_EG_Itin, []>; -+def : Proc<"rv730", R600_EG_Itin, []>; -+def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>; -+def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -+def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -+def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -+def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>; -+def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -+def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -+def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -+def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>; -+def : Proc<"SI", SI_Itin, [Feature64BitPtr]>; -+ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600Defines.h llvm-r600/lib/Target/R600/R600Defines.h ---- llvm-3.2.src/lib/Target/R600/R600Defines.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/R600Defines.h 2013-01-25 19:43:57.460049721 +0100 -@@ -0,0 +1,94 @@ -+//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+/// \file -+//===----------------------------------------------------------------------===// -+ -+#ifndef R600DEFINES_H_ -+#define R600DEFINES_H_ -+ -+#include "llvm/MC/MCRegisterInfo.h" -+ -+// Operand Flags -+#define MO_FLAG_CLAMP (1 << 0) -+#define MO_FLAG_NEG (1 << 1) -+#define MO_FLAG_ABS (1 << 2) -+#define MO_FLAG_MASK (1 << 3) -+#define MO_FLAG_PUSH (1 << 4) -+#define MO_FLAG_NOT_LAST (1 << 5) -+#define MO_FLAG_LAST (1 << 6) -+#define NUM_MO_FLAGS 7 -+ -+/// \brief Helper for getting the operand index for the instruction flags -+/// operand. -+#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3) -+ -+namespace R600_InstFlag { -+ enum TIF { -+ TRANS_ONLY = (1 << 0), -+ TEX = (1 << 1), -+ REDUCTION = (1 << 2), -+ FC = (1 << 3), -+ TRIG = (1 << 4), -+ OP3 = (1 << 5), -+ VECTOR = (1 << 6), -+ //FlagOperand bits 7, 8 -+ NATIVE_OPERANDS = (1 << 9), -+ OP1 = (1 << 10), -+ OP2 = (1 << 11) -+ }; -+} -+ -+#define HAS_NATIVE_OPERANDS(Flags) ((Flags) & R600_InstFlag::NATIVE_OPERANDS) -+ -+/// \brief Defines for extracting register infomation from register encoding -+#define HW_REG_MASK 0x1ff -+#define HW_CHAN_SHIFT 9 -+ -+namespace R600Operands { -+ enum Ops { -+ DST, -+ UPDATE_EXEC_MASK, -+ UPDATE_PREDICATE, -+ WRITE, -+ OMOD, -+ DST_REL, -+ CLAMP, -+ SRC0, -+ SRC0_NEG, -+ SRC0_REL, -+ SRC0_ABS, -+ SRC0_SEL, -+ SRC1, -+ SRC1_NEG, -+ SRC1_REL, -+ SRC1_ABS, -+ SRC1_SEL, -+ SRC2, -+ SRC2_NEG, -+ SRC2_REL, -+ SRC2_SEL, -+ LAST, -+ PRED_SEL, -+ IMM, -+ COUNT -+ }; -+ -+ const static int ALUOpTable[3][R600Operands::COUNT] = { -+// W C S S S S S S S S S S S -+// R O D L S R R R R S R R R R S R R R L P -+// D U I M R A R C C C C R C C C C R C C C A R I -+// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M -+// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M -+ {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12}, -+ {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19}, -+ {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17} -+ }; -+ -+} -+ -+#endif // R600DEFINES_H_ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600ExpandSpecialInstrs.cpp llvm-r600/lib/Target/R600/R600ExpandSpecialInstrs.cpp ---- llvm-3.2.src/lib/Target/R600/R600ExpandSpecialInstrs.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/R600ExpandSpecialInstrs.cpp 2013-01-25 19:43:57.463383054 +0100 -@@ -0,0 +1,333 @@ -+//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// Vector, Reduction, and Cube instructions need to fill the entire instruction -+/// group to work correctly. This pass expands these individual instructions -+/// into several instructions that will completely fill the instruction group. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPU.h" -+#include "R600Defines.h" -+#include "R600InstrInfo.h" -+#include "R600RegisterInfo.h" -+#include "R600MachineFunctionInfo.h" -+#include "llvm/CodeGen/MachineFunctionPass.h" -+#include "llvm/CodeGen/MachineInstrBuilder.h" -+#include "llvm/CodeGen/MachineRegisterInfo.h" -+ -+using namespace llvm; -+ -+namespace { -+ -+class R600ExpandSpecialInstrsPass : public MachineFunctionPass { -+ -+private: -+ static char ID; -+ const R600InstrInfo *TII; -+ -+ bool ExpandInputPerspective(MachineInstr& MI); -+ bool ExpandInputConstant(MachineInstr& MI); -+ -+public: -+ R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), -+ TII (static_cast(tm.getInstrInfo())) { } -+ -+ virtual bool runOnMachineFunction(MachineFunction &MF); -+ -+ const char *getPassName() const { -+ return "R600 Expand special instructions pass"; -+ } -+}; -+ -+} // End anonymous namespace -+ -+char R600ExpandSpecialInstrsPass::ID = 0; -+ -+FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { -+ return new R600ExpandSpecialInstrsPass(TM); -+} -+ -+bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) { -+ const R600RegisterInfo &TRI = TII->getRegisterInfo(); -+ if (MI.getOpcode() != AMDGPU::input_perspective) -+ return false; -+ -+ MachineBasicBlock::iterator I = &MI; -+ unsigned DstReg = MI.getOperand(0).getReg(); -+ R600MachineFunctionInfo *MFI = MI.getParent()->getParent() -+ ->getInfo(); -+ unsigned IJIndexBase; -+ -+ // In Evergreen ISA doc section 8.3.2 : -+ // We need to interpolate XY and ZW in two different instruction groups. -+ // An INTERP_* must occupy all 4 slots of an instruction group. -+ // Output of INTERP_XY is written in X,Y slots -+ // Output of INTERP_ZW is written in Z,W slots -+ // -+ // Thus interpolation requires the following sequences : -+ // -+ // AnyGPR.x = INTERP_ZW; (Write Masked Out) -+ // AnyGPR.y = INTERP_ZW; (Write Masked Out) -+ // DstGPR.z = INTERP_ZW; -+ // DstGPR.w = INTERP_ZW; (End of first IG) -+ // DstGPR.x = INTERP_XY; -+ // DstGPR.y = INTERP_XY; -+ // AnyGPR.z = INTERP_XY; (Write Masked Out) -+ // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG) -+ // -+ switch (MI.getOperand(1).getImm()) { -+ case 0: -+ IJIndexBase = MFI->GetIJPerspectiveIndex(); -+ break; -+ case 1: -+ IJIndexBase = MFI->GetIJLinearIndex(); -+ break; -+ default: -+ assert(0 && "Unknow ij index"); -+ } -+ -+ for (unsigned i = 0; i < 8; i++) { -+ unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister( -+ 2 * IJIndexBase + ((i + 1) % 2)); -+ unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( -+ MI.getOperand(2).getImm()); -+ -+ -+ unsigned Sel = AMDGPU::sel_x; -+ switch (i % 4) { -+ case 0:Sel = AMDGPU::sel_x;break; -+ case 1:Sel = AMDGPU::sel_y;break; -+ case 2:Sel = AMDGPU::sel_z;break; -+ case 3:Sel = AMDGPU::sel_w;break; -+ default:break; -+ } -+ -+ unsigned Res = TRI.getSubReg(DstReg, Sel); -+ -+ unsigned Opcode = (i < 4)?AMDGPU::INTERP_ZW:AMDGPU::INTERP_XY; -+ -+ MachineBasicBlock &MBB = *(MI.getParent()); -+ MachineInstr *NewMI = -+ TII->buildDefaultInstruction(MBB, I, Opcode, Res, IJIndex, ReadReg); -+ -+ if (!(i> 1 && i < 6)) { -+ TII->addFlag(NewMI, 0, MO_FLAG_MASK); -+ } -+ -+ if (i % 4 != 3) -+ TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); -+ } -+ -+ MI.eraseFromParent(); -+ -+ return true; -+} -+ -+bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) { -+ const R600RegisterInfo &TRI = TII->getRegisterInfo(); -+ if (MI.getOpcode() != AMDGPU::input_constant) -+ return false; -+ -+ MachineBasicBlock::iterator I = &MI; -+ unsigned DstReg = MI.getOperand(0).getReg(); -+ -+ for (unsigned i = 0; i < 4; i++) { -+ unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( -+ MI.getOperand(1).getImm()); -+ -+ unsigned Sel = AMDGPU::sel_x; -+ switch (i % 4) { -+ case 0:Sel = AMDGPU::sel_x;break; -+ case 1:Sel = AMDGPU::sel_y;break; -+ case 2:Sel = AMDGPU::sel_z;break; -+ case 3:Sel = AMDGPU::sel_w;break; -+ default:break; -+ } -+ -+ unsigned Res = TRI.getSubReg(DstReg, Sel); -+ -+ MachineBasicBlock &MBB = *(MI.getParent()); -+ MachineInstr *NewMI = TII->buildDefaultInstruction( -+ MBB, I, AMDGPU::INTERP_LOAD_P0, Res, ReadReg); -+ -+ if (i % 4 != 3) -+ TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); -+ } -+ -+ MI.eraseFromParent(); -+ -+ return true; -+} -+ -+bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { -+ -+ const R600RegisterInfo &TRI = TII->getRegisterInfo(); -+ -+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); -+ BB != BB_E; ++BB) { -+ MachineBasicBlock &MBB = *BB; -+ MachineBasicBlock::iterator I = MBB.begin(); -+ while (I != MBB.end()) { -+ MachineInstr &MI = *I; -+ I = llvm::next(I); -+ -+ switch (MI.getOpcode()) { -+ default: break; -+ // Expand PRED_X to one of the PRED_SET instructions. -+ case AMDGPU::PRED_X: { -+ uint64_t Flags = MI.getOperand(3).getImm(); -+ // The native opcode used by PRED_X is stored as an immediate in the -+ // third operand. -+ MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I, -+ MI.getOperand(2).getImm(), // opcode -+ MI.getOperand(0).getReg(), // dst -+ MI.getOperand(1).getReg(), // src0 -+ AMDGPU::ZERO); // src1 -+ TII->addFlag(PredSet, 0, MO_FLAG_MASK); -+ if (Flags & MO_FLAG_PUSH) { -+ TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1); -+ } else { -+ TII->setImmOperand(PredSet, R600Operands::UPDATE_PREDICATE, 1); -+ } -+ MI.eraseFromParent(); -+ continue; -+ } -+ case AMDGPU::BREAK: -+ MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I, -+ AMDGPU::PRED_SETE_INT, -+ AMDGPU::PREDICATE_BIT, -+ AMDGPU::ZERO, -+ AMDGPU::ZERO); -+ TII->addFlag(PredSet, 0, MO_FLAG_MASK); -+ TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1); -+ -+ BuildMI(MBB, I, MBB.findDebugLoc(I), -+ TII->get(AMDGPU::PREDICATED_BREAK)) -+ .addReg(AMDGPU::PREDICATE_BIT); -+ MI.eraseFromParent(); -+ continue; -+ } -+ -+ if (ExpandInputPerspective(MI)) -+ continue; -+ if (ExpandInputConstant(MI)) -+ continue; -+ -+ bool IsReduction = TII->isReductionOp(MI.getOpcode()); -+ bool IsVector = TII->isVector(MI); -+ bool IsCube = TII->isCubeOp(MI.getOpcode()); -+ if (!IsReduction && !IsVector && !IsCube) { -+ continue; -+ } -+ -+ // Expand the instruction -+ // -+ // Reduction instructions: -+ // T0_X = DP4 T1_XYZW, T2_XYZW -+ // becomes: -+ // TO_X = DP4 T1_X, T2_X -+ // TO_Y (write masked) = DP4 T1_Y, T2_Y -+ // TO_Z (write masked) = DP4 T1_Z, T2_Z -+ // TO_W (write masked) = DP4 T1_W, T2_W -+ // -+ // Vector instructions: -+ // T0_X = MULLO_INT T1_X, T2_X -+ // becomes: -+ // T0_X = MULLO_INT T1_X, T2_X -+ // T0_Y (write masked) = MULLO_INT T1_X, T2_X -+ // T0_Z (write masked) = MULLO_INT T1_X, T2_X -+ // T0_W (write masked) = MULLO_INT T1_X, T2_X -+ // -+ // Cube instructions: -+ // T0_XYZW = CUBE T1_XYZW -+ // becomes: -+ // TO_X = CUBE T1_Z, T1_Y -+ // T0_Y = CUBE T1_Z, T1_X -+ // T0_Z = CUBE T1_X, T1_Z -+ // T0_W = CUBE T1_Y, T1_Z -+ for (unsigned Chan = 0; Chan < 4; Chan++) { -+ unsigned DstReg = MI.getOperand( -+ TII->getOperandIdx(MI, R600Operands::DST)).getReg(); -+ unsigned Src0 = MI.getOperand( -+ TII->getOperandIdx(MI, R600Operands::SRC0)).getReg(); -+ unsigned Src1 = 0; -+ -+ // Determine the correct source registers -+ if (!IsCube) { -+ int Src1Idx = TII->getOperandIdx(MI, R600Operands::SRC1); -+ if (Src1Idx != -1) { -+ Src1 = MI.getOperand(Src1Idx).getReg(); -+ } -+ } -+ if (IsReduction) { -+ unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); -+ Src0 = TRI.getSubReg(Src0, SubRegIndex); -+ Src1 = TRI.getSubReg(Src1, SubRegIndex); -+ } else if (IsCube) { -+ static const int CubeSrcSwz[] = {2, 2, 0, 1}; -+ unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]); -+ unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]); -+ Src1 = TRI.getSubReg(Src0, SubRegIndex1); -+ Src0 = TRI.getSubReg(Src0, SubRegIndex0); -+ } -+ -+ // Determine the correct destination registers; -+ bool Mask = false; -+ bool NotLast = true; -+ if (IsCube) { -+ unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); -+ DstReg = TRI.getSubReg(DstReg, SubRegIndex); -+ } else { -+ // Mask the write if the original instruction does not write to -+ // the current Channel. -+ Mask = (Chan != TRI.getHWRegChan(DstReg)); -+ unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; -+ DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); -+ } -+ -+ // Set the IsLast bit -+ NotLast = (Chan != 3 ); -+ -+ // Add the new instruction -+ unsigned Opcode = MI.getOpcode(); -+ switch (Opcode) { -+ case AMDGPU::CUBE_r600_pseudo: -+ Opcode = AMDGPU::CUBE_r600_real; -+ break; -+ case AMDGPU::CUBE_eg_pseudo: -+ Opcode = AMDGPU::CUBE_eg_real; -+ break; -+ case AMDGPU::DOT4_r600_pseudo: -+ Opcode = AMDGPU::DOT4_r600_real; -+ break; -+ case AMDGPU::DOT4_eg_pseudo: -+ Opcode = AMDGPU::DOT4_eg_real; -+ break; -+ default: -+ break; -+ } -+ -+ MachineInstr *NewMI = -+ TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1); -+ -+ NewMI->setIsInsideBundle(Chan != 0); -+ if (Mask) { -+ TII->addFlag(NewMI, 0, MO_FLAG_MASK); -+ } -+ if (NotLast) { -+ TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); -+ } -+ } -+ MI.eraseFromParent(); -+ } -+ } -+ return false; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600InstrInfo.cpp llvm-r600/lib/Target/R600/R600InstrInfo.cpp ---- llvm-3.2.src/lib/Target/R600/R600InstrInfo.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/R600InstrInfo.cpp 2013-01-25 19:43:57.466716387 +0100 -@@ -0,0 +1,655 @@ -+//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief R600 Implementation of TargetInstrInfo. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "R600InstrInfo.h" -+#include "AMDGPUTargetMachine.h" -+#include "AMDGPUSubtarget.h" -+#include "R600Defines.h" -+#include "R600RegisterInfo.h" -+#include "llvm/CodeGen/MachineInstrBuilder.h" -+ -+#define GET_INSTRINFO_CTOR -+#include "AMDGPUGenDFAPacketizer.inc" -+ -+using namespace llvm; -+ -+R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) -+ : AMDGPUInstrInfo(tm), -+ RI(tm, *this) -+ { } -+ -+const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { -+ return RI; -+} -+ -+bool R600InstrInfo::isTrig(const MachineInstr &MI) const { -+ return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; -+} -+ -+bool R600InstrInfo::isVector(const MachineInstr &MI) const { -+ return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; -+} -+ -+void -+R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MI, DebugLoc DL, -+ unsigned DestReg, unsigned SrcReg, -+ bool KillSrc) const { -+ if (AMDGPU::R600_Reg128RegClass.contains(DestReg) -+ && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { -+ for (unsigned I = 0; I < 4; I++) { -+ unsigned SubRegIndex = RI.getSubRegFromChannel(I); -+ buildDefaultInstruction(MBB, MI, AMDGPU::MOV, -+ RI.getSubReg(DestReg, SubRegIndex), -+ RI.getSubReg(SrcReg, SubRegIndex)) -+ .addReg(DestReg, -+ RegState::Define | RegState::Implicit); -+ } -+ } else { -+ -+ // We can't copy vec4 registers -+ assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg) -+ && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); -+ -+ MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, -+ DestReg, SrcReg); -+ NewMI->getOperand(getOperandIdx(*NewMI, R600Operands::SRC0)) -+ .setIsKill(KillSrc); -+ } -+} -+ -+MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, -+ unsigned DstReg, int64_t Imm) const { -+ MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc()); -+ MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); -+ MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X); -+ MachineInstrBuilder(MI).addImm(Imm); -+ MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT -+ -+ return MI; -+} -+ -+unsigned R600InstrInfo::getIEQOpcode() const { -+ return AMDGPU::SETE_INT; -+} -+ -+bool R600InstrInfo::isMov(unsigned Opcode) const { -+ -+ -+ switch(Opcode) { -+ default: return false; -+ case AMDGPU::MOV: -+ case AMDGPU::MOV_IMM_F32: -+ case AMDGPU::MOV_IMM_I32: -+ return true; -+ } -+} -+ -+// Some instructions act as place holders to emulate operations that the GPU -+// hardware does automatically. This function can be used to check if -+// an opcode falls into this category. -+bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { -+ switch (Opcode) { -+ default: return false; -+ case AMDGPU::RETURN: -+ case AMDGPU::RESERVE_REG: -+ return true; -+ } -+} -+ -+bool R600InstrInfo::isReductionOp(unsigned Opcode) const { -+ switch(Opcode) { -+ default: return false; -+ case AMDGPU::DOT4_r600_pseudo: -+ case AMDGPU::DOT4_eg_pseudo: -+ return true; -+ } -+} -+ -+bool R600InstrInfo::isCubeOp(unsigned Opcode) const { -+ switch(Opcode) { -+ default: return false; -+ case AMDGPU::CUBE_r600_pseudo: -+ case AMDGPU::CUBE_r600_real: -+ case AMDGPU::CUBE_eg_pseudo: -+ case AMDGPU::CUBE_eg_real: -+ return true; -+ } -+} -+ -+bool R600InstrInfo::isALUInstr(unsigned Opcode) const { -+ unsigned TargetFlags = get(Opcode).TSFlags; -+ -+ return ((TargetFlags & R600_InstFlag::OP1) | -+ (TargetFlags & R600_InstFlag::OP2) | -+ (TargetFlags & R600_InstFlag::OP3)); -+} -+ -+DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, -+ const ScheduleDAG *DAG) const { -+ const InstrItineraryData *II = TM->getInstrItineraryData(); -+ return TM->getSubtarget().createDFAPacketizer(II); -+} -+ -+static bool -+isPredicateSetter(unsigned Opcode) { -+ switch (Opcode) { -+ case AMDGPU::PRED_X: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+static MachineInstr * -+findFirstPredicateSetterFrom(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator I) { -+ while (I != MBB.begin()) { -+ --I; -+ MachineInstr *MI = I; -+ if (isPredicateSetter(MI->getOpcode())) -+ return MI; -+ } -+ -+ return NULL; -+} -+ -+bool -+R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, -+ MachineBasicBlock *&TBB, -+ MachineBasicBlock *&FBB, -+ SmallVectorImpl &Cond, -+ bool AllowModify) const { -+ // Most of the following comes from the ARM implementation of AnalyzeBranch -+ -+ // If the block has no terminators, it just falls into the block after it. -+ MachineBasicBlock::iterator I = MBB.end(); -+ if (I == MBB.begin()) -+ return false; -+ --I; -+ while (I->isDebugValue()) { -+ if (I == MBB.begin()) -+ return false; -+ --I; -+ } -+ if (static_cast(I)->getOpcode() != AMDGPU::JUMP) { -+ return false; -+ } -+ -+ // Get the last instruction in the block. -+ MachineInstr *LastInst = I; -+ -+ // If there is only one terminator instruction, process it. -+ unsigned LastOpc = LastInst->getOpcode(); -+ if (I == MBB.begin() || -+ static_cast(--I)->getOpcode() != AMDGPU::JUMP) { -+ if (LastOpc == AMDGPU::JUMP) { -+ if(!isPredicated(LastInst)) { -+ TBB = LastInst->getOperand(0).getMBB(); -+ return false; -+ } else { -+ MachineInstr *predSet = I; -+ while (!isPredicateSetter(predSet->getOpcode())) { -+ predSet = --I; -+ } -+ TBB = LastInst->getOperand(0).getMBB(); -+ Cond.push_back(predSet->getOperand(1)); -+ Cond.push_back(predSet->getOperand(2)); -+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); -+ return false; -+ } -+ } -+ return true; // Can't handle indirect branch. -+ } -+ -+ // Get the instruction before it if it is a terminator. -+ MachineInstr *SecondLastInst = I; -+ unsigned SecondLastOpc = SecondLastInst->getOpcode(); -+ -+ // If the block ends with a B and a Bcc, handle it. -+ if (SecondLastOpc == AMDGPU::JUMP && -+ isPredicated(SecondLastInst) && -+ LastOpc == AMDGPU::JUMP && -+ !isPredicated(LastInst)) { -+ MachineInstr *predSet = --I; -+ while (!isPredicateSetter(predSet->getOpcode())) { -+ predSet = --I; -+ } -+ TBB = SecondLastInst->getOperand(0).getMBB(); -+ FBB = LastInst->getOperand(0).getMBB(); -+ Cond.push_back(predSet->getOperand(1)); -+ Cond.push_back(predSet->getOperand(2)); -+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); -+ return false; -+ } -+ -+ // Otherwise, can't handle this. -+ return true; -+} -+ -+int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { -+ const MachineInstr *MI = op.getParent(); -+ -+ switch (MI->getDesc().OpInfo->RegClass) { -+ default: // FIXME: fallthrough?? -+ case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; -+ case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; -+ }; -+} -+ -+unsigned -+R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, -+ MachineBasicBlock *TBB, -+ MachineBasicBlock *FBB, -+ const SmallVectorImpl &Cond, -+ DebugLoc DL) const { -+ assert(TBB && "InsertBranch must not be told to insert a fallthrough"); -+ -+ if (FBB == 0) { -+ if (Cond.empty()) { -+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0); -+ return 1; -+ } else { -+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); -+ assert(PredSet && "No previous predicate !"); -+ addFlag(PredSet, 0, MO_FLAG_PUSH); -+ PredSet->getOperand(2).setImm(Cond[1].getImm()); -+ -+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)) -+ .addMBB(TBB) -+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); -+ return 1; -+ } -+ } else { -+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); -+ assert(PredSet && "No previous predicate !"); -+ addFlag(PredSet, 0, MO_FLAG_PUSH); -+ PredSet->getOperand(2).setImm(Cond[1].getImm()); -+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)) -+ .addMBB(TBB) -+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); -+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0); -+ return 2; -+ } -+} -+ -+unsigned -+R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { -+ -+ // Note : we leave PRED* instructions there. -+ // They may be needed when predicating instructions. -+ -+ MachineBasicBlock::iterator I = MBB.end(); -+ -+ if (I == MBB.begin()) { -+ return 0; -+ } -+ --I; -+ switch (I->getOpcode()) { -+ default: -+ return 0; -+ case AMDGPU::JUMP: -+ if (isPredicated(I)) { -+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); -+ clearFlag(predSet, 0, MO_FLAG_PUSH); -+ } -+ I->eraseFromParent(); -+ break; -+ } -+ I = MBB.end(); -+ -+ if (I == MBB.begin()) { -+ return 1; -+ } -+ --I; -+ switch (I->getOpcode()) { -+ // FIXME: only one case?? -+ default: -+ return 1; -+ case AMDGPU::JUMP: -+ if (isPredicated(I)) { -+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); -+ clearFlag(predSet, 0, MO_FLAG_PUSH); -+ } -+ I->eraseFromParent(); -+ break; -+ } -+ return 2; -+} -+ -+bool -+R600InstrInfo::isPredicated(const MachineInstr *MI) const { -+ int idx = MI->findFirstPredOperandIdx(); -+ if (idx < 0) -+ return false; -+ -+ unsigned Reg = MI->getOperand(idx).getReg(); -+ switch (Reg) { -+ default: return false; -+ case AMDGPU::PRED_SEL_ONE: -+ case AMDGPU::PRED_SEL_ZERO: -+ case AMDGPU::PREDICATE_BIT: -+ return true; -+ } -+} -+ -+bool -+R600InstrInfo::isPredicable(MachineInstr *MI) const { -+ // XXX: KILL* instructions can be predicated, but they must be the last -+ // instruction in a clause, so this means any instructions after them cannot -+ // be predicated. Until we have proper support for instruction clauses in the -+ // backend, we will mark KILL* instructions as unpredicable. -+ -+ if (MI->getOpcode() == AMDGPU::KILLGT) { -+ return false; -+ } else { -+ return AMDGPUInstrInfo::isPredicable(MI); -+ } -+} -+ -+ -+bool -+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, -+ unsigned NumCyles, -+ unsigned ExtraPredCycles, -+ const BranchProbability &Probability) const{ -+ return true; -+} -+ -+bool -+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, -+ unsigned NumTCycles, -+ unsigned ExtraTCycles, -+ MachineBasicBlock &FMBB, -+ unsigned NumFCycles, -+ unsigned ExtraFCycles, -+ const BranchProbability &Probability) const { -+ return true; -+} -+ -+bool -+R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, -+ unsigned NumCyles, -+ const BranchProbability &Probability) -+ const { -+ return true; -+} -+ -+bool -+R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, -+ MachineBasicBlock &FMBB) const { -+ return false; -+} -+ -+ -+bool -+R600InstrInfo::ReverseBranchCondition(SmallVectorImpl &Cond) const { -+ MachineOperand &MO = Cond[1]; -+ switch (MO.getImm()) { -+ case OPCODE_IS_ZERO_INT: -+ MO.setImm(OPCODE_IS_NOT_ZERO_INT); -+ break; -+ case OPCODE_IS_NOT_ZERO_INT: -+ MO.setImm(OPCODE_IS_ZERO_INT); -+ break; -+ case OPCODE_IS_ZERO: -+ MO.setImm(OPCODE_IS_NOT_ZERO); -+ break; -+ case OPCODE_IS_NOT_ZERO: -+ MO.setImm(OPCODE_IS_ZERO); -+ break; -+ default: -+ return true; -+ } -+ -+ MachineOperand &MO2 = Cond[2]; -+ switch (MO2.getReg()) { -+ case AMDGPU::PRED_SEL_ZERO: -+ MO2.setReg(AMDGPU::PRED_SEL_ONE); -+ break; -+ case AMDGPU::PRED_SEL_ONE: -+ MO2.setReg(AMDGPU::PRED_SEL_ZERO); -+ break; -+ default: -+ return true; -+ } -+ return false; -+} -+ -+bool -+R600InstrInfo::DefinesPredicate(MachineInstr *MI, -+ std::vector &Pred) const { -+ return isPredicateSetter(MI->getOpcode()); -+} -+ -+ -+bool -+R600InstrInfo::SubsumesPredicate(const SmallVectorImpl &Pred1, -+ const SmallVectorImpl &Pred2) const { -+ return false; -+} -+ -+ -+bool -+R600InstrInfo::PredicateInstruction(MachineInstr *MI, -+ const SmallVectorImpl &Pred) const { -+ int PIdx = MI->findFirstPredOperandIdx(); -+ -+ if (PIdx != -1) { -+ MachineOperand &PMO = MI->getOperand(PIdx); -+ PMO.setReg(Pred[2].getReg()); -+ MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); -+ return true; -+ } -+ -+ return false; -+} -+ -+unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, -+ const MachineInstr *MI, -+ unsigned *PredCost) const { -+ if (PredCost) -+ *PredCost = 2; -+ return 2; -+} -+ -+MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator I, -+ unsigned Opcode, -+ unsigned DstReg, -+ unsigned Src0Reg, -+ unsigned Src1Reg) const { -+ MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode), -+ DstReg); // $dst -+ -+ if (Src1Reg) { -+ MIB.addImm(0) // $update_exec_mask -+ .addImm(0); // $update_predicate -+ } -+ MIB.addImm(1) // $write -+ .addImm(0) // $omod -+ .addImm(0) // $dst_rel -+ .addImm(0) // $dst_clamp -+ .addReg(Src0Reg) // $src0 -+ .addImm(0) // $src0_neg -+ .addImm(0) // $src0_rel -+ .addImm(0) // $src0_abs -+ .addImm(-1); // $src0_sel -+ -+ if (Src1Reg) { -+ MIB.addReg(Src1Reg) // $src1 -+ .addImm(0) // $src1_neg -+ .addImm(0) // $src1_rel -+ .addImm(0) // $src1_abs -+ .addImm(-1); // $src1_sel -+ } -+ -+ //XXX: The r600g finalizer expects this to be 1, once we've moved the -+ //scheduling to the backend, we can change the default to 0. -+ MIB.addImm(1) // $last -+ .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel -+ .addImm(0); // $literal -+ -+ return MIB; -+} -+ -+MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, -+ MachineBasicBlock::iterator I, -+ unsigned DstReg, -+ uint64_t Imm) const { -+ MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, -+ AMDGPU::ALU_LITERAL_X); -+ setImmOperand(MovImm, R600Operands::IMM, Imm); -+ return MovImm; -+} -+ -+int R600InstrInfo::getOperandIdx(const MachineInstr &MI, -+ R600Operands::Ops Op) const { -+ return getOperandIdx(MI.getOpcode(), Op); -+} -+ -+int R600InstrInfo::getOperandIdx(unsigned Opcode, -+ R600Operands::Ops Op) const { -+ unsigned TargetFlags = get(Opcode).TSFlags; -+ unsigned OpTableIdx; -+ -+ if (!HAS_NATIVE_OPERANDS(TargetFlags)) { -+ switch (Op) { -+ case R600Operands::DST: return 0; -+ case R600Operands::SRC0: return 1; -+ case R600Operands::SRC1: return 2; -+ case R600Operands::SRC2: return 3; -+ default: -+ assert(!"Unknown operand type for instruction"); -+ return -1; -+ } -+ } -+ -+ if (TargetFlags & R600_InstFlag::OP1) { -+ OpTableIdx = 0; -+ } else if (TargetFlags & R600_InstFlag::OP2) { -+ OpTableIdx = 1; -+ } else { -+ assert((TargetFlags & R600_InstFlag::OP3) && "OP1, OP2, or OP3 not defined " -+ "for this instruction"); -+ OpTableIdx = 2; -+ } -+ -+ return R600Operands::ALUOpTable[OpTableIdx][Op]; -+} -+ -+void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op, -+ int64_t Imm) const { -+ int Idx = getOperandIdx(*MI, Op); -+ assert(Idx != -1 && "Operand not supported for this instruction."); -+ assert(MI->getOperand(Idx).isImm()); -+ MI->getOperand(Idx).setImm(Imm); -+} -+ -+//===----------------------------------------------------------------------===// -+// Instruction flag getters/setters -+//===----------------------------------------------------------------------===// -+ -+bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const { -+ return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; -+} -+ -+MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, -+ unsigned Flag) const { -+ unsigned TargetFlags = get(MI->getOpcode()).TSFlags; -+ int FlagIndex = 0; -+ if (Flag != 0) { -+ // If we pass something other than the default value of Flag to this -+ // function, it means we are want to set a flag on an instruction -+ // that uses native encoding. -+ assert(HAS_NATIVE_OPERANDS(TargetFlags)); -+ bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; -+ switch (Flag) { -+ case MO_FLAG_CLAMP: -+ FlagIndex = getOperandIdx(*MI, R600Operands::CLAMP); -+ break; -+ case MO_FLAG_MASK: -+ FlagIndex = getOperandIdx(*MI, R600Operands::WRITE); -+ break; -+ case MO_FLAG_NOT_LAST: -+ case MO_FLAG_LAST: -+ FlagIndex = getOperandIdx(*MI, R600Operands::LAST); -+ break; -+ case MO_FLAG_NEG: -+ switch (SrcIdx) { -+ case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_NEG); break; -+ case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_NEG); break; -+ case 2: FlagIndex = getOperandIdx(*MI, R600Operands::SRC2_NEG); break; -+ } -+ break; -+ -+ case MO_FLAG_ABS: -+ assert(!IsOP3 && "Cannot set absolute value modifier for OP3 " -+ "instructions."); -+ switch (SrcIdx) { -+ case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_ABS); break; -+ case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_ABS); break; -+ } -+ break; -+ -+ default: -+ FlagIndex = -1; -+ break; -+ } -+ assert(FlagIndex != -1 && "Flag not supported for this instruction"); -+ } else { -+ FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags); -+ assert(FlagIndex != 0 && -+ "Instruction flags not supported for this instruction"); -+ } -+ -+ MachineOperand &FlagOp = MI->getOperand(FlagIndex); -+ assert(FlagOp.isImm()); -+ return FlagOp; -+} -+ -+void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, -+ unsigned Flag) const { -+ unsigned TargetFlags = get(MI->getOpcode()).TSFlags; -+ if (Flag == 0) { -+ return; -+ } -+ if (HAS_NATIVE_OPERANDS(TargetFlags)) { -+ MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); -+ if (Flag == MO_FLAG_NOT_LAST) { -+ clearFlag(MI, Operand, MO_FLAG_LAST); -+ } else if (Flag == MO_FLAG_MASK) { -+ clearFlag(MI, Operand, Flag); -+ } else { -+ FlagOp.setImm(1); -+ } -+ } else { -+ MachineOperand &FlagOp = getFlagOp(MI, Operand); -+ FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); -+ } -+} -+ -+void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, -+ unsigned Flag) const { -+ unsigned TargetFlags = get(MI->getOpcode()).TSFlags; -+ if (HAS_NATIVE_OPERANDS(TargetFlags)) { -+ MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); -+ FlagOp.setImm(0); -+ } else { -+ MachineOperand &FlagOp = getFlagOp(MI); -+ unsigned InstFlags = FlagOp.getImm(); -+ InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); -+ FlagOp.setImm(InstFlags); -+ } -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600InstrInfo.h llvm-r600/lib/Target/R600/R600InstrInfo.h ---- llvm-3.2.src/lib/Target/R600/R600InstrInfo.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/R600InstrInfo.h 2013-01-25 19:43:57.466716387 +0100 -@@ -0,0 +1,169 @@ -+//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Interface definition for R600InstrInfo -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef R600INSTRUCTIONINFO_H_ -+#define R600INSTRUCTIONINFO_H_ -+ -+#include "AMDIL.h" -+#include "AMDGPUInstrInfo.h" -+#include "R600Defines.h" -+#include "R600RegisterInfo.h" -+ -+#include -+ -+namespace llvm { -+ -+ class AMDGPUTargetMachine; -+ class DFAPacketizer; -+ class ScheduleDAG; -+ class MachineFunction; -+ class MachineInstr; -+ class MachineInstrBuilder; -+ -+ class R600InstrInfo : public AMDGPUInstrInfo { -+ private: -+ const R600RegisterInfo RI; -+ -+ int getBranchInstr(const MachineOperand &op) const; -+ -+ public: -+ explicit R600InstrInfo(AMDGPUTargetMachine &tm); -+ -+ const R600RegisterInfo &getRegisterInfo() const; -+ virtual void copyPhysReg(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MI, DebugLoc DL, -+ unsigned DestReg, unsigned SrcReg, -+ bool KillSrc) const; -+ -+ bool isTrig(const MachineInstr &MI) const; -+ bool isPlaceHolderOpcode(unsigned opcode) const; -+ bool isReductionOp(unsigned opcode) const; -+ bool isCubeOp(unsigned opcode) const; -+ -+ /// \returns true if this \p Opcode represents an ALU instruction. -+ bool isALUInstr(unsigned Opcode) const; -+ -+ /// \breif Vector instructions are instructions that must fill all -+ /// instruction slots within an instruction group. -+ bool isVector(const MachineInstr &MI) const; -+ -+ virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg, -+ int64_t Imm) const; -+ -+ virtual unsigned getIEQOpcode() const; -+ virtual bool isMov(unsigned Opcode) const; -+ -+ DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM, -+ const ScheduleDAG *DAG) const; -+ -+ bool ReverseBranchCondition(SmallVectorImpl &Cond) const; -+ -+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, -+ SmallVectorImpl &Cond, bool AllowModify) const; -+ -+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl &Cond, DebugLoc DL) const; -+ -+ unsigned RemoveBranch(MachineBasicBlock &MBB) const; -+ -+ bool isPredicated(const MachineInstr *MI) const; -+ -+ bool isPredicable(MachineInstr *MI) const; -+ -+ bool -+ isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, -+ const BranchProbability &Probability) const; -+ -+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, -+ unsigned ExtraPredCycles, -+ const BranchProbability &Probability) const ; -+ -+ bool -+ isProfitableToIfCvt(MachineBasicBlock &TMBB, -+ unsigned NumTCycles, unsigned ExtraTCycles, -+ MachineBasicBlock &FMBB, -+ unsigned NumFCycles, unsigned ExtraFCycles, -+ const BranchProbability &Probability) const; -+ -+ bool DefinesPredicate(MachineInstr *MI, -+ std::vector &Pred) const; -+ -+ bool SubsumesPredicate(const SmallVectorImpl &Pred1, -+ const SmallVectorImpl &Pred2) const; -+ -+ bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, -+ MachineBasicBlock &FMBB) const; -+ -+ bool PredicateInstruction(MachineInstr *MI, -+ const SmallVectorImpl &Pred) const; -+ -+ unsigned int getInstrLatency(const InstrItineraryData *ItinData, -+ const MachineInstr *MI, -+ unsigned *PredCost = 0) const; -+ -+ virtual int getInstrLatency(const InstrItineraryData *ItinData, -+ SDNode *Node) const { return 1;} -+ -+ /// You can use this function to avoid manually specifying each instruction -+ /// modifier operand when building a new instruction. -+ /// -+ /// \returns a MachineInstr with all the instruction modifiers initialized -+ /// to their default values. -+ MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator I, -+ unsigned Opcode, -+ unsigned DstReg, -+ unsigned Src0Reg, -+ unsigned Src1Reg = 0) const; -+ -+ MachineInstr *buildMovImm(MachineBasicBlock &BB, -+ MachineBasicBlock::iterator I, -+ unsigned DstReg, -+ uint64_t Imm) const; -+ -+ /// \brief Get the index of Op in the MachineInstr. -+ /// -+ /// \returns -1 if the Instruction does not contain the specified \p Op. -+ int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const; -+ -+ /// \brief Get the index of \p Op for the given Opcode. -+ /// -+ /// \returns -1 if the Instruction does not contain the specified \p Op. -+ int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const; -+ -+ /// \brief Helper function for setting instruction flag values. -+ void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const; -+ -+ /// \returns true if this instruction has an operand for storing target flags. -+ bool hasFlagOperand(const MachineInstr &MI) const; -+ -+ ///\brief Add one of the MO_FLAG* flags to the specified \p Operand. -+ void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const; -+ -+ ///\brief Determine if the specified \p Flag is set on this \p Operand. -+ bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const; -+ -+ /// \param SrcIdx The register source to set the flag on (e.g src0, src1, src2) -+ /// \param Flag The flag being set. -+ /// -+ /// \returns the operand containing the flags for this instruction. -+ MachineOperand &getFlagOp(MachineInstr *MI, unsigned SrcIdx = 0, -+ unsigned Flag = 0) const; -+ -+ /// \brief Clear the specified flag on the instruction. -+ void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const; -+}; -+ -+} // End llvm namespace -+ -+#endif // R600INSTRINFO_H_ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600Instructions.td llvm-r600/lib/Target/R600/R600Instructions.td ---- llvm-3.2.src/lib/Target/R600/R600Instructions.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/R600Instructions.td 2013-01-25 19:43:57.466716387 +0100 -@@ -0,0 +1,1843 @@ -+//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// R600 Tablegen instruction definitions -+// -+//===----------------------------------------------------------------------===// -+ -+include "R600Intrinsics.td" -+ -+class InstR600 inst, dag outs, dag ins, string asm, list pattern, -+ InstrItinClass itin> -+ : AMDGPUInst { -+ -+ field bits<64> Inst; -+ bit Trig = 0; -+ bit Op3 = 0; -+ bit isVector = 0; -+ bits<2> FlagOperandIdx = 0; -+ bit Op1 = 0; -+ bit Op2 = 0; -+ bit HasNativeOperands = 0; -+ -+ bits<11> op_code = inst; -+ //let Inst = inst; -+ let Namespace = "AMDGPU"; -+ let OutOperandList = outs; -+ let InOperandList = ins; -+ let AsmString = asm; -+ let Pattern = pattern; -+ let Itinerary = itin; -+ -+ let TSFlags{4} = Trig; -+ let TSFlags{5} = Op3; -+ -+ // Vector instructions are instructions that must fill all slots in an -+ // instruction group -+ let TSFlags{6} = isVector; -+ let TSFlags{8-7} = FlagOperandIdx; -+ let TSFlags{9} = HasNativeOperands; -+ let TSFlags{10} = Op1; -+ let TSFlags{11} = Op2; -+} -+ -+class InstR600ISA pattern> : -+ AMDGPUInst { -+ field bits<64> Inst; -+ -+ let Namespace = "AMDGPU"; -+} -+ -+def MEMxi : Operand { -+ let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index); -+ let PrintMethod = "printMemOperand"; -+} -+ -+def MEMrr : Operand { -+ let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index); -+} -+ -+// Operands for non-registers -+ -+class InstFlag -+ : OperandWithDefaultOps { -+ let PrintMethod = PM; -+} -+ -+// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers -+def SEL : OperandWithDefaultOps { -+ let PrintMethod = "printSel"; -+} -+ -+def LITERAL : InstFlag<"printLiteral">; -+ -+def WRITE : InstFlag <"printWrite", 1>; -+def OMOD : InstFlag <"printOMOD">; -+def REL : InstFlag <"printRel">; -+def CLAMP : InstFlag <"printClamp">; -+def NEG : InstFlag <"printNeg">; -+def ABS : InstFlag <"printAbs">; -+def UEM : InstFlag <"printUpdateExecMask">; -+def UP : InstFlag <"printUpdatePred">; -+ -+// XXX: The r600g finalizer in Mesa expects last to be one in most cases. -+// Once we start using the packetizer in this backend we should have this -+// default to 0. -+def LAST : InstFlag<"printLast", 1>; -+ -+def ADDRParam : ComplexPattern; -+def ADDRDWord : ComplexPattern; -+def ADDRVTX_READ : ComplexPattern; -+def ADDRGA_CONST_OFFSET : ComplexPattern; -+def ADDRGA_VAR_OFFSET : ComplexPattern; -+ -+class R600ALU_Word0 { -+ field bits<32> Word0; -+ -+ bits<11> src0; -+ bits<1> src0_neg; -+ bits<1> src0_rel; -+ bits<11> src1; -+ bits<1> src1_rel; -+ bits<1> src1_neg; -+ bits<3> index_mode = 0; -+ bits<2> pred_sel; -+ bits<1> last; -+ -+ bits<9> src0_sel = src0{8-0}; -+ bits<2> src0_chan = src0{10-9}; -+ bits<9> src1_sel = src1{8-0}; -+ bits<2> src1_chan = src1{10-9}; -+ -+ let Word0{8-0} = src0_sel; -+ let Word0{9} = src0_rel; -+ let Word0{11-10} = src0_chan; -+ let Word0{12} = src0_neg; -+ let Word0{21-13} = src1_sel; -+ let Word0{22} = src1_rel; -+ let Word0{24-23} = src1_chan; -+ let Word0{25} = src1_neg; -+ let Word0{28-26} = index_mode; -+ let Word0{30-29} = pred_sel; -+ let Word0{31} = last; -+} -+ -+class R600ALU_Word1 { -+ field bits<32> Word1; -+ -+ bits<11> dst; -+ bits<3> bank_swizzle = 0; -+ bits<1> dst_rel; -+ bits<1> clamp; -+ -+ bits<7> dst_sel = dst{6-0}; -+ bits<2> dst_chan = dst{10-9}; -+ -+ let Word1{20-18} = bank_swizzle; -+ let Word1{27-21} = dst_sel; -+ let Word1{28} = dst_rel; -+ let Word1{30-29} = dst_chan; -+ let Word1{31} = clamp; -+} -+ -+class R600ALU_Word1_OP2 alu_inst> : R600ALU_Word1{ -+ -+ bits<1> src0_abs; -+ bits<1> src1_abs; -+ bits<1> update_exec_mask; -+ bits<1> update_pred; -+ bits<1> write; -+ bits<2> omod; -+ -+ let Word1{0} = src0_abs; -+ let Word1{1} = src1_abs; -+ let Word1{2} = update_exec_mask; -+ let Word1{3} = update_pred; -+ let Word1{4} = write; -+ let Word1{6-5} = omod; -+ let Word1{17-7} = alu_inst; -+} -+ -+class R600ALU_Word1_OP3 alu_inst> : R600ALU_Word1{ -+ -+ bits<11> src2; -+ bits<1> src2_rel; -+ bits<1> src2_neg; -+ -+ bits<9> src2_sel = src2{8-0}; -+ bits<2> src2_chan = src2{10-9}; -+ -+ let Word1{8-0} = src2_sel; -+ let Word1{9} = src2_rel; -+ let Word1{11-10} = src2_chan; -+ let Word1{12} = src2_neg; -+ let Word1{17-13} = alu_inst; -+} -+ -+class VTX_WORD0 { -+ field bits<32> Word0; -+ bits<7> SRC_GPR; -+ bits<5> VC_INST; -+ bits<2> FETCH_TYPE; -+ bits<1> FETCH_WHOLE_QUAD; -+ bits<8> BUFFER_ID; -+ bits<1> SRC_REL; -+ bits<2> SRC_SEL_X; -+ bits<6> MEGA_FETCH_COUNT; -+ -+ let Word0{4-0} = VC_INST; -+ let Word0{6-5} = FETCH_TYPE; -+ let Word0{7} = FETCH_WHOLE_QUAD; -+ let Word0{15-8} = BUFFER_ID; -+ let Word0{22-16} = SRC_GPR; -+ let Word0{23} = SRC_REL; -+ let Word0{25-24} = SRC_SEL_X; -+ let Word0{31-26} = MEGA_FETCH_COUNT; -+} -+ -+class VTX_WORD1_GPR { -+ field bits<32> Word1; -+ bits<7> DST_GPR; -+ bits<1> DST_REL; -+ bits<3> DST_SEL_X; -+ bits<3> DST_SEL_Y; -+ bits<3> DST_SEL_Z; -+ bits<3> DST_SEL_W; -+ bits<1> USE_CONST_FIELDS; -+ bits<6> DATA_FORMAT; -+ bits<2> NUM_FORMAT_ALL; -+ bits<1> FORMAT_COMP_ALL; -+ bits<1> SRF_MODE_ALL; -+ -+ let Word1{6-0} = DST_GPR; -+ let Word1{7} = DST_REL; -+ let Word1{8} = 0; // Reserved -+ let Word1{11-9} = DST_SEL_X; -+ let Word1{14-12} = DST_SEL_Y; -+ let Word1{17-15} = DST_SEL_Z; -+ let Word1{20-18} = DST_SEL_W; -+ let Word1{21} = USE_CONST_FIELDS; -+ let Word1{27-22} = DATA_FORMAT; -+ let Word1{29-28} = NUM_FORMAT_ALL; -+ let Word1{30} = FORMAT_COMP_ALL; -+ let Word1{31} = SRF_MODE_ALL; -+} -+ -+/* -+XXX: R600 subtarget uses a slightly different encoding than the other -+subtargets. We currently handle this in R600MCCodeEmitter, but we may -+want to use these instruction classes in the future. -+ -+class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 { -+ -+ bits<1> fog_merge; -+ bits<10> alu_inst; -+ -+ let Inst{37} = fog_merge; -+ let Inst{39-38} = omod; -+ let Inst{49-40} = alu_inst; -+} -+ -+class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 { -+ -+ bits<11> alu_inst; -+ -+ let Inst{38-37} = omod; -+ let Inst{49-39} = alu_inst; -+} -+*/ -+ -+def R600_Pred : PredicateOperand; -+ -+ -+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { -+ -+// Class for instructions with only one source register. -+// If you add new ins to this instruction, make sure they are listed before -+// $literal, because the backend currently assumes that the last operand is -+// a literal. Also be sure to update the enum R600Op1OperandIndex::ROI in -+// R600Defines.h, R600InstrInfo::buildDefaultInstruction(), -+// and R600InstrInfo::getOperandIdx(). -+class R600_1OP inst, string opName, list pattern, -+ InstrItinClass itin = AnyALU> : -+ InstR600 <0, -+ (outs R600_Reg32:$dst), -+ (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, -+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, -+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), -+ !strconcat(opName, -+ "$clamp $dst$write$dst_rel$omod, " -+ "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, " -+ "$literal $pred_sel$last"), -+ pattern, -+ itin>, -+ R600ALU_Word0, -+ R600ALU_Word1_OP2 { -+ -+ let src1 = 0; -+ let src1_rel = 0; -+ let src1_neg = 0; -+ let src1_abs = 0; -+ let update_exec_mask = 0; -+ let update_pred = 0; -+ let HasNativeOperands = 1; -+ let Op1 = 1; -+ let DisableEncoding = "$literal"; -+ -+ let Inst{31-0} = Word0; -+ let Inst{63-32} = Word1; -+} -+ -+class R600_1OP_Helper inst, string opName, SDPatternOperator node, -+ InstrItinClass itin = AnyALU> : -+ R600_1OP ; -+ -+// If you add our change the operands for R600_2OP instructions, you must -+// also update the R600Op2OperandIndex::ROI enum in R600Defines.h, -+// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx(). -+class R600_2OP inst, string opName, list pattern, -+ InstrItinClass itin = AnyALU> : -+ InstR600 , -+ R600ALU_Word0, -+ R600ALU_Word1_OP2 { -+ -+ let HasNativeOperands = 1; -+ let Op2 = 1; -+ let DisableEncoding = "$literal"; -+ -+ let Inst{31-0} = Word0; -+ let Inst{63-32} = Word1; -+} -+ -+class R600_2OP_Helper inst, string opName, SDPatternOperator node, -+ InstrItinClass itim = AnyALU> : -+ R600_2OP ; -+ -+// If you add our change the operands for R600_3OP instructions, you must -+// also update the R600Op3OperandIndex::ROI enum in R600Defines.h, -+// R600InstrInfo::buildDefaultInstruction(), and -+// R600InstrInfo::getOperandIdx(). -+class R600_3OP inst, string opName, list pattern, -+ InstrItinClass itin = AnyALU> : -+ InstR600 <0, -+ (outs R600_Reg32:$dst), -+ (ins REL:$dst_rel, CLAMP:$clamp, -+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel, -+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel, -+ R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, -+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), -+ !strconcat(opName, "$clamp $dst$dst_rel, " -+ "$src0_neg$src0$src0_sel$src0_rel, " -+ "$src1_neg$src1$src1_sel$src1_rel, " -+ "$src2_neg$src2$src2_sel$src2_rel, " -+ "$literal $pred_sel$last"), -+ pattern, -+ itin>, -+ R600ALU_Word0, -+ R600ALU_Word1_OP3{ -+ -+ let HasNativeOperands = 1; -+ let DisableEncoding = "$literal"; -+ let Op3 = 1; -+ -+ let Inst{31-0} = Word0; -+ let Inst{63-32} = Word1; -+} -+ -+class R600_REDUCTION inst, dag ins, string asm, list pattern, -+ InstrItinClass itin = VecALU> : -+ InstR600 ; -+ -+class R600_TEX inst, string opName, list pattern, -+ InstrItinClass itin = AnyALU> : -+ InstR600 { -+ let Inst {10-0} = inst; -+ } -+ -+} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 -+ -+def TEX_SHADOW : PatLeaf< -+ (imm), -+ [{uint32_t TType = (uint32_t)N->getZExtValue(); -+ return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13); -+ }] -+>; -+ -+def TEX_RECT : PatLeaf< -+ (imm), -+ [{uint32_t TType = (uint32_t)N->getZExtValue(); -+ return TType == 5; -+ }] -+>; -+ -+class EG_CF_RAT cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, -+ dag ins, string asm, list pattern> : -+ InstR600ISA { -+ bits<7> RW_GPR; -+ bits<7> INDEX_GPR; -+ -+ bits<2> RIM; -+ bits<2> TYPE; -+ bits<1> RW_REL; -+ bits<2> ELEM_SIZE; -+ -+ bits<12> ARRAY_SIZE; -+ bits<4> COMP_MASK; -+ bits<4> BURST_COUNT; -+ bits<1> VPM; -+ bits<1> eop; -+ bits<1> MARK; -+ bits<1> BARRIER; -+ -+ // CF_ALLOC_EXPORT_WORD0_RAT -+ let Inst{3-0} = rat_id; -+ let Inst{9-4} = rat_inst; -+ let Inst{10} = 0; // Reserved -+ let Inst{12-11} = RIM; -+ let Inst{14-13} = TYPE; -+ let Inst{21-15} = RW_GPR; -+ let Inst{22} = RW_REL; -+ let Inst{29-23} = INDEX_GPR; -+ let Inst{31-30} = ELEM_SIZE; -+ -+ // CF_ALLOC_EXPORT_WORD1_BUF -+ let Inst{43-32} = ARRAY_SIZE; -+ let Inst{47-44} = COMP_MASK; -+ let Inst{51-48} = BURST_COUNT; -+ let Inst{52} = VPM; -+ let Inst{53} = eop; -+ let Inst{61-54} = cf_inst; -+ let Inst{62} = MARK; -+ let Inst{63} = BARRIER; -+} -+ -+class LoadParamFrag : PatFrag < -+ (ops node:$ptr), (load_type node:$ptr), -+ [{ return isParamLoad(dyn_cast(N)); }] -+>; -+ -+def load_param : LoadParamFrag; -+def load_param_zexti8 : LoadParamFrag; -+def load_param_zexti16 : LoadParamFrag; -+ -+def isR600 : Predicate<"Subtarget.device()" -+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">; -+def isR700 : Predicate<"Subtarget.device()" -+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&" -+ "Subtarget.device()->getDeviceFlag()" -+ ">= OCL_DEVICE_RV710">; -+def isEG : Predicate< -+ "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && " -+ "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && " -+ "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">; -+ -+def isCayman : Predicate<"Subtarget.device()" -+ "->getDeviceFlag() == OCL_DEVICE_CAYMAN">; -+def isEGorCayman : Predicate<"Subtarget.device()" -+ "->getGeneration() == AMDGPUDeviceInfo::HD5XXX" -+ "|| Subtarget.device()->getGeneration() ==" -+ "AMDGPUDeviceInfo::HD6XXX">; -+ -+def isR600toCayman : Predicate< -+ "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; -+ -+//===----------------------------------------------------------------------===// -+// R600 SDNodes -+//===----------------------------------------------------------------------===// -+ -+def INTERP: SDNode<"AMDGPUISD::INTERP", -+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]> -+ >; -+ -+def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0", -+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]> -+ >; -+ -+def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", -+ SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, -+ [SDNPMayLoad] -+>; -+ -+//===----------------------------------------------------------------------===// -+// Interpolation Instructions -+//===----------------------------------------------------------------------===// -+ -+let usesCustomInserter = 1 in { -+def input_perspective : AMDGPUShaderInst < -+ (outs R600_Reg128:$dst), -+ (ins i32imm:$src0, i32imm:$src1), -+ "input_perspective $src0 $src1 : dst", -+ [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>; -+} // End usesCustomInserter = 1 -+ -+def input_constant : AMDGPUShaderInst < -+ (outs R600_Reg128:$dst), -+ (ins i32imm:$src), -+ "input_perspective $src : dst", -+ [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>; -+ -+ -+ -+def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { -+ let bank_swizzle = 5; -+} -+ -+def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> { -+ let bank_swizzle = 5; -+} -+ -+def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; -+ -+//===----------------------------------------------------------------------===// -+// Export Instructions -+//===----------------------------------------------------------------------===// -+ -+def ExportType : SDTypeProfile<0, 5, [SDTCisFP<0>, SDTCisInt<1>]>; -+ -+def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, -+ [SDNPHasChain, SDNPSideEffect]>; -+ -+class ExportWord0 { -+ field bits<32> Word0; -+ -+ bits<13> arraybase; -+ bits<2> type; -+ bits<7> gpr; -+ bits<2> elem_size; -+ -+ let Word0{12-0} = arraybase; -+ let Word0{14-13} = type; -+ let Word0{21-15} = gpr; -+ let Word0{22} = 0; // RW_REL -+ let Word0{29-23} = 0; // INDEX_GPR -+ let Word0{31-30} = elem_size; -+} -+ -+class ExportSwzWord1 { -+ field bits<32> Word1; -+ -+ bits<3> sw_x; -+ bits<3> sw_y; -+ bits<3> sw_z; -+ bits<3> sw_w; -+ bits<1> eop; -+ bits<8> inst; -+ -+ let Word1{2-0} = sw_x; -+ let Word1{5-3} = sw_y; -+ let Word1{8-6} = sw_z; -+ let Word1{11-9} = sw_w; -+} -+ -+class ExportBufWord1 { -+ field bits<32> Word1; -+ -+ bits<12> arraySize; -+ bits<4> compMask; -+ bits<1> eop; -+ bits<8> inst; -+ -+ let Word1{11-0} = arraySize; -+ let Word1{15-12} = compMask; -+} -+ -+multiclass ExportPattern cf_inst> { -+ def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), -+ (ExportInst -+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), -+ 0, 61, 0, 7, 7, 7, cf_inst, 0) -+ >; -+ -+ def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg), -+ (ExportInst -+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), -+ 0, 61, 7, 0, 7, 7, cf_inst, 0) -+ >; -+ -+ def : Pat<(int_R600_store_pixel_dummy), -+ (ExportInst -+ (v4f32 (IMPLICIT_DEF)), 0, 0, 7, 7, 7, 7, cf_inst, 0) -+ >; -+ -+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 0), -+ (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)), -+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, -+ 0, 1, 2, 3, cf_inst, 0) -+ >; -+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1), -+ (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)), -+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, -+ 0, 1, 2, 3, cf_inst, 0) -+ >; -+ -+ def : Pat<(int_R600_store_swizzle (v4f32 R600_Reg128:$src), imm:$arraybase, -+ imm:$type), -+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, -+ 0, 1, 2, 3, cf_inst, 0) -+ >; -+} -+ -+multiclass SteamOutputExportPattern buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> { -+// Stream0 -+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), -+ (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)), -+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase, -+ 4095, imm:$mask, buf0inst, 0)>; -+// Stream1 -+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), -+ (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)), -+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase, -+ 4095, imm:$mask, buf1inst, 0)>; -+// Stream2 -+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), -+ (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)), -+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase, -+ 4095, imm:$mask, buf2inst, 0)>; -+// Stream3 -+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), -+ (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)), -+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase, -+ 4095, imm:$mask, buf3inst, 0)>; -+} -+ -+let isTerminator = 1, usesCustomInserter = 1 in { -+ -+class ExportSwzInst : InstR600ISA<( -+ outs), -+ (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, -+ i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst, -+ i32imm:$eop), -+ !strconcat("EXPORT", " $gpr"), -+ []>, ExportWord0, ExportSwzWord1 { -+ let elem_size = 3; -+ let Inst{31-0} = Word0; -+ let Inst{63-32} = Word1; -+} -+ -+} // End isTerminator = 1, usesCustomInserter = 1 -+ -+class ExportBufInst : InstR600ISA<( -+ outs), -+ (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, -+ i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop), -+ !strconcat("EXPORT", " $gpr"), -+ []>, ExportWord0, ExportBufWord1 { -+ let elem_size = 0; -+ let Inst{31-0} = Word0; -+ let Inst{63-32} = Word1; -+} -+ -+let Predicates = [isR600toCayman] in { -+ -+//===----------------------------------------------------------------------===// -+// Common Instructions R600, R700, Evergreen, Cayman -+//===----------------------------------------------------------------------===// -+ -+def ADD : R600_2OP_Helper <0x0, "ADD", fadd>; -+// Non-IEEE MUL: 0 * anything = 0 -+def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>; -+def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>; -+def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>; -+def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>; -+ -+// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, -+// so some of the instruction names don't match the asm string. -+// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. -+def SETE : R600_2OP < -+ 0x08, "SETE", -+ [(set R600_Reg32:$dst, -+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, -+ COND_EQ))] -+>; -+ -+def SGT : R600_2OP < -+ 0x09, "SETGT", -+ [(set R600_Reg32:$dst, -+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, -+ COND_GT))] -+>; -+ -+def SGE : R600_2OP < -+ 0xA, "SETGE", -+ [(set R600_Reg32:$dst, -+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, -+ COND_GE))] -+>; -+ -+def SNE : R600_2OP < -+ 0xB, "SETNE", -+ [(set R600_Reg32:$dst, -+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, -+ COND_NE))] -+>; -+ -+def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; -+def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>; -+def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; -+def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>; -+def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; -+ -+def MOV : R600_1OP <0x19, "MOV", []>; -+ -+let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { -+ -+class MOV_IMM : AMDGPUInst < -+ (outs R600_Reg32:$dst), -+ (ins immType:$imm), -+ "", -+ [] -+>; -+ -+} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 -+ -+def MOV_IMM_I32 : MOV_IMM; -+def : Pat < -+ (imm:$val), -+ (MOV_IMM_I32 imm:$val) -+>; -+ -+def MOV_IMM_F32 : MOV_IMM; -+def : Pat < -+ (fpimm:$val), -+ (MOV_IMM_F32 fpimm:$val) -+>; -+ -+def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>; -+def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>; -+def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>; -+def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>; -+ -+let hasSideEffects = 1 in { -+ -+def KILLGT : R600_2OP <0x2D, "KILLGT", []>; -+ -+} // end hasSideEffects -+ -+def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>; -+def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>; -+def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>; -+def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>; -+def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>; -+def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>; -+def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>; -+def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>; -+def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>; -+def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>; -+ -+def SETE_INT : R600_2OP < -+ 0x3A, "SETE_INT", -+ [(set (i32 R600_Reg32:$dst), -+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))] -+>; -+ -+def SETGT_INT : R600_2OP < -+ 0x3B, "SGT_INT", -+ [(set (i32 R600_Reg32:$dst), -+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))] -+>; -+ -+def SETGE_INT : R600_2OP < -+ 0x3C, "SETGE_INT", -+ [(set (i32 R600_Reg32:$dst), -+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))] -+>; -+ -+def SETNE_INT : R600_2OP < -+ 0x3D, "SETNE_INT", -+ [(set (i32 R600_Reg32:$dst), -+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))] -+>; -+ -+def SETGT_UINT : R600_2OP < -+ 0x3E, "SETGT_UINT", -+ [(set (i32 R600_Reg32:$dst), -+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))] -+>; -+ -+def SETGE_UINT : R600_2OP < -+ 0x3F, "SETGE_UINT", -+ [(set (i32 R600_Reg32:$dst), -+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))] -+>; -+ -+def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>; -+def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>; -+def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>; -+def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>; -+ -+def CNDE_INT : R600_3OP < -+ 0x1C, "CNDE_INT", -+ [(set (i32 R600_Reg32:$dst), -+ (selectcc (i32 R600_Reg32:$src0), 0, -+ (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), -+ COND_EQ))] -+>; -+ -+def CNDGE_INT : R600_3OP < -+ 0x1E, "CNDGE_INT", -+ [(set (i32 R600_Reg32:$dst), -+ (selectcc (i32 R600_Reg32:$src0), 0, -+ (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), -+ COND_GE))] -+>; -+ -+def CNDGT_INT : R600_3OP < -+ 0x1D, "CNDGT_INT", -+ [(set (i32 R600_Reg32:$dst), -+ (selectcc (i32 R600_Reg32:$src0), 0, -+ (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), -+ COND_GT))] -+>; -+ -+//===----------------------------------------------------------------------===// -+// Texture instructions -+//===----------------------------------------------------------------------===// -+ -+def TEX_LD : R600_TEX < -+ 0x03, "TEX_LD", -+ [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] -+> { -+let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $resourceId, $samplerId, $textureTarget"; -+let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget); -+} -+ -+def TEX_GET_TEXTURE_RESINFO : R600_TEX < -+ 0x04, "TEX_GET_TEXTURE_RESINFO", -+ [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] -+>; -+ -+def TEX_GET_GRADIENTS_H : R600_TEX < -+ 0x07, "TEX_GET_GRADIENTS_H", -+ [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] -+>; -+ -+def TEX_GET_GRADIENTS_V : R600_TEX < -+ 0x08, "TEX_GET_GRADIENTS_V", -+ [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] -+>; -+ -+def TEX_SET_GRADIENTS_H : R600_TEX < -+ 0x0B, "TEX_SET_GRADIENTS_H", -+ [] -+>; -+ -+def TEX_SET_GRADIENTS_V : R600_TEX < -+ 0x0C, "TEX_SET_GRADIENTS_V", -+ [] -+>; -+ -+def TEX_SAMPLE : R600_TEX < -+ 0x10, "TEX_SAMPLE", -+ [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] -+>; -+ -+def TEX_SAMPLE_C : R600_TEX < -+ 0x18, "TEX_SAMPLE_C", -+ [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] -+>; -+ -+def TEX_SAMPLE_L : R600_TEX < -+ 0x11, "TEX_SAMPLE_L", -+ [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] -+>; -+ -+def TEX_SAMPLE_C_L : R600_TEX < -+ 0x19, "TEX_SAMPLE_C_L", -+ [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] -+>; -+ -+def TEX_SAMPLE_LB : R600_TEX < -+ 0x12, "TEX_SAMPLE_LB", -+ [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0,imm:$resourceId, imm:$samplerId, imm:$textureTarget))] -+>; -+ -+def TEX_SAMPLE_C_LB : R600_TEX < -+ 0x1A, "TEX_SAMPLE_C_LB", -+ [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] -+>; -+ -+def TEX_SAMPLE_G : R600_TEX < -+ 0x14, "TEX_SAMPLE_G", -+ [] -+>; -+ -+def TEX_SAMPLE_C_G : R600_TEX < -+ 0x1C, "TEX_SAMPLE_C_G", -+ [] -+>; -+ -+//===----------------------------------------------------------------------===// -+// Helper classes for common instructions -+//===----------------------------------------------------------------------===// -+ -+class MUL_LIT_Common inst> : R600_3OP < -+ inst, "MUL_LIT", -+ [] -+>; -+ -+class MULADD_Common inst> : R600_3OP < -+ inst, "MULADD", -+ [(set (f32 R600_Reg32:$dst), -+ (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] -+>; -+ -+class CNDE_Common inst> : R600_3OP < -+ inst, "CNDE", -+ [(set R600_Reg32:$dst, -+ (selectcc (f32 R600_Reg32:$src0), FP_ZERO, -+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), -+ COND_EQ))] -+>; -+ -+class CNDGT_Common inst> : R600_3OP < -+ inst, "CNDGT", -+ [(set R600_Reg32:$dst, -+ (selectcc (f32 R600_Reg32:$src0), FP_ZERO, -+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), -+ COND_GT))] -+>; -+ -+class CNDGE_Common inst> : R600_3OP < -+ inst, "CNDGE", -+ [(set R600_Reg32:$dst, -+ (selectcc (f32 R600_Reg32:$src0), FP_ZERO, -+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), -+ COND_GE))] -+>; -+ -+multiclass DOT4_Common inst> { -+ -+ def _pseudo : R600_REDUCTION ; -+ -+ def _real : R600_2OP ; -+} -+ -+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { -+multiclass CUBE_Common inst> { -+ -+ def _pseudo : InstR600 < -+ inst, -+ (outs R600_Reg128:$dst), -+ (ins R600_Reg128:$src), -+ "CUBE $dst $src", -+ [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], -+ VecALU -+ > { -+ let isPseudo = 1; -+ } -+ -+ def _real : R600_2OP ; -+} -+} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 -+ -+class EXP_IEEE_Common inst> : R600_1OP_Helper < -+ inst, "EXP_IEEE", fexp2 -+>; -+ -+class FLT_TO_INT_Common inst> : R600_1OP_Helper < -+ inst, "FLT_TO_INT", fp_to_sint -+>; -+ -+class INT_TO_FLT_Common inst> : R600_1OP_Helper < -+ inst, "INT_TO_FLT", sint_to_fp -+>; -+ -+class FLT_TO_UINT_Common inst> : R600_1OP_Helper < -+ inst, "FLT_TO_UINT", fp_to_uint -+>; -+ -+class UINT_TO_FLT_Common inst> : R600_1OP_Helper < -+ inst, "UINT_TO_FLT", uint_to_fp -+>; -+ -+class LOG_CLAMPED_Common inst> : R600_1OP < -+ inst, "LOG_CLAMPED", [] -+>; -+ -+class LOG_IEEE_Common inst> : R600_1OP_Helper < -+ inst, "LOG_IEEE", flog2 -+>; -+ -+class LSHL_Common inst> : R600_2OP_Helper ; -+class LSHR_Common inst> : R600_2OP_Helper ; -+class ASHR_Common inst> : R600_2OP_Helper ; -+class MULHI_INT_Common inst> : R600_2OP_Helper < -+ inst, "MULHI_INT", mulhs -+>; -+class MULHI_UINT_Common inst> : R600_2OP_Helper < -+ inst, "MULHI", mulhu -+>; -+class MULLO_INT_Common inst> : R600_2OP_Helper < -+ inst, "MULLO_INT", mul -+>; -+class MULLO_UINT_Common inst> : R600_2OP ; -+ -+class RECIP_CLAMPED_Common inst> : R600_1OP < -+ inst, "RECIP_CLAMPED", [] -+>; -+ -+class RECIP_IEEE_Common inst> : R600_1OP < -+ inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (fdiv FP_ONE, R600_Reg32:$src0))] -+>; -+ -+class RECIP_UINT_Common inst> : R600_1OP_Helper < -+ inst, "RECIP_UINT", AMDGPUurecip -+>; -+ -+class RECIPSQRT_CLAMPED_Common inst> : R600_1OP_Helper < -+ inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq -+>; -+ -+class RECIPSQRT_IEEE_Common inst> : R600_1OP < -+ inst, "RECIPSQRT_IEEE", [] -+>; -+ -+class SIN_Common inst> : R600_1OP < -+ inst, "SIN", []>{ -+ let Trig = 1; -+} -+ -+class COS_Common inst> : R600_1OP < -+ inst, "COS", []> { -+ let Trig = 1; -+} -+ -+//===----------------------------------------------------------------------===// -+// Helper patterns for complex intrinsics -+//===----------------------------------------------------------------------===// -+ -+multiclass DIV_Common { -+def : Pat< -+ (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1), -+ (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) -+>; -+ -+def : Pat< -+ (fdiv R600_Reg32:$src0, R600_Reg32:$src1), -+ (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) -+>; -+} -+ -+class TGSI_LIT_Z_Common : Pat < -+ (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w), -+ (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x)) -+>; -+ -+//===----------------------------------------------------------------------===// -+// R600 / R700 Instructions -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [isR600] in { -+ -+ def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; -+ def MULADD_r600 : MULADD_Common<0x10>; -+ def CNDE_r600 : CNDE_Common<0x18>; -+ def CNDGT_r600 : CNDGT_Common<0x19>; -+ def CNDGE_r600 : CNDGE_Common<0x1A>; -+ defm DOT4_r600 : DOT4_Common<0x50>; -+ defm CUBE_r600 : CUBE_Common<0x52>; -+ def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; -+ def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; -+ def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; -+ def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>; -+ def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>; -+ def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>; -+ def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>; -+ def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>; -+ def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; -+ def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>; -+ def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>; -+ def SIN_r600 : SIN_Common<0x6E>; -+ def COS_r600 : COS_Common<0x6F>; -+ def ASHR_r600 : ASHR_Common<0x70>; -+ def LSHR_r600 : LSHR_Common<0x71>; -+ def LSHL_r600 : LSHL_Common<0x72>; -+ def MULLO_INT_r600 : MULLO_INT_Common<0x73>; -+ def MULHI_INT_r600 : MULHI_INT_Common<0x74>; -+ def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>; -+ def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>; -+ def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; -+ -+ defm DIV_r600 : DIV_Common; -+ def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common; -+ -+ def : Pat<(fsqrt R600_Reg32:$src), -+ (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>; -+ -+ def R600_ExportSwz : ExportSwzInst { -+ let Word1{20-17} = 1; // BURST_COUNT -+ let Word1{21} = eop; -+ let Word1{22} = 1; // VALID_PIXEL_MODE -+ let Word1{30-23} = inst; -+ let Word1{31} = 1; // BARRIER -+ } -+ defm : ExportPattern; -+ -+ def R600_ExportBuf : ExportBufInst { -+ let Word1{20-17} = 1; // BURST_COUNT -+ let Word1{21} = eop; -+ let Word1{22} = 1; // VALID_PIXEL_MODE -+ let Word1{30-23} = inst; -+ let Word1{31} = 1; // BARRIER -+ } -+ defm : SteamOutputExportPattern; -+} -+ -+// Helper pattern for normalizing inputs to triginomic instructions for R700+ -+// cards. -+class COS_PAT : Pat< -+ (fcos R600_Reg32:$src), -+ (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src)) -+>; -+ -+class SIN_PAT : Pat< -+ (fsin R600_Reg32:$src), -+ (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src)) -+>; -+ -+//===----------------------------------------------------------------------===// -+// R700 Only instructions -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [isR700] in { -+ def SIN_r700 : SIN_Common<0x6E>; -+ def COS_r700 : COS_Common<0x6F>; -+ -+ // R700 normalizes inputs to SIN/COS the same as EG -+ def : SIN_PAT ; -+ def : COS_PAT ; -+} -+ -+//===----------------------------------------------------------------------===// -+// Evergreen Only instructions -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [isEG] in { -+ -+def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; -+defm DIV_eg : DIV_Common; -+ -+def MULLO_INT_eg : MULLO_INT_Common<0x8F>; -+def MULHI_INT_eg : MULHI_INT_Common<0x90>; -+def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; -+def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; -+def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; -+def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; -+def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; -+def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; -+def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; -+def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; -+def SIN_eg : SIN_Common<0x8D>; -+def COS_eg : COS_Common<0x8E>; -+ -+def : SIN_PAT ; -+def : COS_PAT ; -+def : Pat<(fsqrt R600_Reg32:$src), -+ (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>; -+} // End Predicates = [isEG] -+ -+//===----------------------------------------------------------------------===// -+// Evergreen / Cayman Instructions -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [isEGorCayman] in { -+ -+ // BFE_UINT - bit_extract, an optimization for mask and shift -+ // Src0 = Input -+ // Src1 = Offset -+ // Src2 = Width -+ // -+ // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) -+ // -+ // Example Usage: -+ // (Offset, Width) -+ // -+ // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 -+ // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 -+ // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 -+ // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 -+ def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", -+ [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0, -+ R600_Reg32:$src1, -+ R600_Reg32:$src2))], -+ VecALU -+ >; -+ -+ def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", -+ [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1, -+ R600_Reg32:$src2))], -+ VecALU -+ >; -+ -+ def MULADD_eg : MULADD_Common<0x14>; -+ def ASHR_eg : ASHR_Common<0x15>; -+ def LSHR_eg : LSHR_Common<0x16>; -+ def LSHL_eg : LSHL_Common<0x17>; -+ def CNDE_eg : CNDE_Common<0x19>; -+ def CNDGT_eg : CNDGT_Common<0x1A>; -+ def CNDGE_eg : CNDGE_Common<0x1B>; -+ def MUL_LIT_eg : MUL_LIT_Common<0x1F>; -+ def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; -+ defm DOT4_eg : DOT4_Common<0xBE>; -+ defm CUBE_eg : CUBE_Common<0xC0>; -+ -+ def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common; -+ -+ def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { -+ let Pattern = []; -+ } -+ -+ def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; -+ -+ def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { -+ let Pattern = []; -+ } -+ -+ def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; -+ -+ // TRUNC is used for the FLT_TO_INT instructions to work around a -+ // perceived problem where the rounding modes are applied differently -+ // depending on the instruction and the slot they are in. -+ // See: -+ // https://bugs.freedesktop.org/show_bug.cgi?id=50232 -+ // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c -+ // -+ // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes, -+ // which do not need to be truncated since the fp values are 0.0f or 1.0f. -+ // We should look into handling these cases separately. -+ def : Pat<(fp_to_sint R600_Reg32:$src0), -+ (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>; -+ -+ def : Pat<(fp_to_uint R600_Reg32:$src0), -+ (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>; -+ -+ def EG_ExportSwz : ExportSwzInst { -+ let Word1{19-16} = 1; // BURST_COUNT -+ let Word1{20} = 1; // VALID_PIXEL_MODE -+ let Word1{21} = eop; -+ let Word1{29-22} = inst; -+ let Word1{30} = 0; // MARK -+ let Word1{31} = 1; // BARRIER -+ } -+ defm : ExportPattern; -+ -+ def EG_ExportBuf : ExportBufInst { -+ let Word1{19-16} = 1; // BURST_COUNT -+ let Word1{20} = 1; // VALID_PIXEL_MODE -+ let Word1{21} = eop; -+ let Word1{29-22} = inst; -+ let Word1{30} = 0; // MARK -+ let Word1{31} = 1; // BARRIER -+ } -+ defm : SteamOutputExportPattern; -+ -+//===----------------------------------------------------------------------===// -+// Memory read/write instructions -+//===----------------------------------------------------------------------===// -+let usesCustomInserter = 1 in { -+ -+class RAT_WRITE_CACHELESS_eg comp_mask, string name, -+ list pattern> -+ : EG_CF_RAT <0x57, 0x2, 0, (outs), ins, -+ !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> { -+ let RIM = 0; -+ // XXX: Have a separate instruction for non-indexed writes. -+ let TYPE = 1; -+ let RW_REL = 0; -+ let ELEM_SIZE = 0; -+ -+ let ARRAY_SIZE = 0; -+ let COMP_MASK = comp_mask; -+ let BURST_COUNT = 0; -+ let VPM = 0; -+ let MARK = 0; -+ let BARRIER = 1; -+} -+ -+} // End usesCustomInserter = 1 -+ -+// 32-bit store -+def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < -+ (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), -+ 0x1, "RAT_WRITE_CACHELESS_32_eg", -+ [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)] -+>; -+ -+//128-bit store -+def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < -+ (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), -+ 0xf, "RAT_WRITE_CACHELESS_128", -+ [(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)] -+>; -+ -+class VTX_READ_eg buffer_id, dag outs, list pattern> -+ : InstR600ISA , -+ VTX_WORD1_GPR, VTX_WORD0 { -+ -+ // Static fields -+ let VC_INST = 0; -+ let FETCH_TYPE = 2; -+ let FETCH_WHOLE_QUAD = 0; -+ let BUFFER_ID = buffer_id; -+ let SRC_REL = 0; -+ // XXX: We can infer this field based on the SRC_GPR. This would allow us -+ // to store vertex addresses in any channel, not just X. -+ let SRC_SEL_X = 0; -+ let DST_REL = 0; -+ // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, -+ // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, -+ // however, based on my testing if USE_CONST_FIELDS is set, then all -+ // these fields need to be set to 0. -+ let USE_CONST_FIELDS = 0; -+ let NUM_FORMAT_ALL = 1; -+ let FORMAT_COMP_ALL = 0; -+ let SRF_MODE_ALL = 0; -+ -+ let Inst{31-0} = Word0; -+ let Inst{63-32} = Word1; -+ // LLVM can only encode 64-bit instructions, so these fields are manually -+ // encoded in R600CodeEmitter -+ // -+ // bits<16> OFFSET; -+ // bits<2> ENDIAN_SWAP = 0; -+ // bits<1> CONST_BUF_NO_STRIDE = 0; -+ // bits<1> MEGA_FETCH = 0; -+ // bits<1> ALT_CONST = 0; -+ // bits<2> BUFFER_INDEX_MODE = 0; -+ -+ -+ -+ // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding -+ // is done in R600CodeEmitter -+ // -+ // Inst{79-64} = OFFSET; -+ // Inst{81-80} = ENDIAN_SWAP; -+ // Inst{82} = CONST_BUF_NO_STRIDE; -+ // Inst{83} = MEGA_FETCH; -+ // Inst{84} = ALT_CONST; -+ // Inst{86-85} = BUFFER_INDEX_MODE; -+ // Inst{95-86} = 0; Reserved -+ -+ // VTX_WORD3 (Padding) -+ // -+ // Inst{127-96} = 0; -+} -+ -+class VTX_READ_8_eg buffer_id, list pattern> -+ : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst), -+ pattern> { -+ -+ let MEGA_FETCH_COUNT = 1; -+ let DST_SEL_X = 0; -+ let DST_SEL_Y = 7; // Masked -+ let DST_SEL_Z = 7; // Masked -+ let DST_SEL_W = 7; // Masked -+ let DATA_FORMAT = 1; // FMT_8 -+} -+ -+class VTX_READ_16_eg buffer_id, list pattern> -+ : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst), -+ pattern> { -+ let MEGA_FETCH_COUNT = 2; -+ let DST_SEL_X = 0; -+ let DST_SEL_Y = 7; // Masked -+ let DST_SEL_Z = 7; // Masked -+ let DST_SEL_W = 7; // Masked -+ let DATA_FORMAT = 5; // FMT_16 -+ -+} -+ -+class VTX_READ_32_eg buffer_id, list pattern> -+ : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst), -+ pattern> { -+ -+ let MEGA_FETCH_COUNT = 4; -+ let DST_SEL_X = 0; -+ let DST_SEL_Y = 7; // Masked -+ let DST_SEL_Z = 7; // Masked -+ let DST_SEL_W = 7; // Masked -+ let DATA_FORMAT = 0xD; // COLOR_32 -+ -+ // This is not really necessary, but there were some GPU hangs that appeared -+ // to be caused by ALU instructions in the next instruction group that wrote -+ // to the $ptr registers of the VTX_READ. -+ // e.g. -+ // %T3_X = VTX_READ_PARAM_32_eg %T2_X, 24 -+ // %T2_X = MOV %ZERO -+ //Adding this constraint prevents this from happening. -+ let Constraints = "$ptr.ptr = $dst"; -+} -+ -+class VTX_READ_128_eg buffer_id, list pattern> -+ : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst), -+ pattern> { -+ -+ let MEGA_FETCH_COUNT = 16; -+ let DST_SEL_X = 0; -+ let DST_SEL_Y = 1; -+ let DST_SEL_Z = 2; -+ let DST_SEL_W = 3; -+ let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 -+ -+ // XXX: Need to force VTX_READ_128 instructions to write to the same register -+ // that holds its buffer address to avoid potential hangs. We can't use -+ // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst -+ // registers are different sizes. -+} -+ -+//===----------------------------------------------------------------------===// -+// VTX Read from parameter memory space -+//===----------------------------------------------------------------------===// -+ -+def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, -+ [(set (i32 R600_TReg32_X:$dst), (load_param_zexti8 ADDRVTX_READ:$ptr))] -+>; -+ -+def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, -+ [(set (i32 R600_TReg32_X:$dst), (load_param_zexti16 ADDRVTX_READ:$ptr))] -+>; -+ -+def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, -+ [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] -+>; -+ -+//===----------------------------------------------------------------------===// -+// VTX Read from global memory space -+//===----------------------------------------------------------------------===// -+ -+// 8-bit reads -+def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, -+ [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))] -+>; -+ -+// 32-bit reads -+def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, -+ [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))] -+>; -+ -+// 128-bit reads -+def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, -+ [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))] -+>; -+ -+//===----------------------------------------------------------------------===// -+// Constant Loads -+// XXX: We are currently storing all constants in the global address space. -+//===----------------------------------------------------------------------===// -+ -+def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, -+ [(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))] -+>; -+ -+} -+ -+let Predicates = [isCayman] in { -+ -+let isVector = 1 in { -+ -+def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; -+ -+def MULLO_INT_cm : MULLO_INT_Common<0x8F>; -+def MULHI_INT_cm : MULHI_INT_Common<0x90>; -+def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; -+def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; -+def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; -+def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; -+def LOG_IEEE_ : LOG_IEEE_Common<0x83>; -+def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; -+def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; -+def SIN_cm : SIN_Common<0x8D>; -+def COS_cm : COS_Common<0x8E>; -+} // End isVector = 1 -+ -+def : SIN_PAT ; -+def : COS_PAT ; -+ -+defm DIV_cm : DIV_Common; -+ -+// RECIP_UINT emulation for Cayman -+def : Pat < -+ (AMDGPUurecip R600_Reg32:$src0), -+ (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)), -+ (MOV_IMM_I32 0x4f800000))) -+>; -+ -+ -+def : Pat<(fsqrt R600_Reg32:$src), -+ (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>; -+ -+} // End isCayman -+ -+//===----------------------------------------------------------------------===// -+// Branch Instructions -+//===----------------------------------------------------------------------===// -+ -+ -+def IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src), -+ "IF_PREDICATE_SET $src", []>; -+ -+def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src), -+ "PREDICATED_BREAK $src", []>; -+ -+//===----------------------------------------------------------------------===// -+// Pseudo instructions -+//===----------------------------------------------------------------------===// -+ -+let isPseudo = 1 in { -+ -+def PRED_X : InstR600 < -+ 0, (outs R600_Predicate_Bit:$dst), -+ (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), -+ "", [], NullALU> { -+ let FlagOperandIdx = 3; -+} -+ -+let isTerminator = 1, isBranch = 1, isBarrier = 1 in { -+ -+def JUMP : InstR600 <0x10, -+ (outs), -+ (ins brtarget:$target, R600_Pred:$p), -+ "JUMP $target ($p)", -+ [], AnyALU -+ >; -+ -+} // End isTerminator = 1, isBranch = 1, isBarrier = 1 -+ -+let usesCustomInserter = 1 in { -+ -+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in { -+ -+def MASK_WRITE : AMDGPUShaderInst < -+ (outs), -+ (ins R600_Reg32:$src), -+ "MASK_WRITE $src", -+ [] -+>; -+ -+} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1 -+ -+ -+def RESERVE_REG : AMDGPUShaderInst < -+ (outs), -+ (ins i32imm:$src), -+ "RESERVE_REG $src", -+ [(int_AMDGPU_reserve_reg imm:$src)] -+>; -+def TXD: AMDGPUShaderInst < -+ (outs R600_Reg128:$dst), -+ (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), -+ "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", -+ [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] -+>; -+ -+def TXD_SHADOW: AMDGPUShaderInst < -+ (outs R600_Reg128:$dst), -+ (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), -+ "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", -+ [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] -+>; -+ -+} // End isPseudo = 1 -+} // End usesCustomInserter = 1 -+ -+def CLAMP_R600 : CLAMP ; -+def FABS_R600 : FABS; -+def FNEG_R600 : FNEG; -+ -+//===---------------------------------------------------------------------===// -+// Return instruction -+//===---------------------------------------------------------------------===// -+let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in { -+ def RETURN : ILFormat<(outs), (ins variable_ops), -+ "RETURN", [(IL_retflag)]>; -+} -+ -+ -+//===----------------------------------------------------------------------===// -+// Constant Buffer Addressing Support -+//===----------------------------------------------------------------------===// -+ -+let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { -+def CONST_COPY : Instruction { -+ let OutOperandList = (outs R600_Reg32:$dst); -+ let InOperandList = (ins i32imm:$src); -+ let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))]; -+ let AsmString = "CONST_COPY"; -+ let neverHasSideEffects = 1; -+ let isAsCheapAsAMove = 1; -+ let Itinerary = NullALU; -+} -+} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" -+ -+def TEX_VTX_CONSTBUF : -+ InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", -+ [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>, -+ VTX_WORD1_GPR, VTX_WORD0 { -+ -+ let VC_INST = 0; -+ let FETCH_TYPE = 2; -+ let FETCH_WHOLE_QUAD = 0; -+ let BUFFER_ID = 0; -+ let SRC_REL = 0; -+ let SRC_SEL_X = 0; -+ let DST_REL = 0; -+ let USE_CONST_FIELDS = 0; -+ let NUM_FORMAT_ALL = 2; -+ let FORMAT_COMP_ALL = 1; -+ let SRF_MODE_ALL = 1; -+ let MEGA_FETCH_COUNT = 16; -+ let DST_SEL_X = 0; -+ let DST_SEL_Y = 1; -+ let DST_SEL_Z = 2; -+ let DST_SEL_W = 3; -+ let DATA_FORMAT = 35; -+ -+ let Inst{31-0} = Word0; -+ let Inst{63-32} = Word1; -+ -+// LLVM can only encode 64-bit instructions, so these fields are manually -+// encoded in R600CodeEmitter -+// -+// bits<16> OFFSET; -+// bits<2> ENDIAN_SWAP = 0; -+// bits<1> CONST_BUF_NO_STRIDE = 0; -+// bits<1> MEGA_FETCH = 0; -+// bits<1> ALT_CONST = 0; -+// bits<2> BUFFER_INDEX_MODE = 0; -+ -+ -+ -+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding -+// is done in R600CodeEmitter -+// -+// Inst{79-64} = OFFSET; -+// Inst{81-80} = ENDIAN_SWAP; -+// Inst{82} = CONST_BUF_NO_STRIDE; -+// Inst{83} = MEGA_FETCH; -+// Inst{84} = ALT_CONST; -+// Inst{86-85} = BUFFER_INDEX_MODE; -+// Inst{95-86} = 0; Reserved -+ -+// VTX_WORD3 (Padding) -+// -+// Inst{127-96} = 0; -+} -+ -+ -+//===--------------------------------------------------------------------===// -+// Instructions support -+//===--------------------------------------------------------------------===// -+//===---------------------------------------------------------------------===// -+// Custom Inserter for Branches and returns, this eventually will be a -+// seperate pass -+//===---------------------------------------------------------------------===// -+let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { -+ def BRANCH : ILFormat<(outs), (ins brtarget:$target), -+ "; Pseudo unconditional branch instruction", -+ [(br bb:$target)]>; -+ defm BRANCH_COND : BranchConditional; -+} -+ -+//===---------------------------------------------------------------------===// -+// Flow and Program control Instructions -+//===---------------------------------------------------------------------===// -+let isTerminator=1 in { -+ def SWITCH : ILFormat< (outs), (ins GPRI32:$src), -+ !strconcat("SWITCH", " $src"), []>; -+ def CASE : ILFormat< (outs), (ins GPRI32:$src), -+ !strconcat("CASE", " $src"), []>; -+ def BREAK : ILFormat< (outs), (ins), -+ "BREAK", []>; -+ def CONTINUE : ILFormat< (outs), (ins), -+ "CONTINUE", []>; -+ def DEFAULT : ILFormat< (outs), (ins), -+ "DEFAULT", []>; -+ def ELSE : ILFormat< (outs), (ins), -+ "ELSE", []>; -+ def ENDSWITCH : ILFormat< (outs), (ins), -+ "ENDSWITCH", []>; -+ def ENDMAIN : ILFormat< (outs), (ins), -+ "ENDMAIN", []>; -+ def END : ILFormat< (outs), (ins), -+ "END", []>; -+ def ENDFUNC : ILFormat< (outs), (ins), -+ "ENDFUNC", []>; -+ def ENDIF : ILFormat< (outs), (ins), -+ "ENDIF", []>; -+ def WHILELOOP : ILFormat< (outs), (ins), -+ "WHILE", []>; -+ def ENDLOOP : ILFormat< (outs), (ins), -+ "ENDLOOP", []>; -+ def FUNC : ILFormat< (outs), (ins), -+ "FUNC", []>; -+ def RETDYN : ILFormat< (outs), (ins), -+ "RET_DYN", []>; -+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder -+ defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">; -+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder -+ defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">; -+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder -+ defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">; -+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder -+ defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">; -+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder -+ defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">; -+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder -+ defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">; -+ defm IFC : BranchInstr2<"IFC">; -+ defm BREAKC : BranchInstr2<"BREAKC">; -+ defm CONTINUEC : BranchInstr2<"CONTINUEC">; -+} -+ -+//===----------------------------------------------------------------------===// -+// ISel Patterns -+//===----------------------------------------------------------------------===// -+ -+//CNDGE_INT extra pattern -+def : Pat < -+ (selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1), -+ (i32 R600_Reg32:$src2), COND_GT), -+ (CNDGE_INT R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2) -+>; -+ -+// KIL Patterns -+def KILP : Pat < -+ (int_AMDGPU_kilp), -+ (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO))) -+>; -+ -+def KIL : Pat < -+ (int_AMDGPU_kill R600_Reg32:$src0), -+ (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0))) -+>; -+ -+// SGT Reverse args -+def : Pat < -+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT), -+ (SGT R600_Reg32:$src1, R600_Reg32:$src0) -+>; -+ -+// SGE Reverse args -+def : Pat < -+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE), -+ (SGE R600_Reg32:$src1, R600_Reg32:$src0) -+>; -+ -+// SETGT_INT reverse args -+def : Pat < -+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT), -+ (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0) -+>; -+ -+// SETGE_INT reverse args -+def : Pat < -+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE), -+ (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0) -+>; -+ -+// SETGT_UINT reverse args -+def : Pat < -+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT), -+ (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0) -+>; -+ -+// SETGE_UINT reverse args -+def : Pat < -+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE), -+ (SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0) -+>; -+ -+// The next two patterns are special cases for handling 'true if ordered' and -+// 'true if unordered' conditionals. The assumption here is that the behavior of -+// SETE and SNE conforms to the Direct3D 10 rules for floating point values -+// described here: -+// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit -+// We assume that SETE returns false when one of the operands is NAN and -+// SNE returns true when on of the operands is NAN -+ -+//SETE - 'true if ordered' -+def : Pat < -+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO), -+ (SETE R600_Reg32:$src0, R600_Reg32:$src1) -+>; -+ -+//SNE - 'true if unordered' -+def : Pat < -+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), -+ (SNE R600_Reg32:$src0, R600_Reg32:$src1) -+>; -+ -+def : Extract_Element ; -+def : Extract_Element ; -+def : Extract_Element ; -+def : Extract_Element ; -+ -+def : Insert_Element ; -+def : Insert_Element ; -+def : Insert_Element ; -+def : Insert_Element ; -+ -+def : Extract_Element ; -+def : Extract_Element ; -+def : Extract_Element ; -+def : Extract_Element ; -+ -+def : Insert_Element ; -+def : Insert_Element ; -+def : Insert_Element ; -+def : Insert_Element ; -+ -+def : Vector_Build ; -+def : Vector_Build ; -+ -+// bitconvert patterns -+ -+def : BitConvert ; -+def : BitConvert ; -+def : BitConvert ; -+def : BitConvert ; -+ -+// DWORDADDR pattern -+def : DwordAddrPat ; -+ -+} // End isR600toCayman Predicate -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600Intrinsics.td llvm-r600/lib/Target/R600/R600Intrinsics.td ---- llvm-3.2.src/lib/Target/R600/R600Intrinsics.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/R600Intrinsics.td 2013-01-25 19:43:57.466716387 +0100 -@@ -0,0 +1,34 @@ -+//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// R600 Intrinsic Definitions -+// -+//===----------------------------------------------------------------------===// -+ -+let TargetPrefix = "R600", isTarget = 1 in { -+ def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; -+ def int_R600_load_input_perspective : -+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; -+ def int_R600_load_input_constant : -+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; -+ def int_R600_load_input_linear : -+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; -+ def int_R600_store_swizzle : -+ Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; -+ def int_R600_store_stream_output : -+ Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -+ def int_R600_store_pixel_color : -+ Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; -+ def int_R600_store_pixel_depth : -+ Intrinsic<[], [llvm_float_ty], []>; -+ def int_R600_store_pixel_stencil : -+ Intrinsic<[], [llvm_float_ty], []>; -+ def int_R600_store_pixel_dummy : -+ Intrinsic<[], [], []>; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600ISelLowering.cpp llvm-r600/lib/Target/R600/R600ISelLowering.cpp ---- llvm-3.2.src/lib/Target/R600/R600ISelLowering.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/R600ISelLowering.cpp 2013-01-25 19:43:57.463383054 +0100 -@@ -0,0 +1,997 @@ -+//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Custom DAG lowering for R600 -+// -+//===----------------------------------------------------------------------===// -+ -+#include "R600ISelLowering.h" -+#include "R600Defines.h" -+#include "R600InstrInfo.h" -+#include "R600MachineFunctionInfo.h" -+#include "llvm/Argument.h" -+#include "llvm/Function.h" -+#include "llvm/CodeGen/MachineInstrBuilder.h" -+#include "llvm/CodeGen/MachineRegisterInfo.h" -+#include "llvm/CodeGen/SelectionDAG.h" -+ -+using namespace llvm; -+ -+R600TargetLowering::R600TargetLowering(TargetMachine &TM) : -+ AMDGPUTargetLowering(TM), -+ TII(static_cast(TM.getInstrInfo())) { -+ setOperationAction(ISD::MUL, MVT::i64, Expand); -+ addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); -+ addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); -+ addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); -+ addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); -+ computeRegisterProperties(); -+ -+ setOperationAction(ISD::FADD, MVT::v4f32, Expand); -+ setOperationAction(ISD::FMUL, MVT::v4f32, Expand); -+ setOperationAction(ISD::FDIV, MVT::v4f32, Expand); -+ setOperationAction(ISD::FSUB, MVT::v4f32, Expand); -+ -+ setOperationAction(ISD::ADD, MVT::v4i32, Expand); -+ setOperationAction(ISD::AND, MVT::v4i32, Expand); -+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); -+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); -+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); -+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); -+ setOperationAction(ISD::UDIV, MVT::v4i32, Expand); -+ setOperationAction(ISD::UREM, MVT::v4i32, Expand); -+ setOperationAction(ISD::SETCC, MVT::v4i32, Expand); -+ -+ setOperationAction(ISD::BR_CC, MVT::i32, Custom); -+ setOperationAction(ISD::BR_CC, MVT::f32, Custom); -+ -+ setOperationAction(ISD::FSUB, MVT::f32, Expand); -+ -+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); -+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); -+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom); -+ setOperationAction(ISD::FPOW, MVT::f32, Custom); -+ -+ setOperationAction(ISD::ROTL, MVT::i32, Custom); -+ -+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); -+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); -+ -+ setOperationAction(ISD::SETCC, MVT::i32, Custom); -+ setOperationAction(ISD::SETCC, MVT::f32, Custom); -+ setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); -+ -+ setOperationAction(ISD::SELECT, MVT::i32, Custom); -+ setOperationAction(ISD::SELECT, MVT::f32, Custom); -+ -+ setOperationAction(ISD::STORE, MVT::i32, Custom); -+ setOperationAction(ISD::STORE, MVT::v4i32, Custom); -+ -+ setOperationAction(ISD::LOAD, MVT::i32, Custom); -+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom); -+ setTargetDAGCombine(ISD::FP_ROUND); -+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); -+ -+ setSchedulingPreference(Sched::VLIW); -+} -+ -+MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( -+ MachineInstr * MI, MachineBasicBlock * BB) const { -+ MachineFunction * MF = BB->getParent(); -+ MachineRegisterInfo &MRI = MF->getRegInfo(); -+ MachineBasicBlock::iterator I = *MI; -+ -+ switch (MI->getOpcode()) { -+ default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); -+ case AMDGPU::SHADER_TYPE: break; -+ case AMDGPU::CLAMP_R600: { -+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, -+ AMDGPU::MOV, -+ MI->getOperand(0).getReg(), -+ MI->getOperand(1).getReg()); -+ TII->addFlag(NewMI, 0, MO_FLAG_CLAMP); -+ break; -+ } -+ -+ case AMDGPU::FABS_R600: { -+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, -+ AMDGPU::MOV, -+ MI->getOperand(0).getReg(), -+ MI->getOperand(1).getReg()); -+ TII->addFlag(NewMI, 0, MO_FLAG_ABS); -+ break; -+ } -+ -+ case AMDGPU::FNEG_R600: { -+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, -+ AMDGPU::MOV, -+ MI->getOperand(0).getReg(), -+ MI->getOperand(1).getReg()); -+ TII->addFlag(NewMI, 0, MO_FLAG_NEG); -+ break; -+ } -+ -+ case AMDGPU::MASK_WRITE: { -+ unsigned maskedRegister = MI->getOperand(0).getReg(); -+ assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); -+ MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); -+ TII->addFlag(defInstr, 0, MO_FLAG_MASK); -+ break; -+ } -+ -+ case AMDGPU::MOV_IMM_F32: -+ TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), -+ MI->getOperand(1).getFPImm()->getValueAPF() -+ .bitcastToAPInt().getZExtValue()); -+ break; -+ case AMDGPU::MOV_IMM_I32: -+ TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), -+ MI->getOperand(1).getImm()); -+ break; -+ -+ -+ case AMDGPU::RAT_WRITE_CACHELESS_32_eg: -+ case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { -+ unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; -+ -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) -+ .addOperand(MI->getOperand(0)) -+ .addOperand(MI->getOperand(1)) -+ .addImm(EOP); // Set End of program bit -+ break; -+ } -+ -+ case AMDGPU::RESERVE_REG: { -+ R600MachineFunctionInfo * MFI = MF->getInfo(); -+ int64_t ReservedIndex = MI->getOperand(0).getImm(); -+ unsigned ReservedReg = -+ AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); -+ MFI->ReservedRegs.push_back(ReservedReg); -+ unsigned SuperReg = -+ AMDGPU::R600_Reg128RegClass.getRegister(ReservedIndex / 4); -+ MFI->ReservedRegs.push_back(SuperReg); -+ break; -+ } -+ -+ case AMDGPU::TXD: { -+ unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); -+ unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); -+ -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) -+ .addOperand(MI->getOperand(3)) -+ .addOperand(MI->getOperand(4)) -+ .addOperand(MI->getOperand(5)) -+ .addOperand(MI->getOperand(6)); -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) -+ .addOperand(MI->getOperand(2)) -+ .addOperand(MI->getOperand(4)) -+ .addOperand(MI->getOperand(5)) -+ .addOperand(MI->getOperand(6)); -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) -+ .addOperand(MI->getOperand(0)) -+ .addOperand(MI->getOperand(1)) -+ .addOperand(MI->getOperand(4)) -+ .addOperand(MI->getOperand(5)) -+ .addOperand(MI->getOperand(6)) -+ .addReg(T0, RegState::Implicit) -+ .addReg(T1, RegState::Implicit); -+ break; -+ } -+ -+ case AMDGPU::TXD_SHADOW: { -+ unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); -+ unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); -+ -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) -+ .addOperand(MI->getOperand(3)) -+ .addOperand(MI->getOperand(4)) -+ .addOperand(MI->getOperand(5)) -+ .addOperand(MI->getOperand(6)); -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) -+ .addOperand(MI->getOperand(2)) -+ .addOperand(MI->getOperand(4)) -+ .addOperand(MI->getOperand(5)) -+ .addOperand(MI->getOperand(6)); -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) -+ .addOperand(MI->getOperand(0)) -+ .addOperand(MI->getOperand(1)) -+ .addOperand(MI->getOperand(4)) -+ .addOperand(MI->getOperand(5)) -+ .addOperand(MI->getOperand(6)) -+ .addReg(T0, RegState::Implicit) -+ .addReg(T1, RegState::Implicit); -+ break; -+ } -+ -+ case AMDGPU::BRANCH: -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) -+ .addOperand(MI->getOperand(0)) -+ .addReg(0); -+ break; -+ -+ case AMDGPU::BRANCH_COND_f32: { -+ MachineInstr *NewMI = -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), -+ AMDGPU::PREDICATE_BIT) -+ .addOperand(MI->getOperand(1)) -+ .addImm(OPCODE_IS_NOT_ZERO) -+ .addImm(0); // Flags -+ TII->addFlag(NewMI, 0, MO_FLAG_PUSH); -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) -+ .addOperand(MI->getOperand(0)) -+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); -+ break; -+ } -+ -+ case AMDGPU::BRANCH_COND_i32: { -+ MachineInstr *NewMI = -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), -+ AMDGPU::PREDICATE_BIT) -+ .addOperand(MI->getOperand(1)) -+ .addImm(OPCODE_IS_NOT_ZERO_INT) -+ .addImm(0); // Flags -+ TII->addFlag(NewMI, 0, MO_FLAG_PUSH); -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) -+ .addOperand(MI->getOperand(0)) -+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); -+ break; -+ } -+ -+ case AMDGPU::input_perspective: { -+ R600MachineFunctionInfo *MFI = MF->getInfo(); -+ -+ // XXX Be more fine about register reservation -+ for (unsigned i = 0; i < 4; i ++) { -+ unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i); -+ MFI->ReservedRegs.push_back(ReservedReg); -+ } -+ -+ switch (MI->getOperand(1).getImm()) { -+ case 0:// Perspective -+ MFI->HasPerspectiveInterpolation = true; -+ break; -+ case 1:// Linear -+ MFI->HasLinearInterpolation = true; -+ break; -+ default: -+ assert(0 && "Unknow ij index"); -+ } -+ -+ return BB; -+ } -+ -+ case AMDGPU::EG_ExportSwz: -+ case AMDGPU::R600_ExportSwz: { -+ // Instruction is left unmodified if its not the last one of its type -+ bool isLastInstructionOfItsType = true; -+ unsigned InstExportType = MI->getOperand(1).getImm(); -+ for (MachineBasicBlock::iterator NextExportInst = llvm::next(I), -+ EndBlock = BB->end(); NextExportInst != EndBlock; -+ NextExportInst = llvm::next(NextExportInst)) { -+ if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz || -+ NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) { -+ unsigned CurrentInstExportType = NextExportInst->getOperand(1) -+ .getImm(); -+ if (CurrentInstExportType == InstExportType) { -+ isLastInstructionOfItsType = false; -+ break; -+ } -+ } -+ } -+ bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0; -+ if (!EOP && !isLastInstructionOfItsType) -+ return BB; -+ unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40; -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) -+ .addOperand(MI->getOperand(0)) -+ .addOperand(MI->getOperand(1)) -+ .addOperand(MI->getOperand(2)) -+ .addOperand(MI->getOperand(3)) -+ .addOperand(MI->getOperand(4)) -+ .addOperand(MI->getOperand(5)) -+ .addOperand(MI->getOperand(6)) -+ .addImm(CfInst) -+ .addImm(EOP); -+ break; -+ } -+ } -+ -+ MI->eraseFromParent(); -+ return BB; -+} -+ -+//===----------------------------------------------------------------------===// -+// Custom DAG Lowering Operations -+//===----------------------------------------------------------------------===// -+ -+using namespace llvm::Intrinsic; -+using namespace llvm::AMDGPUIntrinsic; -+ -+static SDValue -+InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap, -+ unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type, -+ SDValue Scalar, SDValue Chain) { -+ if (!ExportMap[Slot]) { -+ SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, -+ DL, MVT::v4f32, -+ DAG.getUNDEF(MVT::v4f32), -+ Scalar, -+ DAG.getConstant(Channel, MVT::i32)); -+ -+ unsigned Mask = 1 << Channel; -+ -+ const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32), -+ DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32), -+ DAG.getConstant(Mask, MVT::i32)}; -+ -+ SDValue Res = DAG.getNode( -+ AMDGPUISD::EXPORT, -+ DL, -+ MVT::Other, -+ Ops, 6); -+ ExportMap[Slot] = Res.getNode(); -+ return Res; -+ } -+ -+ SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ; -+ SDValue PreviousVector = ExportInstruction->getOperand(1); -+ SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, -+ DL, MVT::v4f32, -+ PreviousVector, -+ Scalar, -+ DAG.getConstant(Channel, MVT::i32)); -+ -+ unsigned Mask = dyn_cast(ExportInstruction->getOperand(5)) -+ ->getZExtValue(); -+ Mask |= (1 << Channel); -+ -+ const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector, -+ DAG.getConstant(Inst, MVT::i32), -+ DAG.getConstant(Type, MVT::i32), -+ DAG.getConstant(Slot, MVT::i32), -+ DAG.getConstant(Mask, MVT::i32)}; -+ -+ DAG.UpdateNodeOperands(ExportInstruction, -+ Ops, 6); -+ -+ return Chain; -+ -+} -+ -+SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { -+ switch (Op.getOpcode()) { -+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); -+ case ISD::BR_CC: return LowerBR_CC(Op, DAG); -+ case ISD::ROTL: return LowerROTL(Op, DAG); -+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); -+ case ISD::SELECT: return LowerSELECT(Op, DAG); -+ case ISD::SETCC: return LowerSETCC(Op, DAG); -+ case ISD::STORE: return LowerSTORE(Op, DAG); -+ case ISD::LOAD: return LowerLOAD(Op, DAG); -+ case ISD::FPOW: return LowerFPOW(Op, DAG); -+ case ISD::INTRINSIC_VOID: { -+ SDValue Chain = Op.getOperand(0); -+ unsigned IntrinsicID = -+ cast(Op.getOperand(1))->getZExtValue(); -+ switch (IntrinsicID) { -+ case AMDGPUIntrinsic::AMDGPU_store_output: { -+ MachineFunction &MF = DAG.getMachineFunction(); -+ MachineRegisterInfo &MRI = MF.getRegInfo(); -+ int64_t RegIndex = cast(Op.getOperand(3))->getZExtValue(); -+ unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); -+ if (!MRI.isLiveOut(Reg)) { -+ MRI.addLiveOut(Reg); -+ } -+ return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); -+ } -+ case AMDGPUIntrinsic::R600_store_pixel_color: { -+ MachineFunction &MF = DAG.getMachineFunction(); -+ R600MachineFunctionInfo *MFI = MF.getInfo(); -+ int64_t RegIndex = cast(Op.getOperand(3))->getZExtValue(); -+ -+ SDNode **OutputsMap = MFI->Outputs; -+ return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, -+ RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2), -+ Chain); -+ -+ } -+ -+ // default for switch(IntrinsicID) -+ default: break; -+ } -+ // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) -+ break; -+ } -+ case ISD::INTRINSIC_WO_CHAIN: { -+ unsigned IntrinsicID = -+ cast(Op.getOperand(0))->getZExtValue(); -+ EVT VT = Op.getValueType(); -+ DebugLoc DL = Op.getDebugLoc(); -+ switch(IntrinsicID) { -+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); -+ case AMDGPUIntrinsic::R600_load_input: { -+ int64_t RegIndex = cast(Op.getOperand(1))->getZExtValue(); -+ unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); -+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); -+ } -+ case AMDGPUIntrinsic::R600_load_input_perspective: { -+ int slot = cast(Op.getOperand(1))->getZExtValue(); -+ if (slot < 0) -+ return DAG.getUNDEF(MVT::f32); -+ SDValue FullVector = DAG.getNode( -+ AMDGPUISD::INTERP, -+ DL, MVT::v4f32, -+ DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32)); -+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, -+ DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); -+ } -+ case AMDGPUIntrinsic::R600_load_input_linear: { -+ int slot = cast(Op.getOperand(1))->getZExtValue(); -+ if (slot < 0) -+ return DAG.getUNDEF(MVT::f32); -+ SDValue FullVector = DAG.getNode( -+ AMDGPUISD::INTERP, -+ DL, MVT::v4f32, -+ DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32)); -+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, -+ DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); -+ } -+ case AMDGPUIntrinsic::R600_load_input_constant: { -+ int slot = cast(Op.getOperand(1))->getZExtValue(); -+ if (slot < 0) -+ return DAG.getUNDEF(MVT::f32); -+ SDValue FullVector = DAG.getNode( -+ AMDGPUISD::INTERP_P0, -+ DL, MVT::v4f32, -+ DAG.getConstant(slot / 4 , MVT::i32)); -+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, -+ DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); -+ } -+ -+ case r600_read_ngroups_x: -+ return LowerImplicitParameter(DAG, VT, DL, 0); -+ case r600_read_ngroups_y: -+ return LowerImplicitParameter(DAG, VT, DL, 1); -+ case r600_read_ngroups_z: -+ return LowerImplicitParameter(DAG, VT, DL, 2); -+ case r600_read_global_size_x: -+ return LowerImplicitParameter(DAG, VT, DL, 3); -+ case r600_read_global_size_y: -+ return LowerImplicitParameter(DAG, VT, DL, 4); -+ case r600_read_global_size_z: -+ return LowerImplicitParameter(DAG, VT, DL, 5); -+ case r600_read_local_size_x: -+ return LowerImplicitParameter(DAG, VT, DL, 6); -+ case r600_read_local_size_y: -+ return LowerImplicitParameter(DAG, VT, DL, 7); -+ case r600_read_local_size_z: -+ return LowerImplicitParameter(DAG, VT, DL, 8); -+ -+ case r600_read_tgid_x: -+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, -+ AMDGPU::T1_X, VT); -+ case r600_read_tgid_y: -+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, -+ AMDGPU::T1_Y, VT); -+ case r600_read_tgid_z: -+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, -+ AMDGPU::T1_Z, VT); -+ case r600_read_tidig_x: -+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, -+ AMDGPU::T0_X, VT); -+ case r600_read_tidig_y: -+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, -+ AMDGPU::T0_Y, VT); -+ case r600_read_tidig_z: -+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, -+ AMDGPU::T0_Z, VT); -+ } -+ // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) -+ break; -+ } -+ } // end switch(Op.getOpcode()) -+ return SDValue(); -+} -+ -+void R600TargetLowering::ReplaceNodeResults(SDNode *N, -+ SmallVectorImpl &Results, -+ SelectionDAG &DAG) const { -+ switch (N->getOpcode()) { -+ default: return; -+ case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); -+ return; -+ case ISD::LOAD: { -+ SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode(); -+ Results.push_back(SDValue(Node, 0)); -+ Results.push_back(SDValue(Node, 1)); -+ // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode -+ // function -+ DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1)); -+ return; -+ } -+ } -+} -+ -+SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { -+ return DAG.getNode( -+ ISD::SETCC, -+ Op.getDebugLoc(), -+ MVT::i1, -+ Op, DAG.getConstantFP(0.0f, MVT::f32), -+ DAG.getCondCode(ISD::SETNE) -+ ); -+} -+ -+SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { -+ SDValue Chain = Op.getOperand(0); -+ SDValue CC = Op.getOperand(1); -+ SDValue LHS = Op.getOperand(2); -+ SDValue RHS = Op.getOperand(3); -+ SDValue JumpT = Op.getOperand(4); -+ SDValue CmpValue; -+ SDValue Result; -+ -+ if (LHS.getValueType() == MVT::i32) { -+ CmpValue = DAG.getNode( -+ ISD::SELECT_CC, -+ Op.getDebugLoc(), -+ MVT::i32, -+ LHS, RHS, -+ DAG.getConstant(-1, MVT::i32), -+ DAG.getConstant(0, MVT::i32), -+ CC); -+ } else if (LHS.getValueType() == MVT::f32) { -+ CmpValue = DAG.getNode( -+ ISD::SELECT_CC, -+ Op.getDebugLoc(), -+ MVT::f32, -+ LHS, RHS, -+ DAG.getConstantFP(1.0f, MVT::f32), -+ DAG.getConstantFP(0.0f, MVT::f32), -+ CC); -+ } else { -+ assert(0 && "Not valid type for br_cc"); -+ } -+ Result = DAG.getNode( -+ AMDGPUISD::BRANCH_COND, -+ CmpValue.getDebugLoc(), -+ MVT::Other, Chain, -+ JumpT, CmpValue); -+ return Result; -+} -+ -+SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, -+ DebugLoc DL, -+ unsigned DwordOffset) const { -+ unsigned ByteOffset = DwordOffset * 4; -+ PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), -+ AMDGPUAS::PARAM_I_ADDRESS); -+ -+ // We shouldn't be using an offset wider than 16-bits for implicit parameters. -+ assert(isInt<16>(ByteOffset)); -+ -+ return DAG.getLoad(VT, DL, DAG.getEntryNode(), -+ DAG.getConstant(ByteOffset, MVT::i32), // PTR -+ MachinePointerInfo(ConstantPointerNull::get(PtrType)), -+ false, false, false, 0); -+} -+ -+SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const { -+ DebugLoc DL = Op.getDebugLoc(); -+ EVT VT = Op.getValueType(); -+ -+ return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, -+ Op.getOperand(0), -+ Op.getOperand(0), -+ DAG.getNode(ISD::SUB, DL, VT, -+ DAG.getConstant(32, MVT::i32), -+ Op.getOperand(1))); -+} -+ -+bool R600TargetLowering::isZero(SDValue Op) const { -+ if(ConstantSDNode *Cst = dyn_cast(Op)) { -+ return Cst->isNullValue(); -+ } else if(ConstantFPSDNode *CstFP = dyn_cast(Op)){ -+ return CstFP->isZero(); -+ } else { -+ return false; -+ } -+} -+ -+SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { -+ DebugLoc DL = Op.getDebugLoc(); -+ EVT VT = Op.getValueType(); -+ -+ SDValue LHS = Op.getOperand(0); -+ SDValue RHS = Op.getOperand(1); -+ SDValue True = Op.getOperand(2); -+ SDValue False = Op.getOperand(3); -+ SDValue CC = Op.getOperand(4); -+ SDValue Temp; -+ -+ // LHS and RHS are guaranteed to be the same value type -+ EVT CompareVT = LHS.getValueType(); -+ -+ // Check if we can lower this to a native operation. -+ -+ // Try to lower to a CND* instruction: -+ // CND* instructions requires RHS to be zero. Some SELECT_CC nodes that -+ // can be lowered to CND* instructions can also be lowered to SET* -+ // instructions. CND* instructions are cheaper, because they dont't -+ // require additional instructions to convert their result to the correct -+ // value type, so this check should be first. -+ if (isZero(LHS) || isZero(RHS)) { -+ SDValue Cond = (isZero(LHS) ? RHS : LHS); -+ SDValue Zero = (isZero(LHS) ? LHS : RHS); -+ ISD::CondCode CCOpcode = cast(CC)->get(); -+ if (CompareVT != VT) { -+ // Bitcast True / False to the correct types. This will end up being -+ // a nop, but it allows us to define only a single pattern in the -+ // .TD files for each CND* instruction rather than having to have -+ // one pattern for integer True/False and one for fp True/False -+ True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); -+ False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); -+ } -+ if (isZero(LHS)) { -+ CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode); -+ } -+ -+ switch (CCOpcode) { -+ case ISD::SETONE: -+ case ISD::SETUNE: -+ case ISD::SETNE: -+ case ISD::SETULE: -+ case ISD::SETULT: -+ case ISD::SETOLE: -+ case ISD::SETOLT: -+ case ISD::SETLE: -+ case ISD::SETLT: -+ CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); -+ Temp = True; -+ True = False; -+ False = Temp; -+ break; -+ default: -+ break; -+ } -+ SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, -+ Cond, Zero, -+ True, False, -+ DAG.getCondCode(CCOpcode)); -+ return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); -+ } -+ -+ // Try to lower to a SET* instruction: -+ // We need all the operands of SELECT_CC to have the same value type, so if -+ // necessary we need to change True and False to be the same type as LHS and -+ // RHS, and then convert the result of the select_cc back to the correct type. -+ -+ // Move hardware True/False values to the correct operand. -+ if (isHWTrueValue(False) && isHWFalseValue(True)) { -+ ISD::CondCode CCOpcode = cast(CC)->get(); -+ std::swap(False, True); -+ CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32)); -+ } -+ -+ if (isHWTrueValue(True) && isHWFalseValue(False)) { -+ if (CompareVT != VT) { -+ if (VT == MVT::f32 && CompareVT == MVT::i32) { -+ SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, -+ LHS, RHS, -+ DAG.getConstant(-1, MVT::i32), -+ DAG.getConstant(0, MVT::i32), -+ CC); -+ // Convert integer values of true (-1) and false (0) to fp values of -+ // true (1.0f) and false (0.0f). -+ SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean, -+ DAG.getConstant(1, MVT::i32)); -+ return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB); -+ } else if (VT == MVT::i32 && CompareVT == MVT::f32) { -+ SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, -+ LHS, RHS, -+ DAG.getConstantFP(1.0f, MVT::f32), -+ DAG.getConstantFP(0.0f, MVT::f32), -+ CC); -+ // Convert fp values of true (1.0f) and false (0.0f) to integer values -+ // of true (-1) and false (0). -+ SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt); -+ return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg); -+ } else { -+ // I don't think there will be any other type pairings. -+ assert(!"Unhandled operand type parings in SELECT_CC"); -+ } -+ } else { -+ // This SELECT_CC is already legal. -+ return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); -+ } -+ } -+ -+ // Possible Min/Max pattern -+ SDValue MinMax = LowerMinMax(Op, DAG); -+ if (MinMax.getNode()) { -+ return MinMax; -+ } -+ -+ // If we make it this for it means we have no native instructions to handle -+ // this SELECT_CC, so we must lower it. -+ SDValue HWTrue, HWFalse; -+ -+ if (CompareVT == MVT::f32) { -+ HWTrue = DAG.getConstantFP(1.0f, CompareVT); -+ HWFalse = DAG.getConstantFP(0.0f, CompareVT); -+ } else if (CompareVT == MVT::i32) { -+ HWTrue = DAG.getConstant(-1, CompareVT); -+ HWFalse = DAG.getConstant(0, CompareVT); -+ } -+ else { -+ assert(!"Unhandled value type in LowerSELECT_CC"); -+ } -+ -+ // Lower this unsupported SELECT_CC into a combination of two supported -+ // SELECT_CC operations. -+ SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); -+ -+ return DAG.getNode(ISD::SELECT_CC, DL, VT, -+ Cond, HWFalse, -+ True, False, -+ DAG.getCondCode(ISD::SETNE)); -+} -+ -+SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { -+ return DAG.getNode(ISD::SELECT_CC, -+ Op.getDebugLoc(), -+ Op.getValueType(), -+ Op.getOperand(0), -+ DAG.getConstant(0, MVT::i32), -+ Op.getOperand(1), -+ Op.getOperand(2), -+ DAG.getCondCode(ISD::SETNE)); -+} -+ -+SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { -+ SDValue Cond; -+ SDValue LHS = Op.getOperand(0); -+ SDValue RHS = Op.getOperand(1); -+ SDValue CC = Op.getOperand(2); -+ DebugLoc DL = Op.getDebugLoc(); -+ assert(Op.getValueType() == MVT::i32); -+ if (LHS.getValueType() == MVT::i32) { -+ Cond = DAG.getNode( -+ ISD::SELECT_CC, -+ Op.getDebugLoc(), -+ MVT::i32, -+ LHS, RHS, -+ DAG.getConstant(-1, MVT::i32), -+ DAG.getConstant(0, MVT::i32), -+ CC); -+ } else if (LHS.getValueType() == MVT::f32) { -+ Cond = DAG.getNode( -+ ISD::SELECT_CC, -+ Op.getDebugLoc(), -+ MVT::f32, -+ LHS, RHS, -+ DAG.getConstantFP(1.0f, MVT::f32), -+ DAG.getConstantFP(0.0f, MVT::f32), -+ CC); -+ Cond = DAG.getNode( -+ ISD::FP_TO_SINT, -+ DL, -+ MVT::i32, -+ Cond); -+ } else { -+ assert(0 && "Not valid type for set_cc"); -+ } -+ Cond = DAG.getNode( -+ ISD::AND, -+ DL, -+ MVT::i32, -+ DAG.getConstant(1, MVT::i32), -+ Cond); -+ return Cond; -+} -+ -+SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { -+ DebugLoc DL = Op.getDebugLoc(); -+ StoreSDNode *StoreNode = cast(Op); -+ SDValue Chain = Op.getOperand(0); -+ SDValue Value = Op.getOperand(1); -+ SDValue Ptr = Op.getOperand(2); -+ -+ if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && -+ Ptr->getOpcode() != AMDGPUISD::DWORDADDR) { -+ // Convert pointer from byte address to dword address. -+ Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(), -+ DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), -+ Ptr, DAG.getConstant(2, MVT::i32))); -+ -+ if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { -+ assert(!"Truncated and indexed stores not supported yet"); -+ } else { -+ Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); -+ } -+ return Chain; -+ } -+ return SDValue(); -+} -+ -+// return (512 + (kc_bank << 12) -+static int -+ConstantAddressBlock(unsigned AddressSpace) { -+ switch (AddressSpace) { -+ case AMDGPUAS::CONSTANT_BUFFER_0: -+ return 512; -+ case AMDGPUAS::CONSTANT_BUFFER_1: -+ return 512 + 4096; -+ case AMDGPUAS::CONSTANT_BUFFER_2: -+ return 512 + 4096 * 2; -+ case AMDGPUAS::CONSTANT_BUFFER_3: -+ return 512 + 4096 * 3; -+ case AMDGPUAS::CONSTANT_BUFFER_4: -+ return 512 + 4096 * 4; -+ case AMDGPUAS::CONSTANT_BUFFER_5: -+ return 512 + 4096 * 5; -+ case AMDGPUAS::CONSTANT_BUFFER_6: -+ return 512 + 4096 * 6; -+ case AMDGPUAS::CONSTANT_BUFFER_7: -+ return 512 + 4096 * 7; -+ case AMDGPUAS::CONSTANT_BUFFER_8: -+ return 512 + 4096 * 8; -+ case AMDGPUAS::CONSTANT_BUFFER_9: -+ return 512 + 4096 * 9; -+ case AMDGPUAS::CONSTANT_BUFFER_10: -+ return 512 + 4096 * 10; -+ case AMDGPUAS::CONSTANT_BUFFER_11: -+ return 512 + 4096 * 11; -+ case AMDGPUAS::CONSTANT_BUFFER_12: -+ return 512 + 4096 * 12; -+ case AMDGPUAS::CONSTANT_BUFFER_13: -+ return 512 + 4096 * 13; -+ case AMDGPUAS::CONSTANT_BUFFER_14: -+ return 512 + 4096 * 14; -+ case AMDGPUAS::CONSTANT_BUFFER_15: -+ return 512 + 4096 * 15; -+ default: -+ return -1; -+ } -+} -+ -+SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const -+{ -+ EVT VT = Op.getValueType(); -+ DebugLoc DL = Op.getDebugLoc(); -+ LoadSDNode *LoadNode = cast(Op); -+ SDValue Chain = Op.getOperand(0); -+ SDValue Ptr = Op.getOperand(1); -+ SDValue LoweredLoad; -+ -+ int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); -+ if (ConstantBlock > -1) { -+ SDValue Result; -+ if (dyn_cast(LoadNode->getSrcValue()) || -+ dyn_cast(LoadNode->getSrcValue())) { -+ SDValue Slots[4]; -+ for (unsigned i = 0; i < 4; i++) { -+ // We want Const position encoded with the following formula : -+ // (((512 + (kc_bank << 12) + const_index) << 2) + chan) -+ // const_index is Ptr computed by llvm using an alignment of 16. -+ // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and -+ // then div by 4 at the ISel step -+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, -+ DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32)); -+ Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); -+ } -+ Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4); -+ } else { -+ // non constant ptr cant be folded, keeps it as a v4f32 load -+ Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, -+ DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)) -+ ); -+ } -+ -+ if (!VT.isVector()) { -+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, -+ DAG.getConstant(0, MVT::i32)); -+ } -+ -+ SDValue MergedValues[2] = { -+ Result, -+ Chain -+ }; -+ return DAG.getMergeValues(MergedValues, 2, DL); -+ } -+ -+ return SDValue(); -+} -+ -+SDValue R600TargetLowering::LowerFPOW(SDValue Op, -+ SelectionDAG &DAG) const { -+ DebugLoc DL = Op.getDebugLoc(); -+ EVT VT = Op.getValueType(); -+ SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0)); -+ SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase); -+ return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase); -+} -+ -+/// XXX Only kernel functions are supported, so we can assume for now that -+/// every function is a kernel function, but in the future we should use -+/// separate calling conventions for kernel and non-kernel functions. -+SDValue R600TargetLowering::LowerFormalArguments( -+ SDValue Chain, -+ CallingConv::ID CallConv, -+ bool isVarArg, -+ const SmallVectorImpl &Ins, -+ DebugLoc DL, SelectionDAG &DAG, -+ SmallVectorImpl &InVals) const { -+ unsigned ParamOffsetBytes = 36; -+ Function::const_arg_iterator FuncArg = -+ DAG.getMachineFunction().getFunction()->arg_begin(); -+ for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) { -+ EVT VT = Ins[i].VT; -+ Type *ArgType = FuncArg->getType(); -+ unsigned ArgSizeInBits = ArgType->isPointerTy() ? -+ 32 : ArgType->getPrimitiveSizeInBits(); -+ unsigned ArgBytes = ArgSizeInBits >> 3; -+ EVT ArgVT; -+ if (ArgSizeInBits < VT.getSizeInBits()) { -+ assert(!ArgType->isFloatTy() && -+ "Extending floating point arguments not supported yet"); -+ ArgVT = MVT::getIntegerVT(ArgSizeInBits); -+ } else { -+ ArgVT = VT; -+ } -+ PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), -+ AMDGPUAS::PARAM_I_ADDRESS); -+ SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(), -+ DAG.getConstant(ParamOffsetBytes, MVT::i32), -+ MachinePointerInfo(new Argument(PtrTy)), -+ ArgVT, false, false, ArgBytes); -+ InVals.push_back(Arg); -+ ParamOffsetBytes += ArgBytes; -+ } -+ return Chain; -+} -+ -+EVT R600TargetLowering::getSetCCResultType(EVT VT) const { -+ if (!VT.isVector()) return MVT::i32; -+ return VT.changeVectorElementTypeToInteger(); -+} -+ -+//===----------------------------------------------------------------------===// -+// Custom DAG Optimizations -+//===----------------------------------------------------------------------===// -+ -+SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, -+ DAGCombinerInfo &DCI) const { -+ SelectionDAG &DAG = DCI.DAG; -+ -+ switch (N->getOpcode()) { -+ // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a) -+ case ISD::FP_ROUND: { -+ SDValue Arg = N->getOperand(0); -+ if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) { -+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0), -+ Arg.getOperand(0)); -+ } -+ break; -+ } -+ // Extract_vec (Build_vector) generated by custom lowering -+ // also needs to be customly combined -+ case ISD::EXTRACT_VECTOR_ELT: { -+ SDValue Arg = N->getOperand(0); -+ if (Arg.getOpcode() == ISD::BUILD_VECTOR) { -+ if (ConstantSDNode *Const = dyn_cast(N->getOperand(1))) { -+ unsigned Element = Const->getZExtValue(); -+ return Arg->getOperand(Element); -+ } -+ } -+ } -+ } -+ return SDValue(); -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600ISelLowering.h llvm-r600/lib/Target/R600/R600ISelLowering.h ---- llvm-3.2.src/lib/Target/R600/R600ISelLowering.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/R600ISelLowering.h 2013-01-25 19:43:57.463383054 +0100 -@@ -0,0 +1,73 @@ -+//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief R600 DAG Lowering interface definition -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef R600ISELLOWERING_H -+#define R600ISELLOWERING_H -+ -+#include "AMDGPUISelLowering.h" -+ -+namespace llvm { -+ -+class R600InstrInfo; -+ -+class R600TargetLowering : public AMDGPUTargetLowering { -+public: -+ R600TargetLowering(TargetMachine &TM); -+ virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, -+ MachineBasicBlock * BB) const; -+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; -+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; -+ void ReplaceNodeResults(SDNode * N, -+ SmallVectorImpl &Results, -+ SelectionDAG &DAG) const; -+ virtual SDValue LowerFormalArguments( -+ SDValue Chain, -+ CallingConv::ID CallConv, -+ bool isVarArg, -+ const SmallVectorImpl &Ins, -+ DebugLoc DL, SelectionDAG &DAG, -+ SmallVectorImpl &InVals) const; -+ virtual EVT getSetCCResultType(EVT VT) const; -+private: -+ const R600InstrInfo * TII; -+ -+ /// Each OpenCL kernel has nine implicit parameters that are stored in the -+ /// first nine dwords of a Vertex Buffer. These implicit parameters are -+ /// lowered to load instructions which retreive the values from the Vertex -+ /// Buffer. -+ SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT, -+ DebugLoc DL, unsigned DwordOffset) const; -+ -+ void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, -+ MachineRegisterInfo & MRI, unsigned dword_offset) const; -+ -+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; -+ -+ /// \brief Lower ROTL opcode to BITALIGN -+ SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; -+ -+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; -+ -+ bool isZero(SDValue Op) const; -+}; -+ -+} // End namespace llvm; -+ -+#endif // R600ISELLOWERING_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600LowerConstCopy.cpp llvm-r600/lib/Target/R600/R600LowerConstCopy.cpp ---- llvm-3.2.src/lib/Target/R600/R600LowerConstCopy.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/R600LowerConstCopy.cpp 2013-01-25 19:43:57.466716387 +0100 -@@ -0,0 +1,74 @@ -+//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr. -+/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot -+/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits -+/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try -+/// to fold them if possible or replace them by MOV otherwise. -+/// TODO : Implement the folding part, using Copy Propagation algorithm. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPU.h" -+#include "llvm/CodeGen/MachineFunction.h" -+#include "llvm/CodeGen/MachineFunctionPass.h" -+#include "R600InstrInfo.h" -+#include "llvm/GlobalValue.h" -+#include "llvm/CodeGen/MachineInstrBuilder.h" -+ -+namespace llvm { -+ -+class R600LowerConstCopy : public MachineFunctionPass { -+private: -+ static char ID; -+ const R600InstrInfo *TII; -+public: -+ R600LowerConstCopy(TargetMachine &tm); -+ virtual bool runOnMachineFunction(MachineFunction &MF); -+ -+ const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; } -+}; -+ -+char R600LowerConstCopy::ID = 0; -+ -+ -+R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) : -+ MachineFunctionPass(ID), -+ TII (static_cast(tm.getInstrInfo())) -+{ -+} -+ -+bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) { -+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); -+ BB != BB_E; ++BB) { -+ MachineBasicBlock &MBB = *BB; -+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); -+ I != E;) { -+ MachineInstr &MI = *I; -+ I = llvm::next(I); -+ if (MI.getOpcode() != AMDGPU::CONST_COPY) -+ continue; -+ MachineInstr *NewMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV, -+ MI.getOperand(0).getReg(), AMDGPU::ALU_CONST); -+ NewMI->getOperand(9).setImm(MI.getOperand(1).getImm()); -+ MI.eraseFromParent(); -+ } -+ } -+ return false; -+} -+ -+FunctionPass *createR600LowerConstCopy(TargetMachine &tm) { -+ return new R600LowerConstCopy(tm); -+} -+ -+} -+ -+ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600MachineFunctionInfo.cpp llvm-r600/lib/Target/R600/R600MachineFunctionInfo.cpp ---- llvm-3.2.src/lib/Target/R600/R600MachineFunctionInfo.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/R600MachineFunctionInfo.cpp 2013-01-25 19:43:57.470049720 +0100 -@@ -0,0 +1,33 @@ -+//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+/// \file -+//===----------------------------------------------------------------------===// -+ -+#include "R600MachineFunctionInfo.h" -+ -+using namespace llvm; -+ -+R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) -+ : MachineFunctionInfo(), -+ HasLinearInterpolation(false), -+ HasPerspectiveInterpolation(false) { -+ memset(Outputs, 0, sizeof(Outputs)); -+ } -+ -+unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const { -+ assert(HasPerspectiveInterpolation); -+ return 0; -+} -+ -+unsigned R600MachineFunctionInfo::GetIJLinearIndex() const { -+ assert(HasLinearInterpolation); -+ if (HasPerspectiveInterpolation) -+ return 1; -+ else -+ return 0; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600MachineFunctionInfo.h llvm-r600/lib/Target/R600/R600MachineFunctionInfo.h ---- llvm-3.2.src/lib/Target/R600/R600MachineFunctionInfo.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/R600MachineFunctionInfo.h 2013-01-25 19:43:57.470049720 +0100 -@@ -0,0 +1,38 @@ -+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+//===----------------------------------------------------------------------===// -+ -+#ifndef R600MACHINEFUNCTIONINFO_H -+#define R600MACHINEFUNCTIONINFO_H -+ -+#include "llvm/CodeGen/MachineFunction.h" -+#include "llvm/CodeGen/SelectionDAG.h" -+#include -+ -+namespace llvm { -+ -+class R600MachineFunctionInfo : public MachineFunctionInfo { -+ -+public: -+ R600MachineFunctionInfo(const MachineFunction &MF); -+ std::vector ReservedRegs; -+ SDNode *Outputs[16]; -+ bool HasLinearInterpolation; -+ bool HasPerspectiveInterpolation; -+ -+ unsigned GetIJLinearIndex() const; -+ unsigned GetIJPerspectiveIndex() const; -+ -+}; -+ -+} // End llvm namespace -+ -+#endif //R600MACHINEFUNCTIONINFO_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600RegisterInfo.cpp llvm-r600/lib/Target/R600/R600RegisterInfo.cpp ---- llvm-3.2.src/lib/Target/R600/R600RegisterInfo.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/R600RegisterInfo.cpp 2013-01-25 19:43:57.470049720 +0100 -@@ -0,0 +1,85 @@ -+//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief R600 implementation of the TargetRegisterInfo class. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "R600RegisterInfo.h" -+#include "AMDGPUTargetMachine.h" -+#include "R600Defines.h" -+#include "R600MachineFunctionInfo.h" -+ -+using namespace llvm; -+ -+R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm, -+ const TargetInstrInfo &tii) -+: AMDGPURegisterInfo(tm, tii), -+ TM(tm), -+ TII(tii) -+ { } -+ -+BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { -+ BitVector Reserved(getNumRegs()); -+ const R600MachineFunctionInfo * MFI = MF.getInfo(); -+ -+ Reserved.set(AMDGPU::ZERO); -+ Reserved.set(AMDGPU::HALF); -+ Reserved.set(AMDGPU::ONE); -+ Reserved.set(AMDGPU::ONE_INT); -+ Reserved.set(AMDGPU::NEG_HALF); -+ Reserved.set(AMDGPU::NEG_ONE); -+ Reserved.set(AMDGPU::PV_X); -+ Reserved.set(AMDGPU::ALU_LITERAL_X); -+ Reserved.set(AMDGPU::ALU_CONST); -+ Reserved.set(AMDGPU::PREDICATE_BIT); -+ Reserved.set(AMDGPU::PRED_SEL_OFF); -+ Reserved.set(AMDGPU::PRED_SEL_ZERO); -+ Reserved.set(AMDGPU::PRED_SEL_ONE); -+ -+ for (std::vector::const_iterator I = MFI->ReservedRegs.begin(), -+ E = MFI->ReservedRegs.end(); I != E; ++I) { -+ Reserved.set(*I); -+ } -+ -+ return Reserved; -+} -+ -+const TargetRegisterClass * -+R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const { -+ switch (rc->getID()) { -+ case AMDGPU::GPRF32RegClassID: -+ case AMDGPU::GPRI32RegClassID: -+ return &AMDGPU::R600_Reg32RegClass; -+ default: return rc; -+ } -+} -+ -+unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const { -+ return this->getEncodingValue(reg) >> HW_CHAN_SHIFT; -+} -+ -+const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass( -+ MVT VT) const { -+ switch(VT.SimpleTy) { -+ default: -+ case MVT::i32: return &AMDGPU::R600_TReg32RegClass; -+ } -+} -+ -+unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const { -+ switch (Channel) { -+ default: assert(!"Invalid channel index"); return 0; -+ case 0: return AMDGPU::sel_x; -+ case 1: return AMDGPU::sel_y; -+ case 2: return AMDGPU::sel_z; -+ case 3: return AMDGPU::sel_w; -+ } -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600RegisterInfo.h llvm-r600/lib/Target/R600/R600RegisterInfo.h ---- llvm-3.2.src/lib/Target/R600/R600RegisterInfo.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/R600RegisterInfo.h 2013-01-25 19:43:57.470049720 +0100 -@@ -0,0 +1,55 @@ -+//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Interface definition for R600RegisterInfo -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef R600REGISTERINFO_H_ -+#define R600REGISTERINFO_H_ -+ -+#include "AMDGPUTargetMachine.h" -+#include "AMDGPURegisterInfo.h" -+ -+namespace llvm { -+ -+class R600TargetMachine; -+class TargetInstrInfo; -+ -+struct R600RegisterInfo : public AMDGPURegisterInfo { -+ AMDGPUTargetMachine &TM; -+ const TargetInstrInfo &TII; -+ -+ R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii); -+ -+ virtual BitVector getReservedRegs(const MachineFunction &MF) const; -+ -+ /// \param RC is an AMDIL reg class. -+ /// -+ /// \returns the R600 reg class that is equivalent to \p RC. -+ virtual const TargetRegisterClass *getISARegClass( -+ const TargetRegisterClass *RC) const; -+ -+ /// \brief get the HW encoding for a register's channel. -+ unsigned getHWRegChan(unsigned reg) const; -+ -+ /// \brief get the register class of the specified type to use in the -+ /// CFGStructurizer -+ virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; -+ -+ /// \returns the sub reg enum value for the given \p Channel -+ /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x) -+ unsigned getSubRegFromChannel(unsigned Channel) const; -+ -+}; -+ -+} // End namespace llvm -+ -+#endif // AMDIDSAREGISTERINFO_H_ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600RegisterInfo.td llvm-r600/lib/Target/R600/R600RegisterInfo.td ---- llvm-3.2.src/lib/Target/R600/R600RegisterInfo.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/R600RegisterInfo.td 2013-01-25 19:43:57.470049720 +0100 -@@ -0,0 +1,101 @@ -+ -+class R600Reg encoding> : Register { -+ let Namespace = "AMDGPU"; -+ let HWEncoding = encoding; -+} -+ -+class R600RegWithChan sel, string chan> : -+ Register { -+ -+ field bits<2> chan_encoding = !if(!eq(chan, "X"), 0, -+ !if(!eq(chan, "Y"), 1, -+ !if(!eq(chan, "Z"), 2, -+ !if(!eq(chan, "W"), 3, 0)))); -+ let HWEncoding{8-0} = sel; -+ let HWEncoding{10-9} = chan_encoding; -+ let Namespace = "AMDGPU"; -+} -+ -+class R600Reg_128 subregs, bits<16> encoding> : -+ RegisterWithSubRegs { -+ let Namespace = "AMDGPU"; -+ let SubRegIndices = [sel_x, sel_y, sel_z, sel_w]; -+ let HWEncoding = encoding; -+} -+ -+foreach Index = 0-127 in { -+ foreach Chan = [ "X", "Y", "Z", "W" ] in { -+ // 32-bit Temporary Registers -+ def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>; -+ } -+ // 128-bit Temporary Registers -+ def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW", -+ [!cast("T"#Index#"_X"), -+ !cast("T"#Index#"_Y"), -+ !cast("T"#Index#"_Z"), -+ !cast("T"#Index#"_W")], -+ Index>; -+} -+ -+// Array Base Register holding input in FS -+foreach Index = 448-464 in { -+ def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>; -+} -+ -+ -+// Special Registers -+ -+def ZERO : R600Reg<"0.0", 248>; -+def ONE : R600Reg<"1.0", 249>; -+def NEG_ONE : R600Reg<"-1.0", 249>; -+def ONE_INT : R600Reg<"1", 250>; -+def HALF : R600Reg<"0.5", 252>; -+def NEG_HALF : R600Reg<"-0.5", 252>; -+def ALU_LITERAL_X : R600Reg<"literal.x", 253>; -+def PV_X : R600Reg<"pv.x", 254>; -+def PREDICATE_BIT : R600Reg<"PredicateBit", 0>; -+def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>; -+def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>; -+def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>; -+ -+def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32, -+ (add (sequence "ArrayBase%u", 448, 464))>; -+// special registers for ALU src operands -+// const buffer reference, SRCx_SEL contains index -+def ALU_CONST : R600Reg<"CBuf", 0>; -+// interpolation param reference, SRCx_SEL contains index -+def ALU_PARAM : R600Reg<"Param", 0>; -+ -+def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, -+ (add (sequence "T%u_X", 0, 127))>; -+ -+def R600_TReg32_Y : RegisterClass <"AMDGPU", [f32, i32], 32, -+ (add (sequence "T%u_Y", 0, 127))>; -+ -+def R600_TReg32_Z : RegisterClass <"AMDGPU", [f32, i32], 32, -+ (add (sequence "T%u_Z", 0, 127))>; -+ -+def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32, -+ (add (sequence "T%u_W", 0, 127))>; -+ -+def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, -+ (interleave R600_TReg32_X, R600_TReg32_Y, -+ R600_TReg32_Z, R600_TReg32_W)>; -+ -+def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add -+ R600_TReg32, -+ R600_ArrayBase, -+ ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF, -+ ALU_CONST, ALU_PARAM -+ )>; -+ -+def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add -+ PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>; -+ -+def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add -+ PREDICATE_BIT)>; -+ -+def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, -+ (add (sequence "T%u_XYZW", 0, 127))> { -+ let CopyCost = -1; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/R600Schedule.td llvm-r600/lib/Target/R600/R600Schedule.td ---- llvm-3.2.src/lib/Target/R600/R600Schedule.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/R600Schedule.td 2013-01-25 19:43:57.470049720 +0100 -@@ -0,0 +1,36 @@ -+//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction -+// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS -+// slot has been removed. -+// -+//===----------------------------------------------------------------------===// -+ -+ -+def ALU_X : FuncUnit; -+def ALU_Y : FuncUnit; -+def ALU_Z : FuncUnit; -+def ALU_W : FuncUnit; -+def TRANS : FuncUnit; -+ -+def AnyALU : InstrItinClass; -+def VecALU : InstrItinClass; -+def TransALU : InstrItinClass; -+ -+def R600_EG_Itin : ProcessorItineraries < -+ [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL], -+ [], -+ [ -+ InstrItinData]>, -+ InstrItinData]>, -+ InstrItinData]>, -+ InstrItinData]> -+ ] -+>; -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIAnnotateControlFlow.cpp llvm-r600/lib/Target/R600/SIAnnotateControlFlow.cpp ---- llvm-3.2.src/lib/Target/R600/SIAnnotateControlFlow.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIAnnotateControlFlow.cpp 2013-01-25 19:43:57.470049720 +0100 -@@ -0,0 +1,330 @@ -+//===-- SIAnnotateControlFlow.cpp - ------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// Annotates the control flow with hardware specific intrinsics. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPU.h" -+ -+#include "llvm/Pass.h" -+#include "llvm/Module.h" -+#include "llvm/Analysis/Dominators.h" -+#include "llvm/Transforms/Utils/BasicBlockUtils.h" -+#include "llvm/ADT/DepthFirstIterator.h" -+#include "llvm/Transforms/Utils/SSAUpdater.h" -+ -+using namespace llvm; -+ -+namespace { -+ -+// Complex types used in this pass -+typedef std::pair StackEntry; -+typedef SmallVector StackVector; -+ -+// Intrinsic names the control flow is annotated with -+static const char *IfIntrinsic = "llvm.SI.if"; -+static const char *ElseIntrinsic = "llvm.SI.else"; -+static const char *BreakIntrinsic = "llvm.SI.break"; -+static const char *IfBreakIntrinsic = "llvm.SI.if.break"; -+static const char *ElseBreakIntrinsic = "llvm.SI.else.break"; -+static const char *LoopIntrinsic = "llvm.SI.loop"; -+static const char *EndCfIntrinsic = "llvm.SI.end.cf"; -+ -+class SIAnnotateControlFlow : public FunctionPass { -+ -+ static char ID; -+ -+ Type *Boolean; -+ Type *Void; -+ Type *Int64; -+ Type *ReturnStruct; -+ -+ ConstantInt *BoolTrue; -+ ConstantInt *BoolFalse; -+ UndefValue *BoolUndef; -+ Constant *Int64Zero; -+ -+ Constant *If; -+ Constant *Else; -+ Constant *Break; -+ Constant *IfBreak; -+ Constant *ElseBreak; -+ Constant *Loop; -+ Constant *EndCf; -+ -+ DominatorTree *DT; -+ StackVector Stack; -+ SSAUpdater PhiInserter; -+ -+ bool isTopOfStack(BasicBlock *BB); -+ -+ Value *popSaved(); -+ -+ void push(BasicBlock *BB, Value *Saved); -+ -+ bool isElse(PHINode *Phi); -+ -+ void eraseIfUnused(PHINode *Phi); -+ -+ void openIf(BranchInst *Term); -+ -+ void insertElse(BranchInst *Term); -+ -+ void handleLoopCondition(Value *Cond); -+ -+ void handleLoop(BranchInst *Term); -+ -+ void closeControlFlow(BasicBlock *BB); -+ -+public: -+ SIAnnotateControlFlow(): -+ FunctionPass(ID) { } -+ -+ virtual bool doInitialization(Module &M); -+ -+ virtual bool runOnFunction(Function &F); -+ -+ virtual const char *getPassName() const { -+ return "SI annotate control flow"; -+ } -+ -+ virtual void getAnalysisUsage(AnalysisUsage &AU) const { -+ AU.addRequired(); -+ AU.addPreserved(); -+ FunctionPass::getAnalysisUsage(AU); -+ } -+ -+}; -+ -+} // end anonymous namespace -+ -+char SIAnnotateControlFlow::ID = 0; -+ -+/// \brief Initialize all the types and constants used in the pass -+bool SIAnnotateControlFlow::doInitialization(Module &M) { -+ LLVMContext &Context = M.getContext(); -+ -+ Void = Type::getVoidTy(Context); -+ Boolean = Type::getInt1Ty(Context); -+ Int64 = Type::getInt64Ty(Context); -+ ReturnStruct = StructType::get(Boolean, Int64, (Type *)0); -+ -+ BoolTrue = ConstantInt::getTrue(Context); -+ BoolFalse = ConstantInt::getFalse(Context); -+ BoolUndef = UndefValue::get(Boolean); -+ Int64Zero = ConstantInt::get(Int64, 0); -+ -+ If = M.getOrInsertFunction( -+ IfIntrinsic, ReturnStruct, Boolean, (Type *)0); -+ -+ Else = M.getOrInsertFunction( -+ ElseIntrinsic, ReturnStruct, Int64, (Type *)0); -+ -+ Break = M.getOrInsertFunction( -+ BreakIntrinsic, Int64, Int64, (Type *)0); -+ -+ IfBreak = M.getOrInsertFunction( -+ IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)0); -+ -+ ElseBreak = M.getOrInsertFunction( -+ ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)0); -+ -+ Loop = M.getOrInsertFunction( -+ LoopIntrinsic, Boolean, Int64, (Type *)0); -+ -+ EndCf = M.getOrInsertFunction( -+ EndCfIntrinsic, Void, Int64, (Type *)0); -+ -+ return false; -+} -+ -+/// \brief Is BB the last block saved on the stack ? -+bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) { -+ return Stack.back().first == BB; -+} -+ -+/// \brief Pop the last saved value from the control flow stack -+Value *SIAnnotateControlFlow::popSaved() { -+ return Stack.pop_back_val().second; -+} -+ -+/// \brief Push a BB and saved value to the control flow stack -+void SIAnnotateControlFlow::push(BasicBlock *BB, Value *Saved) { -+ Stack.push_back(std::make_pair(BB, Saved)); -+} -+ -+/// \brief Can the condition represented by this PHI node treated like -+/// an "Else" block? -+bool SIAnnotateControlFlow::isElse(PHINode *Phi) { -+ BasicBlock *IDom = DT->getNode(Phi->getParent())->getIDom()->getBlock(); -+ for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { -+ if (Phi->getIncomingBlock(i) == IDom) { -+ -+ if (Phi->getIncomingValue(i) != BoolTrue) -+ return false; -+ -+ } else { -+ if (Phi->getIncomingValue(i) != BoolFalse) -+ return false; -+ -+ } -+ } -+ return true; -+} -+ -+// \brief Erase "Phi" if it is not used any more -+void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) { -+ if (!Phi->hasNUsesOrMore(1)) -+ Phi->eraseFromParent(); -+} -+ -+/// \brief Open a new "If" block -+void SIAnnotateControlFlow::openIf(BranchInst *Term) { -+ Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term); -+ Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term)); -+ push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term)); -+} -+ -+/// \brief Close the last "If" block and open a new "Else" block -+void SIAnnotateControlFlow::insertElse(BranchInst *Term) { -+ Value *Ret = CallInst::Create(Else, popSaved(), "", Term); -+ Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term)); -+ push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term)); -+} -+ -+/// \brief Recursively handle the condition leading to a loop -+void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) { -+ if (PHINode *Phi = dyn_cast(Cond)) { -+ -+ // Handle all non constant incoming values first -+ for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { -+ Value *Incoming = Phi->getIncomingValue(i); -+ if (isa(Incoming)) -+ continue; -+ -+ Phi->setIncomingValue(i, BoolFalse); -+ handleLoopCondition(Incoming); -+ } -+ -+ BasicBlock *Parent = Phi->getParent(); -+ BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock(); -+ -+ for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { -+ -+ Value *Incoming = Phi->getIncomingValue(i); -+ if (Incoming != BoolTrue) -+ continue; -+ -+ BasicBlock *From = Phi->getIncomingBlock(i); -+ if (From == IDom) { -+ CallInst *OldEnd = dyn_cast(Parent->getFirstInsertionPt()); -+ if (OldEnd && OldEnd->getCalledFunction() == EndCf) { -+ Value *Args[] = { -+ OldEnd->getArgOperand(0), -+ PhiInserter.GetValueAtEndOfBlock(Parent) -+ }; -+ Value *Ret = CallInst::Create(ElseBreak, Args, "", OldEnd); -+ PhiInserter.AddAvailableValue(Parent, Ret); -+ continue; -+ } -+ } -+ -+ TerminatorInst *Insert = From->getTerminator(); -+ Value *Arg = PhiInserter.GetValueAtEndOfBlock(From); -+ Value *Ret = CallInst::Create(Break, Arg, "", Insert); -+ PhiInserter.AddAvailableValue(From, Ret); -+ } -+ eraseIfUnused(Phi); -+ -+ } else if (Instruction *Inst = dyn_cast(Cond)) { -+ BasicBlock *Parent = Inst->getParent(); -+ TerminatorInst *Insert = Parent->getTerminator(); -+ Value *Args[] = { Cond, PhiInserter.GetValueAtEndOfBlock(Parent) }; -+ Value *Ret = CallInst::Create(IfBreak, Args, "", Insert); -+ PhiInserter.AddAvailableValue(Parent, Ret); -+ -+ } else { -+ assert(0 && "Unhandled loop condition!"); -+ } -+} -+ -+/// \brief Handle a back edge (loop) -+void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { -+ BasicBlock *Target = Term->getSuccessor(1); -+ PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front()); -+ -+ PhiInserter.Initialize(Int64, ""); -+ PhiInserter.AddAvailableValue(Target, Broken); -+ -+ Value *Cond = Term->getCondition(); -+ Term->setCondition(BoolTrue); -+ handleLoopCondition(Cond); -+ -+ BasicBlock *BB = Term->getParent(); -+ Value *Arg = PhiInserter.GetValueAtEndOfBlock(BB); -+ for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target); -+ PI != PE; ++PI) { -+ -+ Broken->addIncoming(*PI == BB ? Arg : Int64Zero, *PI); -+ } -+ -+ Term->setCondition(CallInst::Create(Loop, Arg, "", Term)); -+ push(Term->getSuccessor(0), Arg); -+} -+ -+/// \brief Close the last opened control flow -+void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) { -+ CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt()); -+} -+ -+/// \brief Annotate the control flow with intrinsics so the backend can -+/// recognize if/then/else and loops. -+bool SIAnnotateControlFlow::runOnFunction(Function &F) { -+ DT = &getAnalysis(); -+ -+ for (df_iterator I = df_begin(&F.getEntryBlock()), -+ E = df_end(&F.getEntryBlock()); I != E; ++I) { -+ -+ BranchInst *Term = dyn_cast((*I)->getTerminator()); -+ -+ if (!Term || Term->isUnconditional()) { -+ if (isTopOfStack(*I)) -+ closeControlFlow(*I); -+ continue; -+ } -+ -+ if (I.nodeVisited(Term->getSuccessor(1))) { -+ if (isTopOfStack(*I)) -+ closeControlFlow(*I); -+ handleLoop(Term); -+ continue; -+ } -+ -+ if (isTopOfStack(*I)) { -+ PHINode *Phi = dyn_cast(Term->getCondition()); -+ if (Phi && Phi->getParent() == *I && isElse(Phi)) { -+ insertElse(Term); -+ eraseIfUnused(Phi); -+ continue; -+ } -+ closeControlFlow(*I); -+ } -+ openIf(Term); -+ } -+ -+ assert(Stack.empty()); -+ return true; -+} -+ -+/// \brief Create the annotation pass -+FunctionPass *llvm::createSIAnnotateControlFlowPass() { -+ return new SIAnnotateControlFlow(); -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIAssignInterpRegs.cpp llvm-r600/lib/Target/R600/SIAssignInterpRegs.cpp ---- llvm-3.2.src/lib/Target/R600/SIAssignInterpRegs.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIAssignInterpRegs.cpp 2013-01-25 19:43:57.470049720 +0100 -@@ -0,0 +1,152 @@ -+//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief This pass maps the pseudo interpolation registers to the correct physical -+/// registers. -+// -+/// Prior to executing a fragment shader, the GPU loads interpolation -+/// parameters into physical registers. The specific physical register that each -+/// interpolation parameter ends up in depends on the type of the interpolation -+/// parameter as well as how many interpolation parameters are used by the -+/// shader. -+// -+//===----------------------------------------------------------------------===// -+ -+ -+ -+#include "AMDGPU.h" -+#include "AMDIL.h" -+#include "SIMachineFunctionInfo.h" -+#include "llvm/CodeGen/MachineFunctionPass.h" -+#include "llvm/CodeGen/MachineInstrBuilder.h" -+#include "llvm/CodeGen/MachineRegisterInfo.h" -+ -+using namespace llvm; -+ -+namespace { -+ -+class SIAssignInterpRegsPass : public MachineFunctionPass { -+ -+private: -+ static char ID; -+ TargetMachine &TM; -+ -+ void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI, -+ unsigned physReg, unsigned virtReg); -+ -+public: -+ SIAssignInterpRegsPass(TargetMachine &tm) : -+ MachineFunctionPass(ID), TM(tm) { } -+ -+ virtual bool runOnMachineFunction(MachineFunction &MF); -+ -+ const char *getPassName() const { return "SI Assign intrpolation registers"; } -+}; -+ -+} // End anonymous namespace -+ -+char SIAssignInterpRegsPass::ID = 0; -+ -+#define INTERP_VALUES 16 -+#define REQUIRED_VALUE_MAX_INDEX 7 -+ -+struct InterpInfo { -+ bool Enabled; -+ unsigned Regs[3]; -+ unsigned RegCount; -+}; -+ -+ -+FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) { -+ return new SIAssignInterpRegsPass(tm); -+} -+ -+bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF) { -+ -+ struct InterpInfo InterpUse[INTERP_VALUES] = { -+ {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2}, -+ {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2}, -+ {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2}, -+ {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3}, -+ {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2}, -+ {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2}, -+ {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2}, -+ {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1}, -+ {false, {AMDGPU::POS_X_FLOAT}, 1}, -+ {false, {AMDGPU::POS_Y_FLOAT}, 1}, -+ {false, {AMDGPU::POS_Z_FLOAT}, 1}, -+ {false, {AMDGPU::POS_W_FLOAT}, 1}, -+ {false, {AMDGPU::FRONT_FACE}, 1}, -+ {false, {AMDGPU::ANCILLARY}, 1}, -+ {false, {AMDGPU::SAMPLE_COVERAGE}, 1}, -+ {false, {AMDGPU::POS_FIXED_PT}, 1} -+ }; -+ -+ SIMachineFunctionInfo * MFI = MF.getInfo(); -+ // This pass is only needed for pixel shaders. -+ if (MFI->ShaderType != ShaderType::PIXEL) { -+ return false; -+ } -+ MachineRegisterInfo &MRI = MF.getRegInfo(); -+ bool ForceEnable = true; -+ -+ // First pass, mark the interpolation values that are used. -+ for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) { -+ for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount; -+ RegIdx++) { -+ InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled || -+ !MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]); -+ if (InterpUse[InterpIdx].Enabled && -+ InterpIdx <= REQUIRED_VALUE_MAX_INDEX) { -+ ForceEnable = false; -+ } -+ } -+ } -+ -+ // At least one interpolation mode must be enabled or else the GPU will hang. -+ if (ForceEnable) { -+ InterpUse[0].Enabled = true; -+ } -+ -+ unsigned UsedVgprs = 0; -+ -+ // Second pass, replace with VGPRs. -+ for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) { -+ if (!InterpUse[InterpIdx].Enabled) { -+ continue; -+ } -+ MFI->SPIPSInputAddr |= (1 << InterpIdx); -+ -+ for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount; -+ RegIdx++, UsedVgprs++) { -+ unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs); -+ unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); -+ MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg); -+ addLiveIn(&MF, MRI, NewReg, VirtReg); -+ } -+ } -+ -+ return false; -+} -+ -+void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF, -+ MachineRegisterInfo & MRI, -+ unsigned physReg, unsigned virtReg) { -+ const TargetInstrInfo * TII = TM.getInstrInfo(); -+ if (!MRI.isLiveIn(physReg)) { -+ MRI.addLiveIn(physReg, virtReg); -+ MF->front().addLiveIn(physReg); -+ BuildMI(MF->front(), MF->front().begin(), DebugLoc(), -+ TII->get(TargetOpcode::COPY), virtReg) -+ .addReg(physReg); -+ } else { -+ MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg)); -+ } -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIInsertWaits.cpp llvm-r600/lib/Target/R600/SIInsertWaits.cpp ---- llvm-3.2.src/lib/Target/R600/SIInsertWaits.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIInsertWaits.cpp 2013-01-25 19:43:57.473383054 +0100 -@@ -0,0 +1,353 @@ -+//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Insert wait instructions for memory reads and writes. -+/// -+/// Memory reads and writes are issued asynchronously, so we need to insert -+/// S_WAITCNT instructions when we want to access any of their results or -+/// overwrite any register that's used asynchronously. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPU.h" -+#include "SIInstrInfo.h" -+#include "SIMachineFunctionInfo.h" -+#include "llvm/CodeGen/MachineFunction.h" -+#include "llvm/CodeGen/MachineFunctionPass.h" -+#include "llvm/CodeGen/MachineInstrBuilder.h" -+#include "llvm/CodeGen/MachineRegisterInfo.h" -+ -+using namespace llvm; -+ -+namespace { -+ -+/// \brief One variable for each of the hardware counters -+typedef union { -+ struct { -+ unsigned VM; -+ unsigned EXP; -+ unsigned LGKM; -+ } Named; -+ unsigned Array[3]; -+ -+} Counters; -+ -+typedef Counters RegCounters[512]; -+typedef std::pair RegInterval; -+ -+class SIInsertWaits : public MachineFunctionPass { -+ -+private: -+ static char ID; -+ const SIInstrInfo *TII; -+ const SIRegisterInfo &TRI; -+ const MachineRegisterInfo *MRI; -+ -+ /// \brief Constant hardware limits -+ static const Counters WaitCounts; -+ -+ /// \brief Constant zero value -+ static const Counters ZeroCounts; -+ -+ /// \brief Counter values we have already waited on. -+ Counters WaitedOn; -+ -+ /// \brief Counter values for last instruction issued. -+ Counters LastIssued; -+ -+ /// \brief Registers used by async instructions. -+ RegCounters UsedRegs; -+ -+ /// \brief Registers defined by async instructions. -+ RegCounters DefinedRegs; -+ -+ /// \brief Different export instruction types seen since last wait. -+ unsigned ExpInstrTypesSeen; -+ -+ /// \brief Get increment/decrement amount for this instruction. -+ Counters getHwCounts(MachineInstr &MI); -+ -+ /// \brief Is operand relevant for async execution? -+ bool isOpRelevant(MachineOperand &Op); -+ -+ /// \brief Get register interval an operand affects. -+ RegInterval getRegInterval(MachineOperand &Op); -+ -+ /// \brief Handle instructions async components -+ void pushInstruction(MachineInstr &MI); -+ -+ /// \brief Insert the actual wait instruction -+ bool insertWait(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator I, -+ const Counters &Counts); -+ -+ /// \brief Resolve all operand dependencies to counter requirements -+ Counters handleOperands(MachineInstr &MI); -+ -+public: -+ SIInsertWaits(TargetMachine &tm) : -+ MachineFunctionPass(ID), -+ TII(static_cast(tm.getInstrInfo())), -+ TRI(TII->getRegisterInfo()) { } -+ -+ virtual bool runOnMachineFunction(MachineFunction &MF); -+ -+ const char *getPassName() const { -+ return "SI insert wait instructions"; -+ } -+ -+}; -+ -+} // End anonymous namespace -+ -+char SIInsertWaits::ID = 0; -+ -+const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } }; -+const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } }; -+ -+FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) { -+ return new SIInsertWaits(tm); -+} -+ -+Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { -+ -+ uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags; -+ Counters Result; -+ -+ Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT); -+ -+ // Only consider stores or EXP for EXP_CNT -+ Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT && -+ (MI.getOpcode() == AMDGPU::EXP || !MI.getDesc().mayStore())); -+ -+ // LGKM may uses larger values -+ if (TSFlags & SIInstrFlags::LGKM_CNT) { -+ -+ MachineOperand &Op = MI.getOperand(0); -+ assert(Op.isReg() && "First LGKM operand must be a register!"); -+ -+ unsigned Reg = Op.getReg(); -+ unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize(); -+ Result.Named.LGKM = Size > 4 ? 2 : 1; -+ -+ } else { -+ Result.Named.LGKM = 0; -+ } -+ -+ return Result; -+} -+ -+bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { -+ -+ // Constants are always irrelevant -+ if (!Op.isReg()) -+ return false; -+ -+ // Defines are always relevant -+ if (Op.isDef()) -+ return true; -+ -+ // For exports all registers are relevant -+ MachineInstr &MI = *Op.getParent(); -+ if (MI.getOpcode() == AMDGPU::EXP) -+ return true; -+ -+ // For stores the stored value is also relevant -+ if (!MI.getDesc().mayStore()) -+ return false; -+ -+ for (MachineInstr::mop_iterator I = MI.operands_begin(), -+ E = MI.operands_end(); I != E; ++I) { -+ -+ if (I->isReg() && I->isUse()) -+ return Op.isIdenticalTo(*I); -+ } -+ -+ return false; -+} -+ -+RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) { -+ -+ if (!Op.isReg()) -+ return std::make_pair(0, 0); -+ -+ unsigned Reg = Op.getReg(); -+ unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize(); -+ -+ assert(Size >= 4); -+ -+ RegInterval Result; -+ Result.first = TRI.getEncodingValue(Reg); -+ Result.second = Result.first + Size / 4; -+ -+ return Result; -+} -+ -+void SIInsertWaits::pushInstruction(MachineInstr &MI) { -+ -+ // Get the hardware counter increments and sum them up -+ Counters Increment = getHwCounts(MI); -+ unsigned Sum = 0; -+ -+ for (unsigned i = 0; i < 3; ++i) { -+ LastIssued.Array[i] += Increment.Array[i]; -+ Sum += Increment.Array[i]; -+ } -+ -+ // If we don't increase anything then that's it -+ if (Sum == 0) -+ return; -+ -+ // Remember which export instructions we have seen -+ if (Increment.Named.EXP) { -+ ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2; -+ } -+ -+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { -+ -+ MachineOperand &Op = MI.getOperand(i); -+ if (!isOpRelevant(Op)) -+ continue; -+ -+ RegInterval Interval = getRegInterval(Op); -+ for (unsigned j = Interval.first; j < Interval.second; ++j) { -+ -+ // Remember which registers we define -+ if (Op.isDef()) -+ DefinedRegs[j] = LastIssued; -+ -+ // and which one we are using -+ if (Op.isUse()) -+ UsedRegs[j] = LastIssued; -+ } -+ } -+} -+ -+bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator I, -+ const Counters &Required) { -+ -+ // End of program? No need to wait on anything -+ if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM) -+ return false; -+ -+ // Figure out if the async instructions execute in order -+ bool Ordered[3]; -+ -+ // VM_CNT is always ordered -+ Ordered[0] = true; -+ -+ // EXP_CNT is unordered if we have both EXP & VM-writes -+ Ordered[1] = ExpInstrTypesSeen == 3; -+ -+ // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS -+ Ordered[2] = false; -+ -+ // The values we are going to put into the S_WAITCNT instruction -+ Counters Counts = WaitCounts; -+ -+ // Do we really need to wait? -+ bool NeedWait = false; -+ -+ for (unsigned i = 0; i < 3; ++i) { -+ -+ if (Required.Array[i] <= WaitedOn.Array[i]) -+ continue; -+ -+ NeedWait = true; -+ -+ if (Ordered[i]) { -+ unsigned Value = LastIssued.Array[i] - Required.Array[i]; -+ -+ // adjust the value to the real hardware posibilities -+ Counts.Array[i] = std::min(Value, WaitCounts.Array[i]); -+ -+ } else -+ Counts.Array[i] = 0; -+ -+ // Remember on what we have waited on -+ WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i]; -+ } -+ -+ if (!NeedWait) -+ return false; -+ -+ // Reset EXP_CNT instruction types -+ if (Counts.Named.EXP == 0) -+ ExpInstrTypesSeen = 0; -+ -+ // Build the wait instruction -+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) -+ .addImm((Counts.Named.VM & 0xF) | -+ ((Counts.Named.EXP & 0x7) << 4) | -+ ((Counts.Named.LGKM & 0x7) << 8)); -+ -+ return true; -+} -+ -+/// \brief helper function for handleOperands -+static void increaseCounters(Counters &Dst, const Counters &Src) { -+ -+ for (unsigned i = 0; i < 3; ++i) -+ Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]); -+} -+ -+Counters SIInsertWaits::handleOperands(MachineInstr &MI) { -+ -+ Counters Result = ZeroCounts; -+ -+ // For each register affected by this -+ // instruction increase the result sequence -+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { -+ -+ MachineOperand &Op = MI.getOperand(i); -+ RegInterval Interval = getRegInterval(Op); -+ for (unsigned j = Interval.first; j < Interval.second; ++j) { -+ -+ if (Op.isDef()) -+ increaseCounters(Result, UsedRegs[j]); -+ -+ if (Op.isUse()) -+ increaseCounters(Result, DefinedRegs[j]); -+ } -+ } -+ -+ return Result; -+} -+ -+bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { -+ -+ bool Changes = false; -+ -+ MRI = &MF.getRegInfo(); -+ -+ WaitedOn = ZeroCounts; -+ LastIssued = ZeroCounts; -+ -+ memset(&UsedRegs, 0, sizeof(UsedRegs)); -+ memset(&DefinedRegs, 0, sizeof(DefinedRegs)); -+ -+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); -+ BI != BE; ++BI) { -+ -+ MachineBasicBlock &MBB = *BI; -+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); -+ I != E; ++I) { -+ -+ Changes |= insertWait(MBB, I, handleOperands(*I)); -+ pushInstruction(*I); -+ } -+ -+ // Wait for everything at the end of the MBB -+ Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued); -+ } -+ -+ return Changes; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIInstrFormats.td llvm-r600/lib/Target/R600/SIInstrFormats.td ---- llvm-3.2.src/lib/Target/R600/SIInstrFormats.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIInstrFormats.td 2013-01-25 19:43:57.473383054 +0100 -@@ -0,0 +1,146 @@ -+//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// SI Instruction format definitions. -+// -+// Instructions with _32 take 32-bit operands. -+// Instructions with _64 take 64-bit operands. -+// -+// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit -+// encoding is the standard encoding, but instruction that make use of -+// any of the instruction modifiers must use the 64-bit encoding. -+// -+// Instructions with _e32 use the 32-bit encoding. -+// Instructions with _e64 use the 64-bit encoding. -+// -+//===----------------------------------------------------------------------===// -+ -+class VOP3b_2IN op, string opName, RegisterClass dstClass, -+ RegisterClass src0Class, RegisterClass src1Class, -+ list pattern> -+ : VOP3b ; -+ -+ -+class VOP3_1_32 op, string opName, list pattern> -+ : VOP3b_2IN ; -+ -+class VOP3_32 op, string opName, list pattern> -+ : VOP3 ; -+ -+class VOP3_64 op, string opName, list pattern> -+ : VOP3 ; -+ -+ -+class SOP1_32 op, string opName, list pattern> -+ : SOP1 ; -+ -+class SOP1_64 op, string opName, list pattern> -+ : SOP1 ; -+ -+class SOP2_32 op, string opName, list pattern> -+ : SOP2 ; -+ -+class SOP2_64 op, string opName, list pattern> -+ : SOP2 ; -+ -+class SOP2_VCC op, string opName, list pattern> -+ : SOP2 ; -+ -+class VOP1_Helper op, RegisterClass vrc, RegisterClass arc, -+ string opName, list pattern> : -+ VOP1 < -+ op, (outs vrc:$dst), (ins arc:$src0), opName, pattern -+ >; -+ -+multiclass VOP1_32 op, string opName, list pattern> { -+ def _e32: VOP1_Helper ; -+ def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, -+ opName, [] -+ >; -+} -+ -+multiclass VOP1_64 op, string opName, list pattern> { -+ -+ def _e32 : VOP1_Helper ; -+ -+ def _e64 : VOP3_64 < -+ {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, -+ opName, [] -+ >; -+} -+ -+class VOP2_Helper op, RegisterClass vrc, RegisterClass arc, -+ string opName, list pattern> : -+ VOP2 < -+ op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern -+ >; -+ -+multiclass VOP2_32 op, string opName, list pattern> { -+ -+ def _e32 : VOP2_Helper ; -+ -+ def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, -+ opName, [] -+ >; -+} -+ -+multiclass VOP2_64 op, string opName, list pattern> { -+ def _e32: VOP2_Helper ; -+ -+ def _e64 : VOP3_64 < -+ {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, -+ opName, [] -+ >; -+} -+ -+class SOPK_32 op, string opName, list pattern> -+ : SOPK ; -+ -+class SOPK_64 op, string opName, list pattern> -+ : SOPK ; -+ -+class VOPC_Helper op, RegisterClass vrc, RegisterClass arc, -+ string opName, list pattern> : -+ VOPC < -+ op, (ins arc:$src0, vrc:$src1), opName, pattern -+ >; -+ -+multiclass VOPC_32 op, string opName, list pattern> { -+ -+ def _e32 : VOPC_Helper < -+ {op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, -+ VReg_32, AllReg_32, opName, pattern -+ >; -+ -+ def _e64 : VOP3_1_32 < -+ op, -+ opName, pattern -+ >; -+} -+ -+multiclass VOPC_64 op, string opName, list pattern> { -+ -+ def _e32 : VOPC_Helper ; -+ -+ def _e64 : VOP3_64 < -+ {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, -+ opName, [] -+ >; -+} -+ -+class SOPC_32 op, string opName, list pattern> -+ : SOPC ; -+ -+class SOPC_64 op, string opName, list pattern> -+ : SOPC ; -+ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIInstrInfo.cpp llvm-r600/lib/Target/R600/SIInstrInfo.cpp ---- llvm-3.2.src/lib/Target/R600/SIInstrInfo.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIInstrInfo.cpp 2013-01-25 19:43:57.473383054 +0100 -@@ -0,0 +1,89 @@ -+//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief SI Implementation of TargetInstrInfo. -+// -+//===----------------------------------------------------------------------===// -+ -+ -+#include "SIInstrInfo.h" -+#include "AMDGPUTargetMachine.h" -+#include "llvm/CodeGen/MachineInstrBuilder.h" -+#include "llvm/CodeGen/MachineRegisterInfo.h" -+#include "llvm/MC/MCInstrDesc.h" -+ -+#include -+ -+using namespace llvm; -+ -+SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm) -+ : AMDGPUInstrInfo(tm), -+ RI(tm, *this) -+ { } -+ -+const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const { -+ return RI; -+} -+ -+void -+SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MI, DebugLoc DL, -+ unsigned DestReg, unsigned SrcReg, -+ bool KillSrc) const { -+ // If we are trying to copy to or from SCC, there is a bug somewhere else in -+ // the backend. While it may be theoretically possible to do this, it should -+ // never be necessary. -+ assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); -+ -+ if (AMDGPU::SReg_64RegClass.contains(DestReg)) { -+ assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); -+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) -+ .addReg(SrcReg, getKillRegState(KillSrc)); -+ } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) { -+ assert(AMDGPU::VReg_32RegClass.contains(SrcReg) || -+ AMDGPU::SReg_32RegClass.contains(SrcReg)); -+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) -+ .addReg(SrcReg, getKillRegState(KillSrc)); -+ } else { -+ assert(AMDGPU::SReg_32RegClass.contains(DestReg)); -+ assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); -+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) -+ .addReg(SrcReg, getKillRegState(KillSrc)); -+ } -+} -+ -+MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg, -+ int64_t Imm) const { -+ MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc()); -+ MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); -+ MachineInstrBuilder(MI).addImm(Imm); -+ -+ return MI; -+ -+} -+ -+bool SIInstrInfo::isMov(unsigned Opcode) const { -+ switch(Opcode) { -+ default: return false; -+ case AMDGPU::S_MOV_B32: -+ case AMDGPU::S_MOV_B64: -+ case AMDGPU::V_MOV_B32_e32: -+ case AMDGPU::V_MOV_B32_e64: -+ case AMDGPU::V_MOV_IMM_F32: -+ case AMDGPU::V_MOV_IMM_I32: -+ case AMDGPU::S_MOV_IMM_I32: -+ return true; -+ } -+} -+ -+bool -+SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { -+ return RC != &AMDGPU::EXECRegRegClass; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIInstrInfo.h llvm-r600/lib/Target/R600/SIInstrInfo.h ---- llvm-3.2.src/lib/Target/R600/SIInstrInfo.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIInstrInfo.h 2013-01-25 19:43:57.476716387 +0100 -@@ -0,0 +1,64 @@ -+//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Interface definition for SIInstrInfo. -+// -+//===----------------------------------------------------------------------===// -+ -+ -+#ifndef SIINSTRINFO_H -+#define SIINSTRINFO_H -+ -+#include "AMDGPUInstrInfo.h" -+#include "SIRegisterInfo.h" -+ -+namespace llvm { -+ -+class SIInstrInfo : public AMDGPUInstrInfo { -+private: -+ const SIRegisterInfo RI; -+ -+public: -+ explicit SIInstrInfo(AMDGPUTargetMachine &tm); -+ -+ const SIRegisterInfo &getRegisterInfo() const; -+ -+ virtual void copyPhysReg(MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator MI, DebugLoc DL, -+ unsigned DestReg, unsigned SrcReg, -+ bool KillSrc) const; -+ -+ /// \returns the encoding type of this instruction. -+ unsigned getEncodingType(const MachineInstr &MI) const; -+ -+ /// \returns the size of this instructions encoding in number of bytes. -+ unsigned getEncodingBytes(const MachineInstr &MI) const; -+ -+ virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg, -+ int64_t Imm) const; -+ -+ virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;} -+ virtual bool isMov(unsigned Opcode) const; -+ -+ virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; -+ }; -+ -+} // End namespace llvm -+ -+namespace SIInstrFlags { -+ enum Flags { -+ // First 4 bits are the instruction encoding -+ VM_CNT = 1 << 4, -+ EXP_CNT = 1 << 5, -+ LGKM_CNT = 1 << 6 -+ }; -+} -+ -+#endif //SIINSTRINFO_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIInstrInfo.td llvm-r600/lib/Target/R600/SIInstrInfo.td ---- llvm-3.2.src/lib/Target/R600/SIInstrInfo.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIInstrInfo.td 2013-01-25 19:43:57.476716387 +0100 -@@ -0,0 +1,591 @@ -+//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+ -+//===----------------------------------------------------------------------===// -+// SI DAG Profiles -+//===----------------------------------------------------------------------===// -+def SDTVCCBinaryOp : SDTypeProfile<1, 2, [ -+ SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2> -+]>; -+ -+//===----------------------------------------------------------------------===// -+// SI DAG Nodes -+//===----------------------------------------------------------------------===// -+ -+// and operation on 64-bit wide vcc -+def SIsreg1_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp, -+ [SDNPCommutative, SDNPAssociative] -+>; -+ -+// Special bitcast node for sharing VCC register between VALU and SALU -+def SIsreg1_bitcast : SDNode<"SIISD::VCC_BITCAST", -+ SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]> -+>; -+ -+// and operation on 64-bit wide vcc -+def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp, -+ [SDNPCommutative, SDNPAssociative] -+>; -+ -+// Special bitcast node for sharing VCC register between VALU and SALU -+def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST", -+ SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]> -+>; -+ -+class InstSI pattern> : -+ AMDGPUInst { -+ -+ field bits<4> EncodingType = 0; -+ field bits<1> VM_CNT = 0; -+ field bits<1> EXP_CNT = 0; -+ field bits<1> LGKM_CNT = 0; -+ -+ let TSFlags{3-0} = EncodingType; -+ let TSFlags{4} = VM_CNT; -+ let TSFlags{5} = EXP_CNT; -+ let TSFlags{6} = LGKM_CNT; -+} -+ -+class Enc32 pattern> : -+ InstSI { -+ -+ field bits<32> Inst; -+} -+ -+class Enc64 pattern> : -+ InstSI { -+ -+ field bits<64> Inst; -+} -+ -+class SIOperand : Operand { -+ let EncoderMethod = "encodeOperand"; -+ let MIOperandInfo = opInfo; -+} -+ -+def IMM16bit : ImmLeaf < -+ i16, -+ [{return isInt<16>(Imm);}] -+>; -+ -+def IMM8bit : ImmLeaf < -+ i32, -+ [{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}] -+>; -+ -+def IMM12bit : ImmLeaf < -+ i16, -+ [{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}] -+>; -+ -+def IMM32bitIn64bit : ImmLeaf < -+ i64, -+ [{return isInt<32>(Imm);}] -+>; -+ -+class GPR4Align : Operand { -+ let EncoderMethod = "GPR4AlignEncode"; -+ let MIOperandInfo = (ops rc:$reg); -+} -+ -+class GPR2Align : Operand { -+ let EncoderMethod = "GPR2AlignEncode"; -+ let MIOperandInfo = (ops rc:$reg); -+} -+ -+def SMRDmemrr : Operand { -+ let MIOperandInfo = (ops SReg_64, SReg_32); -+ let EncoderMethod = "GPR2AlignEncode"; -+} -+ -+def SMRDmemri : Operand { -+ let MIOperandInfo = (ops SReg_64, i32imm); -+ let EncoderMethod = "SMRDmemriEncode"; -+} -+ -+def ADDR_Reg : ComplexPattern; -+def ADDR_Offset8 : ComplexPattern; -+ -+let Uses = [EXEC] in { -+ -+def EXP : Enc64< -+ (outs), -+ (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm, -+ VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3), -+ "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3", -+ [] > { -+ -+ bits<4> EN; -+ bits<6> TGT; -+ bits<1> COMPR; -+ bits<1> DONE; -+ bits<1> VM; -+ bits<8> VSRC0; -+ bits<8> VSRC1; -+ bits<8> VSRC2; -+ bits<8> VSRC3; -+ -+ let Inst{3-0} = EN; -+ let Inst{9-4} = TGT; -+ let Inst{10} = COMPR; -+ let Inst{11} = DONE; -+ let Inst{12} = VM; -+ let Inst{31-26} = 0x3e; -+ let Inst{39-32} = VSRC0; -+ let Inst{47-40} = VSRC1; -+ let Inst{55-48} = VSRC2; -+ let Inst{63-56} = VSRC3; -+ let EncodingType = 0; //SIInstrEncodingType::EXP -+ -+ let EXP_CNT = 1; -+} -+ -+class MIMG op, dag outs, dag ins, string asm, list pattern> : -+ Enc64 { -+ -+ bits<8> VDATA; -+ bits<4> DMASK; -+ bits<1> UNORM; -+ bits<1> GLC; -+ bits<1> DA; -+ bits<1> R128; -+ bits<1> TFE; -+ bits<1> LWE; -+ bits<1> SLC; -+ bits<8> VADDR; -+ bits<5> SRSRC; -+ bits<5> SSAMP; -+ -+ let Inst{11-8} = DMASK; -+ let Inst{12} = UNORM; -+ let Inst{13} = GLC; -+ let Inst{14} = DA; -+ let Inst{15} = R128; -+ let Inst{16} = TFE; -+ let Inst{17} = LWE; -+ let Inst{24-18} = op; -+ let Inst{25} = SLC; -+ let Inst{31-26} = 0x3c; -+ let Inst{39-32} = VADDR; -+ let Inst{47-40} = VDATA; -+ let Inst{52-48} = SRSRC; -+ let Inst{57-53} = SSAMP; -+ let EncodingType = 2; //SIInstrEncodingType::MIMG -+ -+ let VM_CNT = 1; -+ let EXP_CNT = 1; -+} -+ -+class MTBUF op, dag outs, dag ins, string asm, list pattern> : -+ Enc64 { -+ -+ bits<8> VDATA; -+ bits<12> OFFSET; -+ bits<1> OFFEN; -+ bits<1> IDXEN; -+ bits<1> GLC; -+ bits<1> ADDR64; -+ bits<4> DFMT; -+ bits<3> NFMT; -+ bits<8> VADDR; -+ bits<5> SRSRC; -+ bits<1> SLC; -+ bits<1> TFE; -+ bits<8> SOFFSET; -+ -+ let Inst{11-0} = OFFSET; -+ let Inst{12} = OFFEN; -+ let Inst{13} = IDXEN; -+ let Inst{14} = GLC; -+ let Inst{15} = ADDR64; -+ let Inst{18-16} = op; -+ let Inst{22-19} = DFMT; -+ let Inst{25-23} = NFMT; -+ let Inst{31-26} = 0x3a; //encoding -+ let Inst{39-32} = VADDR; -+ let Inst{47-40} = VDATA; -+ let Inst{52-48} = SRSRC; -+ let Inst{54} = SLC; -+ let Inst{55} = TFE; -+ let Inst{63-56} = SOFFSET; -+ let EncodingType = 3; //SIInstrEncodingType::MTBUF -+ -+ let VM_CNT = 1; -+ let EXP_CNT = 1; -+ -+ let neverHasSideEffects = 1; -+} -+ -+class MUBUF op, dag outs, dag ins, string asm, list pattern> : -+ Enc64 { -+ -+ bits<8> VDATA; -+ bits<12> OFFSET; -+ bits<1> OFFEN; -+ bits<1> IDXEN; -+ bits<1> GLC; -+ bits<1> ADDR64; -+ bits<1> LDS; -+ bits<8> VADDR; -+ bits<5> SRSRC; -+ bits<1> SLC; -+ bits<1> TFE; -+ bits<8> SOFFSET; -+ -+ let Inst{11-0} = OFFSET; -+ let Inst{12} = OFFEN; -+ let Inst{13} = IDXEN; -+ let Inst{14} = GLC; -+ let Inst{15} = ADDR64; -+ let Inst{16} = LDS; -+ let Inst{24-18} = op; -+ let Inst{31-26} = 0x38; //encoding -+ let Inst{39-32} = VADDR; -+ let Inst{47-40} = VDATA; -+ let Inst{52-48} = SRSRC; -+ let Inst{54} = SLC; -+ let Inst{55} = TFE; -+ let Inst{63-56} = SOFFSET; -+ let EncodingType = 4; //SIInstrEncodingType::MUBUF -+ -+ let VM_CNT = 1; -+ let EXP_CNT = 1; -+ -+ let neverHasSideEffects = 1; -+} -+ -+} // End Uses = [EXEC] -+ -+class SMRD op, dag outs, dag ins, string asm, list pattern> : -+ Enc32 { -+ -+ bits<7> SDST; -+ bits<15> PTR; -+ bits<8> OFFSET = PTR{7-0}; -+ bits<1> IMM = PTR{8}; -+ bits<6> SBASE = PTR{14-9}; -+ -+ let Inst{7-0} = OFFSET; -+ let Inst{8} = IMM; -+ let Inst{14-9} = SBASE; -+ let Inst{21-15} = SDST; -+ let Inst{26-22} = op; -+ let Inst{31-27} = 0x18; //encoding -+ let EncodingType = 5; //SIInstrEncodingType::SMRD -+ -+ let LGKM_CNT = 1; -+} -+ -+class SOP1 op, dag outs, dag ins, string asm, list pattern> : -+ Enc32 { -+ -+ bits<7> SDST; -+ bits<8> SSRC0; -+ -+ let Inst{7-0} = SSRC0; -+ let Inst{15-8} = op; -+ let Inst{22-16} = SDST; -+ let Inst{31-23} = 0x17d; //encoding; -+ let EncodingType = 6; //SIInstrEncodingType::SOP1 -+ -+ let mayLoad = 0; -+ let mayStore = 0; -+ let hasSideEffects = 0; -+} -+ -+class SOP2 op, dag outs, dag ins, string asm, list pattern> : -+ Enc32 { -+ -+ bits<7> SDST; -+ bits<8> SSRC0; -+ bits<8> SSRC1; -+ -+ let Inst{7-0} = SSRC0; -+ let Inst{15-8} = SSRC1; -+ let Inst{22-16} = SDST; -+ let Inst{29-23} = op; -+ let Inst{31-30} = 0x2; // encoding -+ let EncodingType = 7; // SIInstrEncodingType::SOP2 -+ -+ let mayLoad = 0; -+ let mayStore = 0; -+ let hasSideEffects = 0; -+} -+ -+class SOPC op, dag outs, dag ins, string asm, list pattern> : -+ Enc32 { -+ -+ bits<8> SSRC0; -+ bits<8> SSRC1; -+ -+ let Inst{7-0} = SSRC0; -+ let Inst{15-8} = SSRC1; -+ let Inst{22-16} = op; -+ let Inst{31-23} = 0x17e; -+ let EncodingType = 8; // SIInstrEncodingType::SOPC -+ -+ let DisableEncoding = "$dst"; -+ let mayLoad = 0; -+ let mayStore = 0; -+ let hasSideEffects = 0; -+} -+ -+class SOPK op, dag outs, dag ins, string asm, list pattern> : -+ Enc32 { -+ -+ bits <7> SDST; -+ bits <16> SIMM16; -+ -+ let Inst{15-0} = SIMM16; -+ let Inst{22-16} = SDST; -+ let Inst{27-23} = op; -+ let Inst{31-28} = 0xb; //encoding -+ let EncodingType = 9; // SIInstrEncodingType::SOPK -+ -+ let mayLoad = 0; -+ let mayStore = 0; -+ let hasSideEffects = 0; -+} -+ -+class SOPP op, dag ins, string asm, list pattern> : Enc32 < -+ (outs), -+ ins, -+ asm, -+ pattern > { -+ -+ bits <16> SIMM16; -+ -+ let Inst{15-0} = SIMM16; -+ let Inst{22-16} = op; -+ let Inst{31-23} = 0x17f; // encoding -+ let EncodingType = 10; // SIInstrEncodingType::SOPP -+ -+ let mayLoad = 0; -+ let mayStore = 0; -+ let hasSideEffects = 0; -+} -+ -+let Uses = [EXEC] in { -+ -+class VINTRP op, dag outs, dag ins, string asm, list pattern> : -+ Enc32 { -+ -+ bits<8> VDST; -+ bits<8> VSRC; -+ bits<2> ATTRCHAN; -+ bits<6> ATTR; -+ -+ let Inst{7-0} = VSRC; -+ let Inst{9-8} = ATTRCHAN; -+ let Inst{15-10} = ATTR; -+ let Inst{17-16} = op; -+ let Inst{25-18} = VDST; -+ let Inst{31-26} = 0x32; // encoding -+ let EncodingType = 11; // SIInstrEncodingType::VINTRP -+ -+ let neverHasSideEffects = 1; -+ let mayLoad = 1; -+ let mayStore = 0; -+} -+ -+class VOP1 op, dag outs, dag ins, string asm, list pattern> : -+ Enc32 { -+ -+ bits<8> VDST; -+ bits<9> SRC0; -+ -+ let Inst{8-0} = SRC0; -+ let Inst{16-9} = op; -+ let Inst{24-17} = VDST; -+ let Inst{31-25} = 0x3f; //encoding -+ -+ let EncodingType = 12; // SIInstrEncodingType::VOP1 -+ let PostEncoderMethod = "VOPPostEncode"; -+ -+ let mayLoad = 0; -+ let mayStore = 0; -+ let hasSideEffects = 0; -+} -+ -+class VOP2 op, dag outs, dag ins, string asm, list pattern> : -+ Enc32 { -+ -+ bits<8> VDST; -+ bits<9> SRC0; -+ bits<8> VSRC1; -+ -+ let Inst{8-0} = SRC0; -+ let Inst{16-9} = VSRC1; -+ let Inst{24-17} = VDST; -+ let Inst{30-25} = op; -+ let Inst{31} = 0x0; //encoding -+ -+ let EncodingType = 13; // SIInstrEncodingType::VOP2 -+ let PostEncoderMethod = "VOPPostEncode"; -+ -+ let mayLoad = 0; -+ let mayStore = 0; -+ let hasSideEffects = 0; -+} -+ -+class VOP3 op, dag outs, dag ins, string asm, list pattern> : -+ Enc64 { -+ -+ bits<8> VDST; -+ bits<9> SRC0; -+ bits<9> SRC1; -+ bits<9> SRC2; -+ bits<3> ABS; -+ bits<1> CLAMP; -+ bits<2> OMOD; -+ bits<3> NEG; -+ -+ let Inst{7-0} = VDST; -+ let Inst{10-8} = ABS; -+ let Inst{11} = CLAMP; -+ let Inst{25-17} = op; -+ let Inst{31-26} = 0x34; //encoding -+ let Inst{40-32} = SRC0; -+ let Inst{49-41} = SRC1; -+ let Inst{58-50} = SRC2; -+ let Inst{60-59} = OMOD; -+ let Inst{63-61} = NEG; -+ -+ let EncodingType = 14; // SIInstrEncodingType::VOP3 -+ let PostEncoderMethod = "VOPPostEncode"; -+ -+ let mayLoad = 0; -+ let mayStore = 0; -+ let hasSideEffects = 0; -+} -+ -+class VOP3b op, dag outs, dag ins, string asm, list pattern> : -+ Enc64 { -+ -+ bits<8> VDST; -+ bits<9> SRC0; -+ bits<9> SRC1; -+ bits<9> SRC2; -+ bits<7> SDST; -+ bits<2> OMOD; -+ bits<3> NEG; -+ -+ let Inst{7-0} = VDST; -+ let Inst{14-8} = SDST; -+ let Inst{25-17} = op; -+ let Inst{31-26} = 0x34; //encoding -+ let Inst{40-32} = SRC0; -+ let Inst{49-41} = SRC1; -+ let Inst{58-50} = SRC2; -+ let Inst{60-59} = OMOD; -+ let Inst{63-61} = NEG; -+ -+ let EncodingType = 14; // SIInstrEncodingType::VOP3 -+ let PostEncoderMethod = "VOPPostEncode"; -+ -+ let mayLoad = 0; -+ let mayStore = 0; -+ let hasSideEffects = 0; -+} -+ -+class VOPC op, dag ins, string asm, list pattern> : -+ Enc32 <(outs VCCReg:$dst), ins, asm, pattern> { -+ -+ bits<9> SRC0; -+ bits<8> VSRC1; -+ -+ let Inst{8-0} = SRC0; -+ let Inst{16-9} = VSRC1; -+ let Inst{24-17} = op; -+ let Inst{31-25} = 0x3e; -+ -+ let EncodingType = 15; //SIInstrEncodingType::VOPC -+ let PostEncoderMethod = "VOPPostEncode"; -+ let DisableEncoding = "$dst"; -+ let mayLoad = 0; -+ let mayStore = 0; -+ let hasSideEffects = 0; -+} -+ -+} // End Uses = [EXEC] -+ -+class MIMG_Load_Helper op, string asm> : MIMG < -+ op, -+ (outs VReg_128:$vdata), -+ (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, -+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr, -+ GPR4Align:$srsrc, GPR4Align:$ssamp), -+ asm, -+ []> { -+ let mayLoad = 1; -+ let mayStore = 0; -+} -+ -+class MUBUF_Load_Helper op, string asm, RegisterClass regClass> : MUBUF < -+ op, -+ (outs regClass:$dst), -+ (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, -+ i1imm:$lds, VReg_32:$vaddr, GPR4Align:$srsrc, i1imm:$slc, -+ i1imm:$tfe, SReg_32:$soffset), -+ asm, -+ []> { -+ let mayLoad = 1; -+ let mayStore = 0; -+} -+ -+class MTBUF_Load_Helper op, string asm, RegisterClass regClass> : MTBUF < -+ op, -+ (outs regClass:$dst), -+ (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, -+ i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align:$srsrc, -+ i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), -+ asm, -+ []> { -+ let mayLoad = 1; -+ let mayStore = 0; -+} -+ -+class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBUF < -+ op, -+ (outs), -+ (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, -+ i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, -+ GPR4Align:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), -+ asm, -+ []> { -+ let mayStore = 1; -+ let mayLoad = 0; -+} -+ -+multiclass SMRD_Helper op, string asm, RegisterClass dstClass, -+ ValueType vt> { -+ def _IMM : SMRD < -+ op, -+ (outs dstClass:$dst), -+ (ins SMRDmemri:$src0), -+ asm, -+ [(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))] -+ >; -+ -+ def _SGPR : SMRD < -+ op, -+ (outs dstClass:$dst), -+ (ins SMRDmemrr:$src0), -+ asm, -+ [(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))] -+ >; -+} -+ -+multiclass SMRD_32 op, string asm, RegisterClass dstClass> { -+ defm _F32 : SMRD_Helper ; -+ defm _I32 : SMRD_Helper ; -+} -+ -+include "SIInstrFormats.td" -+include "SIInstructions.td" -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIInstructions.td llvm-r600/lib/Target/R600/SIInstructions.td ---- llvm-3.2.src/lib/Target/R600/SIInstructions.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIInstructions.td 2013-01-25 19:43:57.480049720 +0100 -@@ -0,0 +1,1357 @@ -+//===-- SIInstructions.td - SI Instruction Defintions ---------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// This file was originally auto-generated from a GPU register header file and -+// all the instruction definitions were originally commented out. Instructions -+// that are not yet supported remain commented out. -+//===----------------------------------------------------------------------===// -+ -+def isSI : Predicate<"Subtarget.device()" -+ "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">; -+ -+let Predicates = [isSI] in { -+ -+let neverHasSideEffects = 1 in { -+def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>; -+def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>; -+def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>; -+def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>; -+def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>; -+def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>; -+def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>; -+def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>; -+def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>; -+def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>; -+} // End neverHasSideEffects = 1 -+////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>; -+////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>; -+////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>; -+////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>; -+////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>; -+////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>; -+////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>; -+////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>; -+//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>; -+//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>; -+def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>; -+//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>; -+//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>; -+//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>; -+////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>; -+////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>; -+////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>; -+////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>; -+def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>; -+def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>; -+def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>; -+def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>; -+ -+let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC] in { -+ -+def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>; -+def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>; -+def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>; -+def S_ANDN2_SAVEEXEC_B64 : SOP1_64 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>; -+def S_ORN2_SAVEEXEC_B64 : SOP1_64 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>; -+def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>; -+def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>; -+def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>; -+ -+} // End hasSideEffects = 1 -+ -+def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>; -+def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>; -+def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>; -+def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>; -+def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>; -+def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>; -+//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>; -+def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>; -+def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>; -+def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>; -+def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>; -+def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>; -+ -+/* -+This instruction is disabled for now until we can figure out how to teach -+the instruction selector to correctly use the S_CMP* vs V_CMP* -+instructions. -+ -+When this instruction is enabled the code generator sometimes produces this -+invalid sequence: -+ -+SCC = S_CMPK_EQ_I32 SGPR0, imm -+VCC = COPY SCC -+VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1 -+ -+def S_CMPK_EQ_I32 : SOPK < -+ 0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1), -+ "S_CMPK_EQ_I32", -+ [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))] -+>; -+*/ -+ -+def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>; -+def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>; -+def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>; -+def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>; -+def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>; -+def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>; -+def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>; -+def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>; -+def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>; -+def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>; -+def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>; -+def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>; -+def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>; -+//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>; -+def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>; -+def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>; -+def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>; -+//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>; -+//def EXP : EXP_ <0x00000000, "EXP", []>; -+ -+defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32", []>; -+defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", []>; -+def : Pat < -+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LT)), -+ (V_CMP_LT_F32_e64 AllReg_32:$src0, VReg_32:$src1) -+>; -+defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", []>; -+def : Pat < -+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)), -+ (V_CMP_EQ_F32_e64 AllReg_32:$src0, VReg_32:$src1) -+>; -+defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", []>; -+def : Pat < -+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LE)), -+ (V_CMP_LE_F32_e64 AllReg_32:$src0, VReg_32:$src1) -+>; -+defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", []>; -+def : Pat < -+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GT)), -+ (V_CMP_GT_F32_e64 AllReg_32:$src0, VReg_32:$src1) -+>; -+defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", []>; -+def : Pat < -+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)), -+ (V_CMP_LG_F32_e64 AllReg_32:$src0, VReg_32:$src1) -+>; -+defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", []>; -+def : Pat < -+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GE)), -+ (V_CMP_GE_F32_e64 AllReg_32:$src0, VReg_32:$src1) -+>; -+defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", []>; -+defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", []>; -+defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32", []>; -+defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32", []>; -+defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32", []>; -+defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32", []>; -+defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", []>; -+def : Pat < -+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)), -+ (V_CMP_NEQ_F32_e64 AllReg_32:$src0, VReg_32:$src1) -+>; -+defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32", []>; -+defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32", []>; -+ -+//Side effect is writing to EXEC -+let hasSideEffects = 1 in { -+ -+defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32", []>; -+defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32", []>; -+defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32", []>; -+defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32", []>; -+defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32", []>; -+defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32", []>; -+defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32", []>; -+defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32", []>; -+defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32", []>; -+defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32", []>; -+defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32", []>; -+defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32", []>; -+defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32", []>; -+defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32", []>; -+defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32", []>; -+defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32", []>; -+ -+} // End hasSideEffects = 1 -+ -+defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64", []>; -+defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", []>; -+defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", []>; -+defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", []>; -+defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", []>; -+defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64", []>; -+defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", []>; -+defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", []>; -+defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", []>; -+defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64", []>; -+defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64", []>; -+defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64", []>; -+defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64", []>; -+defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", []>; -+defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64", []>; -+defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64", []>; -+ -+//Side effect is writing to EXEC -+let hasSideEffects = 1 in { -+ -+defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64", []>; -+defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64", []>; -+defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64", []>; -+defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64", []>; -+defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64", []>; -+defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64", []>; -+defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64", []>; -+defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64", []>; -+defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64", []>; -+defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64", []>; -+defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64", []>; -+defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64", []>; -+defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64", []>; -+defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64", []>; -+defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64", []>; -+defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64", []>; -+ -+} // End hasSideEffects = 1 -+ -+defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32", []>; -+defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32", []>; -+defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32", []>; -+defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32", []>; -+defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32", []>; -+defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32", []>; -+defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32", []>; -+defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32", []>; -+defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32", []>; -+defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32", []>; -+defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32", []>; -+defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32", []>; -+defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32", []>; -+defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32", []>; -+defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32", []>; -+defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32", []>; -+defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32", []>; -+defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32", []>; -+defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32", []>; -+defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32", []>; -+defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32", []>; -+defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32", []>; -+defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32", []>; -+defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32", []>; -+defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32", []>; -+defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32", []>; -+defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32", []>; -+defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32", []>; -+defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32", []>; -+defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32", []>; -+defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32", []>; -+defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32", []>; -+defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64", []>; -+defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64", []>; -+defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64", []>; -+defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64", []>; -+defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64", []>; -+defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64", []>; -+defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64", []>; -+defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64", []>; -+defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64", []>; -+defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64", []>; -+defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64", []>; -+defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64", []>; -+defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64", []>; -+defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64", []>; -+defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64", []>; -+defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64", []>; -+defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64", []>; -+defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64", []>; -+defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64", []>; -+defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64", []>; -+defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64", []>; -+defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64", []>; -+defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64", []>; -+defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64", []>; -+defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64", []>; -+defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64", []>; -+defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64", []>; -+defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64", []>; -+defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64", []>; -+defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64", []>; -+defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64", []>; -+defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64", []>; -+defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32", []>; -+defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", []>; -+def : Pat < -+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LT)), -+ (V_CMP_LT_I32_e64 AllReg_32:$src0, VReg_32:$src1) -+>; -+defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", []>; -+def : Pat < -+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)), -+ (V_CMP_EQ_I32_e64 AllReg_32:$src0, VReg_32:$src1) -+>; -+defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", []>; -+def : Pat < -+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LE)), -+ (V_CMP_LE_I32_e64 AllReg_32:$src0, VReg_32:$src1) -+>; -+defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", []>; -+def : Pat < -+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GT)), -+ (V_CMP_GT_I32_e64 AllReg_32:$src0, VReg_32:$src1) -+>; -+defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", []>; -+def : Pat < -+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_NE)), -+ (V_CMP_NE_I32_e64 AllReg_32:$src0, VReg_32:$src1) -+>; -+defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", []>; -+def : Pat < -+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GE)), -+ (V_CMP_GE_I32_e64 AllReg_32:$src0, VReg_32:$src1) -+>; -+defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32", []>; -+ -+let hasSideEffects = 1 in { -+ -+defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32", []>; -+defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32", []>; -+defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32", []>; -+defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32", []>; -+defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32", []>; -+defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32", []>; -+defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32", []>; -+defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32", []>; -+ -+} // End hasSideEffects -+ -+defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64", []>; -+defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", []>; -+defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", []>; -+defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", []>; -+defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", []>; -+defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", []>; -+defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", []>; -+defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64", []>; -+ -+let hasSideEffects = 1 in { -+ -+defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64", []>; -+defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64", []>; -+defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64", []>; -+defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64", []>; -+defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64", []>; -+defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64", []>; -+defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64", []>; -+defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64", []>; -+ -+} // End hasSideEffects -+ -+defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32", []>; -+defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", []>; -+defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", []>; -+defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", []>; -+defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", []>; -+defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", []>; -+defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", []>; -+defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32", []>; -+ -+let hasSideEffects = 1 in { -+ -+defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32", []>; -+defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32", []>; -+defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32", []>; -+defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32", []>; -+defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32", []>; -+defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32", []>; -+defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32", []>; -+defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32", []>; -+ -+} // End hasSideEffects -+ -+defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64", []>; -+defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", []>; -+defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", []>; -+defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", []>; -+defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", []>; -+defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", []>; -+defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", []>; -+defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64", []>; -+defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64", []>; -+defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64", []>; -+defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64", []>; -+defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64", []>; -+defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64", []>; -+defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64", []>; -+defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64", []>; -+defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64", []>; -+defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32", []>; -+defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32", []>; -+defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64", []>; -+defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64", []>; -+//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; -+//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>; -+//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>; -+def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>; -+//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>; -+//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>; -+//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>; -+//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>; -+//def BUFFER_LOAD_UBYTE : MUBUF_ <0x00000008, "BUFFER_LOAD_UBYTE", []>; -+//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>; -+//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>; -+//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>; -+//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>; -+//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>; -+//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>; -+//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>; -+//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>; -+//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>; -+//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>; -+//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>; -+//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>; -+//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>; -+//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>; -+//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>; -+//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>; -+//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>; -+//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>; -+//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>; -+//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>; -+//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>; -+//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>; -+//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>; -+//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>; -+//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>; -+//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>; -+//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>; -+//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>; -+//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>; -+//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>; -+//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>; -+//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>; -+//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>; -+//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>; -+//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>; -+//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>; -+//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>; -+//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>; -+//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>; -+//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>; -+//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>; -+//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>; -+//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>; -+//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>; -+//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>; -+//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>; -+//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>; -+//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>; -+//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>; -+//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>; -+def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>; -+//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>; -+//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>; -+//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>; -+//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>; -+ -+defm S_LOAD_DWORD : SMRD_32 <0x00000000, "S_LOAD_DWORD", SReg_32>; -+ -+//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>; -+defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128, v4i32>; -+defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32>; -+//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>; -+//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>; -+//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>; -+//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>; -+//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>; -+//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>; -+ -+//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; -+//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; -+//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>; -+//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>; -+//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>; -+//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>; -+//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>; -+//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>; -+//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>; -+//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>; -+//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>; -+//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>; -+//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>; -+//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>; -+//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>; -+//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>; -+//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>; -+//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>; -+//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>; -+//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>; -+//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>; -+//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>; -+//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>; -+//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>; -+//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>; -+//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>; -+//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>; -+//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>; -+//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>; -+//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>; -+def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">; -+//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>; -+def IMAGE_SAMPLE_D : MIMG_Load_Helper <0x00000022, "IMAGE_SAMPLE_D">; -+//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>; -+def IMAGE_SAMPLE_L : MIMG_Load_Helper <0x00000024, "IMAGE_SAMPLE_L">; -+def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">; -+//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>; -+//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>; -+//def IMAGE_SAMPLE_C : MIMG_NoPattern_ <"IMAGE_SAMPLE_C", 0x00000028>; -+//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>; -+//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>; -+//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>; -+//def IMAGE_SAMPLE_C_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L", 0x0000002c>; -+//def IMAGE_SAMPLE_C_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B", 0x0000002d>; -+//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>; -+//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>; -+//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>; -+//def IMAGE_SAMPLE_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL_O", 0x00000031>; -+//def IMAGE_SAMPLE_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_O", 0x00000032>; -+//def IMAGE_SAMPLE_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL_O", 0x00000033>; -+//def IMAGE_SAMPLE_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_L_O", 0x00000034>; -+//def IMAGE_SAMPLE_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_O", 0x00000035>; -+//def IMAGE_SAMPLE_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL_O", 0x00000036>; -+//def IMAGE_SAMPLE_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ_O", 0x00000037>; -+//def IMAGE_SAMPLE_C_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_O", 0x00000038>; -+//def IMAGE_SAMPLE_C_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL_O", 0x00000039>; -+//def IMAGE_SAMPLE_C_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_O", 0x0000003a>; -+//def IMAGE_SAMPLE_C_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL_O", 0x0000003b>; -+//def IMAGE_SAMPLE_C_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L_O", 0x0000003c>; -+//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>; -+//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>; -+//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>; -+//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>; -+//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>; -+//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>; -+//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>; -+//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>; -+//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>; -+//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>; -+//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>; -+//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>; -+//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>; -+//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>; -+//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>; -+//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>; -+//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>; -+//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>; -+//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>; -+//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>; -+//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>; -+//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>; -+//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>; -+//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>; -+//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>; -+//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>; -+//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>; -+//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>; -+//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>; -+//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>; -+//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>; -+//def IMAGE_SAMPLE_C_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL", 0x0000006b>; -+//def IMAGE_SAMPLE_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_O", 0x0000006c>; -+//def IMAGE_SAMPLE_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL_O", 0x0000006d>; -+//def IMAGE_SAMPLE_C_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_O", 0x0000006e>; -+//def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>; -+//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>; -+//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>; -+//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>; -+ -+let neverHasSideEffects = 1 in { -+defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>; -+} // End neverHasSideEffects -+defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>; -+//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>; -+//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>; -+defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32", -+ [(set VReg_32:$dst, (sint_to_fp AllReg_32:$src0))] -+>; -+//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>; -+//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>; -+defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", -+ [(set VReg_32:$dst, (fp_to_sint AllReg_32:$src0))] -+>; -+defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; -+////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>; -+//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>; -+//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>; -+//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>; -+//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>; -+//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>; -+//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>; -+//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>; -+//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>; -+//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>; -+//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>; -+//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>; -+//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>; -+defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32", -+ [(set VReg_32:$dst, (AMDGPUfract AllReg_32:$src0))] -+>; -+defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>; -+defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", []>; -+defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32", -+ [(set VReg_32:$dst, (frint AllReg_32:$src0))] -+>; -+defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32", -+ [(set VReg_32:$dst, (ffloor AllReg_32:$src0))] -+>; -+defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32", -+ [(set VReg_32:$dst, (fexp2 AllReg_32:$src0))] -+>; -+defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>; -+defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", []>; -+defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>; -+defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>; -+defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32", -+ [(set VReg_32:$dst, (fdiv FP_ONE, AllReg_32:$src0))] -+>; -+defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>; -+defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>; -+defm V_RSQ_LEGACY_F32 : VOP1_32 < -+ 0x0000002d, "V_RSQ_LEGACY_F32", -+ [(set VReg_32:$dst, (int_AMDGPU_rsq AllReg_32:$src0))] -+>; -+defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>; -+defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>; -+defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>; -+defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>; -+defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>; -+defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>; -+defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>; -+defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>; -+defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>; -+defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>; -+defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>; -+defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>; -+defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>; -+defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>; -+//defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>; -+defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>; -+defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>; -+//defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>; -+defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>; -+//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>; -+defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>; -+defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>; -+defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>; -+ -+def V_INTERP_P1_F32 : VINTRP < -+ 0x00000000, -+ (outs VReg_32:$dst), -+ (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), -+ "V_INTERP_P1_F32", -+ []> { -+ let DisableEncoding = "$m0"; -+} -+ -+def V_INTERP_P2_F32 : VINTRP < -+ 0x00000001, -+ (outs VReg_32:$dst), -+ (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), -+ "V_INTERP_P2_F32", -+ []> { -+ -+ let Constraints = "$src0 = $dst"; -+ let DisableEncoding = "$src0,$m0"; -+ -+} -+ -+def V_INTERP_MOV_F32 : VINTRP < -+ 0x00000002, -+ (outs VReg_32:$dst), -+ (ins i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), -+ "V_INTERP_MOV_F32", -+ []> { -+ let VSRC = 0; -+ let DisableEncoding = "$m0"; -+} -+ -+//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>; -+ -+let isTerminator = 1 in { -+ -+def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM", -+ [(IL_retflag)]> { -+ let SIMM16 = 0; -+ let isBarrier = 1; -+ let hasCtrlDep = 1; -+} -+ -+let isBranch = 1 in { -+def S_BRANCH : SOPP < -+ 0x00000002, (ins brtarget:$target), "S_BRANCH", -+ [(br bb:$target)]> { -+ let isBarrier = 1; -+} -+ -+let DisableEncoding = "$scc" in { -+def S_CBRANCH_SCC0 : SOPP < -+ 0x00000004, (ins brtarget:$target, SCCReg:$scc), -+ "S_CBRANCH_SCC0", [] -+>; -+def S_CBRANCH_SCC1 : SOPP < -+ 0x00000005, (ins brtarget:$target, SCCReg:$scc), -+ "S_CBRANCH_SCC1", -+ [] -+>; -+} // End DisableEncoding = "$scc" -+ -+def S_CBRANCH_VCCZ : SOPP < -+ 0x00000006, (ins brtarget:$target, VCCReg:$vcc), -+ "S_CBRANCH_VCCZ", -+ [] -+>; -+def S_CBRANCH_VCCNZ : SOPP < -+ 0x00000007, (ins brtarget:$target, VCCReg:$vcc), -+ "S_CBRANCH_VCCNZ", -+ [] -+>; -+ -+let DisableEncoding = "$exec" in { -+def S_CBRANCH_EXECZ : SOPP < -+ 0x00000008, (ins brtarget:$target, EXECReg:$exec), -+ "S_CBRANCH_EXECZ", -+ [] -+>; -+def S_CBRANCH_EXECNZ : SOPP < -+ 0x00000009, (ins brtarget:$target, EXECReg:$exec), -+ "S_CBRANCH_EXECNZ", -+ [] -+>; -+} // End DisableEncoding = "$exec" -+ -+ -+} // End isBranch = 1 -+} // End isTerminator = 1 -+ -+//def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>; -+let hasSideEffects = 1 in { -+def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16", -+ [] -+>; -+} // End hasSideEffects -+//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>; -+//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>; -+//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>; -+//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>; -+//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>; -+//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>; -+//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>; -+//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>; -+//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>; -+//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>; -+ -+def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst), -+ (ins AllReg_32:$src0, VReg_32:$src1, VCCReg:$vcc), "V_CNDMASK_B32_e32", -+ [] -+>{ -+ let DisableEncoding = "$vcc"; -+} -+ -+def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst), -+ (ins VReg_32:$src0, VReg_32:$src1, SReg_1:$src2, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), -+ "V_CNDMASK_B32_e64", -+ [(set (i32 VReg_32:$dst), (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0))] -+>; -+ -+//f32 pattern for V_CNDMASK_B32_e64 -+def : Pat < -+ (f32 (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0)), -+ (V_CNDMASK_B32_e64 VReg_32:$src0, VReg_32:$src1, SReg_1:$src2) -+>; -+ -+defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; -+defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>; -+ -+defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", []>; -+def : Pat < -+ (f32 (fadd AllReg_32:$src0, VReg_32:$src1)), -+ (V_ADD_F32_e32 AllReg_32:$src0, VReg_32:$src1) -+>; -+ -+defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", []>; -+def : Pat < -+ (f32 (fsub AllReg_32:$src0, VReg_32:$src1)), -+ (V_SUB_F32_e32 AllReg_32:$src0, VReg_32:$src1) -+>; -+defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>; -+defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>; -+defm V_MUL_LEGACY_F32 : VOP2_32 < -+ 0x00000007, "V_MUL_LEGACY_F32", -+ [(set VReg_32:$dst, (int_AMDGPU_mul AllReg_32:$src0, VReg_32:$src1))] -+>; -+ -+defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32", -+ [(set VReg_32:$dst, (fmul AllReg_32:$src0, VReg_32:$src1))] -+>; -+//defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>; -+//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>; -+//defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>; -+//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>; -+defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32", -+ [(set VReg_32:$dst, (AMDGPUfmin AllReg_32:$src0, VReg_32:$src1))] -+>; -+ -+defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32", -+ [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))] -+>; -+defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>; -+defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>; -+defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>; -+defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>; -+defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; -+defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; -+defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>; -+defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>; -+defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>; -+defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; -+defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; -+defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; -+defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", -+ [(set VReg_32:$dst, (and AllReg_32:$src0, VReg_32:$src1))] -+>; -+defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", -+ [(set VReg_32:$dst, (or AllReg_32:$src0, VReg_32:$src1))] -+>; -+defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", -+ [(set VReg_32:$dst, (xor AllReg_32:$src0, VReg_32:$src1))] -+>; -+defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>; -+defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>; -+defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>; -+defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; -+//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; -+//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; -+//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; -+let Defs = [VCC] in { // Carry-out goes to VCC -+defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32", -+ [(set VReg_32:$dst, (add (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))] -+>; -+defm V_SUB_I32 : VOP2_32 <0x00000026, "V_SUB_I32", -+ [(set VReg_32:$dst, (sub (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))] -+>; -+} // End Defs = [VCC] -+defm V_SUBREV_I32 : VOP2_32 <0x00000027, "V_SUBREV_I32", []>; -+defm V_ADDC_U32 : VOP2_32 <0x00000028, "V_ADDC_U32", []>; -+defm V_SUBB_U32 : VOP2_32 <0x00000029, "V_SUBB_U32", []>; -+defm V_SUBBREV_U32 : VOP2_32 <0x0000002a, "V_SUBBREV_U32", []>; -+defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; -+////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>; -+////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; -+////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>; -+defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32", -+ [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))] -+>; -+////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>; -+////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>; -+def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>; -+def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>; -+def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>; -+def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>; -+def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>; -+def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>; -+def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>; -+def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>; -+def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>; -+def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>; -+def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>; -+def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>; -+////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>; -+////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>; -+////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>; -+////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>; -+//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>; -+ -+let neverHasSideEffects = 1 in { -+ -+def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>; -+def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>; -+//def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", []>; -+//def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", []>; -+ -+} // End neverHasSideEffects -+def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>; -+def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>; -+def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>; -+def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>; -+def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>; -+def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>; -+def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>; -+def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>; -+def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>; -+//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>; -+def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>; -+def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>; -+def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>; -+////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>; -+////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>; -+////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>; -+////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>; -+////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>; -+////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>; -+////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>; -+////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>; -+////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>; -+//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>; -+//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>; -+//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>; -+def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>; -+////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>; -+def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>; -+def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>; -+def V_LSHL_B64 : VOP3_64 <0x00000161, "V_LSHL_B64", []>; -+def V_LSHR_B64 : VOP3_64 <0x00000162, "V_LSHR_B64", []>; -+def V_ASHR_I64 : VOP3_64 <0x00000163, "V_ASHR_I64", []>; -+def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>; -+def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>; -+def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>; -+def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>; -+def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; -+def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>; -+def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; -+def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; -+def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; -+def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; -+def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; -+def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>; -+def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>; -+//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>; -+//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>; -+//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>; -+def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>; -+def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>; -+def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>; -+def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>; -+def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>; -+def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>; -+def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>; -+def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>; -+def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>; -+def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>; -+def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>; -+ -+def S_CSELECT_B32 : SOP2 < -+ 0x0000000a, (outs SReg_32:$dst), -+ (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32", -+ [(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))] -+>; -+ -+def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>; -+ -+// f32 pattern for S_CSELECT_B32 -+def : Pat < -+ (f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)), -+ (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc) -+>; -+ -+def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>; -+ -+def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", -+ [(set SReg_64:$dst, (and SReg_64:$src0, SReg_64:$src1))] -+>; -+def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64", -+ [(set SReg_1:$vcc, (SIvcc_and SReg_64:$src0, SReg_64:$src1))] -+>; -+def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>; -+def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>; -+def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>; -+def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>; -+def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>; -+def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>; -+def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>; -+def S_ORN2_B64 : SOP2_64 <0x00000017, "S_ORN2_B64", []>; -+def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>; -+def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>; -+def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>; -+def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>; -+def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>; -+def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>; -+def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>; -+def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>; -+def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>; -+def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>; -+def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>; -+def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>; -+def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>; -+def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>; -+def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>; -+def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>; -+def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>; -+def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>; -+def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>; -+//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>; -+def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>; -+ -+class V_MOV_IMM : InstSI < -+ (outs VReg_32:$dst), -+ (ins immType:$src0), -+ "V_MOV_IMM", -+ [(set VReg_32:$dst, (immNode:$src0))] -+>; -+ -+let isCodeGenOnly = 1, isPseudo = 1 in { -+ -+def V_MOV_IMM_I32 : V_MOV_IMM; -+def V_MOV_IMM_F32 : V_MOV_IMM; -+ -+def S_MOV_IMM_I32 : InstSI < -+ (outs SReg_32:$dst), -+ (ins i32imm:$src0), -+ "S_MOV_IMM_I32", -+ [(set SReg_32:$dst, (imm:$src0))] -+>; -+ -+// i64 immediates aren't really supported in hardware, but LLVM will use the i64 -+// type for indices on load and store instructions. The pattern for -+// S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits, -+// which the hardware can handle. -+def S_MOV_IMM_I64 : InstSI < -+ (outs SReg_64:$dst), -+ (ins i64imm:$src0), -+ "S_MOV_IMM_I64 $dst, $src0", -+ [(set SReg_64:$dst, (IMM32bitIn64bit:$src0))] -+>; -+ -+} // End isCodeGenOnly, isPseudo = 1 -+ -+class SI_LOAD_LITERAL : -+ Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> { -+ -+ bits<32> imm; -+ let Inst{31-0} = imm; -+} -+ -+def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL; -+def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL; -+ -+let isCodeGenOnly = 1, isPseudo = 1 in { -+ -+def SET_M0 : InstSI < -+ (outs SReg_32:$dst), -+ (ins i32imm:$src0), -+ "SET_M0", -+ [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))] -+>; -+ -+def LOAD_CONST : AMDGPUShaderInst < -+ (outs GPRF32:$dst), -+ (ins i32imm:$src), -+ "LOAD_CONST $dst, $src", -+ [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))] -+>; -+ -+let usesCustomInserter = 1 in { -+ -+def SI_V_CNDLT : InstSI < -+ (outs VReg_32:$dst), -+ (ins VReg_32:$src0, VReg_32:$src1, VReg_32:$src2), -+ "SI_V_CNDLT $dst, $src0, $src1, $src2", -+ [(set VReg_32:$dst, (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2))] -+>; -+ -+def SI_INTERP : InstSI < -+ (outs VReg_32:$dst), -+ (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params), -+ "SI_INTERP $dst, $i, $j, $attr_chan, $attr, $params", -+ [] -+>; -+ -+def SI_INTERP_CONST : InstSI < -+ (outs VReg_32:$dst), -+ (ins i32imm:$attr_chan, i32imm:$attr, SReg_32:$params), -+ "SI_INTERP_CONST $dst, $attr_chan, $attr, $params", -+ [(set VReg_32:$dst, (int_SI_fs_interp_constant imm:$attr_chan, -+ imm:$attr, SReg_32:$params))] -+>; -+ -+def SI_WQM : InstSI < -+ (outs), -+ (ins), -+ "SI_WQM", -+ [(int_SI_wqm)] -+>; -+ -+} // end usesCustomInserter -+ -+// SI Psuedo instructions. These are used by the CFG structurizer pass -+// and should be lowered to ISA instructions prior to codegen. -+ -+let mayLoad = 1, mayStore = 1, hasSideEffects = 1, -+ Uses = [EXEC], Defs = [EXEC] in { -+ -+let isBranch = 1, isTerminator = 1 in { -+ -+def SI_IF : InstSI < -+ (outs SReg_64:$dst), -+ (ins SReg_1:$vcc, brtarget:$target), -+ "SI_IF", -+ [(set SReg_64:$dst, (int_SI_if SReg_1:$vcc, bb:$target))] -+>; -+ -+def SI_ELSE : InstSI < -+ (outs SReg_64:$dst), -+ (ins SReg_64:$src, brtarget:$target), -+ "SI_ELSE", -+ [(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> { -+ -+ let Constraints = "$src = $dst"; -+} -+ -+def SI_LOOP : InstSI < -+ (outs), -+ (ins SReg_64:$saved, brtarget:$target), -+ "SI_LOOP", -+ [(int_SI_loop SReg_64:$saved, bb:$target)] -+>; -+ -+} // end isBranch = 1, isTerminator = 1 -+ -+def SI_BREAK : InstSI < -+ (outs SReg_64:$dst), -+ (ins SReg_64:$src), -+ "SI_ELSE", -+ [(set SReg_64:$dst, (int_SI_break SReg_64:$src))] -+>; -+ -+def SI_IF_BREAK : InstSI < -+ (outs SReg_64:$dst), -+ (ins SReg_1:$vcc, SReg_64:$src), -+ "SI_IF_BREAK", -+ [(set SReg_64:$dst, (int_SI_if_break SReg_1:$vcc, SReg_64:$src))] -+>; -+ -+def SI_ELSE_BREAK : InstSI < -+ (outs SReg_64:$dst), -+ (ins SReg_64:$src0, SReg_64:$src1), -+ "SI_ELSE_BREAK", -+ [(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))] -+>; -+ -+def SI_END_CF : InstSI < -+ (outs), -+ (ins SReg_64:$saved), -+ "SI_END_CF", -+ [(int_SI_end_cf SReg_64:$saved)] -+>; -+ -+def SI_KILL : InstSI < -+ (outs), -+ (ins VReg_32:$src), -+ "SI_KIL $src", -+ [(int_AMDGPU_kill VReg_32:$src)] -+>; -+ -+} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 -+ // Uses = [EXEC], Defs = [EXEC] -+ -+} // end IsCodeGenOnly, isPseudo -+ -+def : Pat < -+ (int_AMDGPU_kilp), -+ (SI_KILL (V_MOV_IMM_I32 0xbf800000)) -+>; -+ -+/* int_SI_vs_load_input */ -+def : Pat< -+ (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset, -+ VReg_32:$buf_idx_vgpr), -+ (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0, -+ VReg_32:$buf_idx_vgpr, SReg_128:$tlst, -+ 0, 0, (i32 SREG_LIT_0)) -+>; -+ -+/* int_SI_export */ -+def : Pat < -+ (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr, -+ VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3), -+ (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm, -+ VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3) -+>; -+ -+/* int_SI_sample */ -+def : Pat < -+ (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, imm), -+ (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, -+ SReg_256:$rsrc, SReg_128:$sampler) -+>; -+ -+def : Pat < -+ (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT), -+ (IMAGE_SAMPLE imm:$writemask, 1, 0, 0, 0, 0, 0, 0, VReg_128:$coord, -+ SReg_256:$rsrc, SReg_128:$sampler) -+>; -+ -+/* int_SI_sample_lod */ -+def : Pat < -+ (int_SI_sample_lod imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, imm), -+ (IMAGE_SAMPLE_L imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, -+ SReg_256:$rsrc, SReg_128:$sampler) -+>; -+ -+/* int_SI_sample_bias */ -+def : Pat < -+ (int_SI_sample_bias imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, imm), -+ (IMAGE_SAMPLE_B imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, -+ SReg_256:$rsrc, SReg_128:$sampler) -+>; -+ -+def CLAMP_SI : CLAMP; -+def FABS_SI : FABS; -+def FNEG_SI : FNEG; -+ -+def : Extract_Element ; -+def : Extract_Element ; -+def : Extract_Element ; -+def : Extract_Element ; -+ -+def : Insert_Element ; -+def : Insert_Element ; -+def : Insert_Element ; -+def : Insert_Element ; -+ -+def : Vector_Build ; -+def : Vector_Build ; -+ -+def : BitConvert ; -+def : BitConvert ; -+ -+def : BitConvert ; -+def : BitConvert ; -+ -+def : Pat < -+ (i64 (SIsreg1_bitcast SReg_1:$vcc)), -+ (S_MOV_B64 (COPY_TO_REGCLASS SReg_1:$vcc, SReg_64)) -+>; -+ -+def : Pat < -+ (i1 (SIsreg1_bitcast SReg_64:$vcc)), -+ (COPY_TO_REGCLASS SReg_64:$vcc, SReg_1) -+>; -+ -+def : Pat < -+ (i64 (SIvcc_bitcast VCCReg:$vcc)), -+ (S_MOV_B64 (COPY_TO_REGCLASS VCCReg:$vcc, SReg_64)) -+>; -+ -+def : Pat < -+ (i1 (SIvcc_bitcast SReg_64:$vcc)), -+ (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg) -+>; -+ -+/********** ===================== **********/ -+/********** Interpolation Paterns **********/ -+/********** ===================== **********/ -+ -+def : Pat < -+ (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params), -+ (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan, -+ imm:$attr, SReg_32:$params) -+>; -+ -+def : Pat < -+ (int_SI_fs_interp_linear_centroid imm:$attr_chan, imm:$attr, SReg_32:$params), -+ (SI_INTERP (f32 LINEAR_CENTROID_I), (f32 LINEAR_CENTROID_J), imm:$attr_chan, -+ imm:$attr, SReg_32:$params) -+>; -+ -+def : Pat < -+ (int_SI_fs_interp_persp_center imm:$attr_chan, imm:$attr, SReg_32:$params), -+ (SI_INTERP (f32 PERSP_CENTER_I), (f32 PERSP_CENTER_J), imm:$attr_chan, -+ imm:$attr, SReg_32:$params) -+>; -+ -+def : Pat < -+ (int_SI_fs_interp_persp_centroid imm:$attr_chan, imm:$attr, SReg_32:$params), -+ (SI_INTERP (f32 PERSP_CENTROID_I), (f32 PERSP_CENTROID_J), imm:$attr_chan, -+ imm:$attr, SReg_32:$params) -+>; -+ -+def : Pat < -+ (int_SI_fs_read_face), -+ (f32 FRONT_FACE) -+>; -+ -+def : Pat < -+ (int_SI_fs_read_pos 0), -+ (f32 POS_X_FLOAT) -+>; -+ -+def : Pat < -+ (int_SI_fs_read_pos 1), -+ (f32 POS_Y_FLOAT) -+>; -+ -+def : Pat < -+ (int_SI_fs_read_pos 2), -+ (f32 POS_Z_FLOAT) -+>; -+ -+def : Pat < -+ (int_SI_fs_read_pos 3), -+ (f32 POS_W_FLOAT) -+>; -+ -+/********** ================== **********/ -+/********** Intrinsic Patterns **********/ -+/********** ================== **********/ -+ -+/* llvm.AMDGPU.pow */ -+/* XXX: We are using IEEE MUL, not the 0 * anything = 0 MUL, is this correct? */ -+def : POW_Common ; -+ -+def : Pat < -+ (int_AMDGPU_div AllReg_32:$src0, AllReg_32:$src1), -+ (V_MUL_LEGACY_F32_e32 AllReg_32:$src0, (V_RCP_LEGACY_F32_e32 AllReg_32:$src1)) -+>; -+ -+def : Pat< -+ (fdiv AllReg_32:$src0, AllReg_32:$src1), -+ (V_MUL_F32_e32 AllReg_32:$src0, (V_RCP_F32_e32 AllReg_32:$src1)) -+>; -+ -+def : Pat < -+ (int_AMDGPU_cube VReg_128:$src), -+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), -+ (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), -+ (EXTRACT_SUBREG VReg_128:$src, sel_y), -+ (EXTRACT_SUBREG VReg_128:$src, sel_z), -+ 0, 0, 0, 0), sel_x), -+ (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), -+ (EXTRACT_SUBREG VReg_128:$src, sel_y), -+ (EXTRACT_SUBREG VReg_128:$src, sel_z), -+ 0, 0, 0, 0), sel_y), -+ (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), -+ (EXTRACT_SUBREG VReg_128:$src, sel_y), -+ (EXTRACT_SUBREG VReg_128:$src, sel_z), -+ 0, 0, 0, 0), sel_z), -+ (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), -+ (EXTRACT_SUBREG VReg_128:$src, sel_y), -+ (EXTRACT_SUBREG VReg_128:$src, sel_z), -+ 0, 0, 0, 0), sel_w) -+>; -+ -+/********** ================== **********/ -+/********** VOP3 Patterns **********/ -+/********** ================== **********/ -+ -+def : Pat <(f32 (IL_mad AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2)), -+ (V_MAD_LEGACY_F32 AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2, -+ 0, 0, 0, 0)>; -+ -+} // End isSI predicate -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIIntrinsics.td llvm-r600/lib/Target/R600/SIIntrinsics.td ---- llvm-3.2.src/lib/Target/R600/SIIntrinsics.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIIntrinsics.td 2013-01-25 19:43:57.480049720 +0100 -@@ -0,0 +1,54 @@ -+//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// SI Intrinsic Definitions -+// -+//===----------------------------------------------------------------------===// -+ -+ -+let TargetPrefix = "SI", isTarget = 1 in { -+ -+ def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; -+ def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -+ /* XXX: We may need a seperate intrinsic here for loading integer values */ -+ def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>; -+ def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>; -+ def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ; -+ def int_SI_wqm : Intrinsic <[], [], []>; -+ -+ class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrReadMem]>; -+ -+ def int_SI_sample : Sample; -+ def int_SI_sample_bias : Sample; -+ def int_SI_sample_lod : Sample; -+ -+ /* Interpolation Intrinsics */ -+ -+ def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>; -+ class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; -+ -+ def int_SI_fs_interp_linear_center : Interp; -+ def int_SI_fs_interp_linear_centroid : Interp; -+ def int_SI_fs_interp_persp_center : Interp; -+ def int_SI_fs_interp_persp_centroid : Interp; -+ def int_SI_fs_interp_constant : Interp; -+ -+ def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>; -+ def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; -+ -+ /* Control flow Intrinsics */ -+ -+ def int_SI_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>; -+ def int_SI_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], []>; -+ def int_SI_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>; -+ def int_SI_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], []>; -+ def int_SI_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>; -+ def int_SI_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], []>; -+ def int_SI_end_cf : Intrinsic<[], [llvm_i64_ty], []>; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIISelLowering.cpp llvm-r600/lib/Target/R600/SIISelLowering.cpp ---- llvm-3.2.src/lib/Target/R600/SIISelLowering.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIISelLowering.cpp 2013-01-25 19:43:57.470049720 +0100 -@@ -0,0 +1,486 @@ -+//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Custom DAG lowering for SI -+// -+//===----------------------------------------------------------------------===// -+ -+#include "SIISelLowering.h" -+#include "AMDIL.h" -+#include "AMDILIntrinsicInfo.h" -+#include "SIInstrInfo.h" -+#include "SIMachineFunctionInfo.h" -+#include "SIRegisterInfo.h" -+#include "llvm/CodeGen/MachineInstrBuilder.h" -+#include "llvm/CodeGen/MachineRegisterInfo.h" -+#include "llvm/CodeGen/SelectionDAG.h" -+ -+using namespace llvm; -+ -+SITargetLowering::SITargetLowering(TargetMachine &TM) : -+ AMDGPUTargetLowering(TM), -+ TII(static_cast(TM.getInstrInfo())) { -+ addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); -+ addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); -+ addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); -+ addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); -+ addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass); -+ addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass); -+ -+ addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); -+ addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass); -+ -+ computeRegisterProperties(); -+ -+ setOperationAction(ISD::AND, MVT::i1, Custom); -+ -+ setOperationAction(ISD::ADD, MVT::i64, Legal); -+ setOperationAction(ISD::ADD, MVT::i32, Legal); -+ -+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); -+ -+ // We need to custom lower loads from the USER_SGPR address space, so we can -+ // add the SGPRs as livein registers. -+ setOperationAction(ISD::LOAD, MVT::i32, Custom); -+ setOperationAction(ISD::LOAD, MVT::i64, Custom); -+ -+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); -+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); -+ -+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); -+ setTargetDAGCombine(ISD::SELECT_CC); -+ -+ setTargetDAGCombine(ISD::SETCC); -+} -+ -+MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( -+ MachineInstr * MI, MachineBasicBlock * BB) const { -+ const TargetInstrInfo * TII = getTargetMachine().getInstrInfo(); -+ MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); -+ MachineBasicBlock::iterator I = MI; -+ -+ switch (MI->getOpcode()) { -+ default: -+ return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); -+ case AMDGPU::BRANCH: return BB; -+ case AMDGPU::CLAMP_SI: -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) -+ .addOperand(MI->getOperand(0)) -+ .addOperand(MI->getOperand(1)) -+ // VSRC1-2 are unused, but we still need to fill all the -+ // operand slots, so we just reuse the VSRC0 operand -+ .addOperand(MI->getOperand(1)) -+ .addOperand(MI->getOperand(1)) -+ .addImm(0) // ABS -+ .addImm(1) // CLAMP -+ .addImm(0) // OMOD -+ .addImm(0); // NEG -+ MI->eraseFromParent(); -+ break; -+ -+ case AMDGPU::FABS_SI: -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) -+ .addOperand(MI->getOperand(0)) -+ .addOperand(MI->getOperand(1)) -+ // VSRC1-2 are unused, but we still need to fill all the -+ // operand slots, so we just reuse the VSRC0 operand -+ .addOperand(MI->getOperand(1)) -+ .addOperand(MI->getOperand(1)) -+ .addImm(1) // ABS -+ .addImm(0) // CLAMP -+ .addImm(0) // OMOD -+ .addImm(0); // NEG -+ MI->eraseFromParent(); -+ break; -+ -+ case AMDGPU::FNEG_SI: -+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) -+ .addOperand(MI->getOperand(0)) -+ .addOperand(MI->getOperand(1)) -+ // VSRC1-2 are unused, but we still need to fill all the -+ // operand slots, so we just reuse the VSRC0 operand -+ .addOperand(MI->getOperand(1)) -+ .addOperand(MI->getOperand(1)) -+ .addImm(0) // ABS -+ .addImm(0) // CLAMP -+ .addImm(0) // OMOD -+ .addImm(1); // NEG -+ MI->eraseFromParent(); -+ break; -+ case AMDGPU::SHADER_TYPE: -+ BB->getParent()->getInfo()->ShaderType = -+ MI->getOperand(0).getImm(); -+ MI->eraseFromParent(); -+ break; -+ -+ case AMDGPU::SI_INTERP: -+ LowerSI_INTERP(MI, *BB, I, MRI); -+ break; -+ case AMDGPU::SI_INTERP_CONST: -+ LowerSI_INTERP_CONST(MI, *BB, I, MRI); -+ break; -+ case AMDGPU::SI_WQM: -+ LowerSI_WQM(MI, *BB, I, MRI); -+ break; -+ case AMDGPU::SI_V_CNDLT: -+ LowerSI_V_CNDLT(MI, *BB, I, MRI); -+ break; -+ } -+ return BB; -+} -+ -+void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, -+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { -+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC) -+ .addReg(AMDGPU::EXEC); -+ -+ MI->eraseFromParent(); -+} -+ -+void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, -+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { -+ unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); -+ unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); -+ MachineOperand dst = MI->getOperand(0); -+ MachineOperand iReg = MI->getOperand(1); -+ MachineOperand jReg = MI->getOperand(2); -+ MachineOperand attr_chan = MI->getOperand(3); -+ MachineOperand attr = MI->getOperand(4); -+ MachineOperand params = MI->getOperand(5); -+ -+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) -+ .addOperand(params); -+ -+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp) -+ .addOperand(iReg) -+ .addOperand(attr_chan) -+ .addOperand(attr) -+ .addReg(M0); -+ -+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32)) -+ .addOperand(dst) -+ .addReg(tmp) -+ .addOperand(jReg) -+ .addOperand(attr_chan) -+ .addOperand(attr) -+ .addReg(M0); -+ -+ MI->eraseFromParent(); -+} -+ -+void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI, -+ MachineBasicBlock &BB, MachineBasicBlock::iterator I, -+ MachineRegisterInfo &MRI) const { -+ MachineOperand dst = MI->getOperand(0); -+ MachineOperand attr_chan = MI->getOperand(1); -+ MachineOperand attr = MI->getOperand(2); -+ MachineOperand params = MI->getOperand(3); -+ unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); -+ -+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) -+ .addOperand(params); -+ -+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32)) -+ .addOperand(dst) -+ .addOperand(attr_chan) -+ .addOperand(attr) -+ .addReg(M0); -+ -+ MI->eraseFromParent(); -+} -+ -+void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, -+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { -+ unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); -+ -+ BuildMI(BB, I, BB.findDebugLoc(I), -+ TII->get(AMDGPU::V_CMP_GT_F32_e32), -+ VCC) -+ .addReg(AMDGPU::SREG_LIT_0) -+ .addOperand(MI->getOperand(1)); -+ -+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32_e32)) -+ .addOperand(MI->getOperand(0)) -+ .addOperand(MI->getOperand(3)) -+ .addOperand(MI->getOperand(2)) -+ .addReg(VCC); -+ -+ MI->eraseFromParent(); -+} -+ -+EVT SITargetLowering::getSetCCResultType(EVT VT) const { -+ return MVT::i1; -+} -+ -+//===----------------------------------------------------------------------===// -+// Custom DAG Lowering Operations -+//===----------------------------------------------------------------------===// -+ -+SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { -+ switch (Op.getOpcode()) { -+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); -+ case ISD::BRCOND: return LowerBRCOND(Op, DAG); -+ case ISD::LOAD: return LowerLOAD(Op, DAG); -+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); -+ case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND); -+ case ISD::INTRINSIC_WO_CHAIN: { -+ unsigned IntrinsicID = -+ cast(Op.getOperand(0))->getZExtValue(); -+ EVT VT = Op.getValueType(); -+ switch (IntrinsicID) { -+ case AMDGPUIntrinsic::SI_vs_load_buffer_index: -+ return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, -+ AMDGPU::VGPR0, VT); -+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); -+ } -+ break; -+ } -+ } -+ return SDValue(); -+} -+ -+/// \brief The function is for lowering i1 operations on the -+/// VCC register. -+/// -+/// In the VALU context, VCC is a one bit register, but in the -+/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only -+/// the SALU can perform operations on the VCC register, we need to promote -+/// the operand types from i1 to i64 in order for tablegen to be able to match -+/// this operation to the correct SALU instruction. We do this promotion by -+/// wrapping the operands in a CopyToReg node. -+/// -+SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op, -+ SelectionDAG &DAG, -+ unsigned VCCNode) const { -+ DebugLoc DL = Op.getDebugLoc(); -+ -+ SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64, -+ DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, -+ Op.getOperand(0)), -+ DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, -+ Op.getOperand(1))); -+ -+ return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode); -+} -+ -+/// \brief Helper function for LowerBRCOND -+static SDNode *findUser(SDValue Value, unsigned Opcode) { -+ -+ SDNode *Parent = Value.getNode(); -+ for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end(); -+ I != E; ++I) { -+ -+ if (I.getUse().get() != Value) -+ continue; -+ -+ if (I->getOpcode() == Opcode) -+ return *I; -+ } -+ return 0; -+} -+ -+/// This transforms the control flow intrinsics to get the branch destination as -+/// last parameter, also switches branch target with BR if the need arise -+SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, -+ SelectionDAG &DAG) const { -+ -+ DebugLoc DL = BRCOND.getDebugLoc(); -+ -+ SDNode *Intr = BRCOND.getOperand(1).getNode(); -+ SDValue Target = BRCOND.getOperand(2); -+ SDNode *BR = 0; -+ -+ if (Intr->getOpcode() == ISD::SETCC) { -+ // As long as we negate the condition everything is fine -+ SDNode *SetCC = Intr; -+ assert(SetCC->getConstantOperandVal(1) == 1); -+ -+ CondCodeSDNode *CC = cast(SetCC->getOperand(2).getNode()); -+ assert(CC->get() == ISD::SETNE); -+ Intr = SetCC->getOperand(0).getNode(); -+ -+ } else { -+ // Get the target from BR if we don't negate the condition -+ BR = findUser(BRCOND, ISD::BR); -+ Target = BR->getOperand(1); -+ } -+ -+ assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN); -+ -+ // Build the result and -+ SmallVector Res; -+ for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i) -+ Res.push_back(Intr->getValueType(i)); -+ -+ // operands of the new intrinsic call -+ SmallVector Ops; -+ Ops.push_back(BRCOND.getOperand(0)); -+ for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i) -+ Ops.push_back(Intr->getOperand(i)); -+ Ops.push_back(Target); -+ -+ // build the new intrinsic call -+ SDNode *Result = DAG.getNode( -+ Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL, -+ DAG.getVTList(Res.data(), Res.size()), Ops.data(), Ops.size()).getNode(); -+ -+ if (BR) { -+ // Give the branch instruction our target -+ SDValue Ops[] = { -+ BR->getOperand(0), -+ BRCOND.getOperand(2) -+ }; -+ DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2); -+ } -+ -+ SDValue Chain = SDValue(Result, Result->getNumValues() - 1); -+ -+ // Copy the intrinsic results to registers -+ for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) { -+ SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg); -+ if (!CopyToReg) -+ continue; -+ -+ Chain = DAG.getCopyToReg( -+ Chain, DL, -+ CopyToReg->getOperand(1), -+ SDValue(Result, i - 1), -+ SDValue()); -+ -+ DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0)); -+ } -+ -+ // Remove the old intrinsic from the chain -+ DAG.ReplaceAllUsesOfValueWith( -+ SDValue(Intr, Intr->getNumValues() - 1), -+ Intr->getOperand(0)); -+ -+ return Chain; -+} -+ -+SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { -+ EVT VT = Op.getValueType(); -+ LoadSDNode *Ptr = dyn_cast(Op); -+ -+ assert(Ptr); -+ -+ unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace(); -+ -+ // We only need to lower USER_SGPR address space loads -+ if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) { -+ return SDValue(); -+ } -+ -+ // Loads from the USER_SGPR address space can only have constant value -+ // pointers. -+ ConstantSDNode *BasePtr = dyn_cast(Ptr->getBasePtr()); -+ assert(BasePtr); -+ -+ unsigned TypeDwordWidth = VT.getSizeInBits() / 32; -+ const TargetRegisterClass * dstClass; -+ switch (TypeDwordWidth) { -+ default: -+ assert(!"USER_SGPR value size not implemented"); -+ return SDValue(); -+ case 1: -+ dstClass = &AMDGPU::SReg_32RegClass; -+ break; -+ case 2: -+ dstClass = &AMDGPU::SReg_64RegClass; -+ break; -+ } -+ uint64_t Index = BasePtr->getZExtValue(); -+ assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned"); -+ unsigned SGPRIndex = Index / TypeDwordWidth; -+ unsigned Reg = dstClass->getRegister(SGPRIndex); -+ -+ DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg, -+ VT)); -+ return SDValue(); -+} -+ -+SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { -+ SDValue LHS = Op.getOperand(0); -+ SDValue RHS = Op.getOperand(1); -+ SDValue True = Op.getOperand(2); -+ SDValue False = Op.getOperand(3); -+ SDValue CC = Op.getOperand(4); -+ EVT VT = Op.getValueType(); -+ DebugLoc DL = Op.getDebugLoc(); -+ -+ // Possible Min/Max pattern -+ SDValue MinMax = LowerMinMax(Op, DAG); -+ if (MinMax.getNode()) { -+ return MinMax; -+ } -+ -+ SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC); -+ return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); -+} -+ -+//===----------------------------------------------------------------------===// -+// Custom DAG optimizations -+//===----------------------------------------------------------------------===// -+ -+SDValue SITargetLowering::PerformDAGCombine(SDNode *N, -+ DAGCombinerInfo &DCI) const { -+ SelectionDAG &DAG = DCI.DAG; -+ DebugLoc DL = N->getDebugLoc(); -+ EVT VT = N->getValueType(0); -+ -+ switch (N->getOpcode()) { -+ default: break; -+ case ISD::SELECT_CC: { -+ N->dump(); -+ ConstantSDNode *True, *False; -+ // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc) -+ if ((True = dyn_cast(N->getOperand(2))) -+ && (False = dyn_cast(N->getOperand(3))) -+ && True->isAllOnesValue() -+ && False->isNullValue() -+ && VT == MVT::i1) { -+ return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0), -+ N->getOperand(1), N->getOperand(4)); -+ -+ } -+ break; -+ } -+ case ISD::SETCC: { -+ SDValue Arg0 = N->getOperand(0); -+ SDValue Arg1 = N->getOperand(1); -+ SDValue CC = N->getOperand(2); -+ ConstantSDNode * C = NULL; -+ ISD::CondCode CCOp = dyn_cast(CC)->get(); -+ -+ // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne) -+ if (VT == MVT::i1 -+ && Arg0.getOpcode() == ISD::SIGN_EXTEND -+ && Arg0.getOperand(0).getValueType() == MVT::i1 -+ && (C = dyn_cast(Arg1)) -+ && C->isNullValue() -+ && CCOp == ISD::SETNE) { -+ return SimplifySetCC(VT, Arg0.getOperand(0), -+ DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL); -+ } -+ break; -+ } -+ } -+ return SDValue(); -+} -+ -+#define NODE_NAME_CASE(node) case SIISD::node: return #node; -+ -+const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const { -+ switch (Opcode) { -+ default: return AMDGPUTargetLowering::getTargetNodeName(Opcode); -+ NODE_NAME_CASE(VCC_AND) -+ NODE_NAME_CASE(VCC_BITCAST) -+ } -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIISelLowering.h llvm-r600/lib/Target/R600/SIISelLowering.h ---- llvm-3.2.src/lib/Target/R600/SIISelLowering.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIISelLowering.h 2013-01-25 19:43:57.473383054 +0100 -@@ -0,0 +1,55 @@ -+//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief SI DAG Lowering interface definition -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef SIISELLOWERING_H -+#define SIISELLOWERING_H -+ -+#include "AMDGPUISelLowering.h" -+#include "SIInstrInfo.h" -+ -+namespace llvm { -+ -+class SITargetLowering : public AMDGPUTargetLowering { -+ const SIInstrInfo * TII; -+ -+ void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB, -+ MachineBasicBlock::iterator I, unsigned Opocde) const; -+ void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, -+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; -+ void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB, -+ MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const; -+ void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, -+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; -+ void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, -+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; -+ -+ SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG, -+ unsigned VCCNode) const; -+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; -+ -+public: -+ SITargetLowering(TargetMachine &tm); -+ virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, -+ MachineBasicBlock * BB) const; -+ virtual EVT getSetCCResultType(EVT VT) const; -+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; -+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; -+ virtual const char* getTargetNodeName(unsigned Opcode) const; -+}; -+ -+} // End namespace llvm -+ -+#endif //SIISELLOWERING_H -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SILowerControlFlow.cpp llvm-r600/lib/Target/R600/SILowerControlFlow.cpp ---- llvm-3.2.src/lib/Target/R600/SILowerControlFlow.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SILowerControlFlow.cpp 2013-01-25 19:43:57.480049720 +0100 -@@ -0,0 +1,372 @@ -+//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief This pass lowers the pseudo control flow instructions to real -+/// machine instructions. -+/// -+/// All control flow is handled using predicated instructions and -+/// a predicate stack. Each Scalar ALU controls the operations of 64 Vector -+/// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs -+/// by writting to the 64-bit EXEC register (each bit corresponds to a -+/// single vector ALU). Typically, for predicates, a vector ALU will write -+/// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each -+/// Vector ALU) and then the ScalarALU will AND the VCC register with the -+/// EXEC to update the predicates. -+/// -+/// For example: -+/// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2 -+/// %SGPR0 = SI_IF %VCC -+/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 -+/// %SGPR0 = SI_ELSE %SGPR0 -+/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0 -+/// SI_END_CF %SGPR0 -+/// -+/// becomes: -+/// -+/// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC // Save and update the exec mask -+/// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask -+/// S_CBRANCH_EXECZ label0 // This instruction is an optional -+/// // optimization which allows us to -+/// // branch if all the bits of -+/// // EXEC are zero. -+/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch -+/// -+/// label0: -+/// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC // Restore the exec mask for the Then block -+/// %EXEC = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask -+/// S_BRANCH_EXECZ label1 // Use our branch optimization -+/// // instruction again. -+/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block -+/// label1: -+/// %EXEC = S_OR_B64 %EXEC, %SGPR0 // Re-enable saved exec mask bits -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPU.h" -+#include "SIInstrInfo.h" -+#include "SIMachineFunctionInfo.h" -+#include "llvm/CodeGen/MachineFunction.h" -+#include "llvm/CodeGen/MachineFunctionPass.h" -+#include "llvm/CodeGen/MachineInstrBuilder.h" -+#include "llvm/CodeGen/MachineRegisterInfo.h" -+ -+using namespace llvm; -+ -+namespace { -+ -+class SILowerControlFlowPass : public MachineFunctionPass { -+ -+private: -+ static const unsigned SkipThreshold = 12; -+ -+ static char ID; -+ const TargetInstrInfo *TII; -+ -+ bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); -+ -+ void Skip(MachineInstr &From, MachineOperand &To); -+ void SkipIfDead(MachineInstr &MI); -+ -+ void If(MachineInstr &MI); -+ void Else(MachineInstr &MI); -+ void Break(MachineInstr &MI); -+ void IfBreak(MachineInstr &MI); -+ void ElseBreak(MachineInstr &MI); -+ void Loop(MachineInstr &MI); -+ void EndCf(MachineInstr &MI); -+ -+ void Kill(MachineInstr &MI); -+ void Branch(MachineInstr &MI); -+ -+public: -+ SILowerControlFlowPass(TargetMachine &tm) : -+ MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } -+ -+ virtual bool runOnMachineFunction(MachineFunction &MF); -+ -+ const char *getPassName() const { -+ return "SI Lower control flow instructions"; -+ } -+ -+}; -+ -+} // End anonymous namespace -+ -+char SILowerControlFlowPass::ID = 0; -+ -+FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) { -+ return new SILowerControlFlowPass(tm); -+} -+ -+bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From, -+ MachineBasicBlock *To) { -+ -+ unsigned NumInstr = 0; -+ -+ for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty(); -+ MBB = *MBB->succ_begin()) { -+ -+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); -+ NumInstr < SkipThreshold && I != E; ++I) { -+ -+ if (I->isBundle() || !I->isBundled()) -+ if (++NumInstr >= SkipThreshold) -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { -+ -+ if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB())) -+ return; -+ -+ DebugLoc DL = From.getDebugLoc(); -+ BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) -+ .addOperand(To) -+ .addReg(AMDGPU::EXEC); -+} -+ -+void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) { -+ -+ MachineBasicBlock &MBB = *MI.getParent(); -+ DebugLoc DL = MI.getDebugLoc(); -+ -+ if (!shouldSkip(&MBB, &MBB.getParent()->back())) -+ return; -+ -+ MachineBasicBlock::iterator Insert = &MI; -+ ++Insert; -+ -+ // If the exec mask is non-zero, skip the next two instructions -+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) -+ .addImm(3) -+ .addReg(AMDGPU::EXEC); -+ -+ // Exec mask is zero: Export to NULL target... -+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP)) -+ .addImm(0) -+ .addImm(0x09) // V_008DFC_SQ_EXP_NULL -+ .addImm(0) -+ .addImm(1) -+ .addImm(1) -+ .addReg(AMDGPU::SREG_LIT_0) -+ .addReg(AMDGPU::SREG_LIT_0) -+ .addReg(AMDGPU::SREG_LIT_0) -+ .addReg(AMDGPU::SREG_LIT_0); -+ -+ // ... and terminate wavefront -+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); -+} -+ -+void SILowerControlFlowPass::If(MachineInstr &MI) { -+ MachineBasicBlock &MBB = *MI.getParent(); -+ DebugLoc DL = MI.getDebugLoc(); -+ unsigned Reg = MI.getOperand(0).getReg(); -+ unsigned Vcc = MI.getOperand(1).getReg(); -+ -+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg) -+ .addReg(Vcc); -+ -+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg) -+ .addReg(AMDGPU::EXEC) -+ .addReg(Reg); -+ -+ Skip(MI, MI.getOperand(2)); -+ -+ MI.eraseFromParent(); -+} -+ -+void SILowerControlFlowPass::Else(MachineInstr &MI) { -+ MachineBasicBlock &MBB = *MI.getParent(); -+ DebugLoc DL = MI.getDebugLoc(); -+ unsigned Dst = MI.getOperand(0).getReg(); -+ unsigned Src = MI.getOperand(1).getReg(); -+ -+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst) -+ .addReg(Src); // Saved EXEC -+ -+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) -+ .addReg(AMDGPU::EXEC) -+ .addReg(Dst); -+ -+ Skip(MI, MI.getOperand(2)); -+ -+ MI.eraseFromParent(); -+} -+ -+void SILowerControlFlowPass::Break(MachineInstr &MI) { -+ MachineBasicBlock &MBB = *MI.getParent(); -+ DebugLoc DL = MI.getDebugLoc(); -+ -+ unsigned Dst = MI.getOperand(0).getReg(); -+ unsigned Src = MI.getOperand(1).getReg(); -+ -+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst) -+ .addReg(AMDGPU::EXEC) -+ .addReg(Src); -+ -+ MI.eraseFromParent(); -+} -+ -+void SILowerControlFlowPass::IfBreak(MachineInstr &MI) { -+ MachineBasicBlock &MBB = *MI.getParent(); -+ DebugLoc DL = MI.getDebugLoc(); -+ -+ unsigned Dst = MI.getOperand(0).getReg(); -+ unsigned Vcc = MI.getOperand(1).getReg(); -+ unsigned Src = MI.getOperand(2).getReg(); -+ -+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst) -+ .addReg(Vcc) -+ .addReg(Src); -+ -+ MI.eraseFromParent(); -+} -+ -+void SILowerControlFlowPass::ElseBreak(MachineInstr &MI) { -+ MachineBasicBlock &MBB = *MI.getParent(); -+ DebugLoc DL = MI.getDebugLoc(); -+ -+ unsigned Dst = MI.getOperand(0).getReg(); -+ unsigned Saved = MI.getOperand(1).getReg(); -+ unsigned Src = MI.getOperand(2).getReg(); -+ -+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst) -+ .addReg(Saved) -+ .addReg(Src); -+ -+ MI.eraseFromParent(); -+} -+ -+void SILowerControlFlowPass::Loop(MachineInstr &MI) { -+ MachineBasicBlock &MBB = *MI.getParent(); -+ DebugLoc DL = MI.getDebugLoc(); -+ unsigned Src = MI.getOperand(0).getReg(); -+ -+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC) -+ .addReg(AMDGPU::EXEC) -+ .addReg(Src); -+ -+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) -+ .addOperand(MI.getOperand(1)) -+ .addReg(AMDGPU::EXEC); -+ -+ MI.eraseFromParent(); -+} -+ -+void SILowerControlFlowPass::EndCf(MachineInstr &MI) { -+ MachineBasicBlock &MBB = *MI.getParent(); -+ DebugLoc DL = MI.getDebugLoc(); -+ unsigned Reg = MI.getOperand(0).getReg(); -+ -+ BuildMI(MBB, MBB.getFirstNonPHI(), DL, -+ TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC) -+ .addReg(AMDGPU::EXEC) -+ .addReg(Reg); -+ -+ MI.eraseFromParent(); -+} -+ -+void SILowerControlFlowPass::Branch(MachineInstr &MI) { -+ MachineBasicBlock *Next = MI.getParent()->getNextNode(); -+ MachineBasicBlock *Target = MI.getOperand(0).getMBB(); -+ if (Target == Next) -+ MI.eraseFromParent(); -+ else -+ assert(0); -+} -+ -+void SILowerControlFlowPass::Kill(MachineInstr &MI) { -+ -+ MachineBasicBlock &MBB = *MI.getParent(); -+ DebugLoc DL = MI.getDebugLoc(); -+ -+ // Kill is only allowed in pixel shaders -+ MachineFunction &MF = *MBB.getParent(); -+ SIMachineFunctionInfo *Info = MF.getInfo(); -+ assert(Info->ShaderType == ShaderType::PIXEL); -+ -+ // Clear this pixel from the exec mask if the operand is negative -+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) -+ .addReg(AMDGPU::SREG_LIT_0) -+ .addOperand(MI.getOperand(0)); -+ -+ MI.eraseFromParent(); -+} -+ -+bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { -+ -+ bool HaveKill = false; -+ unsigned Depth = 0; -+ -+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); -+ BI != BE; ++BI) { -+ -+ MachineBasicBlock &MBB = *BI; -+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); -+ I != MBB.end(); I = Next) { -+ -+ Next = llvm::next(I); -+ MachineInstr &MI = *I; -+ switch (MI.getOpcode()) { -+ default: break; -+ case AMDGPU::SI_IF: -+ ++Depth; -+ If(MI); -+ break; -+ -+ case AMDGPU::SI_ELSE: -+ Else(MI); -+ break; -+ -+ case AMDGPU::SI_BREAK: -+ Break(MI); -+ break; -+ -+ case AMDGPU::SI_IF_BREAK: -+ IfBreak(MI); -+ break; -+ -+ case AMDGPU::SI_ELSE_BREAK: -+ ElseBreak(MI); -+ break; -+ -+ case AMDGPU::SI_LOOP: -+ ++Depth; -+ Loop(MI); -+ break; -+ -+ case AMDGPU::SI_END_CF: -+ if (--Depth == 0 && HaveKill) { -+ SkipIfDead(MI); -+ HaveKill = false; -+ } -+ EndCf(MI); -+ break; -+ -+ case AMDGPU::SI_KILL: -+ if (Depth == 0) -+ SkipIfDead(MI); -+ else -+ HaveKill = true; -+ Kill(MI); -+ break; -+ -+ case AMDGPU::S_BRANCH: -+ Branch(MI); -+ break; -+ } -+ } -+ } -+ -+ return true; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SILowerLiteralConstants.cpp llvm-r600/lib/Target/R600/SILowerLiteralConstants.cpp ---- llvm-3.2.src/lib/Target/R600/SILowerLiteralConstants.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SILowerLiteralConstants.cpp 2013-01-25 19:43:57.480049720 +0100 -@@ -0,0 +1,108 @@ -+//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief This pass performs the following transformation on instructions with -+/// literal constants: -+/// -+/// %VGPR0 = V_MOV_IMM_I32 1 -+/// -+/// becomes: -+/// -+/// BUNDLE -+/// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT -+/// * SI_LOAD_LITERAL 1 -+/// -+/// The resulting sequence matches exactly how the hardware handles immediate -+/// operands, so this transformation greatly simplifies the code generator. -+/// -+/// Only the *_MOV_IMM_* support immediate operands at the moment, but when -+/// support for immediate operands is added to other instructions, they -+/// will be lowered here as well. -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPU.h" -+#include "llvm/CodeGen/MachineFunction.h" -+#include "llvm/CodeGen/MachineFunctionPass.h" -+#include "llvm/CodeGen/MachineInstrBuilder.h" -+#include "llvm/CodeGen/MachineInstrBundle.h" -+ -+using namespace llvm; -+ -+namespace { -+ -+class SILowerLiteralConstantsPass : public MachineFunctionPass { -+ -+private: -+ static char ID; -+ const TargetInstrInfo *TII; -+ -+public: -+ SILowerLiteralConstantsPass(TargetMachine &tm) : -+ MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } -+ -+ virtual bool runOnMachineFunction(MachineFunction &MF); -+ -+ const char *getPassName() const { -+ return "SI Lower literal constants pass"; -+ } -+}; -+ -+} // End anonymous namespace -+ -+char SILowerLiteralConstantsPass::ID = 0; -+ -+FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) { -+ return new SILowerLiteralConstantsPass(tm); -+} -+ -+bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) { -+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); -+ BB != BB_E; ++BB) { -+ MachineBasicBlock &MBB = *BB; -+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); -+ I != MBB.end(); I = Next) { -+ Next = llvm::next(I); -+ MachineInstr &MI = *I; -+ switch (MI.getOpcode()) { -+ default: break; -+ case AMDGPU::S_MOV_IMM_I32: -+ case AMDGPU::S_MOV_IMM_I64: -+ case AMDGPU::V_MOV_IMM_F32: -+ case AMDGPU::V_MOV_IMM_I32: { -+ unsigned MovOpcode; -+ unsigned LoadLiteralOpcode; -+ MachineOperand LiteralOp = MI.getOperand(1); -+ if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) { -+ MovOpcode = AMDGPU::V_MOV_B32_e32; -+ } else { -+ MovOpcode = AMDGPU::S_MOV_B32; -+ } -+ if (LiteralOp.isImm()) { -+ LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32; -+ } else { -+ LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32; -+ } -+ MachineInstr *First = -+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MovOpcode), -+ MI.getOperand(0).getReg()) -+ .addReg(AMDGPU::SI_LITERAL_CONSTANT); -+ MachineInstr *Last = -+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(LoadLiteralOpcode)) -+ .addOperand(MI.getOperand(1)); -+ Last->setIsInsideBundle(); -+ llvm::finalizeBundle(MBB, First, Last); -+ MI.eraseFromParent(); -+ break; -+ } -+ } -+ } -+ } -+ return false; -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIMachineFunctionInfo.cpp llvm-r600/lib/Target/R600/SIMachineFunctionInfo.cpp ---- llvm-3.2.src/lib/Target/R600/SIMachineFunctionInfo.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIMachineFunctionInfo.cpp 2013-01-25 19:43:57.480049720 +0100 -@@ -0,0 +1,20 @@ -+//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+/// \file -+//===----------------------------------------------------------------------===// -+ -+ -+#include "SIMachineFunctionInfo.h" -+ -+using namespace llvm; -+ -+SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) -+ : MachineFunctionInfo(), -+ SPIPSInputAddr(0), -+ ShaderType(0) -+ { } -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIMachineFunctionInfo.h llvm-r600/lib/Target/R600/SIMachineFunctionInfo.h ---- llvm-3.2.src/lib/Target/R600/SIMachineFunctionInfo.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIMachineFunctionInfo.h 2013-01-25 19:43:57.480049720 +0100 -@@ -0,0 +1,34 @@ -+//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+// -+//===----------------------------------------------------------------------===// -+ -+ -+#ifndef SIMACHINEFUNCTIONINFO_H_ -+#define SIMACHINEFUNCTIONINFO_H_ -+ -+#include "llvm/CodeGen/MachineFunction.h" -+ -+namespace llvm { -+ -+/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which -+/// tells the hardware which interpolation parameters to load. -+class SIMachineFunctionInfo : public MachineFunctionInfo { -+public: -+ SIMachineFunctionInfo(const MachineFunction &MF); -+ unsigned SPIPSInputAddr; -+ unsigned ShaderType; -+}; -+ -+} // End namespace llvm -+ -+ -+#endif //_SIMACHINEFUNCTIONINFO_H_ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIRegisterInfo.cpp llvm-r600/lib/Target/R600/SIRegisterInfo.cpp ---- llvm-3.2.src/lib/Target/R600/SIRegisterInfo.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIRegisterInfo.cpp 2013-01-25 19:43:57.480049720 +0100 -@@ -0,0 +1,48 @@ -+//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief SI implementation of the TargetRegisterInfo class. -+// -+//===----------------------------------------------------------------------===// -+ -+ -+#include "SIRegisterInfo.h" -+#include "AMDGPUTargetMachine.h" -+ -+using namespace llvm; -+ -+SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm, -+ const TargetInstrInfo &tii) -+: AMDGPURegisterInfo(tm, tii), -+ TM(tm), -+ TII(tii) -+ { } -+ -+BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { -+ BitVector Reserved(getNumRegs()); -+ return Reserved; -+} -+ -+const TargetRegisterClass * -+SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const { -+ switch (rc->getID()) { -+ case AMDGPU::GPRF32RegClassID: -+ return &AMDGPU::VReg_32RegClass; -+ default: return rc; -+ } -+} -+ -+const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass( -+ MVT VT) const { -+ switch(VT.SimpleTy) { -+ default: -+ case MVT::i32: return &AMDGPU::VReg_32RegClass; -+ } -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIRegisterInfo.h llvm-r600/lib/Target/R600/SIRegisterInfo.h ---- llvm-3.2.src/lib/Target/R600/SIRegisterInfo.h 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIRegisterInfo.h 2013-01-25 19:43:57.483383054 +0100 -@@ -0,0 +1,47 @@ -+//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// \brief Interface definition for SIRegisterInfo -+// -+//===----------------------------------------------------------------------===// -+ -+ -+#ifndef SIREGISTERINFO_H_ -+#define SIREGISTERINFO_H_ -+ -+#include "AMDGPURegisterInfo.h" -+ -+namespace llvm { -+ -+class AMDGPUTargetMachine; -+class TargetInstrInfo; -+ -+struct SIRegisterInfo : public AMDGPURegisterInfo { -+ AMDGPUTargetMachine &TM; -+ const TargetInstrInfo &TII; -+ -+ SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii); -+ -+ virtual BitVector getReservedRegs(const MachineFunction &MF) const; -+ -+ /// \param RC is an AMDIL reg class. -+ /// -+ /// \returns the SI register class that is equivalent to \p RC. -+ virtual const TargetRegisterClass * -+ getISARegClass(const TargetRegisterClass *RC) const; -+ -+ /// \brief get the register class of the specified type to use in the -+ /// CFGStructurizer -+ virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; -+}; -+ -+} // End namespace llvm -+ -+#endif // SIREGISTERINFO_H_ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SIRegisterInfo.td llvm-r600/lib/Target/R600/SIRegisterInfo.td ---- llvm-3.2.src/lib/Target/R600/SIRegisterInfo.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SIRegisterInfo.td 2013-01-25 19:43:57.483383054 +0100 -@@ -0,0 +1,167 @@ -+ -+let Namespace = "AMDGPU" in { -+ def low : SubRegIndex; -+ def high : SubRegIndex; -+ -+ def sub0 : SubRegIndex; -+ def sub1 : SubRegIndex; -+ def sub2 : SubRegIndex; -+ def sub3 : SubRegIndex; -+ def sub4 : SubRegIndex; -+ def sub5 : SubRegIndex; -+ def sub6 : SubRegIndex; -+ def sub7 : SubRegIndex; -+} -+ -+class SIReg encoding = 0> : Register { -+ let Namespace = "AMDGPU"; -+ let HWEncoding = encoding; -+} -+ -+class SI_64 subregs, bits<16> encoding> : RegisterWithSubRegs { -+ let Namespace = "AMDGPU"; -+ let SubRegIndices = [low, high]; -+ let HWEncoding = encoding; -+} -+ -+class SGPR_32 num, string name> : SIReg; -+ -+class VGPR_32 num, string name> : SIReg; -+ -+// Special Registers -+def VCC : SIReg<"VCC", 106>; -+def EXEC_LO : SIReg <"EXEC LO", 126>; -+def EXEC_HI : SIReg <"EXEC HI", 127>; -+def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>; -+def SCC : SIReg<"SCC", 253>; -+def SREG_LIT_0 : SIReg <"S LIT 0", 128>; -+def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT", 255>; -+def M0 : SIReg <"M0", 124>; -+ -+//Interpolation registers -+def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">; -+def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">; -+def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">; -+def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">; -+def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">; -+def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">; -+def PERSP_I_W : SIReg <"PERSP_I_W">; -+def PERSP_J_W : SIReg <"PERSP_J_W">; -+def PERSP_1_W : SIReg <"PERSP_1_W">; -+def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">; -+def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">; -+def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">; -+def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">; -+def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">; -+def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">; -+def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">; -+def POS_X_FLOAT : SIReg <"POS_X_FLOAT">; -+def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">; -+def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">; -+def POS_W_FLOAT : SIReg <"POS_W_FLOAT">; -+def FRONT_FACE : SIReg <"FRONT_FACE">; -+def ANCILLARY : SIReg <"ANCILLARY">; -+def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">; -+def POS_FIXED_PT : SIReg <"POS_FIXED_PT">; -+ -+// SGPR 32-bit registers -+foreach Index = 0-101 in { -+ def SGPR#Index : SGPR_32 ; -+} -+ -+def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, -+ (add (sequence "SGPR%u", 0, 101))>; -+ -+// SGPR 64-bit registers -+def SGPR_64 : RegisterTuples<[low, high], -+ [(add (decimate SGPR_32, 2)), -+ (add(decimate (rotl SGPR_32, 1), 2))]>; -+ -+// SGPR 128-bit registers -+def SGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w], -+ [(add (decimate SGPR_32, 4)), -+ (add (decimate (rotl SGPR_32, 1), 4)), -+ (add (decimate (rotl SGPR_32, 2), 4)), -+ (add (decimate (rotl SGPR_32, 3), 4))]>; -+ -+// SGPR 256-bit registers -+def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], -+ [(add (decimate SGPR_32, 8)), -+ (add (decimate (rotl SGPR_32, 1), 8)), -+ (add (decimate (rotl SGPR_32, 2), 8)), -+ (add (decimate (rotl SGPR_32, 3), 8)), -+ (add (decimate (rotl SGPR_32, 4), 8)), -+ (add (decimate (rotl SGPR_32, 5), 8)), -+ (add (decimate (rotl SGPR_32, 6), 8)), -+ (add (decimate (rotl SGPR_32, 7), 8))]>; -+ -+// VGPR 32-bit registers -+foreach Index = 0-255 in { -+ def VGPR#Index : VGPR_32 ; -+} -+ -+def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, -+ (add (sequence "VGPR%u", 0, 255))>; -+ -+// VGPR 64-bit registers -+def VGPR_64 : RegisterTuples<[low, high], -+ [(add VGPR_32), -+ (add (rotl VGPR_32, 1))]>; -+ -+// VGPR 128-bit registers -+def VGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w], -+ [(add VGPR_32), -+ (add (rotl VGPR_32, 1)), -+ (add (rotl VGPR_32, 2)), -+ (add (rotl VGPR_32, 3))]>; -+ -+// Register class for all scalar registers (SGPRs + Special Registers) -+def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, -+ (add SGPR_32, SREG_LIT_0, M0, EXEC_LO, EXEC_HI) -+>; -+ -+def SReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add SGPR_64, VCC, EXEC)>; -+ -+def SReg_1 : RegisterClass<"AMDGPU", [i1], 1, (add VCC, SGPR_64, EXEC)>; -+ -+def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>; -+ -+def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>; -+ -+// Register class for all vector registers (VGPRs + Interploation Registers) -+def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, -+ (add VGPR_32, -+ PERSP_SAMPLE_I, PERSP_SAMPLE_J, -+ PERSP_CENTER_I, PERSP_CENTER_J, -+ PERSP_CENTROID_I, PERSP_CENTROID_J, -+ PERSP_I_W, PERSP_J_W, PERSP_1_W, -+ LINEAR_SAMPLE_I, LINEAR_SAMPLE_J, -+ LINEAR_CENTER_I, LINEAR_CENTER_J, -+ LINEAR_CENTROID_I, LINEAR_CENTROID_J, -+ LINE_STIPPLE_TEX_COORD, -+ POS_X_FLOAT, -+ POS_Y_FLOAT, -+ POS_Z_FLOAT, -+ POS_W_FLOAT, -+ FRONT_FACE, -+ ANCILLARY, -+ SAMPLE_COVERAGE, -+ POS_FIXED_PT -+ ) -+>; -+ -+def VReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add VGPR_64)>; -+ -+def VReg_128 : RegisterClass<"AMDGPU", [v4f32], 128, (add VGPR_128)>; -+ -+// AllReg_* - A set of all scalar and vector registers of a given width. -+def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add VReg_32, SReg_32)>; -+ -+def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64, (add SReg_64, VReg_64)>; -+ -+// Special register classes for predicates and the M0 register -+def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>; -+def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>; -+def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>; -+def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>; -+ -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/SISchedule.td llvm-r600/lib/Target/R600/SISchedule.td ---- llvm-3.2.src/lib/Target/R600/SISchedule.td 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/SISchedule.td 2013-01-25 19:43:57.483383054 +0100 -@@ -0,0 +1,15 @@ -+//===-- SISchedule.td - SI Scheduling definitons -------------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// TODO: This is just a place holder for now. -+// -+//===----------------------------------------------------------------------===// -+ -+ -+def SI_Itin : ProcessorItineraries <[], [], []>; -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp llvm-r600/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp ---- llvm-3.2.src/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp 2013-01-25 19:43:57.483383054 +0100 -@@ -0,0 +1,26 @@ -+//===-- TargetInfo/AMDGPUTargetInfo.cpp - TargetInfo for AMDGPU -----------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+// -+//===----------------------------------------------------------------------===// -+ -+#include "AMDGPU.h" -+#include "llvm/Support/TargetRegistry.h" -+ -+using namespace llvm; -+ -+/// \brief The target for the AMDGPU backend -+Target llvm::TheAMDGPUTarget; -+ -+/// \brief Extern function to initialize the targets for the AMDGPU backend -+extern "C" void LLVMInitializeR600TargetInfo() { -+ RegisterTarget -+ R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX"); -+} -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/TargetInfo/CMakeLists.txt llvm-r600/lib/Target/R600/TargetInfo/CMakeLists.txt ---- llvm-3.2.src/lib/Target/R600/TargetInfo/CMakeLists.txt 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/TargetInfo/CMakeLists.txt 2013-01-25 19:43:57.483383054 +0100 -@@ -0,0 +1,7 @@ -+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) -+ -+add_llvm_library(LLVMR600Info -+ AMDGPUTargetInfo.cpp -+ ) -+ -+add_dependencies(LLVMR600Info AMDGPUCommonTableGen intrinsics_gen) -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/TargetInfo/LLVMBuild.txt llvm-r600/lib/Target/R600/TargetInfo/LLVMBuild.txt ---- llvm-3.2.src/lib/Target/R600/TargetInfo/LLVMBuild.txt 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/TargetInfo/LLVMBuild.txt 2013-01-25 19:43:57.483383054 +0100 -@@ -0,0 +1,23 @@ -+;===- ./lib/Target/R600/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===; -+; -+; The LLVM Compiler Infrastructure -+; -+; This file is distributed under the University of Illinois Open Source -+; License. See LICENSE.TXT for details. -+; -+;===------------------------------------------------------------------------===; -+; -+; This is an LLVMBuild description file for the components in this subdirectory. -+; -+; For more information on the LLVMBuild system, please see: -+; -+; http://llvm.org/docs/LLVMBuild.html -+; -+;===------------------------------------------------------------------------===; -+ -+[component_0] -+type = Library -+name = R600Info -+parent = R600 -+required_libraries = MC Support -+add_to_library_groups = R600 -diff -Nur -x .git llvm-3.2.src/lib/Target/R600/TargetInfo/Makefile llvm-r600/lib/Target/R600/TargetInfo/Makefile ---- llvm-3.2.src/lib/Target/R600/TargetInfo/Makefile 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/lib/Target/R600/TargetInfo/Makefile 2013-01-25 19:43:57.483383054 +0100 -@@ -0,0 +1,15 @@ -+##===- lib/Target/AMDGPU/TargetInfo/Makefile ----------------*- Makefile -*-===## -+# -+# The LLVM Compiler Infrastructure -+# -+# This file is distributed under the University of Illinois Open Source -+# License. See LICENSE.TXT for details. -+# -+##===----------------------------------------------------------------------===## -+LEVEL = ../../../.. -+LIBRARYNAME = LLVMR600Info -+ -+# Hack: we need to include 'main' target directory to grab private headers -+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. -+ -+include $(LEVEL)/Makefile.common -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/add.v4i32.ll llvm-r600/test/CodeGen/R600/add.v4i32.ll ---- llvm-3.2.src/test/CodeGen/R600/add.v4i32.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/add.v4i32.ll 2013-01-25 19:43:58.460049700 +0100 -@@ -0,0 +1,15 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { -+ %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 -+ %a = load <4 x i32> addrspace(1) * %in -+ %b = load <4 x i32> addrspace(1) * %b_ptr -+ %result = add <4 x i32> %a, %b -+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/and.v4i32.ll llvm-r600/test/CodeGen/R600/and.v4i32.ll ---- llvm-3.2.src/test/CodeGen/R600/and.v4i32.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/and.v4i32.ll 2013-01-25 19:43:58.460049700 +0100 -@@ -0,0 +1,15 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { -+ %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 -+ %a = load <4 x i32> addrspace(1) * %in -+ %b = load <4 x i32> addrspace(1) * %b_ptr -+ %result = and <4 x i32> %a, %b -+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll llvm-r600/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll ---- llvm-3.2.src/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll 2013-01-25 19:43:58.460049700 +0100 -@@ -0,0 +1,33 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+; This test is for a bug in -+; DAGCombiner::reduceBuildVecConvertToConvertBuildVec() where -+; the wrong type was being passed to -+; TargetLowering::getOperationAction() when checking the legality of -+; ISD::UINT_TO_FP and ISD::SINT_TO_FP opcodes. -+ -+define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) { -+entry: -+ %ptr = getelementptr i32 addrspace(1)* %in, i32 1 -+ %sint = load i32 addrspace(1) * %in -+ %conv = sitofp i32 %sint to float -+ %0 = insertelement <4 x float> undef, float %conv, i32 0 -+ %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer -+ store <4 x float> %splat, <4 x float> addrspace(1)* %out -+ ret void -+} -+ -+;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) { -+entry: -+ %ptr = getelementptr i32 addrspace(1)* %in, i32 1 -+ %uint = load i32 addrspace(1) * %in -+ %conv = uitofp i32 %uint to float -+ %0 = insertelement <4 x float> undef, float %conv, i32 0 -+ %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer -+ store <4 x float> %splat, <4 x float> addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fabs.ll llvm-r600/test/CodeGen/R600/fabs.ll ---- llvm-3.2.src/test/CodeGen/R600/fabs.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/fabs.ll 2013-01-25 19:43:58.460049700 +0100 -@@ -0,0 +1,16 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: MOV T{{[0-9]+\.[XYZW], \|T[0-9]+\.[XYZW]\|}} -+ -+define void @test() { -+ %r0 = call float @llvm.R600.load.input(i32 0) -+ %r1 = call float @fabs( float %r0) -+ call void @llvm.AMDGPU.store.output(float %r1, i32 0) -+ ret void -+} -+ -+declare float @llvm.R600.load.input(i32) readnone -+ -+declare void @llvm.AMDGPU.store.output(float, i32) -+ -+declare float @fabs(float ) readnone -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fadd.ll llvm-r600/test/CodeGen/R600/fadd.ll ---- llvm-3.2.src/test/CodeGen/R600/fadd.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/fadd.ll 2013-01-25 19:43:58.460049700 +0100 -@@ -0,0 +1,16 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test() { -+ %r0 = call float @llvm.R600.load.input(i32 0) -+ %r1 = call float @llvm.R600.load.input(i32 1) -+ %r2 = fadd float %r0, %r1 -+ call void @llvm.AMDGPU.store.output(float %r2, i32 0) -+ ret void -+} -+ -+declare float @llvm.R600.load.input(i32) readnone -+ -+declare void @llvm.AMDGPU.store.output(float, i32) -+ -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fadd.v4f32.ll llvm-r600/test/CodeGen/R600/fadd.v4f32.ll ---- llvm-3.2.src/test/CodeGen/R600/fadd.v4f32.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/fadd.v4f32.ll 2013-01-25 19:43:58.460049700 +0100 -@@ -0,0 +1,15 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { -+ %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 -+ %a = load <4 x float> addrspace(1) * %in -+ %b = load <4 x float> addrspace(1) * %b_ptr -+ %result = fadd <4 x float> %a, %b -+ store <4 x float> %result, <4 x float> addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fcmp-cnde-int-args.ll llvm-r600/test/CodeGen/R600/fcmp-cnde-int-args.ll ---- llvm-3.2.src/test/CodeGen/R600/fcmp-cnde-int-args.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/fcmp-cnde-int-args.ll 2013-01-25 19:43:58.460049700 +0100 -@@ -0,0 +1,16 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+; This test checks a bug in R600TargetLowering::LowerSELECT_CC where the -+; chance to optimize the fcmp + select instructions to CNDE was missed -+; due to the fact that the operands to fcmp and select had different types -+ -+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, 0.0, -1}} -+ -+define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { -+entry: -+ %0 = load float addrspace(1)* %in -+ %cmp = fcmp oeq float %0, 0.000000e+00 -+ %value = select i1 %cmp, i32 -1, i32 0 -+ store i32 %value, i32 addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fcmp-cnd.ll llvm-r600/test/CodeGen/R600/fcmp-cnd.ll ---- llvm-3.2.src/test/CodeGen/R600/fcmp-cnd.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/fcmp-cnd.ll 2013-01-25 19:43:58.460049700 +0100 -@@ -0,0 +1,14 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;Not checking arguments 2 and 3 to CNDE, because they may change between -+;registers and literal.x depending on what the optimizer does. -+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { -+entry: -+ %0 = load float addrspace(1)* %in -+ %cmp = fcmp oeq float %0, 0.000000e+00 -+ %value = select i1 %cmp, i32 2, i32 3 -+ store i32 %value, i32 addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fcmp.ll llvm-r600/test/CodeGen/R600/fcmp.ll ---- llvm-3.2.src/test/CodeGen/R600/fcmp.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/fcmp.ll 2013-01-25 19:43:58.460049700 +0100 -@@ -0,0 +1,16 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: SETE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: MOV T{{[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} -+;CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { -+entry: -+ %0 = load float addrspace(1)* %in -+ %arrayidx1 = getelementptr inbounds float addrspace(1)* %in, i32 1 -+ %1 = load float addrspace(1)* %arrayidx1 -+ %cmp = fcmp oeq float %0, %1 -+ %sext = sext i1 %cmp to i32 -+ store i32 %sext, i32 addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fdiv.v4f32.ll llvm-r600/test/CodeGen/R600/fdiv.v4f32.ll ---- llvm-3.2.src/test/CodeGen/R600/fdiv.v4f32.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/fdiv.v4f32.ll 2013-01-25 19:43:58.460049700 +0100 -@@ -0,0 +1,19 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { -+ %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 -+ %a = load <4 x float> addrspace(1) * %in -+ %b = load <4 x float> addrspace(1) * %b_ptr -+ %result = fdiv <4 x float> %a, %b -+ store <4 x float> %result, <4 x float> addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/floor.ll llvm-r600/test/CodeGen/R600/floor.ll ---- llvm-3.2.src/test/CodeGen/R600/floor.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/floor.ll 2013-01-25 19:43:58.463383033 +0100 -@@ -0,0 +1,16 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: FLOOR T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test() { -+ %r0 = call float @llvm.R600.load.input(i32 0) -+ %r1 = call float @floor(float %r0) -+ call void @llvm.AMDGPU.store.output(float %r1, i32 0) -+ ret void -+} -+ -+declare float @llvm.R600.load.input(i32) readnone -+ -+declare void @llvm.AMDGPU.store.output(float, i32) -+ -+declare float @floor(float) readonly -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fmax.ll llvm-r600/test/CodeGen/R600/fmax.ll ---- llvm-3.2.src/test/CodeGen/R600/fmax.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/fmax.ll 2013-01-25 19:43:58.463383033 +0100 -@@ -0,0 +1,16 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: MAX T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test() { -+ %r0 = call float @llvm.R600.load.input(i32 0) -+ %r1 = call float @llvm.R600.load.input(i32 1) -+ %r2 = fcmp uge float %r0, %r1 -+ %r3 = select i1 %r2, float %r0, float %r1 -+ call void @llvm.AMDGPU.store.output(float %r3, i32 0) -+ ret void -+} -+ -+declare float @llvm.R600.load.input(i32) readnone -+ -+declare void @llvm.AMDGPU.store.output(float, i32) -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fmin.ll llvm-r600/test/CodeGen/R600/fmin.ll ---- llvm-3.2.src/test/CodeGen/R600/fmin.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/fmin.ll 2013-01-25 19:43:58.463383033 +0100 -@@ -0,0 +1,16 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: MIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test() { -+ %r0 = call float @llvm.R600.load.input(i32 0) -+ %r1 = call float @llvm.R600.load.input(i32 1) -+ %r2 = fcmp uge float %r0, %r1 -+ %r3 = select i1 %r2, float %r1, float %r0 -+ call void @llvm.AMDGPU.store.output(float %r3, i32 0) -+ ret void -+} -+ -+declare float @llvm.R600.load.input(i32) readnone -+ -+declare void @llvm.AMDGPU.store.output(float, i32) -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fmul.ll llvm-r600/test/CodeGen/R600/fmul.ll ---- llvm-3.2.src/test/CodeGen/R600/fmul.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/fmul.ll 2013-01-25 19:43:58.463383033 +0100 -@@ -0,0 +1,16 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test() { -+ %r0 = call float @llvm.R600.load.input(i32 0) -+ %r1 = call float @llvm.R600.load.input(i32 1) -+ %r2 = fmul float %r0, %r1 -+ call void @llvm.AMDGPU.store.output(float %r2, i32 0) -+ ret void -+} -+ -+declare float @llvm.R600.load.input(i32) readnone -+ -+declare void @llvm.AMDGPU.store.output(float, i32) -+ -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fmul.v4f32.ll llvm-r600/test/CodeGen/R600/fmul.v4f32.ll ---- llvm-3.2.src/test/CodeGen/R600/fmul.v4f32.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/fmul.v4f32.ll 2013-01-25 19:43:58.463383033 +0100 -@@ -0,0 +1,15 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { -+ %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 -+ %a = load <4 x float> addrspace(1) * %in -+ %b = load <4 x float> addrspace(1) * %b_ptr -+ %result = fmul <4 x float> %a, %b -+ store <4 x float> %result, <4 x float> addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fsub.ll llvm-r600/test/CodeGen/R600/fsub.ll ---- llvm-3.2.src/test/CodeGen/R600/fsub.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/fsub.ll 2013-01-25 19:43:58.463383033 +0100 -@@ -0,0 +1,17 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+; CHECK: MOV T{{[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} -+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test() { -+ %r0 = call float @llvm.R600.load.input(i32 0) -+ %r1 = call float @llvm.R600.load.input(i32 1) -+ %r2 = fsub float %r0, %r1 -+ call void @llvm.AMDGPU.store.output(float %r2, i32 0) -+ ret void -+} -+ -+declare float @llvm.R600.load.input(i32) readnone -+ -+declare void @llvm.AMDGPU.store.output(float, i32) -+ -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/fsub.v4f32.ll llvm-r600/test/CodeGen/R600/fsub.v4f32.ll ---- llvm-3.2.src/test/CodeGen/R600/fsub.v4f32.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/fsub.v4f32.ll 2013-01-25 19:43:58.463383033 +0100 -@@ -0,0 +1,15 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { -+ %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 -+ %a = load <4 x float> addrspace(1) * %in -+ %b = load <4 x float> addrspace(1) * %b_ptr -+ %result = fsub <4 x float> %a, %b -+ store <4 x float> %result, <4 x float> addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/i8_to_double_to_float.ll llvm-r600/test/CodeGen/R600/i8_to_double_to_float.ll ---- llvm-3.2.src/test/CodeGen/R600/i8_to_double_to_float.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/i8_to_double_to_float.ll 2013-01-25 19:43:58.463383033 +0100 -@@ -0,0 +1,11 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) { -+ %1 = load i8 addrspace(1)* %in -+ %2 = uitofp i8 %1 to double -+ %3 = fptrunc double %2 to float -+ store float %3, float addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/icmp-select-sete-reverse-args.ll llvm-r600/test/CodeGen/R600/icmp-select-sete-reverse-args.ll ---- llvm-3.2.src/test/CodeGen/R600/icmp-select-sete-reverse-args.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/icmp-select-sete-reverse-args.ll 2013-01-25 19:43:58.463383033 +0100 -@@ -0,0 +1,18 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;Test that a select with reversed True/False values is correctly lowered -+;to a SETNE_INT. There should only be one SETNE_INT instruction. -+ -+;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK_NOT: SETNE_INT -+ -+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { -+entry: -+ %0 = load i32 addrspace(1)* %in -+ %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %in, i32 1 -+ %1 = load i32 addrspace(1)* %arrayidx1 -+ %cmp = icmp eq i32 %0, %1 -+ %value = select i1 %cmp, i32 0, i32 -1 -+ store i32 %value, i32 addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/literals.ll llvm-r600/test/CodeGen/R600/literals.ll ---- llvm-3.2.src/test/CodeGen/R600/literals.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/literals.ll 2013-01-25 19:43:58.463383033 +0100 -@@ -0,0 +1,30 @@ -+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+; Test using an integer literal constant. -+; Generated ASM should be: -+; ADD_INT REG literal.x, 5 -+; or -+; ADD_INT literal.x REG, 5 -+ -+; CHECK: ADD_INT {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} 5 -+define void @i32_literal(i32 addrspace(1)* %out, i32 %in) { -+entry: -+ %0 = add i32 5, %in -+ store i32 %0, i32 addrspace(1)* %out -+ ret void -+} -+ -+; Test using a float literal constant. -+; Generated ASM should be: -+; ADD REG literal.x, 5.0 -+; or -+; ADD literal.x REG, 5.0 -+ -+; CHECK: ADD {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} {{[0-9]+}}(5.0 -+define void @float_literal(float addrspace(1)* %out, float %in) { -+entry: -+ %0 = fadd float 5.0, %in -+ store float %0, float addrspace(1)* %out -+ ret void -+} -+ -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/lit.local.cfg llvm-r600/test/CodeGen/R600/lit.local.cfg ---- llvm-3.2.src/test/CodeGen/R600/lit.local.cfg 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/lit.local.cfg 2013-01-25 19:43:58.463383033 +0100 -@@ -0,0 +1,13 @@ -+config.suffixes = ['.ll', '.c', '.cpp'] -+ -+def getRoot(config): -+ if not config.parent: -+ return config -+ return getRoot(config.parent) -+ -+root = getRoot(config) -+ -+targets = set(root.targets_to_build.split()) -+if not 'R600' in targets: -+ config.unsupported = True -+ -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/llvm.AMDGPU.mul.ll llvm-r600/test/CodeGen/R600/llvm.AMDGPU.mul.ll ---- llvm-3.2.src/test/CodeGen/R600/llvm.AMDGPU.mul.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/llvm.AMDGPU.mul.ll 2013-01-25 19:43:58.463383033 +0100 -@@ -0,0 +1,17 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test() { -+ %r0 = call float @llvm.R600.load.input(i32 0) -+ %r1 = call float @llvm.R600.load.input(i32 1) -+ %r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1) -+ call void @llvm.AMDGPU.store.output(float %r2, i32 0) -+ ret void -+} -+ -+declare float @llvm.R600.load.input(i32) readnone -+ -+declare void @llvm.AMDGPU.store.output(float, i32) -+ -+declare float @llvm.AMDGPU.mul(float ,float ) readnone -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/llvm.AMDGPU.trunc.ll llvm-r600/test/CodeGen/R600/llvm.AMDGPU.trunc.ll ---- llvm-3.2.src/test/CodeGen/R600/llvm.AMDGPU.trunc.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/llvm.AMDGPU.trunc.ll 2013-01-25 19:43:58.463383033 +0100 -@@ -0,0 +1,16 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: TRUNC T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test() { -+ %r0 = call float @llvm.R600.load.input(i32 0) -+ %r1 = call float @llvm.AMDGPU.trunc( float %r0) -+ call void @llvm.AMDGPU.store.output(float %r1, i32 0) -+ ret void -+} -+ -+declare float @llvm.R600.load.input(i32) readnone -+ -+declare void @llvm.AMDGPU.store.output(float, i32) -+ -+declare float @llvm.AMDGPU.trunc(float ) readnone -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/llvm.cos.ll llvm-r600/test/CodeGen/R600/llvm.cos.ll ---- llvm-3.2.src/test/CodeGen/R600/llvm.cos.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/llvm.cos.ll 2013-01-25 19:43:58.463383033 +0100 -@@ -0,0 +1,16 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: COS T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test() { -+ %r0 = call float @llvm.R600.load.input(i32 0) -+ %r1 = call float @llvm.cos.f32(float %r0) -+ call void @llvm.AMDGPU.store.output(float %r1, i32 0) -+ ret void -+} -+ -+declare float @llvm.cos.f32(float) readnone -+ -+declare float @llvm.R600.load.input(i32) readnone -+ -+declare void @llvm.AMDGPU.store.output(float, i32) -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/llvm.pow.ll llvm-r600/test/CodeGen/R600/llvm.pow.ll ---- llvm-3.2.src/test/CodeGen/R600/llvm.pow.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/llvm.pow.ll 2013-01-25 19:43:58.466716366 +0100 -@@ -0,0 +1,19 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: LOG_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK-NEXT: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+;CHECK-NEXT: EXP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test() { -+ %r0 = call float @llvm.R600.load.input(i32 0) -+ %r1 = call float @llvm.R600.load.input(i32 1) -+ %r2 = call float @llvm.pow.f32( float %r0, float %r1) -+ call void @llvm.AMDGPU.store.output(float %r2, i32 0) -+ ret void -+} -+ -+declare float @llvm.R600.load.input(i32) readnone -+ -+declare void @llvm.AMDGPU.store.output(float, i32) -+ -+declare float @llvm.pow.f32(float ,float ) readonly -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/llvm.sin.ll llvm-r600/test/CodeGen/R600/llvm.sin.ll ---- llvm-3.2.src/test/CodeGen/R600/llvm.sin.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/llvm.sin.ll 2013-01-25 19:43:58.466716366 +0100 -@@ -0,0 +1,16 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: SIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test() { -+ %r0 = call float @llvm.R600.load.input(i32 0) -+ %r1 = call float @llvm.sin.f32( float %r0) -+ call void @llvm.AMDGPU.store.output(float %r1, i32 0) -+ ret void -+} -+ -+declare float @llvm.sin.f32(float) readnone -+ -+declare float @llvm.R600.load.input(i32) readnone -+ -+declare void @llvm.AMDGPU.store.output(float, i32) -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/load.constant_addrspace.f32.ll llvm-r600/test/CodeGen/R600/load.constant_addrspace.f32.ll ---- llvm-3.2.src/test/CodeGen/R600/load.constant_addrspace.f32.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/load.constant_addrspace.f32.ll 2013-01-25 19:43:58.466716366 +0100 -@@ -0,0 +1,9 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: VTX_READ_32 T{{[0-9]+\.X, T[0-9]+\.X}} -+ -+define void @test(float addrspace(1)* %out, float addrspace(2)* %in) { -+ %1 = load float addrspace(2)* %in -+ store float %1, float addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/load.i8.ll llvm-r600/test/CodeGen/R600/load.i8.ll ---- llvm-3.2.src/test/CodeGen/R600/load.i8.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/load.i8.ll 2013-01-25 19:43:58.466716366 +0100 -@@ -0,0 +1,10 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} -+ -+define void @test(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { -+ %1 = load i8 addrspace(1)* %in -+ %2 = zext i8 %1 to i32 -+ store i32 %2, i32 addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/reciprocal.ll llvm-r600/test/CodeGen/R600/reciprocal.ll ---- llvm-3.2.src/test/CodeGen/R600/reciprocal.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/reciprocal.ll 2013-01-25 19:43:58.466716366 +0100 -@@ -0,0 +1,16 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test() { -+ %r0 = call float @llvm.R600.load.input(i32 0) -+ %r1 = fdiv float 1.0, %r0 -+ call void @llvm.AMDGPU.store.output(float %r1, i32 0) -+ ret void -+} -+ -+declare float @llvm.R600.load.input(i32) readnone -+ -+declare void @llvm.AMDGPU.store.output(float, i32) -+ -+declare float @llvm.AMDGPU.rcp(float ) readnone -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/sdiv.ll llvm-r600/test/CodeGen/R600/sdiv.ll ---- llvm-3.2.src/test/CodeGen/R600/sdiv.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/sdiv.ll 2013-01-25 19:43:58.466716366 +0100 -@@ -0,0 +1,21 @@ -+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+; The code generated by sdiv is long and complex and may frequently change. -+; The goal of this test is to make sure the ISel doesn't fail. -+; -+; This program was previously failing to compile when one of the selectcc -+; opcodes generated by the sdiv lowering was being legalized and optimized to: -+; selectcc Remainder -1, 0, -1, SETGT -+; This was fixed by adding an additional pattern in R600Instructions.td to -+; match this pattern with a CNDGE_INT. -+ -+; CHECK: RETURN -+ -+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { -+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1 -+ %num = load i32 addrspace(1) * %in -+ %den = load i32 addrspace(1) * %den_ptr -+ %result = sdiv i32 %num, %den -+ store i32 %result, i32 addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/selectcc_cnde_int.ll llvm-r600/test/CodeGen/R600/selectcc_cnde_int.ll ---- llvm-3.2.src/test/CodeGen/R600/selectcc_cnde_int.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/selectcc_cnde_int.ll 2013-01-25 19:43:58.466716366 +0100 -@@ -0,0 +1,11 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK-NOT: SETE_INT -+;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1, literal.x, 2}} -+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { -+ %1 = load i32 addrspace(1)* %in -+ %2 = icmp eq i32 %1, 0 -+ %3 = select i1 %2, i32 1, i32 2 -+ store i32 %3, i32 addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/selectcc_cnde.ll llvm-r600/test/CodeGen/R600/selectcc_cnde.ll ---- llvm-3.2.src/test/CodeGen/R600/selectcc_cnde.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/selectcc_cnde.ll 2013-01-25 19:43:58.466716366 +0100 -@@ -0,0 +1,11 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK-NOT: SETE -+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1.0, literal.x, [-0-9]+\(2.0}} -+define void @test(float addrspace(1)* %out, float addrspace(1)* %in) { -+ %1 = load float addrspace(1)* %in -+ %2 = fcmp oeq float %1, 0.0 -+ %3 = select i1 %2, float 1.0, float 2.0 -+ store float %3, float addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/selectcc-icmp-select-float.ll llvm-r600/test/CodeGen/R600/selectcc-icmp-select-float.ll ---- llvm-3.2.src/test/CodeGen/R600/selectcc-icmp-select-float.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/selectcc-icmp-select-float.ll 2013-01-25 19:43:58.466716366 +0100 -@@ -0,0 +1,15 @@ -+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+; Note additional optimizations may cause this SGT to be replaced with a -+; CND* instruction. -+; CHECK: SGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}} -+; Test a selectcc with i32 LHS/RHS and float True/False -+ -+define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) { -+entry: -+ %0 = load i32 addrspace(1)* %in -+ %1 = icmp sge i32 %0, 0 -+ %2 = select i1 %1, float 1.0, float 0.0 -+ store float %2, float addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/setcc.v4i32.ll llvm-r600/test/CodeGen/R600/setcc.v4i32.ll ---- llvm-3.2.src/test/CodeGen/R600/setcc.v4i32.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/setcc.v4i32.ll 2013-01-25 19:43:58.466716366 +0100 -@@ -0,0 +1,12 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+;CHECK: SETE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { -+ %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 -+ %a = load <4 x i32> addrspace(1) * %in -+ %b = load <4 x i32> addrspace(1) * %b_ptr -+ %result = icmp eq <4 x i32> %a, %b -+ %sext = sext <4 x i1> %result to <4 x i32> -+ store <4 x i32> %sext, <4 x i32> addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/short-args.ll llvm-r600/test/CodeGen/R600/short-args.ll ---- llvm-3.2.src/test/CodeGen/R600/short-args.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/short-args.ll 2013-01-25 19:43:58.466716366 +0100 -@@ -0,0 +1,37 @@ -+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} -+ -+define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { -+entry: -+ %0 = zext i8 %in to i32 -+ store i32 %0, i32 addrspace(1)* %out, align 4 -+ ret void -+} -+ -+; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} -+ -+define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { -+entry: -+ %0 = zext i8 %in to i32 -+ store i32 %0, i32 addrspace(1)* %out, align 4 -+ ret void -+} -+ -+; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} -+ -+define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { -+entry: -+ %0 = zext i16 %in to i32 -+ store i32 %0, i32 addrspace(1)* %out, align 4 -+ ret void -+} -+ -+; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} -+ -+define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { -+entry: -+ %0 = zext i16 %in to i32 -+ store i32 %0, i32 addrspace(1)* %out, align 4 -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/store.v4f32.ll llvm-r600/test/CodeGen/R600/store.v4f32.ll ---- llvm-3.2.src/test/CodeGen/R600/store.v4f32.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/store.v4f32.ll 2013-01-25 19:43:58.466716366 +0100 -@@ -0,0 +1,9 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 -+ -+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { -+ %1 = load <4 x float> addrspace(1) * %in -+ store <4 x float> %1, <4 x float> addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/store.v4i32.ll llvm-r600/test/CodeGen/R600/store.v4i32.ll ---- llvm-3.2.src/test/CodeGen/R600/store.v4i32.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/store.v4i32.ll 2013-01-25 19:43:58.466716366 +0100 -@@ -0,0 +1,9 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 -+ -+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { -+ %1 = load <4 x i32> addrspace(1) * %in -+ store <4 x i32> %1, <4 x i32> addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/udiv.v4i32.ll llvm-r600/test/CodeGen/R600/udiv.v4i32.ll ---- llvm-3.2.src/test/CodeGen/R600/udiv.v4i32.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/udiv.v4i32.ll 2013-01-25 19:43:58.466716366 +0100 -@@ -0,0 +1,15 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;The code generated by udiv is long and complex and may frequently change. -+;The goal of this test is to make sure the ISel doesn't fail when it gets -+;a v4i32 udiv -+;CHECK: RETURN -+ -+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { -+ %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 -+ %a = load <4 x i32> addrspace(1) * %in -+ %b = load <4 x i32> addrspace(1) * %b_ptr -+ %result = udiv <4 x i32> %a, %b -+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/urem.v4i32.ll llvm-r600/test/CodeGen/R600/urem.v4i32.ll ---- llvm-3.2.src/test/CodeGen/R600/urem.v4i32.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/urem.v4i32.ll 2013-01-25 19:43:58.470049700 +0100 -@@ -0,0 +1,15 @@ -+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+;The code generated by urem is long and complex and may frequently change. -+;The goal of this test is to make sure the ISel doesn't fail when it gets -+;a v4i32 urem -+;CHECK: RETURN -+ -+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { -+ %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 -+ %a = load <4 x i32> addrspace(1) * %in -+ %b = load <4 x i32> addrspace(1) * %b_ptr -+ %result = urem <4 x i32> %a, %b -+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/R600/vec4-expand.ll llvm-r600/test/CodeGen/R600/vec4-expand.ll ---- llvm-3.2.src/test/CodeGen/R600/vec4-expand.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/R600/vec4-expand.ll 2013-01-25 19:43:58.470049700 +0100 -@@ -0,0 +1,49 @@ -+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -+ -+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @fp_to_sint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { -+ %value = load <4 x float> addrspace(1) * %in -+ %result = fptosi <4 x float> %value to <4 x i32> -+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out -+ ret void -+} -+ -+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @fp_to_uint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { -+ %value = load <4 x float> addrspace(1) * %in -+ %result = fptoui <4 x float> %value to <4 x i32> -+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out -+ ret void -+} -+ -+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @sint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { -+ %value = load <4 x i32> addrspace(1) * %in -+ %result = sitofp <4 x i32> %value to <4 x float> -+ store <4 x float> %result, <4 x float> addrspace(1)* %out -+ ret void -+} -+ -+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -+ -+define void @uint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { -+ %value = load <4 x i32> addrspace(1) * %in -+ %result = uitofp <4 x i32> %value to <4 x float> -+ store <4 x float> %result, <4 x float> addrspace(1)* %out -+ ret void -+} -diff -Nur -x .git llvm-3.2.src/test/CodeGen/SI/sanity.ll llvm-r600/test/CodeGen/SI/sanity.ll ---- llvm-3.2.src/test/CodeGen/SI/sanity.ll 1970-01-01 01:00:00.000000000 +0100 -+++ llvm-r600/test/CodeGen/SI/sanity.ll 2013-01-25 19:43:58.470049700 +0100 -@@ -0,0 +1,37 @@ -+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -+ -+; CHECK: S_ENDPGM -+ -+define void @main() { -+main_body: -+ call void @llvm.AMDGPU.shader.type(i32 1) -+ %0 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*) -+ %1 = getelementptr <4 x i32> addrspace(2)* %0, i32 0 -+ %2 = load <4 x i32> addrspace(2)* %1 -+ %3 = call i32 @llvm.SI.vs.load.buffer.index() -+ %4 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %2, i32 0, i32 %3) -+ %5 = extractelement <4 x float> %4, i32 0 -+ %6 = extractelement <4 x float> %4, i32 1 -+ %7 = extractelement <4 x float> %4, i32 2 -+ %8 = extractelement <4 x float> %4, i32 3 -+ %9 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*) -+ %10 = getelementptr <4 x i32> addrspace(2)* %9, i32 1 -+ %11 = load <4 x i32> addrspace(2)* %10 -+ %12 = call i32 @llvm.SI.vs.load.buffer.index() -+ %13 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %11, i32 0, i32 %12) -+ %14 = extractelement <4 x float> %13, i32 0 -+ %15 = extractelement <4 x float> %13, i32 1 -+ %16 = extractelement <4 x float> %13, i32 2 -+ %17 = extractelement <4 x float> %13, i32 3 -+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17) -+ call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %5, float %6, float %7, float %8) -+ ret void -+} -+ -+declare void @llvm.AMDGPU.shader.type(i32) -+ -+declare i32 @llvm.SI.vs.load.buffer.index() readnone -+ -+declare <4 x float> @llvm.SI.vs.load.input(<4 x i32>, i32, i32) -+ -+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) -diff -Nur -x .git llvm-3.2.src/test/CodeGen/X86/cvtv2f32.ll llvm-r600/test/CodeGen/X86/cvtv2f32.ll ---- llvm-3.2.src/test/CodeGen/X86/cvtv2f32.ll 2012-10-24 06:14:18.000000000 +0200 -+++ llvm-r600/test/CodeGen/X86/cvtv2f32.ll 2013-01-25 19:43:58.856716358 +0100 -@@ -1,3 +1,7 @@ -+; A bug fix in the DAGCombiner made this test fail, so marking as xfail -+; until this can be investigated further. -+; XFAIL: * -+ - ; RUN: llc < %s -mtriple=i686-linux-pc -mcpu=corei7 | FileCheck %s - - define <2 x float> @foo(i32 %x, i32 %y, <2 x float> %v) { diff --git a/llvm-tld.patch b/llvm-tld.patch index 38b54f6..cbe0f36 100644 --- a/llvm-tld.patch +++ b/llvm-tld.patch @@ -1,6 +1,6 @@ ---- llvm-3.2.src/tools/clang/lib/Driver/ToolChains.cpp.orig 2012-12-16 16:59:27.000000000 +0100 -+++ llvm-3.2.src/tools/clang/lib/Driver/ToolChains.cpp 2013-01-24 12:42:19.582377854 +0100 -@@ -1062,6 +1062,7 @@ +--- llvm-3.3.src/tools/clang/lib/Driver/ToolChains.cpp.orig 2014-09-16 11:54:43.000000000 +0000 ++++ llvm-3.3.src/tools/clang/lib/Driver/ToolChains.cpp 2014-09-16 11:53:47.000000000 +0000 +@@ -1091,6 +1091,7 @@ static const char *const X86_64LibDirs[] = { "/lib64", "/lib" }; static const char *const X86_64Triples[] = { @@ -8,7 +8,7 @@ "x86_64-linux-gnu", "x86_64-unknown-linux-gnu", "x86_64-pc-linux-gnu", -@@ -1074,6 +1075,7 @@ +@@ -1103,6 +1104,7 @@ }; static const char *const X86LibDirs[] = { "/lib32", "/lib" }; static const char *const X86Triples[] = { @@ -16,15 +16,15 @@ "i686-linux-gnu", "i686-pc-linux-gnu", "i486-linux-gnu", -@@ -1830,6 +1834,7 @@ +@@ -1981,6 +1983,7 @@ - enum LinuxDistro { + enum Distro { ArchLinux, + TLDLinux, DebianLenny, DebianSqueeze, DebianWheezy, -@@ -1877,6 +1882,10 @@ +@@ -2029,6 +2032,10 @@ return Distro >= UbuntuHardy && Distro <= UbuntuRaring; } @@ -32,10 +32,10 @@ + return Distro == TLDLinux; +} + - static LinuxDistro DetectLinuxDistro(llvm::Triple::ArchType Arch) { + static Distro DetectDistro(llvm::Triple::ArchType Arch) { OwningPtr File; if (!llvm::MemoryBuffer::getFile("/etc/lsb-release", File)) { -@@ -1955,6 +1964,9 @@ +@@ -2109,6 +2116,9 @@ if (!llvm::sys::fs::exists("/etc/arch-release", Exists) && Exists) return ArchLinux; @@ -45,25 +45,25 @@ return UnknownDistro; } -@@ -2072,7 +2084,7 @@ +@@ -2224,7 +2234,7 @@ - LinuxDistro Distro = DetectLinuxDistro(Arch); + Distro Distro = DetectDistro(Arch); - if (IsOpenSuse(Distro) || IsUbuntu(Distro)) { + if (IsOpenSuse(Distro) || IsUbuntu(Distro) || IsTLD(Distro)) { ExtraOpts.push_back("-z"); ExtraOpts.push_back("relro"); } -@@ -2088,7 +2100,7 @@ +@@ -2244,7 +2254,7 @@ // ABI requires a mapping between the GOT and the symbol table. // Android loader does not support .gnu.hash. - if (!isMipsArch(Arch) && !IsAndroid) { + if (!IsMips && !IsAndroid) { - if (IsRedhat(Distro) || IsOpenSuse(Distro) || + if (IsRedhat(Distro) || IsOpenSuse(Distro) || IsTLD(Distro) || (IsUbuntu(Distro) && Distro >= UbuntuMaverick)) ExtraOpts.push_back("--hash-style=gnu"); -@@ -2097,11 +2109,11 @@ +@@ -2253,11 +2263,11 @@ ExtraOpts.push_back("--hash-style=both"); } @@ -72,8 +72,8 @@ ExtraOpts.push_back("--no-add-needed"); if (Distro == DebianSqueeze || Distro == DebianWheezy || -- IsOpenSuse(Distro) || -+ IsOpenSuse(Distro) || IsTLD(Distro) || +- Distro == DebianJessie || IsOpenSuse(Distro) || ++ Distro == DebianJessie || IsOpenSuse(Distro) || IsTLD(Distro) || (IsRedhat(Distro) && Distro != RHEL4 && Distro != RHEL5) || (IsUbuntu(Distro) && Distro >= UbuntuKarmic)) ExtraOpts.push_back("--build-id"); diff --git a/llvm.spec b/llvm.spec index e70eec5..85d3135 100644 --- a/llvm.spec +++ b/llvm.spec @@ -17,21 +17,19 @@ Summary: The Low Level Virtual Machine (An Optimizing Compiler Infrastructure) Summary(pl.UTF-8): Niskopoziomowa maszyna wirtualna (infrastruktura kompilatora optymalizującego) Name: llvm -Version: 3.2 -Release: 4 +Version: 3.3 +Release: 1 License: University of Illinois/NCSA Open Source License Group: Development/Languages #Source0Download: http://llvm.org/releases/download.html Source0: http://llvm.org/releases/%{version}/%{name}-%{version}.src.tar.gz -# Source0-md5: 71610289bbc819e3e15fdd562809a2d7 -Source1: http://llvm.org/releases/%{version}/clang-%{version}.src.tar.gz -# Source1-md5: 3896ef4334df08563b05d0848ba80582 +# Source0-md5: 40564e1dc390f9844f1711c08b08e391 +Source1: http://llvm.org/releases/%{version}/cfe-%{version}.src.tar.gz +# Source1-md5: 8284891e3e311829b8e44ac813d0c9ef Patch0: %{name}-config.patch # Data files should be installed with timestamps preserved Patch1: %{name}-2.6-timestamp.patch Patch2: %{name}-tld.patch -# R600 target support from git://people.freedesktop.org/~tstellar/llvm -Patch3: %{name}-r600.patch URL: http://llvm.org/ BuildRequires: autoconf >= 2.60 BuildRequires: automake >= 1:1.9.6 @@ -267,11 +265,10 @@ Dokumentacja HTML wiązania OCamla do LLVM-a. %prep %setup -q -a1 -n %{name}-%{version}.src -mv clang-*.* tools/clang +mv cfe-%{version}.src tools/clang %patch0 -p1 %patch1 -p1 %patch2 -p1 -%patch3 -p1 # configure does not properly specify libdir %{__sed} -i 's|(PROJ_prefix)/lib|(PROJ_prefix)/%{_lib}|g' Makefile.config.in @@ -318,6 +315,7 @@ bash ../%configure \ --with-pic %{__make} \ + VERBOSE=1 \ REQUIRES_RTTI=1 \ OPTIMIZE_OPTION="%{rpmcflags} %{rpmcppflags}" @@ -418,6 +416,7 @@ rm -rf $RPM_BUILD_ROOT %attr(755,root,root) %{_bindir}/llvm-rtdyld %attr(755,root,root) %{_bindir}/llvm-size %attr(755,root,root) %{_bindir}/llvm-stress +%attr(755,root,root) %{_bindir}/llvm-symbolizer %attr(755,root,root) %{_bindir}/llvm-tblgen %attr(755,root,root) %{_bindir}/macho-dump %attr(755,root,root) %{_bindir}/opt @@ -436,13 +435,15 @@ rm -rf $RPM_BUILD_ROOT %{_mandir}/man1/llvm-nm.1* %{_mandir}/man1/llvm-prof.1* %{_mandir}/man1/llvm-ranlib.1* +%{_mandir}/man1/llvm-readobj.1* %{_mandir}/man1/llvm-stress.1* +%{_mandir}/man1/llvm-symbolizer.1* %{_mandir}/man1/opt.1* %{_mandir}/man1/tblgen.1* %files libs %defattr(644,root,root,755) -%attr(755,root,root) %{_libdir}/libLLVM-%{version}svn.so +%attr(755,root,root) %{_libdir}/libLLVM-%{version}.so %files devel %defattr(644,root,root,755) @@ -476,6 +477,7 @@ rm -rf $RPM_BUILD_ROOT %attr(755,root,root) %{_bindir}/clang %attr(755,root,root) %{_bindir}/clang++ %attr(755,root,root) %{_bindir}/clang-check +%attr(755,root,root) %{_bindir}/clang-format %attr(755,root,root) %{_bindir}/clang-tblgen %attr(755,root,root) %{_libdir}/libclang.so %{_libdir}/clang -- 2.44.0