diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index c5db0c4fe788921299ea11b688ef9128be2a60c6..14c3fe69272300208ea166ac13aa002e370c66cf 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -46,8 +46,9 @@ radeon-$(CONFIG_DRM_RADEON_KMS) += radeon_device.o radeon_kms.o \
 	radeon_encoders.o radeon_display.o radeon_cursor.o radeon_i2c.o \
 	radeon_clocks.o radeon_fb.o radeon_gem.o radeon_ring.o radeon_irq_kms.o \
 	radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \
-	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rs780.o rv770.o \
-	radeon_test.o r200.o radeon_legacy_tv.o
+	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \
+	r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \
+	r600_blit_kms.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 8e31e992ec53d2af99951f5923d6341545295099..a7edd0f2ac37d0ac8a9cd508f63d8320df354d27 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -389,6 +389,7 @@ void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
 					pll_flags |= RADEON_PLL_USE_REF_DIV;
 			}
 			radeon_encoder = to_radeon_encoder(encoder);
+			break;
 		}
 	}
 
diff --git a/drivers/gpu/drm/radeon/r300.h b/drivers/gpu/drm/radeon/avivod.h
similarity index 50%
rename from drivers/gpu/drm/radeon/r300.h
rename to drivers/gpu/drm/radeon/avivod.h
index 8486b4da9d69e22bd2f97e2d41a62921d4b58c38..d4e6e6e4a93888a14a738dd3484279d4f5e28d9c 100644
--- a/drivers/gpu/drm/radeon/r300.h
+++ b/drivers/gpu/drm/radeon/avivod.h
@@ -1,7 +1,6 @@
 /*
- * Copyright 2008 Advanced Micro Devices, Inc.
- * Copyright 2008 Red Hat Inc.
- * Copyright 2009 Jerome Glisse.
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ * Copyright 2009 Red Hat Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -25,12 +24,37 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#ifndef R300_H
-#define R300_H
+#ifndef AVIVOD_H
+#define AVIVOD_H
 
-struct r300_asic {
-	const unsigned	*reg_safe_bm;
-	unsigned	reg_safe_bm_size;
-};
+
+#define	D1CRTC_CONTROL					0x6080
+#define		CRTC_EN						(1 << 0)
+#define	D1CRTC_UPDATE_LOCK				0x60E8
+#define	D1GRPH_PRIMARY_SURFACE_ADDRESS			0x6110
+#define	D1GRPH_SECONDARY_SURFACE_ADDRESS		0x6118
+
+#define	D2CRTC_CONTROL					0x6880
+#define	D2CRTC_UPDATE_LOCK				0x68E8
+#define	D2GRPH_PRIMARY_SURFACE_ADDRESS			0x6910
+#define	D2GRPH_SECONDARY_SURFACE_ADDRESS		0x6918
+
+#define	D1VGA_CONTROL					0x0330
+#define		DVGA_CONTROL_MODE_ENABLE			(1 << 0)
+#define		DVGA_CONTROL_TIMING_SELECT			(1 << 8)
+#define		DVGA_CONTROL_SYNC_POLARITY_SELECT		(1 << 9)
+#define		DVGA_CONTROL_OVERSCAN_TIMING_SELECT		(1 << 10)
+#define		DVGA_CONTROL_OVERSCAN_COLOR_EN			(1 << 16)
+#define		DVGA_CONTROL_ROTATE				(1 << 24)
+#define D2VGA_CONTROL					0x0338
+
+#define	VGA_HDP_CONTROL					0x328
+#define		VGA_MEM_PAGE_SELECT_EN				(1 << 0)
+#define		VGA_MEMORY_DISABLE				(1 << 4)
+#define		VGA_RBBM_LOCK_DISABLE				(1 << 8)
+#define		VGA_SOFT_RESET					(1 << 16)
+#define	VGA_MEMORY_BASE_ADDRESS				0x0310
+#define	VGA_RENDER_CONTROL				0x0300
+#define		VGA_VSTATUS_CNTL_MASK				0x00030000
 
 #endif
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index ee3ab62417e2cb723cc0e2fda6f35b0ff4ebb6dc..5708c07ce7331cbef157d22f609aeab54840cc46 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -31,6 +31,8 @@
 #include "radeon_drm.h"
 #include "radeon_reg.h"
 #include "radeon.h"
+#include "r100d.h"
+
 #include <linux/firmware.h>
 #include <linux/platform_device.h>
 
@@ -391,9 +393,9 @@ int r100_wb_init(struct radeon_device *rdev)
 			return r;
 		}
 	}
-	WREG32(0x774, rdev->wb.gpu_addr);
-	WREG32(0x70C, rdev->wb.gpu_addr + 1024);
-	WREG32(0x770, 0xff);
+	WREG32(RADEON_SCRATCH_ADDR, rdev->wb.gpu_addr);
+	WREG32(RADEON_CP_RB_RPTR_ADDR, rdev->wb.gpu_addr + 1024);
+	WREG32(RADEON_SCRATCH_UMSK, 0xff);
 	return 0;
 }
 
@@ -559,18 +561,18 @@ static int r100_cp_init_microcode(struct radeon_device *rdev)
 		fw_name = FIRMWARE_R520;
 	}
 
-	err = request_firmware(&rdev->fw, fw_name, &pdev->dev);
+	err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
 	platform_device_unregister(pdev);
 	if (err) {
 		printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n",
 		       fw_name);
-	} else if (rdev->fw->size % 8) {
+	} else if (rdev->me_fw->size % 8) {
 		printk(KERN_ERR
 		       "radeon_cp: Bogus length %zu in firmware \"%s\"\n",
-		       rdev->fw->size, fw_name);
+		       rdev->me_fw->size, fw_name);
 		err = -EINVAL;
-		release_firmware(rdev->fw);
-		rdev->fw = NULL;
+		release_firmware(rdev->me_fw);
+		rdev->me_fw = NULL;
 	}
 	return err;
 }
@@ -584,9 +586,9 @@ static void r100_cp_load_microcode(struct radeon_device *rdev)
 		       "programming pipes. Bad things might happen.\n");
 	}
 
-	if (rdev->fw) {
-		size = rdev->fw->size / 4;
-		fw_data = (const __be32 *)&rdev->fw->data[0];
+	if (rdev->me_fw) {
+		size = rdev->me_fw->size / 4;
+		fw_data = (const __be32 *)&rdev->me_fw->data[0];
 		WREG32(RADEON_CP_ME_RAM_ADDR, 0);
 		for (i = 0; i < size; i += 2) {
 			WREG32(RADEON_CP_ME_RAM_DATAH,
@@ -632,7 +634,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
 		DRM_INFO("radeon: cp idle (0x%08X)\n", tmp);
 	}
 
-	if (!rdev->fw) {
+	if (!rdev->me_fw) {
 		r = r100_cp_init_microcode(rdev);
 		if (r) {
 			DRM_ERROR("Failed to load firmware!\n");
@@ -765,6 +767,12 @@ int r100_cp_reset(struct radeon_device *rdev)
 	return -1;
 }
 
+void r100_cp_commit(struct radeon_device *rdev)
+{
+	WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
+	(void)RREG32(RADEON_CP_RB_WPTR);
+}
+
 
 /*
  * CS functions
@@ -2954,3 +2962,106 @@ void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track
 			}
 	}
 }
+
+int r100_ring_test(struct radeon_device *rdev)
+{
+	uint32_t scratch;
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	r = radeon_scratch_get(rdev, &scratch);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
+		return r;
+	}
+	WREG32(scratch, 0xCAFEDEAD);
+	r = radeon_ring_lock(rdev, 2);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+		radeon_scratch_free(rdev, scratch);
+		return r;
+	}
+	radeon_ring_write(rdev, PACKET0(scratch, 0));
+	radeon_ring_write(rdev, 0xDEADBEEF);
+	radeon_ring_unlock_commit(rdev);
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF) {
+			break;
+		}
+		DRM_UDELAY(1);
+	}
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ring test succeeded in %d usecs\n", i);
+	} else {
+		DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n",
+			  scratch, tmp);
+		r = -EINVAL;
+	}
+	radeon_scratch_free(rdev, scratch);
+	return r;
+}
+
+void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1));
+	radeon_ring_write(rdev, ib->gpu_addr);
+	radeon_ring_write(rdev, ib->length_dw);
+}
+
+int r100_ib_test(struct radeon_device *rdev)
+{
+	struct radeon_ib *ib;
+	uint32_t scratch;
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	r = radeon_scratch_get(rdev, &scratch);
+	if (r) {
+		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
+		return r;
+	}
+	WREG32(scratch, 0xCAFEDEAD);
+	r = radeon_ib_get(rdev, &ib);
+	if (r) {
+		return r;
+	}
+	ib->ptr[0] = PACKET0(scratch, 0);
+	ib->ptr[1] = 0xDEADBEEF;
+	ib->ptr[2] = PACKET2(0);
+	ib->ptr[3] = PACKET2(0);
+	ib->ptr[4] = PACKET2(0);
+	ib->ptr[5] = PACKET2(0);
+	ib->ptr[6] = PACKET2(0);
+	ib->ptr[7] = PACKET2(0);
+	ib->length_dw = 8;
+	r = radeon_ib_schedule(rdev, ib);
+	if (r) {
+		radeon_scratch_free(rdev, scratch);
+		radeon_ib_free(rdev, &ib);
+		return r;
+	}
+	r = radeon_fence_wait(ib->fence, false);
+	if (r) {
+		return r;
+	}
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF) {
+			break;
+		}
+		DRM_UDELAY(1);
+	}
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ib test succeeded in %u usecs\n", i);
+	} else {
+		DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n",
+			  scratch, tmp);
+		r = -EINVAL;
+	}
+	radeon_scratch_free(rdev, scratch);
+	radeon_ib_free(rdev, &ib);
+	return r;
+}
diff --git a/drivers/gpu/drm/radeon/r100d.h b/drivers/gpu/drm/radeon/r100d.h
new file mode 100644
index 0000000000000000000000000000000000000000..6da7d92c321ca854a4c30b82a80782a88fa8010f
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r100d.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef __R100D_H__
+#define __R100D_H__
+
+#define CP_PACKET0			0x00000000
+#define		PACKET0_BASE_INDEX_SHIFT	0
+#define		PACKET0_BASE_INDEX_MASK		(0x1ffff << 0)
+#define		PACKET0_COUNT_SHIFT		16
+#define		PACKET0_COUNT_MASK		(0x3fff << 16)
+#define CP_PACKET1			0x40000000
+#define CP_PACKET2			0x80000000
+#define		PACKET2_PAD_SHIFT		0
+#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
+#define CP_PACKET3			0xC0000000
+#define		PACKET3_IT_OPCODE_SHIFT		8
+#define		PACKET3_IT_OPCODE_MASK		(0xff << 8)
+#define		PACKET3_COUNT_SHIFT		16
+#define		PACKET3_COUNT_MASK		(0x3fff << 16)
+/* PACKET3 op code */
+#define		PACKET3_NOP			0x10
+#define		PACKET3_3D_DRAW_VBUF		0x28
+#define		PACKET3_3D_DRAW_IMMD		0x29
+#define		PACKET3_3D_DRAW_INDX		0x2A
+#define		PACKET3_3D_LOAD_VBPNTR		0x2F
+#define		PACKET3_INDX_BUFFER		0x33
+#define		PACKET3_3D_DRAW_VBUF_2		0x34
+#define		PACKET3_3D_DRAW_IMMD_2		0x35
+#define		PACKET3_3D_DRAW_INDX_2		0x36
+#define		PACKET3_BITBLT_MULTI		0x9B
+
+#define PACKET0(reg, n)	(CP_PACKET0 |					\
+			 REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) |	\
+			 REG_SET(PACKET0_COUNT, (n)))
+#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+#define PACKET3(op, n)	(CP_PACKET3 |					\
+			 REG_SET(PACKET3_IT_OPCODE, (op)) |		\
+			 REG_SET(PACKET3_COUNT, (n)))
+
+#define	PACKET_TYPE0	0
+#define	PACKET_TYPE1	1
+#define	PACKET_TYPE2	2
+#define	PACKET_TYPE3	3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
+#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+
+#endif
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 33a2c557eac4ac855c15fe549940d21bb0f855a8..a5f82f7beed60e04eafc26c99c72c5e18ff3cdc3 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -33,6 +33,7 @@
 #include "radeon_drm.h"
 #include "radeon_share.h"
 #include "r100_track.h"
+#include "r300d.h"
 
 #include "r300_reg_safe.h"
 
@@ -127,7 +128,7 @@ int rv370_pcie_gart_enable(struct radeon_device *rdev)
 	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
 	rv370_pcie_gart_tlb_flush(rdev);
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
-		 rdev->mc.gtt_size >> 20, table_addr);
+		 (unsigned)(rdev->mc.gtt_size >> 20), table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/r300d.h b/drivers/gpu/drm/radeon/r300d.h
new file mode 100644
index 0000000000000000000000000000000000000000..63ec076f2cd4199a13052aec811293be00126d70
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r300d.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef __R300D_H__
+#define __R300D_H__
+
+#define CP_PACKET0			0x00000000
+#define		PACKET0_BASE_INDEX_SHIFT	0
+#define		PACKET0_BASE_INDEX_MASK		(0x1ffff << 0)
+#define		PACKET0_COUNT_SHIFT		16
+#define		PACKET0_COUNT_MASK		(0x3fff << 16)
+#define CP_PACKET1			0x40000000
+#define CP_PACKET2			0x80000000
+#define		PACKET2_PAD_SHIFT		0
+#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
+#define CP_PACKET3			0xC0000000
+#define		PACKET3_IT_OPCODE_SHIFT		8
+#define		PACKET3_IT_OPCODE_MASK		(0xff << 8)
+#define		PACKET3_COUNT_SHIFT		16
+#define		PACKET3_COUNT_MASK		(0x3fff << 16)
+/* PACKET3 op code */
+#define		PACKET3_NOP			0x10
+#define		PACKET3_3D_DRAW_VBUF		0x28
+#define		PACKET3_3D_DRAW_IMMD		0x29
+#define		PACKET3_3D_DRAW_INDX		0x2A
+#define		PACKET3_3D_LOAD_VBPNTR		0x2F
+#define		PACKET3_INDX_BUFFER		0x33
+#define		PACKET3_3D_DRAW_VBUF_2		0x34
+#define		PACKET3_3D_DRAW_IMMD_2		0x35
+#define		PACKET3_3D_DRAW_INDX_2		0x36
+#define		PACKET3_BITBLT_MULTI		0x9B
+
+#define PACKET0(reg, n)	(CP_PACKET0 |					\
+			 REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) |	\
+			 REG_SET(PACKET0_COUNT, (n)))
+#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+#define PACKET3(op, n)	(CP_PACKET3 |					\
+			 REG_SET(PACKET3_IT_OPCODE, (op)) |		\
+			 REG_SET(PACKET3_COUNT, (n)))
+
+#define	PACKET_TYPE0	0
+#define	PACKET_TYPE1	1
+#define	PACKET_TYPE2	2
+#define	PACKET_TYPE3	3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
+#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+
+#endif
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 538cd907df690607a2bab2e219b6d73cfde69108..d8fcef44a69f092efd1f42bfbfc042b0d4509722 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -25,12 +25,46 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+#include <linux/seq_file.h>
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
 #include "drmP.h"
-#include "radeon_reg.h"
+#include "radeon_drm.h"
 #include "radeon.h"
+#include "radeon_mode.h"
+#include "radeon_share.h"
+#include "r600d.h"
+#include "avivod.h"
+#include "atom.h"
 
-/* r600,rv610,rv630,rv620,rv635,rv670 depends on : */
-void rs600_mc_disable_clients(struct radeon_device *rdev);
+#define PFP_UCODE_SIZE 576
+#define PM4_UCODE_SIZE 1792
+#define R700_PFP_UCODE_SIZE 848
+#define R700_PM4_UCODE_SIZE 1360
+
+/* Firmware Names */
+MODULE_FIRMWARE("radeon/R600_pfp.bin");
+MODULE_FIRMWARE("radeon/R600_me.bin");
+MODULE_FIRMWARE("radeon/RV610_pfp.bin");
+MODULE_FIRMWARE("radeon/RV610_me.bin");
+MODULE_FIRMWARE("radeon/RV630_pfp.bin");
+MODULE_FIRMWARE("radeon/RV630_me.bin");
+MODULE_FIRMWARE("radeon/RV620_pfp.bin");
+MODULE_FIRMWARE("radeon/RV620_me.bin");
+MODULE_FIRMWARE("radeon/RV635_pfp.bin");
+MODULE_FIRMWARE("radeon/RV635_me.bin");
+MODULE_FIRMWARE("radeon/RV670_pfp.bin");
+MODULE_FIRMWARE("radeon/RV670_me.bin");
+MODULE_FIRMWARE("radeon/RS780_pfp.bin");
+MODULE_FIRMWARE("radeon/RS780_me.bin");
+MODULE_FIRMWARE("radeon/RV770_pfp.bin");
+MODULE_FIRMWARE("radeon/RV770_me.bin");
+MODULE_FIRMWARE("radeon/RV730_pfp.bin");
+MODULE_FIRMWARE("radeon/RV730_me.bin");
+MODULE_FIRMWARE("radeon/RV710_pfp.bin");
+MODULE_FIRMWARE("radeon/RV710_me.bin");
+
+int r600_debugfs_mc_info_init(struct radeon_device *rdev);
 
 /* This files gather functions specifics to:
  * r600,rv610,rv630,rv620,rv635,rv670
@@ -39,87 +73,270 @@ void rs600_mc_disable_clients(struct radeon_device *rdev);
  */
 int r600_mc_wait_for_idle(struct radeon_device *rdev);
 void r600_gpu_init(struct radeon_device *rdev);
+void r600_fini(struct radeon_device *rdev);
 
 
 /*
- * MC
+ * R600 PCIE GART
  */
-int r600_mc_init(struct radeon_device *rdev)
+int r600_gart_clear_page(struct radeon_device *rdev, int i)
 {
-	uint32_t tmp;
+	void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
+	u64 pte;
 
-	r600_gpu_init(rdev);
+	if (i < 0 || i > rdev->gart.num_gpu_pages)
+		return -EINVAL;
+	pte = 0;
+	writeq(pte, ((void __iomem *)ptr) + (i * 8));
+	return 0;
+}
 
-	/* setup the gart before changing location so we can ask to
-	 * discard unmapped mc request
-	 */
-	/* FIXME: disable out of gart access */
-	tmp = rdev->mc.gtt_location / 4096;
-	tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp);
-	WREG32(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, tmp);
-	tmp = (rdev->mc.gtt_location + rdev->mc.gtt_size) / 4096;
-	tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp);
-	WREG32(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, tmp);
-
-	rs600_mc_disable_clients(rdev);
-	if (r600_mc_wait_for_idle(rdev)) {
-		printk(KERN_WARNING "Failed to wait MC idle while "
-		       "programming pipes. Bad things might happen.\n");
+void r600_pcie_gart_tlb_flush(struct radeon_device *rdev)
+{
+	unsigned i;
+	u32 tmp;
+
+	WREG32(VM_CONTEXT0_INVALIDATION_LOW_ADDR, rdev->mc.gtt_start >> 12);
+	WREG32(VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (rdev->mc.gtt_end - 1) >> 12);
+	WREG32(VM_CONTEXT0_REQUEST_RESPONSE, REQUEST_TYPE(1));
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32(VM_CONTEXT0_REQUEST_RESPONSE);
+		tmp = (tmp & RESPONSE_TYPE_MASK) >> RESPONSE_TYPE_SHIFT;
+		if (tmp == 2) {
+			printk(KERN_WARNING "[drm] r600 flush TLB failed\n");
+			return;
+		}
+		if (tmp) {
+			return;
+		}
+		udelay(1);
+	}
+}
+
+int r600_pcie_gart_enable(struct radeon_device *rdev)
+{
+	u32 tmp;
+	int r, i;
+
+	/* Initialize common gart structure */
+	r = radeon_gart_init(rdev);
+	if (r) {
+		return r;
+	}
+	rdev->gart.table_size = rdev->gart.num_gpu_pages * 8;
+	r = radeon_gart_table_vram_alloc(rdev);
+	if (r) {
+		return r;
 	}
+	for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+		r600_gart_clear_page(rdev, i);
+	/* Setup L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
+				ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+				EFFECTIVE_L2_QUEUE_SIZE(7));
+	WREG32(VM_L2_CNTL2, 0);
+	WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1));
+	/* Setup TLB control */
+	tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
+		SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+		EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) |
+		ENABLE_WAIT_L2_QUERY;
+	WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp | ENABLE_L1_STRICT_ORDERING);
+	WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
+	WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end - 1) >> 12);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
+	WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
+				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+			(u32)(rdev->dummy_page.addr >> 12));
+	for (i = 1; i < 7; i++)
+		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
 
-	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
-	tmp = REG_SET(R600_MC_FB_TOP, tmp >> 24);
-	tmp |= REG_SET(R600_MC_FB_BASE, rdev->mc.vram_location >> 24);
-	WREG32(R600_MC_VM_FB_LOCATION, tmp);
-	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
-	tmp = REG_SET(R600_MC_AGP_TOP, tmp >> 22);
-	WREG32(R600_MC_VM_AGP_TOP, tmp);
-	tmp = REG_SET(R600_MC_AGP_BOT, rdev->mc.gtt_location >> 22);
-	WREG32(R600_MC_VM_AGP_BOT, tmp);
+	r600_pcie_gart_tlb_flush(rdev);
+	rdev->gart.ready = true;
 	return 0;
 }
 
-void r600_mc_fini(struct radeon_device *rdev)
+void r600_pcie_gart_disable(struct radeon_device *rdev)
 {
-	/* FIXME: implement */
-}
+	u32 tmp;
+	int i;
 
+	/* Clear ptes*/
+	for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+		r600_gart_clear_page(rdev, i);
+	r600_pcie_gart_tlb_flush(rdev);
+	/* Disable all tables */
+	for (i = 0; i < 7; i++)
+		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
 
-/*
- * Global GPU functions
- */
-void r600_errata(struct radeon_device *rdev)
-{
-	rdev->pll_errata = 0;
+	/* Disable L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING |
+				EFFECTIVE_L2_QUEUE_SIZE(7));
+	WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1));
+	/* Setup L1 TLB control */
+	tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) |
+		ENABLE_WAIT_L2_QUERY;
+	WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp);
 }
 
 int r600_mc_wait_for_idle(struct radeon_device *rdev)
 {
-	/* FIXME: implement */
-	return 0;
+	unsigned i;
+	u32 tmp;
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32(R_000E50_SRBM_STATUS) & 0x3F00;
+		if (!tmp)
+			return 0;
+		udelay(1);
+	}
+	return -1;
 }
 
-void r600_gpu_init(struct radeon_device *rdev)
+static void r600_mc_resume(struct radeon_device *rdev)
 {
-	/* FIXME: implement */
-}
+	u32 d1vga_control, d2vga_control;
+	u32 vga_render_control, vga_hdp_control;
+	u32 d1crtc_control, d2crtc_control;
+	u32 new_d1grph_primary, new_d1grph_secondary;
+	u32 new_d2grph_primary, new_d2grph_secondary;
+	u64 old_vram_start;
+	u32 tmp;
+	int i, j;
 
+	/* Initialize HDP */
+	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+		WREG32((0x2c14 + j), 0x00000000);
+		WREG32((0x2c18 + j), 0x00000000);
+		WREG32((0x2c1c + j), 0x00000000);
+		WREG32((0x2c20 + j), 0x00000000);
+		WREG32((0x2c24 + j), 0x00000000);
+	}
+	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
 
-/*
- * VRAM info
- */
-void r600_vram_get_type(struct radeon_device *rdev)
+	d1vga_control = RREG32(D1VGA_CONTROL);
+	d2vga_control = RREG32(D2VGA_CONTROL);
+	vga_render_control = RREG32(VGA_RENDER_CONTROL);
+	vga_hdp_control = RREG32(VGA_HDP_CONTROL);
+	d1crtc_control = RREG32(D1CRTC_CONTROL);
+	d2crtc_control = RREG32(D2CRTC_CONTROL);
+	old_vram_start = (u64)(RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24;
+	new_d1grph_primary = RREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS);
+	new_d1grph_secondary = RREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS);
+	new_d1grph_primary += rdev->mc.vram_start - old_vram_start;
+	new_d1grph_secondary += rdev->mc.vram_start - old_vram_start;
+	new_d2grph_primary = RREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS);
+	new_d2grph_secondary = RREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS);
+	new_d2grph_primary += rdev->mc.vram_start - old_vram_start;
+	new_d2grph_secondary += rdev->mc.vram_start - old_vram_start;
+
+	/* Stop all video */
+	WREG32(D1VGA_CONTROL, 0);
+	WREG32(D2VGA_CONTROL, 0);
+	WREG32(VGA_RENDER_CONTROL, 0);
+	WREG32(D1CRTC_UPDATE_LOCK, 1);
+	WREG32(D2CRTC_UPDATE_LOCK, 1);
+	WREG32(D1CRTC_CONTROL, 0);
+	WREG32(D2CRTC_CONTROL, 0);
+	WREG32(D1CRTC_UPDATE_LOCK, 0);
+	WREG32(D2CRTC_UPDATE_LOCK, 0);
+
+	mdelay(1);
+	if (r600_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "[drm] MC not idle !\n");
+	}
+
+	/* Lockout access through VGA aperture*/
+	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
+
+	/* Update configuration */
+	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, rdev->mc.vram_start >> 12);
+	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (rdev->mc.vram_end - 1) >> 12);
+	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
+	tmp = (((rdev->mc.vram_end - 1) >> 24) & 0xFFFF) << 16;
+	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
+	WREG32(MC_VM_FB_LOCATION, tmp);
+	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
+	WREG32(HDP_NONSURFACE_INFO, (2 << 7));
+	WREG32(HDP_NONSURFACE_SIZE, (rdev->mc.mc_vram_size - 1) | 0x3FF);
+	if (rdev->flags & RADEON_IS_AGP) {
+		WREG32(MC_VM_AGP_TOP, (rdev->mc.gtt_end - 1) >> 16);
+		WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16);
+		WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22);
+	} else {
+		WREG32(MC_VM_AGP_BASE, 0);
+		WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
+		WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
+	}
+	WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS, new_d1grph_primary);
+	WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS, new_d1grph_secondary);
+	WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS, new_d2grph_primary);
+	WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS, new_d2grph_secondary);
+	WREG32(VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start);
+
+	/* Unlock host access */
+	WREG32(VGA_HDP_CONTROL, vga_hdp_control);
+
+	mdelay(1);
+	if (r600_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "[drm] MC not idle !\n");
+	}
+
+	/* Restore video state */
+	WREG32(D1CRTC_UPDATE_LOCK, 1);
+	WREG32(D2CRTC_UPDATE_LOCK, 1);
+	WREG32(D1CRTC_CONTROL, d1crtc_control);
+	WREG32(D2CRTC_CONTROL, d2crtc_control);
+	WREG32(D1CRTC_UPDATE_LOCK, 0);
+	WREG32(D2CRTC_UPDATE_LOCK, 0);
+	WREG32(D1VGA_CONTROL, d1vga_control);
+	WREG32(D2VGA_CONTROL, d2vga_control);
+	WREG32(VGA_RENDER_CONTROL, vga_render_control);
+}
+
+int r600_mc_init(struct radeon_device *rdev)
 {
-	uint32_t tmp;
+	fixed20_12 a;
+	u32 tmp;
 	int chansize;
+	int r;
 
+	/* Get VRAM informations */
 	rdev->mc.vram_width = 128;
 	rdev->mc.vram_is_ddr = true;
-
-	tmp = RREG32(R600_RAMCFG);
-	if (tmp & R600_CHANSIZE_OVERRIDE) {
+	tmp = RREG32(RAMCFG);
+	if (tmp & CHANSIZE_OVERRIDE) {
 		chansize = 16;
-	} else if (tmp & R600_CHANSIZE) {
+	} else if (tmp & CHANSIZE_MASK) {
 		chansize = 64;
 	} else {
 		chansize = 32;
@@ -135,36 +352,1391 @@ void r600_vram_get_type(struct radeon_device *rdev)
 			(rdev->family == CHIP_RV635)) {
 		rdev->mc.vram_width = 2 * chansize;
 	}
+	/* Could aper size report 0 ? */
+	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	/* Setup GPU memory space */
+	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
+	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r)
+			return r;
+		/* gtt_size is setup by radeon_agp_init */
+		rdev->mc.gtt_location = rdev->mc.agp_base;
+		tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size;
+		/* Try to put vram before or after AGP because we
+		 * we want SYSTEM_APERTURE to cover both VRAM and
+		 * AGP so that GPU can catch out of VRAM/AGP access
+		 */
+		if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) {
+			/* Enought place before */
+			rdev->mc.vram_location = rdev->mc.gtt_location -
+							rdev->mc.mc_vram_size;
+		} else if (tmp > rdev->mc.mc_vram_size) {
+			/* Enought place after */
+			rdev->mc.vram_location = rdev->mc.gtt_location +
+							rdev->mc.gtt_size;
+		} else {
+			/* Try to setup VRAM then AGP might not
+			 * not work on some card
+			 */
+			rdev->mc.vram_location = 0x00000000UL;
+			rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+		}
+	} else {
+		if (rdev->family == CHIP_RS780 || rdev->family == CHIP_RS880) {
+			rdev->mc.vram_location = (RREG32(MC_VM_FB_LOCATION) &
+								0xFFFF) << 24;
+			rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+			tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size;
+			if ((0xFFFFFFFFUL - tmp) >= rdev->mc.gtt_size) {
+				/* Enough place after vram */
+				rdev->mc.gtt_location = tmp;
+			} else if (rdev->mc.vram_location >= rdev->mc.gtt_size) {
+				/* Enough place before vram */
+				rdev->mc.gtt_location = 0;
+			} else {
+				/* Not enough place after or before shrink
+				 * gart size
+				 */
+				if (rdev->mc.vram_location > (0xFFFFFFFFUL - tmp)) {
+					rdev->mc.gtt_location = 0;
+					rdev->mc.gtt_size = rdev->mc.vram_location;
+				} else {
+					rdev->mc.gtt_location = tmp;
+					rdev->mc.gtt_size = 0xFFFFFFFFUL - tmp;
+				}
+			}
+			rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+		} else {
+			rdev->mc.vram_location = 0x00000000UL;
+			rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+			rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+		}
+	}
+	rdev->mc.vram_start = rdev->mc.vram_location;
+	rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size;
+	rdev->mc.gtt_start = rdev->mc.gtt_location;
+	rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size;
+	/* FIXME: we should enforce default clock in case GPU is not in
+	 * default setup
+	 */
+	a.full = rfixed_const(100);
+	rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
+	rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
+	return 0;
 }
 
-void r600_vram_info(struct radeon_device *rdev)
+/* We doesn't check that the GPU really needs a reset we simply do the
+ * reset, it's up to the caller to determine if the GPU needs one. We
+ * might add an helper function to check that.
+ */
+int r600_gpu_soft_reset(struct radeon_device *rdev)
 {
-	r600_vram_get_type(rdev);
-	rdev->mc.real_vram_size = RREG32(R600_CONFIG_MEMSIZE);
-	rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
+	u32 grbm_busy_mask = S_008010_VC_BUSY(1) | S_008010_VGT_BUSY_NO_DMA(1) |
+				S_008010_VGT_BUSY(1) | S_008010_TA03_BUSY(1) |
+				S_008010_TC_BUSY(1) | S_008010_SX_BUSY(1) |
+				S_008010_SH_BUSY(1) | S_008010_SPI03_BUSY(1) |
+				S_008010_SMX_BUSY(1) | S_008010_SC_BUSY(1) |
+				S_008010_PA_BUSY(1) | S_008010_DB03_BUSY(1) |
+				S_008010_CR_BUSY(1) | S_008010_CB03_BUSY(1) |
+				S_008010_GUI_ACTIVE(1);
+	u32 grbm2_busy_mask = S_008014_SPI0_BUSY(1) | S_008014_SPI1_BUSY(1) |
+			S_008014_SPI2_BUSY(1) | S_008014_SPI3_BUSY(1) |
+			S_008014_TA0_BUSY(1) | S_008014_TA1_BUSY(1) |
+			S_008014_TA2_BUSY(1) | S_008014_TA3_BUSY(1) |
+			S_008014_DB0_BUSY(1) | S_008014_DB1_BUSY(1) |
+			S_008014_DB2_BUSY(1) | S_008014_DB3_BUSY(1) |
+			S_008014_CB0_BUSY(1) | S_008014_CB1_BUSY(1) |
+			S_008014_CB2_BUSY(1) | S_008014_CB3_BUSY(1);
+	u32 srbm_reset = 0;
 
-	/* Could aper size report 0 ? */
-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	/* Disable CP parsing/prefetching */
+	WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(0xff));
+	/* Check if any of the rendering block is busy and reset it */
+	if ((RREG32(R_008010_GRBM_STATUS) & grbm_busy_mask) ||
+	    (RREG32(R_008014_GRBM_STATUS2) & grbm2_busy_mask)) {
+		WREG32(R_008020_GRBM_SOFT_RESET, S_008020_SOFT_RESET_CR(1) |
+			S_008020_SOFT_RESET_DB(1) |
+			S_008020_SOFT_RESET_CB(1) |
+			S_008020_SOFT_RESET_PA(1) |
+			S_008020_SOFT_RESET_SC(1) |
+			S_008020_SOFT_RESET_SMX(1) |
+			S_008020_SOFT_RESET_SPI(1) |
+			S_008020_SOFT_RESET_SX(1) |
+			S_008020_SOFT_RESET_SH(1) |
+			S_008020_SOFT_RESET_TC(1) |
+			S_008020_SOFT_RESET_TA(1) |
+			S_008020_SOFT_RESET_VC(1) |
+			S_008020_SOFT_RESET_VGT(1));
+		(void)RREG32(R_008020_GRBM_SOFT_RESET);
+		udelay(50);
+		WREG32(R_008020_GRBM_SOFT_RESET, 0);
+		(void)RREG32(R_008020_GRBM_SOFT_RESET);
+	}
+	/* Reset CP (we always reset CP) */
+	WREG32(R_008020_GRBM_SOFT_RESET, S_008020_SOFT_RESET_CP(1));
+	(void)RREG32(R_008020_GRBM_SOFT_RESET);
+	udelay(50);
+	WREG32(R_008020_GRBM_SOFT_RESET, 0);
+	(void)RREG32(R_008020_GRBM_SOFT_RESET);
+	/* Reset others GPU block if necessary */
+	if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_RLC(1);
+	if (G_000E50_GRBM_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_GRBM(1);
+	if (G_000E50_HI_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_IH(1);
+	if (G_000E50_VMC_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_VMC(1);
+	if (G_000E50_MCB_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+	if (G_000E50_MCDZ_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+	if (G_000E50_MCDY_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+	if (G_000E50_MCDX_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+	if (G_000E50_MCDW_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+	if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_RLC(1);
+	if (G_000E50_SEM_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_SEM(1);
+	WREG32(R_000E60_SRBM_SOFT_RESET, srbm_reset);
+	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
+	udelay(50);
+	WREG32(R_000E60_SRBM_SOFT_RESET, 0);
+	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
+	/* Wait a little for things to settle down */
+	udelay(50);
+	return 0;
+}
+
+int r600_gpu_reset(struct radeon_device *rdev)
+{
+	return r600_gpu_soft_reset(rdev);
+}
+
+static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
+					     u32 num_backends,
+					     u32 backend_disable_mask)
+{
+	u32 backend_map = 0;
+	u32 enabled_backends_mask;
+	u32 enabled_backends_count;
+	u32 cur_pipe;
+	u32 swizzle_pipe[R6XX_MAX_PIPES];
+	u32 cur_backend;
+	u32 i;
+
+	if (num_tile_pipes > R6XX_MAX_PIPES)
+		num_tile_pipes = R6XX_MAX_PIPES;
+	if (num_tile_pipes < 1)
+		num_tile_pipes = 1;
+	if (num_backends > R6XX_MAX_BACKENDS)
+		num_backends = R6XX_MAX_BACKENDS;
+	if (num_backends < 1)
+		num_backends = 1;
+
+	enabled_backends_mask = 0;
+	enabled_backends_count = 0;
+	for (i = 0; i < R6XX_MAX_BACKENDS; ++i) {
+		if (((backend_disable_mask >> i) & 1) == 0) {
+			enabled_backends_mask |= (1 << i);
+			++enabled_backends_count;
+		}
+		if (enabled_backends_count == num_backends)
+			break;
+	}
+
+	if (enabled_backends_count == 0) {
+		enabled_backends_mask = 1;
+		enabled_backends_count = 1;
+	}
+
+	if (enabled_backends_count != num_backends)
+		num_backends = enabled_backends_count;
+
+	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES);
+	switch (num_tile_pipes) {
+	case 1:
+		swizzle_pipe[0] = 0;
+		break;
+	case 2:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 1;
+		break;
+	case 3:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 1;
+		swizzle_pipe[2] = 2;
+		break;
+	case 4:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 1;
+		swizzle_pipe[2] = 2;
+		swizzle_pipe[3] = 3;
+		break;
+	case 5:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 1;
+		swizzle_pipe[2] = 2;
+		swizzle_pipe[3] = 3;
+		swizzle_pipe[4] = 4;
+		break;
+	case 6:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 4;
+		swizzle_pipe[3] = 5;
+		swizzle_pipe[4] = 1;
+		swizzle_pipe[5] = 3;
+		break;
+	case 7:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 4;
+		swizzle_pipe[3] = 6;
+		swizzle_pipe[4] = 1;
+		swizzle_pipe[5] = 3;
+		swizzle_pipe[6] = 5;
+		break;
+	case 8:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 4;
+		swizzle_pipe[3] = 6;
+		swizzle_pipe[4] = 1;
+		swizzle_pipe[5] = 3;
+		swizzle_pipe[6] = 5;
+		swizzle_pipe[7] = 7;
+		break;
+	}
+
+	cur_backend = 0;
+	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
+		while (((1 << cur_backend) & enabled_backends_mask) == 0)
+			cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
+
+		backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
+
+		cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
+	}
+
+	return backend_map;
+}
+
+int r600_count_pipe_bits(uint32_t val)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < 32; i++) {
+		ret += val & 1;
+		val >>= 1;
+	}
+	return ret;
 }
 
+void r600_gpu_init(struct radeon_device *rdev)
+{
+	u32 tiling_config;
+	u32 ramcfg;
+	u32 tmp;
+	int i, j;
+	u32 sq_config;
+	u32 sq_gpr_resource_mgmt_1 = 0;
+	u32 sq_gpr_resource_mgmt_2 = 0;
+	u32 sq_thread_resource_mgmt = 0;
+	u32 sq_stack_resource_mgmt_1 = 0;
+	u32 sq_stack_resource_mgmt_2 = 0;
+
+	/* FIXME: implement */
+	switch (rdev->family) {
+	case CHIP_R600:
+		rdev->config.r600.max_pipes = 4;
+		rdev->config.r600.max_tile_pipes = 8;
+		rdev->config.r600.max_simds = 4;
+		rdev->config.r600.max_backends = 4;
+		rdev->config.r600.max_gprs = 256;
+		rdev->config.r600.max_threads = 192;
+		rdev->config.r600.max_stack_entries = 256;
+		rdev->config.r600.max_hw_contexts = 8;
+		rdev->config.r600.max_gs_threads = 16;
+		rdev->config.r600.sx_max_export_size = 128;
+		rdev->config.r600.sx_max_export_pos_size = 16;
+		rdev->config.r600.sx_max_export_smx_size = 128;
+		rdev->config.r600.sq_num_cf_insts = 2;
+		break;
+	case CHIP_RV630:
+	case CHIP_RV635:
+		rdev->config.r600.max_pipes = 2;
+		rdev->config.r600.max_tile_pipes = 2;
+		rdev->config.r600.max_simds = 3;
+		rdev->config.r600.max_backends = 1;
+		rdev->config.r600.max_gprs = 128;
+		rdev->config.r600.max_threads = 192;
+		rdev->config.r600.max_stack_entries = 128;
+		rdev->config.r600.max_hw_contexts = 8;
+		rdev->config.r600.max_gs_threads = 4;
+		rdev->config.r600.sx_max_export_size = 128;
+		rdev->config.r600.sx_max_export_pos_size = 16;
+		rdev->config.r600.sx_max_export_smx_size = 128;
+		rdev->config.r600.sq_num_cf_insts = 2;
+		break;
+	case CHIP_RV610:
+	case CHIP_RV620:
+	case CHIP_RS780:
+	case CHIP_RS880:
+		rdev->config.r600.max_pipes = 1;
+		rdev->config.r600.max_tile_pipes = 1;
+		rdev->config.r600.max_simds = 2;
+		rdev->config.r600.max_backends = 1;
+		rdev->config.r600.max_gprs = 128;
+		rdev->config.r600.max_threads = 192;
+		rdev->config.r600.max_stack_entries = 128;
+		rdev->config.r600.max_hw_contexts = 4;
+		rdev->config.r600.max_gs_threads = 4;
+		rdev->config.r600.sx_max_export_size = 128;
+		rdev->config.r600.sx_max_export_pos_size = 16;
+		rdev->config.r600.sx_max_export_smx_size = 128;
+		rdev->config.r600.sq_num_cf_insts = 1;
+		break;
+	case CHIP_RV670:
+		rdev->config.r600.max_pipes = 4;
+		rdev->config.r600.max_tile_pipes = 4;
+		rdev->config.r600.max_simds = 4;
+		rdev->config.r600.max_backends = 4;
+		rdev->config.r600.max_gprs = 192;
+		rdev->config.r600.max_threads = 192;
+		rdev->config.r600.max_stack_entries = 256;
+		rdev->config.r600.max_hw_contexts = 8;
+		rdev->config.r600.max_gs_threads = 16;
+		rdev->config.r600.sx_max_export_size = 128;
+		rdev->config.r600.sx_max_export_pos_size = 16;
+		rdev->config.r600.sx_max_export_smx_size = 128;
+		rdev->config.r600.sq_num_cf_insts = 2;
+		break;
+	default:
+		break;
+	}
+
+	/* Initialize HDP */
+	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+		WREG32((0x2c14 + j), 0x00000000);
+		WREG32((0x2c18 + j), 0x00000000);
+		WREG32((0x2c1c + j), 0x00000000);
+		WREG32((0x2c20 + j), 0x00000000);
+		WREG32((0x2c24 + j), 0x00000000);
+	}
+
+	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
+
+	/* Setup tiling */
+	tiling_config = 0;
+	ramcfg = RREG32(RAMCFG);
+	switch (rdev->config.r600.max_tile_pipes) {
+	case 1:
+		tiling_config |= PIPE_TILING(0);
+		break;
+	case 2:
+		tiling_config |= PIPE_TILING(1);
+		break;
+	case 4:
+		tiling_config |= PIPE_TILING(2);
+		break;
+	case 8:
+		tiling_config |= PIPE_TILING(3);
+		break;
+	default:
+		break;
+	}
+	tiling_config |= BANK_TILING((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT);
+	tiling_config |= GROUP_SIZE(0);
+	tmp = (ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT;
+	if (tmp > 3) {
+		tiling_config |= ROW_TILING(3);
+		tiling_config |= SAMPLE_SPLIT(3);
+	} else {
+		tiling_config |= ROW_TILING(tmp);
+		tiling_config |= SAMPLE_SPLIT(tmp);
+	}
+	tiling_config |= BANK_SWAPS(1);
+	tmp = r600_get_tile_pipe_to_backend_map(rdev->config.r600.max_tile_pipes,
+						rdev->config.r600.max_backends,
+						(0xff << rdev->config.r600.max_backends) & 0xff);
+	tiling_config |= BACKEND_MAP(tmp);
+	WREG32(GB_TILING_CONFIG, tiling_config);
+	WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff);
+	WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff);
+
+	tmp = BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << rdev->config.r600.max_backends) & R6XX_MAX_BACKENDS_MASK);
+	WREG32(CC_RB_BACKEND_DISABLE, tmp);
+
+	/* Setup pipes */
+	tmp = INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << rdev->config.r600.max_pipes) & R6XX_MAX_PIPES_MASK);
+	tmp |= INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << rdev->config.r600.max_simds) & R6XX_MAX_SIMDS_MASK);
+	WREG32(CC_GC_SHADER_PIPE_CONFIG, tmp);
+	WREG32(GC_USER_SHADER_PIPE_CONFIG, tmp);
+
+	tmp = R6XX_MAX_BACKENDS - r600_count_pipe_bits(tmp & INACTIVE_QD_PIPES_MASK);
+	WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK);
+	WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((tmp * 4) - 2) & VTX_REUSE_DEPTH_MASK);
+
+	/* Setup some CP states */
+	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | ROQ_IB2_START(0x2b)));
+	WREG32(CP_MEQ_THRESHOLDS, (MEQ_END(0x40) | ROQ_END(0x40)));
+
+	WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO | SYNC_GRADIENT |
+			     SYNC_WALKER | SYNC_ALIGNER));
+	/* Setup various GPU states */
+	if (rdev->family == CHIP_RV670)
+		WREG32(ARB_GDEC_RD_CNTL, 0x00000021);
+
+	tmp = RREG32(SX_DEBUG_1);
+	tmp |= SMX_EVENT_RELEASE;
+	if ((rdev->family > CHIP_R600))
+		tmp |= ENABLE_NEW_SMX_ADDRESS;
+	WREG32(SX_DEBUG_1, tmp);
+
+	if (((rdev->family) == CHIP_R600) ||
+	    ((rdev->family) == CHIP_RV630) ||
+	    ((rdev->family) == CHIP_RV610) ||
+	    ((rdev->family) == CHIP_RV620) ||
+	    ((rdev->family) == CHIP_RS780)) {
+		WREG32(DB_DEBUG, PREZ_MUST_WAIT_FOR_POSTZ_DONE);
+	} else {
+		WREG32(DB_DEBUG, 0);
+	}
+	WREG32(DB_WATERMARKS, (DEPTH_FREE(4) | DEPTH_CACHELINE_FREE(16) |
+			       DEPTH_FLUSH(16) | DEPTH_PENDING_FREE(4)));
+
+	WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
+	WREG32(VGT_NUM_INSTANCES, 0);
+
+	WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0));
+	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(0));
+
+	tmp = RREG32(SQ_MS_FIFO_SIZES);
+	if (((rdev->family) == CHIP_RV610) ||
+	    ((rdev->family) == CHIP_RV620) ||
+	    ((rdev->family) == CHIP_RS780)) {
+		tmp = (CACHE_FIFO_SIZE(0xa) |
+		       FETCH_FIFO_HIWATER(0xa) |
+		       DONE_FIFO_HIWATER(0xe0) |
+		       ALU_UPDATE_FIFO_HIWATER(0x8));
+	} else if (((rdev->family) == CHIP_R600) ||
+		   ((rdev->family) == CHIP_RV630)) {
+		tmp &= ~DONE_FIFO_HIWATER(0xff);
+		tmp |= DONE_FIFO_HIWATER(0x4);
+	}
+	WREG32(SQ_MS_FIFO_SIZES, tmp);
+
+	/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
+	 * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
+	 */
+	sq_config = RREG32(SQ_CONFIG);
+	sq_config &= ~(PS_PRIO(3) |
+		       VS_PRIO(3) |
+		       GS_PRIO(3) |
+		       ES_PRIO(3));
+	sq_config |= (DX9_CONSTS |
+		      VC_ENABLE |
+		      PS_PRIO(0) |
+		      VS_PRIO(1) |
+		      GS_PRIO(2) |
+		      ES_PRIO(3));
+
+	if ((rdev->family) == CHIP_R600) {
+		sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(124) |
+					  NUM_VS_GPRS(124) |
+					  NUM_CLAUSE_TEMP_GPRS(4));
+		sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(0) |
+					  NUM_ES_GPRS(0));
+		sq_thread_resource_mgmt = (NUM_PS_THREADS(136) |
+					   NUM_VS_THREADS(48) |
+					   NUM_GS_THREADS(4) |
+					   NUM_ES_THREADS(4));
+		sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(128) |
+					    NUM_VS_STACK_ENTRIES(128));
+		sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(0) |
+					    NUM_ES_STACK_ENTRIES(0));
+	} else if (((rdev->family) == CHIP_RV610) ||
+		   ((rdev->family) == CHIP_RV620) ||
+		   ((rdev->family) == CHIP_RS780)) {
+		/* no vertex cache */
+		sq_config &= ~VC_ENABLE;
+
+		sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) |
+					  NUM_VS_GPRS(44) |
+					  NUM_CLAUSE_TEMP_GPRS(2));
+		sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) |
+					  NUM_ES_GPRS(17));
+		sq_thread_resource_mgmt = (NUM_PS_THREADS(79) |
+					   NUM_VS_THREADS(78) |
+					   NUM_GS_THREADS(4) |
+					   NUM_ES_THREADS(31));
+		sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) |
+					    NUM_VS_STACK_ENTRIES(40));
+		sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) |
+					    NUM_ES_STACK_ENTRIES(16));
+	} else if (((rdev->family) == CHIP_RV630) ||
+		   ((rdev->family) == CHIP_RV635)) {
+		sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) |
+					  NUM_VS_GPRS(44) |
+					  NUM_CLAUSE_TEMP_GPRS(2));
+		sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(18) |
+					  NUM_ES_GPRS(18));
+		sq_thread_resource_mgmt = (NUM_PS_THREADS(79) |
+					   NUM_VS_THREADS(78) |
+					   NUM_GS_THREADS(4) |
+					   NUM_ES_THREADS(31));
+		sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) |
+					    NUM_VS_STACK_ENTRIES(40));
+		sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) |
+					    NUM_ES_STACK_ENTRIES(16));
+	} else if ((rdev->family) == CHIP_RV670) {
+		sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) |
+					  NUM_VS_GPRS(44) |
+					  NUM_CLAUSE_TEMP_GPRS(2));
+		sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) |
+					  NUM_ES_GPRS(17));
+		sq_thread_resource_mgmt = (NUM_PS_THREADS(79) |
+					   NUM_VS_THREADS(78) |
+					   NUM_GS_THREADS(4) |
+					   NUM_ES_THREADS(31));
+		sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(64) |
+					    NUM_VS_STACK_ENTRIES(64));
+		sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(64) |
+					    NUM_ES_STACK_ENTRIES(64));
+	}
+
+	WREG32(SQ_CONFIG, sq_config);
+	WREG32(SQ_GPR_RESOURCE_MGMT_1,  sq_gpr_resource_mgmt_1);
+	WREG32(SQ_GPR_RESOURCE_MGMT_2,  sq_gpr_resource_mgmt_2);
+	WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
+	WREG32(SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1);
+	WREG32(SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2);
+
+	if (((rdev->family) == CHIP_RV610) ||
+	    ((rdev->family) == CHIP_RV620) ||
+	    ((rdev->family) == CHIP_RS780)) {
+		WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(TC_ONLY));
+	} else {
+		WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC));
+	}
+
+	/* More default values. 2D/3D driver should adjust as needed */
+	WREG32(PA_SC_AA_SAMPLE_LOCS_2S, (S0_X(0xc) | S0_Y(0x4) |
+					 S1_X(0x4) | S1_Y(0xc)));
+	WREG32(PA_SC_AA_SAMPLE_LOCS_4S, (S0_X(0xe) | S0_Y(0xe) |
+					 S1_X(0x2) | S1_Y(0x2) |
+					 S2_X(0xa) | S2_Y(0x6) |
+					 S3_X(0x6) | S3_Y(0xa)));
+	WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD0, (S0_X(0xe) | S0_Y(0xb) |
+					     S1_X(0x4) | S1_Y(0xc) |
+					     S2_X(0x1) | S2_Y(0x6) |
+					     S3_X(0xa) | S3_Y(0xe)));
+	WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD1, (S4_X(0x6) | S4_Y(0x1) |
+					     S5_X(0x0) | S5_Y(0x0) |
+					     S6_X(0xb) | S6_Y(0x4) |
+					     S7_X(0x7) | S7_Y(0x8)));
+
+	WREG32(VGT_STRMOUT_EN, 0);
+	tmp = rdev->config.r600.max_pipes * 16;
+	switch (rdev->family) {
+	case CHIP_RV610:
+	case CHIP_RS780:
+	case CHIP_RV620:
+		tmp += 32;
+		break;
+	case CHIP_RV670:
+		tmp += 128;
+		break;
+	default:
+		break;
+	}
+	if (tmp > 256) {
+		tmp = 256;
+	}
+	WREG32(VGT_ES_PER_GS, 128);
+	WREG32(VGT_GS_PER_ES, tmp);
+	WREG32(VGT_GS_PER_VS, 2);
+	WREG32(VGT_GS_VERTEX_REUSE, 16);
+
+	/* more default values. 2D/3D driver should adjust as needed */
+	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
+	WREG32(VGT_STRMOUT_EN, 0);
+	WREG32(SX_MISC, 0);
+	WREG32(PA_SC_MODE_CNTL, 0);
+	WREG32(PA_SC_AA_CONFIG, 0);
+	WREG32(PA_SC_LINE_STIPPLE, 0);
+	WREG32(SPI_INPUT_Z, 0);
+	WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2));
+	WREG32(CB_COLOR7_FRAG, 0);
+
+	/* Clear render buffer base addresses */
+	WREG32(CB_COLOR0_BASE, 0);
+	WREG32(CB_COLOR1_BASE, 0);
+	WREG32(CB_COLOR2_BASE, 0);
+	WREG32(CB_COLOR3_BASE, 0);
+	WREG32(CB_COLOR4_BASE, 0);
+	WREG32(CB_COLOR5_BASE, 0);
+	WREG32(CB_COLOR6_BASE, 0);
+	WREG32(CB_COLOR7_BASE, 0);
+	WREG32(CB_COLOR7_FRAG, 0);
+
+	switch (rdev->family) {
+	case CHIP_RV610:
+	case CHIP_RS780:
+	case CHIP_RV620:
+		tmp = TC_L2_SIZE(8);
+		break;
+	case CHIP_RV630:
+	case CHIP_RV635:
+		tmp = TC_L2_SIZE(4);
+		break;
+	case CHIP_R600:
+		tmp = TC_L2_SIZE(0) | L2_DISABLE_LATE_HIT;
+		break;
+	default:
+		tmp = TC_L2_SIZE(0);
+		break;
+	}
+	WREG32(TC_CNTL, tmp);
+
+	tmp = RREG32(HDP_HOST_PATH_CNTL);
+	WREG32(HDP_HOST_PATH_CNTL, tmp);
+
+	tmp = RREG32(ARB_POP);
+	tmp |= ENABLE_TC128;
+	WREG32(ARB_POP, tmp);
+
+	WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
+	WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA |
+			       NUM_CLIP_SEQ(3)));
+	WREG32(PA_SC_ENHANCE, FORCE_EOV_MAX_CLK_CNT(4095));
+}
+
+
 /*
  * Indirect registers accessor
  */
-uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg)
+u32 r600_pciep_rreg(struct radeon_device *rdev, u32 reg)
+{
+	u32 r;
+
+	WREG32(PCIE_PORT_INDEX, ((reg) & 0xff));
+	(void)RREG32(PCIE_PORT_INDEX);
+	r = RREG32(PCIE_PORT_DATA);
+	return r;
+}
+
+void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+	WREG32(PCIE_PORT_INDEX, ((reg) & 0xff));
+	(void)RREG32(PCIE_PORT_INDEX);
+	WREG32(PCIE_PORT_DATA, (v));
+	(void)RREG32(PCIE_PORT_DATA);
+}
+
+
+/*
+ * CP & Ring
+ */
+void r600_cp_stop(struct radeon_device *rdev)
+{
+	WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
+}
+
+int r600_cp_init_microcode(struct radeon_device *rdev)
+{
+	struct platform_device *pdev;
+	const char *chip_name;
+	size_t pfp_req_size, me_req_size;
+	char fw_name[30];
+	int err;
+
+	DRM_DEBUG("\n");
+
+	pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
+	err = IS_ERR(pdev);
+	if (err) {
+		printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
+		return -EINVAL;
+	}
+
+	switch (rdev->family) {
+	case CHIP_R600: chip_name = "R600"; break;
+	case CHIP_RV610: chip_name = "RV610"; break;
+	case CHIP_RV630: chip_name = "RV630"; break;
+	case CHIP_RV620: chip_name = "RV620"; break;
+	case CHIP_RV635: chip_name = "RV635"; break;
+	case CHIP_RV670: chip_name = "RV670"; break;
+	case CHIP_RS780:
+	case CHIP_RS880: chip_name = "RS780"; break;
+	case CHIP_RV770: chip_name = "RV770"; break;
+	case CHIP_RV730:
+	case CHIP_RV740: chip_name = "RV730"; break;
+	case CHIP_RV710: chip_name = "RV710"; break;
+	default: BUG();
+	}
+
+	if (rdev->family >= CHIP_RV770) {
+		pfp_req_size = R700_PFP_UCODE_SIZE * 4;
+		me_req_size = R700_PM4_UCODE_SIZE * 4;
+	} else {
+		pfp_req_size = PFP_UCODE_SIZE * 4;
+		me_req_size = PM4_UCODE_SIZE * 12;
+	}
+
+	DRM_INFO("Loading %s CP Microcode\n", chip_name);
+
+	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
+	err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
+	if (err)
+		goto out;
+	if (rdev->pfp_fw->size != pfp_req_size) {
+		printk(KERN_ERR
+		       "r600_cp: Bogus length %zu in firmware \"%s\"\n",
+		       rdev->pfp_fw->size, fw_name);
+		err = -EINVAL;
+		goto out;
+	}
+
+	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
+	err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
+	if (err)
+		goto out;
+	if (rdev->me_fw->size != me_req_size) {
+		printk(KERN_ERR
+		       "r600_cp: Bogus length %zu in firmware \"%s\"\n",
+		       rdev->me_fw->size, fw_name);
+		err = -EINVAL;
+	}
+out:
+	platform_device_unregister(pdev);
+
+	if (err) {
+		if (err != -EINVAL)
+			printk(KERN_ERR
+			       "r600_cp: Failed to load firmware \"%s\"\n",
+			       fw_name);
+		release_firmware(rdev->pfp_fw);
+		rdev->pfp_fw = NULL;
+		release_firmware(rdev->me_fw);
+		rdev->me_fw = NULL;
+	}
+	return err;
+}
+
+static int r600_cp_load_microcode(struct radeon_device *rdev)
+{
+	const __be32 *fw_data;
+	int i;
+
+	if (!rdev->me_fw || !rdev->pfp_fw)
+		return -EINVAL;
+
+	r600_cp_stop(rdev);
+
+	WREG32(CP_RB_CNTL, RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3));
+
+	/* Reset cp */
+	WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
+	RREG32(GRBM_SOFT_RESET);
+	mdelay(15);
+	WREG32(GRBM_SOFT_RESET, 0);
+
+	WREG32(CP_ME_RAM_WADDR, 0);
+
+	fw_data = (const __be32 *)rdev->me_fw->data;
+	WREG32(CP_ME_RAM_WADDR, 0);
+	for (i = 0; i < PM4_UCODE_SIZE * 3; i++)
+		WREG32(CP_ME_RAM_DATA,
+		       be32_to_cpup(fw_data++));
+
+	fw_data = (const __be32 *)rdev->pfp_fw->data;
+	WREG32(CP_PFP_UCODE_ADDR, 0);
+	for (i = 0; i < PFP_UCODE_SIZE; i++)
+		WREG32(CP_PFP_UCODE_DATA,
+		       be32_to_cpup(fw_data++));
+
+	WREG32(CP_PFP_UCODE_ADDR, 0);
+	WREG32(CP_ME_RAM_WADDR, 0);
+	WREG32(CP_ME_RAM_RADDR, 0);
+	return 0;
+}
+
+int r600_cp_start(struct radeon_device *rdev)
+{
+	int r;
+	uint32_t cp_me;
+
+	r = radeon_ring_lock(rdev, 7);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+		return r;
+	}
+	radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5));
+	radeon_ring_write(rdev, 0x1);
+	if (rdev->family < CHIP_RV770) {
+		radeon_ring_write(rdev, 0x3);
+		radeon_ring_write(rdev, rdev->config.r600.max_hw_contexts - 1);
+	} else {
+		radeon_ring_write(rdev, 0x0);
+		radeon_ring_write(rdev, rdev->config.rv770.max_hw_contexts - 1);
+	}
+	radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, 0);
+	radeon_ring_unlock_commit(rdev);
+
+	cp_me = 0xff;
+	WREG32(R_0086D8_CP_ME_CNTL, cp_me);
+	return 0;
+}
+
+int r600_cp_resume(struct radeon_device *rdev)
+{
+	u32 tmp;
+	u32 rb_bufsz;
+	int r;
+
+	/* Reset cp */
+	WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
+	RREG32(GRBM_SOFT_RESET);
+	mdelay(15);
+	WREG32(GRBM_SOFT_RESET, 0);
+
+	/* Set ring buffer size */
+	rb_bufsz = drm_order(rdev->cp.ring_size / 8);
+#ifdef __BIG_ENDIAN
+	WREG32(CP_RB_CNTL, BUF_SWAP_32BIT | RB_NO_UPDATE |
+		(drm_order(4096/8) << 8) | rb_bufsz);
+#else
+	WREG32(CP_RB_CNTL, RB_NO_UPDATE | (drm_order(4096/8) << 8) | rb_bufsz);
+#endif
+	WREG32(CP_SEM_WAIT_TIMER, 0x4);
+
+	/* Set the write pointer delay */
+	WREG32(CP_RB_WPTR_DELAY, 0);
+
+	/* Initialize the ring buffer's read and write pointers */
+	tmp = RREG32(CP_RB_CNTL);
+	WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA);
+	WREG32(CP_RB_RPTR_WR, 0);
+	WREG32(CP_RB_WPTR, 0);
+	WREG32(CP_RB_RPTR_ADDR, rdev->cp.gpu_addr & 0xFFFFFFFF);
+	WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->cp.gpu_addr));
+	mdelay(1);
+	WREG32(CP_RB_CNTL, tmp);
+
+	WREG32(CP_RB_BASE, rdev->cp.gpu_addr >> 8);
+	WREG32(CP_DEBUG, (1 << 27) | (1 << 28));
+
+	rdev->cp.rptr = RREG32(CP_RB_RPTR);
+	rdev->cp.wptr = RREG32(CP_RB_WPTR);
+
+	r600_cp_start(rdev);
+	rdev->cp.ready = true;
+	r = radeon_ring_test(rdev);
+	if (r) {
+		rdev->cp.ready = false;
+		return r;
+	}
+	return 0;
+}
+
+void r600_cp_commit(struct radeon_device *rdev)
+{
+	WREG32(CP_RB_WPTR, rdev->cp.wptr);
+	(void)RREG32(CP_RB_WPTR);
+}
+
+void r600_ring_init(struct radeon_device *rdev, unsigned ring_size)
+{
+	u32 rb_bufsz;
+
+	/* Align ring size */
+	rb_bufsz = drm_order(ring_size / 8);
+	ring_size = (1 << (rb_bufsz + 1)) * 4;
+	rdev->cp.ring_size = ring_size;
+	rdev->cp.align_mask = 16 - 1;
+}
+
+
+/*
+ * GPU scratch registers helpers function.
+ */
+void r600_scratch_init(struct radeon_device *rdev)
+{
+	int i;
+
+	rdev->scratch.num_reg = 7;
+	for (i = 0; i < rdev->scratch.num_reg; i++) {
+		rdev->scratch.free[i] = true;
+		rdev->scratch.reg[i] = SCRATCH_REG0 + (i * 4);
+	}
+}
+
+int r600_ring_test(struct radeon_device *rdev)
+{
+	uint32_t scratch;
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	r = radeon_scratch_get(rdev, &scratch);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
+		return r;
+	}
+	WREG32(scratch, 0xCAFEDEAD);
+	r = radeon_ring_lock(rdev, 3);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+		radeon_scratch_free(rdev, scratch);
+		return r;
+	}
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(rdev, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
+	radeon_ring_write(rdev, 0xDEADBEEF);
+	radeon_ring_unlock_commit(rdev);
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ring test succeeded in %d usecs\n", i);
+	} else {
+		DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
+			  scratch, tmp);
+		r = -EINVAL;
+	}
+	radeon_scratch_free(rdev, scratch);
+	return r;
+}
+
+/*
+ * Writeback
+ */
+int r600_wb_init(struct radeon_device *rdev)
+{
+	int r;
+
+	if (rdev->wb.wb_obj == NULL) {
+		r = radeon_object_create(rdev, NULL, 4096,
+					 true,
+					 RADEON_GEM_DOMAIN_GTT,
+					 false, &rdev->wb.wb_obj);
+		if (r) {
+			DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r);
+			return r;
+		}
+		r = radeon_object_pin(rdev->wb.wb_obj,
+				      RADEON_GEM_DOMAIN_GTT,
+				      &rdev->wb.gpu_addr);
+		if (r) {
+			DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r);
+			return r;
+		}
+		r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb);
+		if (r) {
+			DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r);
+			return r;
+		}
+	}
+	WREG32(SCRATCH_ADDR, (rdev->wb.gpu_addr >> 8) & 0xFFFFFFFF);
+	WREG32(CP_RB_RPTR_ADDR, (rdev->wb.gpu_addr + 1024) & 0xFFFFFFFC);
+	WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + 1024) & 0xFF);
+	WREG32(SCRATCH_UMSK, 0xff);
+	return 0;
+}
+
+void r600_wb_fini(struct radeon_device *rdev)
+{
+	if (rdev->wb.wb_obj) {
+		radeon_object_kunmap(rdev->wb.wb_obj);
+		radeon_object_unpin(rdev->wb.wb_obj);
+		radeon_object_unref(&rdev->wb.wb_obj);
+		rdev->wb.wb = NULL;
+		rdev->wb.wb_obj = NULL;
+	}
+}
+
+
+/*
+ * CS
+ */
+void r600_fence_ring_emit(struct radeon_device *rdev,
+			  struct radeon_fence *fence)
+{
+	/* Emit fence sequence & fire IRQ */
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
+	radeon_ring_write(rdev, fence->seq);
+}
+
+int r600_copy_dma(struct radeon_device *rdev,
+		  uint64_t src_offset,
+		  uint64_t dst_offset,
+		  unsigned num_pages,
+		  struct radeon_fence *fence)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+int r600_copy_blit(struct radeon_device *rdev,
+		   uint64_t src_offset, uint64_t dst_offset,
+		   unsigned num_pages, struct radeon_fence *fence)
+{
+	r600_blit_prepare_copy(rdev, num_pages * 4096);
+	r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * 4096);
+	r600_blit_done_copy(rdev, fence);
+	return 0;
+}
+
+int r600_irq_process(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+int r600_irq_set(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+int r600_set_surface_reg(struct radeon_device *rdev, int reg,
+			 uint32_t tiling_flags, uint32_t pitch,
+			 uint32_t offset, uint32_t obj_size)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+void r600_clear_surface_reg(struct radeon_device *rdev, int reg)
+{
+	/* FIXME: implement */
+}
+
+
+bool r600_card_posted(struct radeon_device *rdev)
+{
+	uint32_t reg;
+
+	/* first check CRTCs */
+	reg = RREG32(D1CRTC_CONTROL) |
+		RREG32(D2CRTC_CONTROL);
+	if (reg & CRTC_EN)
+		return true;
+
+	/* then check MEM_SIZE, in case the crtcs are off */
+	if (RREG32(CONFIG_MEMSIZE))
+		return true;
+
+	return false;
+}
+
+int r600_resume(struct radeon_device *rdev)
+{
+	int r;
+
+	r600_gpu_reset(rdev);
+	r600_mc_resume(rdev);
+	r = r600_pcie_gart_enable(rdev);
+	if (r)
+		return r;
+	r600_gpu_init(rdev);
+	r = radeon_ring_init(rdev, rdev->cp.ring_size);
+	if (r)
+		return r;
+	r = r600_cp_load_microcode(rdev);
+	if (r)
+		return r;
+	r = r600_cp_resume(rdev);
+	if (r)
+		return r;
+	r = r600_wb_init(rdev);
+	if (r)
+		return r;
+	return 0;
+}
+
+int r600_suspend(struct radeon_device *rdev)
+{
+	/* FIXME: we should wait for ring to be empty */
+	r600_cp_stop(rdev);
+	return 0;
+}
+
+/* Plan is to move initialization in that function and use
+ * helper function so that radeon_device_init pretty much
+ * do nothing more than calling asic specific function. This
+ * should also allow to remove a bunch of callback function
+ * like vram_info.
+ */
+int r600_init(struct radeon_device *rdev)
 {
-	uint32_t r;
+	int r;
 
-	WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff));
-	(void)RREG32(R600_PCIE_PORT_INDEX);
-	r = RREG32(R600_PCIE_PORT_DATA);
+	rdev->new_init_path = true;
+	r = radeon_dummy_page_init(rdev);
+	if (r)
+		return r;
+	if (r600_debugfs_mc_info_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for mc !\n");
+	}
+	/* This don't do much */
+	r = radeon_gem_init(rdev);
+	if (r)
+		return r;
+	/* Read BIOS */
+	if (!radeon_get_bios(rdev)) {
+		if (ASIC_IS_AVIVO(rdev))
+			return -EINVAL;
+	}
+	/* Must be an ATOMBIOS */
+	if (!rdev->is_atom_bios)
+		return -EINVAL;
+	r = radeon_atombios_init(rdev);
+	if (r)
+		return r;
+	/* Post card if necessary */
+	if (!r600_card_posted(rdev) && rdev->bios) {
+		DRM_INFO("GPU not posted. posting now...\n");
+		atom_asic_init(rdev->mode_info.atom_context);
+	}
+	/* Initialize scratch registers */
+	r600_scratch_init(rdev);
+	/* Initialize surface registers */
+	radeon_surface_init(rdev);
+	r = radeon_clocks_init(rdev);
+	if (r)
+		return r;
+	/* Fence driver */
+	r = radeon_fence_driver_init(rdev);
+	if (r)
+		return r;
+	r = r600_mc_init(rdev);
+	if (r) {
+		if (rdev->flags & RADEON_IS_AGP) {
+			/* Retry with disabling AGP */
+			r600_fini(rdev);
+			rdev->flags &= ~RADEON_IS_AGP;
+			return r600_init(rdev);
+		}
+		return r;
+	}
+	/* Memory manager */
+	r = radeon_object_init(rdev);
+	if (r)
+		return r;
+	rdev->cp.ring_obj = NULL;
+	r600_ring_init(rdev, 1024 * 1024);
+
+	if (!rdev->me_fw || !rdev->pfp_fw) {
+		r = r600_cp_init_microcode(rdev);
+		if (r) {
+			DRM_ERROR("Failed to load firmware!\n");
+			return r;
+		}
+	}
+
+	r = r600_resume(rdev);
+	if (r) {
+		if (rdev->flags & RADEON_IS_AGP) {
+			/* Retry with disabling AGP */
+			r600_fini(rdev);
+			rdev->flags &= ~RADEON_IS_AGP;
+			return r600_init(rdev);
+		}
+		return r;
+	}
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
+		return r;
+	}
+	r = r600_blit_init(rdev);
+	if (r) {
+		DRM_ERROR("radeon: failled blitter (%d).\n", r);
+		return r;
+	}
+	r = radeon_ib_test(rdev);
+	if (r) {
+		DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+			return r;
+	}
+	return 0;
+}
+
+void r600_fini(struct radeon_device *rdev)
+{
+	/* Suspend operations */
+	r600_suspend(rdev);
+
+	r600_blit_fini(rdev);
+	radeon_ring_fini(rdev);
+	r600_pcie_gart_disable(rdev);
+	radeon_gart_table_vram_free(rdev);
+	radeon_gart_fini(rdev);
+	radeon_gem_fini(rdev);
+	radeon_fence_driver_fini(rdev);
+	radeon_clocks_fini(rdev);
+#if __OS_HAS_AGP
+	if (rdev->flags & RADEON_IS_AGP)
+		radeon_agp_fini(rdev);
+#endif
+	radeon_object_fini(rdev);
+	if (rdev->is_atom_bios)
+		radeon_atombios_fini(rdev);
+	else
+		radeon_combios_fini(rdev);
+	kfree(rdev->bios);
+	rdev->bios = NULL;
+	radeon_dummy_page_fini(rdev);
+}
+
+
+/*
+ * CS stuff
+ */
+void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	/* FIXME: implement */
+	radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+	radeon_ring_write(rdev, ib->gpu_addr & 0xFFFFFFFC);
+	radeon_ring_write(rdev, upper_32_bits(ib->gpu_addr) & 0xFF);
+	radeon_ring_write(rdev, ib->length_dw);
+}
+
+int r600_ib_test(struct radeon_device *rdev)
+{
+	struct radeon_ib *ib;
+	uint32_t scratch;
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	r = radeon_scratch_get(rdev, &scratch);
+	if (r) {
+		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
+		return r;
+	}
+	WREG32(scratch, 0xCAFEDEAD);
+	r = radeon_ib_get(rdev, &ib);
+	if (r) {
+		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
+		return r;
+	}
+	ib->ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1);
+	ib->ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+	ib->ptr[2] = 0xDEADBEEF;
+	ib->ptr[3] = PACKET2(0);
+	ib->ptr[4] = PACKET2(0);
+	ib->ptr[5] = PACKET2(0);
+	ib->ptr[6] = PACKET2(0);
+	ib->ptr[7] = PACKET2(0);
+	ib->ptr[8] = PACKET2(0);
+	ib->ptr[9] = PACKET2(0);
+	ib->ptr[10] = PACKET2(0);
+	ib->ptr[11] = PACKET2(0);
+	ib->ptr[12] = PACKET2(0);
+	ib->ptr[13] = PACKET2(0);
+	ib->ptr[14] = PACKET2(0);
+	ib->ptr[15] = PACKET2(0);
+	ib->length_dw = 16;
+	r = radeon_ib_schedule(rdev, ib);
+	if (r) {
+		radeon_scratch_free(rdev, scratch);
+		radeon_ib_free(rdev, &ib);
+		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+		return r;
+	}
+	r = radeon_fence_wait(ib->fence, false);
+	if (r) {
+		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+		return r;
+	}
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ib test succeeded in %u usecs\n", i);
+	} else {
+		DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n",
+			  scratch, tmp);
+		r = -EINVAL;
+	}
+	radeon_scratch_free(rdev, scratch);
+	radeon_ib_free(rdev, &ib);
 	return r;
 }
 
-void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+
+
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+
+static int r600_debugfs_cp_ring_info(struct seq_file *m, void *data)
 {
-	WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff));
-	(void)RREG32(R600_PCIE_PORT_INDEX);
-	WREG32(R600_PCIE_PORT_DATA, (v));
-	(void)RREG32(R600_PCIE_PORT_DATA);
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t rdp, wdp;
+	unsigned count, i, j;
+
+	radeon_ring_free_size(rdev);
+	rdp = RREG32(CP_RB_RPTR);
+	wdp = RREG32(CP_RB_WPTR);
+	count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask;
+	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(CP_STAT));
+	seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
+	seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
+	seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
+	seq_printf(m, "%u dwords in ring\n", count);
+	for (j = 0; j <= count; j++) {
+		i = (rdp + j) & rdev->cp.ptr_mask;
+		seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
+	}
+	return 0;
+}
+
+static int r600_debugfs_mc_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+
+	DREG32_SYS(m, rdev, R_000E50_SRBM_STATUS);
+	DREG32_SYS(m, rdev, VM_L2_STATUS);
+	return 0;
+}
+
+static struct drm_info_list r600_mc_info_list[] = {
+	{"r600_mc_info", r600_debugfs_mc_info, 0, NULL},
+	{"r600_ring_info", r600_debugfs_cp_ring_info, 0, NULL},
+};
+#endif
+
+int r600_debugfs_mc_info_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return radeon_debugfs_add_files(rdev, r600_mc_info_list, ARRAY_SIZE(r600_mc_info_list));
+#else
+	return 0;
+#endif
 }
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
new file mode 100644
index 0000000000000000000000000000000000000000..c51402e92493e9bbb6062e69a9e106d1e7546a14
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -0,0 +1,855 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *     Alex Deucher <alexander.deucher@amd.com>
+ */
+#include "drmP.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon_drv.h"
+
+#include "r600_blit_shaders.h"
+
+#define DI_PT_RECTLIST        0x11
+#define DI_INDEX_SIZE_16_BIT  0x0
+#define DI_SRC_SEL_AUTO_INDEX 0x2
+
+#define FMT_8                 0x1
+#define FMT_5_6_5             0x8
+#define FMT_8_8_8_8           0x1a
+#define COLOR_8               0x1
+#define COLOR_5_6_5           0x8
+#define COLOR_8_8_8_8         0x1a
+
+static inline void
+set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
+{
+	u32 cb_color_info;
+	int pitch, slice;
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	h = (h + 7) & ~7;
+	if (h < 8)
+		h = 8;
+
+	cb_color_info = ((format << 2) | (1 << 27));
+	pitch = (w / 8) - 1;
+	slice = ((w * h) / 64) - 1;
+
+	if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
+		BEGIN_RING(21 + 2);
+		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+		OUT_RING(gpu_addr >> 8);
+		OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
+		OUT_RING(2 << 0);
+	} else {
+		BEGIN_RING(21);
+		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+		OUT_RING(gpu_addr >> 8);
+	}
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING((pitch << 0) | (slice << 10));
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(0);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(cb_color_info);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(0);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(0);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(0);
+
+	ADVANCE_RING();
+}
+
+static inline void
+cp_set_surface_sync(drm_radeon_private_t *dev_priv,
+		    u32 sync_type, u32 size, u64 mc_addr)
+{
+	u32 cp_coher_size;
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	if (size == 0xffffffff)
+		cp_coher_size = 0xffffffff;
+	else
+		cp_coher_size = ((size + 255) >> 8);
+
+	BEGIN_RING(5);
+	OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
+	OUT_RING(sync_type);
+	OUT_RING(cp_coher_size);
+	OUT_RING((mc_addr >> 8));
+	OUT_RING(10); /* poll interval */
+	ADVANCE_RING();
+}
+
+static inline void
+set_shaders(struct drm_device *dev)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	u64 gpu_addr;
+	int shader_size, i;
+	u32 *vs, *ps;
+	uint32_t sq_pgm_resources;
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	/* load shaders */
+	vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
+	ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
+
+	shader_size = r6xx_vs_size;
+	for (i = 0; i < shader_size; i++)
+		vs[i] = r6xx_vs[i];
+	shader_size = r6xx_ps_size;
+	for (i = 0; i < shader_size; i++)
+		ps[i] = r6xx_ps[i];
+
+	dev_priv->blit_vb->used = 512;
+
+	gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
+
+	/* setup shader regs */
+	sq_pgm_resources = (1 << 0);
+
+	BEGIN_RING(9 + 12);
+	/* VS */
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(gpu_addr >> 8);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(sq_pgm_resources);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(0);
+
+	/* PS */
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING((gpu_addr + 256) >> 8);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(sq_pgm_resources | (1 << 28));
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(2);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(0);
+	ADVANCE_RING();
+
+	cp_set_surface_sync(dev_priv,
+			    R600_SH_ACTION_ENA, 512, gpu_addr);
+}
+
+static inline void
+set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
+{
+	uint32_t sq_vtx_constant_word2;
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
+
+	BEGIN_RING(9);
+	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+	OUT_RING(0x460);
+	OUT_RING(gpu_addr & 0xffffffff);
+	OUT_RING(48 - 1);
+	OUT_RING(sq_vtx_constant_word2);
+	OUT_RING(1 << 0);
+	OUT_RING(0);
+	OUT_RING(0);
+	OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
+	ADVANCE_RING();
+
+	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
+		cp_set_surface_sync(dev_priv,
+				    R600_TC_ACTION_ENA, 48, gpu_addr);
+	else
+		cp_set_surface_sync(dev_priv,
+				    R600_VC_ACTION_ENA, 48, gpu_addr);
+}
+
+static inline void
+set_tex_resource(drm_radeon_private_t *dev_priv,
+		 int format, int w, int h, int pitch, u64 gpu_addr)
+{
+	uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	if (h < 1)
+		h = 1;
+
+	sq_tex_resource_word0 = (1 << 0);
+	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
+				  ((w - 1) << 19));
+
+	sq_tex_resource_word1 = (format << 26);
+	sq_tex_resource_word1 |= ((h - 1) << 0);
+
+	sq_tex_resource_word4 = ((1 << 14) |
+				 (0 << 16) |
+				 (1 << 19) |
+				 (2 << 22) |
+				 (3 << 25));
+
+	BEGIN_RING(9);
+	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+	OUT_RING(0);
+	OUT_RING(sq_tex_resource_word0);
+	OUT_RING(sq_tex_resource_word1);
+	OUT_RING(gpu_addr >> 8);
+	OUT_RING(gpu_addr >> 8);
+	OUT_RING(sq_tex_resource_word4);
+	OUT_RING(0);
+	OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
+	ADVANCE_RING();
+
+}
+
+static inline void
+set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
+{
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	BEGIN_RING(12);
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
+	OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING((x1 << 0) | (y1 << 16));
+	OUT_RING((x2 << 0) | (y2 << 16));
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
+	OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
+	OUT_RING((x2 << 0) | (y2 << 16));
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
+	OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
+	OUT_RING((x2 << 0) | (y2 << 16));
+	ADVANCE_RING();
+}
+
+static inline void
+draw_auto(drm_radeon_private_t *dev_priv)
+{
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	BEGIN_RING(10);
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
+	OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
+	OUT_RING(DI_PT_RECTLIST);
+
+	OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+	OUT_RING(DI_INDEX_SIZE_16_BIT);
+
+	OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+	OUT_RING(1);
+
+	OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
+	OUT_RING(3);
+	OUT_RING(DI_SRC_SEL_AUTO_INDEX);
+
+	ADVANCE_RING();
+	COMMIT_RING();
+}
+
+static inline void
+set_default_state(drm_radeon_private_t *dev_priv)
+{
+	int default_state_dw, i;
+	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
+	u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
+	int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
+	int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
+	int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
+	RING_LOCALS;
+
+	switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
+	case CHIP_R600:
+		num_ps_gprs = 192;
+		num_vs_gprs = 56;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 136;
+		num_vs_threads = 48;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 128;
+		num_vs_stack_entries = 128;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	case CHIP_RV630:
+	case CHIP_RV635:
+		num_ps_gprs = 84;
+		num_vs_gprs = 36;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 144;
+		num_vs_threads = 40;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 40;
+		num_vs_stack_entries = 40;
+		num_gs_stack_entries = 32;
+		num_es_stack_entries = 16;
+		break;
+	case CHIP_RV610:
+	case CHIP_RV620:
+	case CHIP_RS780:
+	case CHIP_RS880:
+	default:
+		num_ps_gprs = 84;
+		num_vs_gprs = 36;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 136;
+		num_vs_threads = 48;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 40;
+		num_vs_stack_entries = 40;
+		num_gs_stack_entries = 32;
+		num_es_stack_entries = 16;
+		break;
+	case CHIP_RV670:
+		num_ps_gprs = 144;
+		num_vs_gprs = 40;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 136;
+		num_vs_threads = 48;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 40;
+		num_vs_stack_entries = 40;
+		num_gs_stack_entries = 32;
+		num_es_stack_entries = 16;
+		break;
+	case CHIP_RV770:
+		num_ps_gprs = 192;
+		num_vs_gprs = 56;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 188;
+		num_vs_threads = 60;
+		num_gs_threads = 0;
+		num_es_threads = 0;
+		num_ps_stack_entries = 256;
+		num_vs_stack_entries = 256;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	case CHIP_RV730:
+	case CHIP_RV740:
+		num_ps_gprs = 84;
+		num_vs_gprs = 36;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 188;
+		num_vs_threads = 60;
+		num_gs_threads = 0;
+		num_es_threads = 0;
+		num_ps_stack_entries = 128;
+		num_vs_stack_entries = 128;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	case CHIP_RV710:
+		num_ps_gprs = 192;
+		num_vs_gprs = 56;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 144;
+		num_vs_threads = 48;
+		num_gs_threads = 0;
+		num_es_threads = 0;
+		num_ps_stack_entries = 128;
+		num_vs_stack_entries = 128;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	}
+
+	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
+		sq_config = 0;
+	else
+		sq_config = R600_VC_ENABLE;
+
+	sq_config |= (R600_DX9_CONSTS |
+		      R600_ALU_INST_PREFER_VECTOR |
+		      R600_PS_PRIO(0) |
+		      R600_VS_PRIO(1) |
+		      R600_GS_PRIO(2) |
+		      R600_ES_PRIO(3));
+
+	sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
+				  R600_NUM_VS_GPRS(num_vs_gprs) |
+				  R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
+	sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
+				  R600_NUM_ES_GPRS(num_es_gprs));
+	sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
+				   R600_NUM_VS_THREADS(num_vs_threads) |
+				   R600_NUM_GS_THREADS(num_gs_threads) |
+				   R600_NUM_ES_THREADS(num_es_threads));
+	sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
+				    R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
+	sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
+				    R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
+
+	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
+		default_state_dw = r7xx_default_size * 4;
+		BEGIN_RING(default_state_dw + 10);
+		for (i = 0; i < default_state_dw; i++)
+			OUT_RING(r7xx_default_state[i]);
+	} else {
+		default_state_dw = r6xx_default_size * 4;
+		BEGIN_RING(default_state_dw + 10);
+		for (i = 0; i < default_state_dw; i++)
+			OUT_RING(r6xx_default_state[i]);
+	}
+	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
+	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
+	/* SQ config */
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
+	OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
+	OUT_RING(sq_config);
+	OUT_RING(sq_gpr_resource_mgmt_1);
+	OUT_RING(sq_gpr_resource_mgmt_2);
+	OUT_RING(sq_thread_resource_mgmt);
+	OUT_RING(sq_stack_resource_mgmt_1);
+	OUT_RING(sq_stack_resource_mgmt_2);
+	ADVANCE_RING();
+}
+
+static inline uint32_t i2f(uint32_t input)
+{
+	u32 result, i, exponent, fraction;
+
+	if ((input & 0x3fff) == 0)
+		result = 0; /* 0 is a special case */
+	else {
+		exponent = 140; /* exponent biased by 127; */
+		fraction = (input & 0x3fff) << 10; /* cheat and only
+						      handle numbers below 2^^15 */
+		for (i = 0; i < 14; i++) {
+			if (fraction & 0x800000)
+				break;
+			else {
+				fraction = fraction << 1; /* keep
+							     shifting left until top bit = 1 */
+				exponent = exponent - 1;
+			}
+		}
+		result = exponent << 23 | (fraction & 0x7fffff); /* mask
+								    off top bit; assumed 1 */
+	}
+	return result;
+}
+
+
+int r600_nomm_get_vb(struct drm_device *dev)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	dev_priv->blit_vb = radeon_freelist_get(dev);
+	if (!dev_priv->blit_vb) {
+		DRM_ERROR("Unable to allocate vertex buffer for blit\n");
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+void r600_nomm_put_vb(struct drm_device *dev)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+
+	dev_priv->blit_vb->used = 0;
+	radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
+}
+
+void *r600_nomm_get_vb_ptr(struct drm_device *dev)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	return (((char *)dev->agp_buffer_map->handle +
+		 dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
+}
+
+int
+r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	DRM_DEBUG("\n");
+
+	r600_nomm_get_vb(dev);
+
+	dev_priv->blit_vb->file_priv = file_priv;
+
+	set_default_state(dev_priv);
+	set_shaders(dev);
+
+	return 0;
+}
+
+
+void
+r600_done_blit_copy(struct drm_device *dev)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	BEGIN_RING(5);
+	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
+	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
+	/* wait for 3D idle clean */
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
+	OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
+	OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
+
+	ADVANCE_RING();
+	COMMIT_RING();
+
+	r600_nomm_put_vb(dev);
+}
+
+void
+r600_blit_copy(struct drm_device *dev,
+	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
+	       int size_bytes)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	int max_bytes;
+	u64 vb_addr;
+	u32 *vb;
+
+	vb = r600_nomm_get_vb_ptr(dev);
+
+	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
+		max_bytes = 8192;
+
+		while (size_bytes) {
+			int cur_size = size_bytes;
+			int src_x = src_gpu_addr & 255;
+			int dst_x = dst_gpu_addr & 255;
+			int h = 1;
+			src_gpu_addr = src_gpu_addr & ~255;
+			dst_gpu_addr = dst_gpu_addr & ~255;
+
+			if (!src_x && !dst_x) {
+				h = (cur_size / max_bytes);
+				if (h > 8192)
+					h = 8192;
+				if (h == 0)
+					h = 1;
+				else
+					cur_size = max_bytes;
+			} else {
+				if (cur_size > max_bytes)
+					cur_size = max_bytes;
+				if (cur_size > (max_bytes - dst_x))
+					cur_size = (max_bytes - dst_x);
+				if (cur_size > (max_bytes - src_x))
+					cur_size = (max_bytes - src_x);
+			}
+
+			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
+
+				r600_nomm_put_vb(dev);
+				r600_nomm_get_vb(dev);
+				if (!dev_priv->blit_vb)
+					return;
+				set_shaders(dev);
+				vb = r600_nomm_get_vb_ptr(dev);
+			}
+
+			vb[0] = i2f(dst_x);
+			vb[1] = 0;
+			vb[2] = i2f(src_x);
+			vb[3] = 0;
+
+			vb[4] = i2f(dst_x);
+			vb[5] = i2f(h);
+			vb[6] = i2f(src_x);
+			vb[7] = i2f(h);
+
+			vb[8] = i2f(dst_x + cur_size);
+			vb[9] = i2f(h);
+			vb[10] = i2f(src_x + cur_size);
+			vb[11] = i2f(h);
+
+			/* src */
+			set_tex_resource(dev_priv, FMT_8,
+					 src_x + cur_size, h, src_x + cur_size,
+					 src_gpu_addr);
+
+			cp_set_surface_sync(dev_priv,
+					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+			/* dst */
+			set_render_target(dev_priv, COLOR_8,
+					  dst_x + cur_size, h,
+					  dst_gpu_addr);
+
+			/* scissors */
+			set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
+
+			/* Vertex buffer setup */
+			vb_addr = dev_priv->gart_buffers_offset +
+				dev_priv->blit_vb->offset +
+				dev_priv->blit_vb->used;
+			set_vtx_resource(dev_priv, vb_addr);
+
+			/* draw */
+			draw_auto(dev_priv);
+
+			cp_set_surface_sync(dev_priv,
+					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
+					    cur_size * h, dst_gpu_addr);
+
+			vb += 12;
+			dev_priv->blit_vb->used += 12 * 4;
+
+			src_gpu_addr += cur_size * h;
+			dst_gpu_addr += cur_size * h;
+			size_bytes -= cur_size * h;
+		}
+	} else {
+		max_bytes = 8192 * 4;
+
+		while (size_bytes) {
+			int cur_size = size_bytes;
+			int src_x = (src_gpu_addr & 255);
+			int dst_x = (dst_gpu_addr & 255);
+			int h = 1;
+			src_gpu_addr = src_gpu_addr & ~255;
+			dst_gpu_addr = dst_gpu_addr & ~255;
+
+			if (!src_x && !dst_x) {
+				h = (cur_size / max_bytes);
+				if (h > 8192)
+					h = 8192;
+				if (h == 0)
+					h = 1;
+				else
+					cur_size = max_bytes;
+			} else {
+				if (cur_size > max_bytes)
+					cur_size = max_bytes;
+				if (cur_size > (max_bytes - dst_x))
+					cur_size = (max_bytes - dst_x);
+				if (cur_size > (max_bytes - src_x))
+					cur_size = (max_bytes - src_x);
+			}
+
+			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
+				r600_nomm_put_vb(dev);
+				r600_nomm_get_vb(dev);
+				if (!dev_priv->blit_vb)
+					return;
+
+				set_shaders(dev);
+				vb = r600_nomm_get_vb_ptr(dev);
+			}
+
+			vb[0] = i2f(dst_x / 4);
+			vb[1] = 0;
+			vb[2] = i2f(src_x / 4);
+			vb[3] = 0;
+
+			vb[4] = i2f(dst_x / 4);
+			vb[5] = i2f(h);
+			vb[6] = i2f(src_x / 4);
+			vb[7] = i2f(h);
+
+			vb[8] = i2f((dst_x + cur_size) / 4);
+			vb[9] = i2f(h);
+			vb[10] = i2f((src_x + cur_size) / 4);
+			vb[11] = i2f(h);
+
+			/* src */
+			set_tex_resource(dev_priv, FMT_8_8_8_8,
+					 (src_x + cur_size) / 4,
+					 h, (src_x + cur_size) / 4,
+					 src_gpu_addr);
+
+			cp_set_surface_sync(dev_priv,
+					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+			/* dst */
+			set_render_target(dev_priv, COLOR_8_8_8_8,
+					  dst_x + cur_size, h,
+					  dst_gpu_addr);
+
+			/* scissors */
+			set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
+
+			/* Vertex buffer setup */
+			vb_addr = dev_priv->gart_buffers_offset +
+				dev_priv->blit_vb->offset +
+				dev_priv->blit_vb->used;
+			set_vtx_resource(dev_priv, vb_addr);
+
+			/* draw */
+			draw_auto(dev_priv);
+
+			cp_set_surface_sync(dev_priv,
+					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
+					    cur_size * h, dst_gpu_addr);
+
+			vb += 12;
+			dev_priv->blit_vb->used += 12 * 4;
+
+			src_gpu_addr += cur_size * h;
+			dst_gpu_addr += cur_size * h;
+			size_bytes -= cur_size * h;
+		}
+	}
+}
+
+void
+r600_blit_swap(struct drm_device *dev,
+	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
+	       int sx, int sy, int dx, int dy,
+	       int w, int h, int src_pitch, int dst_pitch, int cpp)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	int cb_format, tex_format;
+	u64 vb_addr;
+	u32 *vb;
+
+	vb = (u32 *) ((char *)dev->agp_buffer_map->handle +
+		      dev_priv->blit_vb->offset + dev_priv->blit_vb->used);
+
+	if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
+
+		r600_nomm_put_vb(dev);
+		r600_nomm_get_vb(dev);
+		if (!dev_priv->blit_vb)
+			return;
+
+		set_shaders(dev);
+		vb = r600_nomm_get_vb_ptr(dev);
+	}
+
+	if (cpp == 4) {
+		cb_format = COLOR_8_8_8_8;
+		tex_format = FMT_8_8_8_8;
+	} else if (cpp == 2) {
+		cb_format = COLOR_5_6_5;
+		tex_format = FMT_5_6_5;
+	} else {
+		cb_format = COLOR_8;
+		tex_format = FMT_8;
+	}
+
+	vb[0] = i2f(dx);
+	vb[1] = i2f(dy);
+	vb[2] = i2f(sx);
+	vb[3] = i2f(sy);
+
+	vb[4] = i2f(dx);
+	vb[5] = i2f(dy + h);
+	vb[6] = i2f(sx);
+	vb[7] = i2f(sy + h);
+
+	vb[8] = i2f(dx + w);
+	vb[9] = i2f(dy + h);
+	vb[10] = i2f(sx + w);
+	vb[11] = i2f(sy + h);
+
+	/* src */
+	set_tex_resource(dev_priv, tex_format,
+			 src_pitch / cpp,
+			 sy + h, src_pitch / cpp,
+			 src_gpu_addr);
+
+	cp_set_surface_sync(dev_priv,
+			    R600_TC_ACTION_ENA, (src_pitch * (sy + h)), src_gpu_addr);
+
+	/* dst */
+	set_render_target(dev_priv, cb_format,
+			  dst_pitch / cpp, dy + h,
+			  dst_gpu_addr);
+
+	/* scissors */
+	set_scissors(dev_priv, dx, dy, dx + w, dy + h);
+
+	/* Vertex buffer setup */
+	vb_addr = dev_priv->gart_buffers_offset +
+		dev_priv->blit_vb->offset +
+		dev_priv->blit_vb->used;
+	set_vtx_resource(dev_priv, vb_addr);
+
+	/* draw */
+	draw_auto(dev_priv);
+
+	cp_set_surface_sync(dev_priv,
+			    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
+			    dst_pitch * (dy + h), dst_gpu_addr);
+
+	dev_priv->blit_vb->used += 12 * 4;
+}
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
new file mode 100644
index 0000000000000000000000000000000000000000..5755647e688a2dfd75a36c0ef91bdb89c9d3ac81
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -0,0 +1,777 @@
+#include "drmP.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon.h"
+
+#include "r600d.h"
+#include "r600_blit_shaders.h"
+
+#define DI_PT_RECTLIST        0x11
+#define DI_INDEX_SIZE_16_BIT  0x0
+#define DI_SRC_SEL_AUTO_INDEX 0x2
+
+#define FMT_8                 0x1
+#define FMT_5_6_5             0x8
+#define FMT_8_8_8_8           0x1a
+#define COLOR_8               0x1
+#define COLOR_5_6_5           0x8
+#define COLOR_8_8_8_8         0x1a
+
+/* emits 21 on rv770+, 23 on r600 */
+static void
+set_render_target(struct radeon_device *rdev, int format,
+		  int w, int h, u64 gpu_addr)
+{
+	u32 cb_color_info;
+	int pitch, slice;
+
+	h = (h + 7) & ~7;
+	if (h < 8)
+		h = 8;
+
+	cb_color_info = ((format << 2) | (1 << 27));
+	pitch = (w / 8) - 1;
+	slice = ((w * h) / 64) - 1;
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, gpu_addr >> 8);
+
+	if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) {
+		radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0));
+		radeon_ring_write(rdev, 2 << 0);
+	}
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, (pitch << 0) | (slice << 10));
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, 0);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, cb_color_info);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, 0);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, 0);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, 0);
+}
+
+/* emits 5dw */
+static void
+cp_set_surface_sync(struct radeon_device *rdev,
+		    u32 sync_type, u32 size,
+		    u64 mc_addr)
+{
+	u32 cp_coher_size;
+
+	if (size == 0xffffffff)
+		cp_coher_size = 0xffffffff;
+	else
+		cp_coher_size = ((size + 255) >> 8);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	radeon_ring_write(rdev, sync_type);
+	radeon_ring_write(rdev, cp_coher_size);
+	radeon_ring_write(rdev, mc_addr >> 8);
+	radeon_ring_write(rdev, 10); /* poll interval */
+}
+
+/* emits 21dw + 1 surface sync = 26dw */
+static void
+set_shaders(struct radeon_device *rdev)
+{
+	u64 gpu_addr;
+	u32 sq_pgm_resources;
+
+	/* setup shader regs */
+	sq_pgm_resources = (1 << 0);
+
+	/* VS */
+	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, gpu_addr >> 8);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, sq_pgm_resources);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, 0);
+
+	/* PS */
+	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, gpu_addr >> 8);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, sq_pgm_resources | (1 << 28));
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, 2);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, 0);
+
+	cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
+}
+
+/* emits 9 + 1 sync (5) = 14*/
+static void
+set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
+{
+	u32 sq_vtx_constant_word2;
+
+	sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
+	radeon_ring_write(rdev, 0x460);
+	radeon_ring_write(rdev, gpu_addr & 0xffffffff);
+	radeon_ring_write(rdev, 48 - 1);
+	radeon_ring_write(rdev, sq_vtx_constant_word2);
+	radeon_ring_write(rdev, 1 << 0);
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30);
+
+	if ((rdev->family == CHIP_RV610) ||
+	    (rdev->family == CHIP_RV620) ||
+	    (rdev->family == CHIP_RS780) ||
+	    (rdev->family == CHIP_RS880) ||
+	    (rdev->family == CHIP_RV710))
+		cp_set_surface_sync(rdev,
+				    PACKET3_TC_ACTION_ENA, 48, gpu_addr);
+	else
+		cp_set_surface_sync(rdev,
+				    PACKET3_VC_ACTION_ENA, 48, gpu_addr);
+}
+
+/* emits 9 */
+static void
+set_tex_resource(struct radeon_device *rdev,
+		 int format, int w, int h, int pitch,
+		 u64 gpu_addr)
+{
+	uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
+
+	if (h < 1)
+		h = 1;
+
+	sq_tex_resource_word0 = (1 << 0);
+	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
+				  ((w - 1) << 19));
+
+	sq_tex_resource_word1 = (format << 26);
+	sq_tex_resource_word1 |= ((h - 1) << 0);
+
+	sq_tex_resource_word4 = ((1 << 14) |
+				 (0 << 16) |
+				 (1 << 19) |
+				 (2 << 22) |
+				 (3 << 25));
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, sq_tex_resource_word0);
+	radeon_ring_write(rdev, sq_tex_resource_word1);
+	radeon_ring_write(rdev, gpu_addr >> 8);
+	radeon_ring_write(rdev, gpu_addr >> 8);
+	radeon_ring_write(rdev, sq_tex_resource_word4);
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, SQ_TEX_VTX_VALID_TEXTURE << 30);
+}
+
+/* emits 12 */
+static void
+set_scissors(struct radeon_device *rdev, int x1, int y1,
+	     int x2, int y2)
+{
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+	radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16));
+	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+	radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
+	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+	radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
+	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+}
+
+/* emits 10 */
+static void
+draw_auto(struct radeon_device *rdev)
+{
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, DI_PT_RECTLIST);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0));
+	radeon_ring_write(rdev, DI_INDEX_SIZE_16_BIT);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0));
+	radeon_ring_write(rdev, 1);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
+	radeon_ring_write(rdev, 3);
+	radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX);
+
+}
+
+/* emits 14 */
+static void
+set_default_state(struct radeon_device *rdev)
+{
+	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
+	u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
+	int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
+	int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
+	int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
+	u64 gpu_addr;
+
+	switch (rdev->family) {
+	case CHIP_R600:
+		num_ps_gprs = 192;
+		num_vs_gprs = 56;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 136;
+		num_vs_threads = 48;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 128;
+		num_vs_stack_entries = 128;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	case CHIP_RV630:
+	case CHIP_RV635:
+		num_ps_gprs = 84;
+		num_vs_gprs = 36;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 144;
+		num_vs_threads = 40;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 40;
+		num_vs_stack_entries = 40;
+		num_gs_stack_entries = 32;
+		num_es_stack_entries = 16;
+		break;
+	case CHIP_RV610:
+	case CHIP_RV620:
+	case CHIP_RS780:
+	case CHIP_RS880:
+	default:
+		num_ps_gprs = 84;
+		num_vs_gprs = 36;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 136;
+		num_vs_threads = 48;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 40;
+		num_vs_stack_entries = 40;
+		num_gs_stack_entries = 32;
+		num_es_stack_entries = 16;
+		break;
+	case CHIP_RV670:
+		num_ps_gprs = 144;
+		num_vs_gprs = 40;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 136;
+		num_vs_threads = 48;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 40;
+		num_vs_stack_entries = 40;
+		num_gs_stack_entries = 32;
+		num_es_stack_entries = 16;
+		break;
+	case CHIP_RV770:
+		num_ps_gprs = 192;
+		num_vs_gprs = 56;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 188;
+		num_vs_threads = 60;
+		num_gs_threads = 0;
+		num_es_threads = 0;
+		num_ps_stack_entries = 256;
+		num_vs_stack_entries = 256;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	case CHIP_RV730:
+	case CHIP_RV740:
+		num_ps_gprs = 84;
+		num_vs_gprs = 36;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 188;
+		num_vs_threads = 60;
+		num_gs_threads = 0;
+		num_es_threads = 0;
+		num_ps_stack_entries = 128;
+		num_vs_stack_entries = 128;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	case CHIP_RV710:
+		num_ps_gprs = 192;
+		num_vs_gprs = 56;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 144;
+		num_vs_threads = 48;
+		num_gs_threads = 0;
+		num_es_threads = 0;
+		num_ps_stack_entries = 128;
+		num_vs_stack_entries = 128;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	}
+
+	if ((rdev->family == CHIP_RV610) ||
+	    (rdev->family == CHIP_RV620) ||
+	    (rdev->family == CHIP_RS780) ||
+	    (rdev->family == CHIP_RS780) ||
+	    (rdev->family == CHIP_RV710))
+		sq_config = 0;
+	else
+		sq_config = VC_ENABLE;
+
+	sq_config |= (DX9_CONSTS |
+		      ALU_INST_PREFER_VECTOR |
+		      PS_PRIO(0) |
+		      VS_PRIO(1) |
+		      GS_PRIO(2) |
+		      ES_PRIO(3));
+
+	sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
+				  NUM_VS_GPRS(num_vs_gprs) |
+				  NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
+	sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
+				  NUM_ES_GPRS(num_es_gprs));
+	sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
+				   NUM_VS_THREADS(num_vs_threads) |
+				   NUM_GS_THREADS(num_gs_threads) |
+				   NUM_ES_THREADS(num_es_threads));
+	sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
+				    NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
+	sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
+				    NUM_ES_STACK_ENTRIES(num_es_stack_entries));
+
+	/* emit an IB pointing at default state */
+	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
+	radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+	radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC);
+	radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
+	radeon_ring_write(rdev, (rdev->r600_blit.state_len / 4));
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
+	radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
+	/* SQ config */
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6));
+	radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, sq_config);
+	radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
+	radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
+	radeon_ring_write(rdev, sq_thread_resource_mgmt);
+	radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
+	radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
+}
+
+static inline uint32_t i2f(uint32_t input)
+{
+	u32 result, i, exponent, fraction;
+
+	if ((input & 0x3fff) == 0)
+		result = 0; /* 0 is a special case */
+	else {
+		exponent = 140; /* exponent biased by 127; */
+		fraction = (input & 0x3fff) << 10; /* cheat and only
+						      handle numbers below 2^^15 */
+		for (i = 0; i < 14; i++) {
+			if (fraction & 0x800000)
+				break;
+			else {
+				fraction = fraction << 1; /* keep
+							     shifting left until top bit = 1 */
+				exponent = exponent - 1;
+			}
+		}
+		result = exponent << 23 | (fraction & 0x7fffff); /* mask
+								    off top bit; assumed 1 */
+	}
+	return result;
+}
+
+int r600_blit_init(struct radeon_device *rdev)
+{
+	u32 obj_size;
+	int r;
+	void *ptr;
+
+	rdev->r600_blit.state_offset = 0;
+
+	if (rdev->family >= CHIP_RV770)
+		rdev->r600_blit.state_len = r7xx_default_size * 4;
+	else
+		rdev->r600_blit.state_len = r6xx_default_size * 4;
+
+	obj_size = rdev->r600_blit.state_len;
+	obj_size = ALIGN(obj_size, 256);
+
+	rdev->r600_blit.vs_offset = obj_size;
+	obj_size += r6xx_vs_size * 4;
+	obj_size = ALIGN(obj_size, 256);
+
+	rdev->r600_blit.ps_offset = obj_size;
+	obj_size += r6xx_ps_size * 4;
+	obj_size = ALIGN(obj_size, 256);
+
+	r = radeon_object_create(rdev, NULL, obj_size,
+				 true, RADEON_GEM_DOMAIN_VRAM,
+				 false, &rdev->r600_blit.shader_obj);
+	if (r) {
+		DRM_ERROR("r600 failed to allocate shader\n");
+		return r;
+	}
+
+	r = radeon_object_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
+			     &rdev->r600_blit.shader_gpu_addr);
+	if (r) {
+		DRM_ERROR("failed to pin blit object %d\n", r);
+		return r;
+	}
+
+	DRM_DEBUG("r6xx blit allocated bo @ 0x%16llx %08x vs %08x ps %08x\n",
+		  rdev->r600_blit.shader_gpu_addr, obj_size,
+		  rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset);
+
+	r = radeon_object_kmap(rdev->r600_blit.shader_obj, &ptr);
+	if (r) {
+		DRM_ERROR("failed to map blit object %d\n", r);
+		return r;
+	}
+
+	if (rdev->family >= CHIP_RV770)
+		memcpy_toio(ptr + rdev->r600_blit.state_offset, r7xx_default_state, rdev->r600_blit.state_len);
+	else
+		memcpy_toio(ptr + rdev->r600_blit.state_offset, r6xx_default_state, rdev->r600_blit.state_len);
+
+	memcpy(ptr + rdev->r600_blit.vs_offset, r6xx_vs, r6xx_vs_size * 4);
+	memcpy(ptr + rdev->r600_blit.ps_offset, r6xx_ps, r6xx_ps_size * 4);
+
+	radeon_object_kunmap(rdev->r600_blit.shader_obj);
+	return 0;
+}
+
+void r600_blit_fini(struct radeon_device *rdev)
+{
+	radeon_object_unpin(rdev->r600_blit.shader_obj);
+	radeon_object_unref(&rdev->r600_blit.shader_obj);
+}
+
+int r600_vb_ib_get(struct radeon_device *rdev)
+{
+	int r;
+	r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
+	if (r) {
+		DRM_ERROR("failed to get IB for vertex buffer\n");
+		return r;
+	}
+
+	rdev->r600_blit.vb_total = 64*1024;
+	rdev->r600_blit.vb_used = 0;
+	return 0;
+}
+
+void r600_vb_ib_put(struct radeon_device *rdev)
+{
+	mutex_lock(&rdev->ib_pool.mutex);
+	radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
+	list_add_tail(&rdev->r600_blit.vb_ib->list, &rdev->ib_pool.scheduled_ibs);
+	mutex_unlock(&rdev->ib_pool.mutex);
+	radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
+}
+
+int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
+{
+	int r;
+	int ring_size;
+	const int max_size = 8192*8192;
+
+	r = r600_vb_ib_get(rdev);
+	WARN_ON(r);
+
+	/* loops of emits 64 + fence emit possible */
+	ring_size = ((size_bytes + max_size) / max_size) * 78;
+	/* set default  + shaders */
+	ring_size += 40; /* shaders + def state */
+	ring_size += 3; /* fence emit for VB IB */
+	ring_size += 5; /* done copy */
+	ring_size += 3; /* fence emit for done copy */
+	r = radeon_ring_lock(rdev, ring_size);
+	WARN_ON(r);
+
+	set_default_state(rdev); /* 14 */
+	set_shaders(rdev); /* 26 */
+	return 0;
+}
+
+void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
+{
+	int r;
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
+	radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
+	/* wait for 3D idle clean */
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
+
+	if (rdev->r600_blit.vb_ib)
+		r600_vb_ib_put(rdev);
+
+	if (fence)
+		r = radeon_fence_emit(rdev, fence);
+
+	radeon_ring_unlock_commit(rdev);
+}
+
+void r600_kms_blit_copy(struct radeon_device *rdev,
+			u64 src_gpu_addr, u64 dst_gpu_addr,
+			int size_bytes)
+{
+	int max_bytes;
+	u64 vb_gpu_addr;
+	u32 *vb;
+
+	DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
+		  size_bytes, rdev->r600_blit.vb_used);
+	vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
+	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
+		max_bytes = 8192;
+
+		while (size_bytes) {
+			int cur_size = size_bytes;
+			int src_x = src_gpu_addr & 255;
+			int dst_x = dst_gpu_addr & 255;
+			int h = 1;
+			src_gpu_addr = src_gpu_addr & ~255;
+			dst_gpu_addr = dst_gpu_addr & ~255;
+
+			if (!src_x && !dst_x) {
+				h = (cur_size / max_bytes);
+				if (h > 8192)
+					h = 8192;
+				if (h == 0)
+					h = 1;
+				else
+					cur_size = max_bytes;
+			} else {
+				if (cur_size > max_bytes)
+					cur_size = max_bytes;
+				if (cur_size > (max_bytes - dst_x))
+					cur_size = (max_bytes - dst_x);
+				if (cur_size > (max_bytes - src_x))
+					cur_size = (max_bytes - src_x);
+			}
+
+			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
+				WARN_ON(1);
+
+#if 0
+				r600_vb_ib_put(rdev);
+
+				r600_nomm_put_vb(dev);
+				r600_nomm_get_vb(dev);
+				if (!dev_priv->blit_vb)
+					return;
+				set_shaders(dev);
+				vb = r600_nomm_get_vb_ptr(dev);
+#endif
+			}
+
+			vb[0] = i2f(dst_x);
+			vb[1] = 0;
+			vb[2] = i2f(src_x);
+			vb[3] = 0;
+
+			vb[4] = i2f(dst_x);
+			vb[5] = i2f(h);
+			vb[6] = i2f(src_x);
+			vb[7] = i2f(h);
+
+			vb[8] = i2f(dst_x + cur_size);
+			vb[9] = i2f(h);
+			vb[10] = i2f(src_x + cur_size);
+			vb[11] = i2f(h);
+
+			/* src 9 */
+			set_tex_resource(rdev, FMT_8,
+					 src_x + cur_size, h, src_x + cur_size,
+					 src_gpu_addr);
+
+			/* 5 */
+			cp_set_surface_sync(rdev,
+					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+			/* dst 23 */
+			set_render_target(rdev, COLOR_8,
+					  dst_x + cur_size, h,
+					  dst_gpu_addr);
+
+			/* scissors 12 */
+			set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
+
+			/* 14 */
+			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
+			set_vtx_resource(rdev, vb_gpu_addr);
+
+			/* draw 10 */
+			draw_auto(rdev);
+
+			/* 5 */
+			cp_set_surface_sync(rdev,
+					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
+					    cur_size * h, dst_gpu_addr);
+
+			vb += 12;
+			rdev->r600_blit.vb_used += 12 * 4;
+
+			src_gpu_addr += cur_size * h;
+			dst_gpu_addr += cur_size * h;
+			size_bytes -= cur_size * h;
+		}
+	} else {
+		max_bytes = 8192 * 4;
+
+		while (size_bytes) {
+			int cur_size = size_bytes;
+			int src_x = (src_gpu_addr & 255);
+			int dst_x = (dst_gpu_addr & 255);
+			int h = 1;
+			src_gpu_addr = src_gpu_addr & ~255;
+			dst_gpu_addr = dst_gpu_addr & ~255;
+
+			if (!src_x && !dst_x) {
+				h = (cur_size / max_bytes);
+				if (h > 8192)
+					h = 8192;
+				if (h == 0)
+					h = 1;
+				else
+					cur_size = max_bytes;
+			} else {
+				if (cur_size > max_bytes)
+					cur_size = max_bytes;
+				if (cur_size > (max_bytes - dst_x))
+					cur_size = (max_bytes - dst_x);
+				if (cur_size > (max_bytes - src_x))
+					cur_size = (max_bytes - src_x);
+			}
+
+			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
+				WARN_ON(1);
+			}
+#if 0
+			if ((rdev->blit_vb->used + 48) > rdev->blit_vb->total) {
+				r600_nomm_put_vb(dev);
+				r600_nomm_get_vb(dev);
+				if (!rdev->blit_vb)
+					return;
+
+				set_shaders(dev);
+				vb = r600_nomm_get_vb_ptr(dev);
+			}
+#endif
+
+			vb[0] = i2f(dst_x / 4);
+			vb[1] = 0;
+			vb[2] = i2f(src_x / 4);
+			vb[3] = 0;
+
+			vb[4] = i2f(dst_x / 4);
+			vb[5] = i2f(h);
+			vb[6] = i2f(src_x / 4);
+			vb[7] = i2f(h);
+
+			vb[8] = i2f((dst_x + cur_size) / 4);
+			vb[9] = i2f(h);
+			vb[10] = i2f((src_x + cur_size) / 4);
+			vb[11] = i2f(h);
+
+			/* src 9 */
+			set_tex_resource(rdev, FMT_8_8_8_8,
+					 (src_x + cur_size) / 4,
+					 h, (src_x + cur_size) / 4,
+					 src_gpu_addr);
+			/* 5 */
+			cp_set_surface_sync(rdev,
+					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+			/* dst 23 */
+			set_render_target(rdev, COLOR_8_8_8_8,
+					  dst_x + cur_size, h,
+					  dst_gpu_addr);
+
+			/* scissors 12  */
+			set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
+
+			/* Vertex buffer setup 14 */
+			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
+			set_vtx_resource(rdev, vb_gpu_addr);
+
+			/* draw 10 */
+			draw_auto(rdev);
+
+			/* 5 */
+			cp_set_surface_sync(rdev,
+					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
+					    cur_size * h, dst_gpu_addr);
+
+			/* 78 ring dwords per loop */
+			vb += 12;
+			rdev->r600_blit.vb_used += 12 * 4;
+
+			src_gpu_addr += cur_size * h;
+			dst_gpu_addr += cur_size * h;
+			size_bytes -= cur_size * h;
+		}
+	}
+}
+
diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.c b/drivers/gpu/drm/radeon/r600_blit_shaders.c
new file mode 100644
index 0000000000000000000000000000000000000000..d745e815c2e81193bfe9e44c129efc028018985f
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_blit_shaders.c
@@ -0,0 +1,1072 @@
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+const u32 r6xx_default_state[] =
+{
+	0xc0002400,
+	0x00000000,
+	0xc0012800,
+	0x80000000,
+	0x80000000,
+	0xc0004600,
+	0x00000016,
+	0xc0016800,
+	0x00000010,
+	0x00028000,
+	0xc0016800,
+	0x00000010,
+	0x00008000,
+	0xc0016800,
+	0x00000542,
+	0x07000003,
+	0xc0016800,
+	0x000005c5,
+	0x00000000,
+	0xc0016800,
+	0x00000363,
+	0x00000000,
+	0xc0016800,
+	0x0000060c,
+	0x82000000,
+	0xc0016800,
+	0x0000060e,
+	0x01020204,
+	0xc0016f00,
+	0x00000000,
+	0x00000000,
+	0xc0016f00,
+	0x00000001,
+	0x00000000,
+	0xc0096900,
+	0x0000022a,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0016900,
+	0x00000004,
+	0x00000000,
+	0xc0016900,
+	0x0000000a,
+	0x00000000,
+	0xc0016900,
+	0x0000000b,
+	0x00000000,
+	0xc0016900,
+	0x0000010c,
+	0x00000000,
+	0xc0016900,
+	0x0000010d,
+	0x00000000,
+	0xc0016900,
+	0x00000200,
+	0x00000000,
+	0xc0016900,
+	0x00000343,
+	0x00000060,
+	0xc0016900,
+	0x00000344,
+	0x00000040,
+	0xc0016900,
+	0x00000351,
+	0x0000aa00,
+	0xc0016900,
+	0x00000104,
+	0x00000000,
+	0xc0016900,
+	0x0000010e,
+	0x00000000,
+	0xc0046900,
+	0x00000105,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0036900,
+	0x00000109,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0046900,
+	0x0000030c,
+	0x01000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0046900,
+	0x00000048,
+	0x3f800000,
+	0x00000000,
+	0x3f800000,
+	0x3f800000,
+	0xc0016900,
+	0x0000008e,
+	0x0000000f,
+	0xc0016900,
+	0x00000080,
+	0x00000000,
+	0xc0016900,
+	0x00000083,
+	0x0000ffff,
+	0xc0016900,
+	0x00000084,
+	0x00000000,
+	0xc0016900,
+	0x00000085,
+	0x20002000,
+	0xc0016900,
+	0x00000086,
+	0x00000000,
+	0xc0016900,
+	0x00000087,
+	0x20002000,
+	0xc0016900,
+	0x00000088,
+	0x00000000,
+	0xc0016900,
+	0x00000089,
+	0x20002000,
+	0xc0016900,
+	0x0000008a,
+	0x00000000,
+	0xc0016900,
+	0x0000008b,
+	0x20002000,
+	0xc0016900,
+	0x0000008c,
+	0x00000000,
+	0xc0016900,
+	0x00000094,
+	0x80000000,
+	0xc0016900,
+	0x00000095,
+	0x20002000,
+	0xc0026900,
+	0x000000b4,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x00000096,
+	0x80000000,
+	0xc0016900,
+	0x00000097,
+	0x20002000,
+	0xc0026900,
+	0x000000b6,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x00000098,
+	0x80000000,
+	0xc0016900,
+	0x00000099,
+	0x20002000,
+	0xc0026900,
+	0x000000b8,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x0000009a,
+	0x80000000,
+	0xc0016900,
+	0x0000009b,
+	0x20002000,
+	0xc0026900,
+	0x000000ba,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x0000009c,
+	0x80000000,
+	0xc0016900,
+	0x0000009d,
+	0x20002000,
+	0xc0026900,
+	0x000000bc,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x0000009e,
+	0x80000000,
+	0xc0016900,
+	0x0000009f,
+	0x20002000,
+	0xc0026900,
+	0x000000be,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a0,
+	0x80000000,
+	0xc0016900,
+	0x000000a1,
+	0x20002000,
+	0xc0026900,
+	0x000000c0,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a2,
+	0x80000000,
+	0xc0016900,
+	0x000000a3,
+	0x20002000,
+	0xc0026900,
+	0x000000c2,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a4,
+	0x80000000,
+	0xc0016900,
+	0x000000a5,
+	0x20002000,
+	0xc0026900,
+	0x000000c4,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a6,
+	0x80000000,
+	0xc0016900,
+	0x000000a7,
+	0x20002000,
+	0xc0026900,
+	0x000000c6,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a8,
+	0x80000000,
+	0xc0016900,
+	0x000000a9,
+	0x20002000,
+	0xc0026900,
+	0x000000c8,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000aa,
+	0x80000000,
+	0xc0016900,
+	0x000000ab,
+	0x20002000,
+	0xc0026900,
+	0x000000ca,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000ac,
+	0x80000000,
+	0xc0016900,
+	0x000000ad,
+	0x20002000,
+	0xc0026900,
+	0x000000cc,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000ae,
+	0x80000000,
+	0xc0016900,
+	0x000000af,
+	0x20002000,
+	0xc0026900,
+	0x000000ce,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000b0,
+	0x80000000,
+	0xc0016900,
+	0x000000b1,
+	0x20002000,
+	0xc0026900,
+	0x000000d0,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000b2,
+	0x80000000,
+	0xc0016900,
+	0x000000b3,
+	0x20002000,
+	0xc0026900,
+	0x000000d2,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x00000293,
+	0x00004010,
+	0xc0016900,
+	0x00000300,
+	0x00000000,
+	0xc0016900,
+	0x00000301,
+	0x00000000,
+	0xc0016900,
+	0x00000312,
+	0xffffffff,
+	0xc0016900,
+	0x00000307,
+	0x00000000,
+	0xc0016900,
+	0x00000308,
+	0x00000000,
+	0xc0016900,
+	0x00000283,
+	0x00000000,
+	0xc0016900,
+	0x00000292,
+	0x00000000,
+	0xc0066900,
+	0x0000010f,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0016900,
+	0x00000206,
+	0x00000000,
+	0xc0016900,
+	0x00000207,
+	0x00000000,
+	0xc0016900,
+	0x00000208,
+	0x00000000,
+	0xc0046900,
+	0x00000303,
+	0x3f800000,
+	0x3f800000,
+	0x3f800000,
+	0x3f800000,
+	0xc0016900,
+	0x00000205,
+	0x00000004,
+	0xc0016900,
+	0x00000280,
+	0x00000000,
+	0xc0016900,
+	0x00000281,
+	0x00000000,
+	0xc0016900,
+	0x0000037e,
+	0x00000000,
+	0xc0016900,
+	0x00000382,
+	0x00000000,
+	0xc0016900,
+	0x00000380,
+	0x00000000,
+	0xc0016900,
+	0x00000383,
+	0x00000000,
+	0xc0016900,
+	0x00000381,
+	0x00000000,
+	0xc0016900,
+	0x00000282,
+	0x00000008,
+	0xc0016900,
+	0x00000302,
+	0x0000002d,
+	0xc0016900,
+	0x0000037f,
+	0x00000000,
+	0xc0016900,
+	0x000001b2,
+	0x00000000,
+	0xc0016900,
+	0x000001b6,
+	0x00000000,
+	0xc0016900,
+	0x000001b7,
+	0x00000000,
+	0xc0016900,
+	0x000001b8,
+	0x00000000,
+	0xc0016900,
+	0x000001b9,
+	0x00000000,
+	0xc0016900,
+	0x00000225,
+	0x00000000,
+	0xc0016900,
+	0x00000229,
+	0x00000000,
+	0xc0016900,
+	0x00000237,
+	0x00000000,
+	0xc0016900,
+	0x00000100,
+	0x00000800,
+	0xc0016900,
+	0x00000101,
+	0x00000000,
+	0xc0016900,
+	0x00000102,
+	0x00000000,
+	0xc0016900,
+	0x000002a8,
+	0x00000000,
+	0xc0016900,
+	0x000002a9,
+	0x00000000,
+	0xc0016900,
+	0x00000103,
+	0x00000000,
+	0xc0016900,
+	0x00000284,
+	0x00000000,
+	0xc0016900,
+	0x00000290,
+	0x00000000,
+	0xc0016900,
+	0x00000285,
+	0x00000000,
+	0xc0016900,
+	0x00000286,
+	0x00000000,
+	0xc0016900,
+	0x00000287,
+	0x00000000,
+	0xc0016900,
+	0x00000288,
+	0x00000000,
+	0xc0016900,
+	0x00000289,
+	0x00000000,
+	0xc0016900,
+	0x0000028a,
+	0x00000000,
+	0xc0016900,
+	0x0000028b,
+	0x00000000,
+	0xc0016900,
+	0x0000028c,
+	0x00000000,
+	0xc0016900,
+	0x0000028d,
+	0x00000000,
+	0xc0016900,
+	0x0000028e,
+	0x00000000,
+	0xc0016900,
+	0x0000028f,
+	0x00000000,
+	0xc0016900,
+	0x000002a1,
+	0x00000000,
+	0xc0016900,
+	0x000002a5,
+	0x00000000,
+	0xc0016900,
+	0x000002ac,
+	0x00000000,
+	0xc0016900,
+	0x000002ad,
+	0x00000000,
+	0xc0016900,
+	0x000002ae,
+	0x00000000,
+	0xc0016900,
+	0x000002c8,
+	0x00000000,
+	0xc0016900,
+	0x00000206,
+	0x00000100,
+	0xc0016900,
+	0x00000204,
+	0x00010000,
+	0xc0036e00,
+	0x00000000,
+	0x00000012,
+	0x00000000,
+	0x00000000,
+	0xc0016900,
+	0x0000008f,
+	0x0000000f,
+	0xc0016900,
+	0x000001e8,
+	0x00000001,
+	0xc0016900,
+	0x00000202,
+	0x00cc0000,
+	0xc0016900,
+	0x00000205,
+	0x00000244,
+	0xc0016900,
+	0x00000203,
+	0x00000210,
+	0xc0016900,
+	0x000001b1,
+	0x00000000,
+	0xc0016900,
+	0x00000185,
+	0x00000000,
+	0xc0016900,
+	0x000001b3,
+	0x00000001,
+	0xc0016900,
+	0x000001b4,
+	0x00000000,
+	0xc0016900,
+	0x00000191,
+	0x00000b00,
+	0xc0016900,
+	0x000001b5,
+	0x00000000,
+};
+
+const u32 r7xx_default_state[] =
+{
+	0xc0012800,
+	0x80000000,
+	0x80000000,
+	0xc0004600,
+	0x00000016,
+	0xc0016800,
+	0x00000010,
+	0x00028000,
+	0xc0016800,
+	0x00000010,
+	0x00008000,
+	0xc0016800,
+	0x00000542,
+	0x07000002,
+	0xc0016800,
+	0x000005c5,
+	0x00000000,
+	0xc0016800,
+	0x00000363,
+	0x00004000,
+	0xc0016800,
+	0x0000060c,
+	0x00000000,
+	0xc0016800,
+	0x0000060e,
+	0x00420204,
+	0xc0016f00,
+	0x00000000,
+	0x00000000,
+	0xc0016f00,
+	0x00000001,
+	0x00000000,
+	0xc0096900,
+	0x0000022a,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0016900,
+	0x00000004,
+	0x00000000,
+	0xc0016900,
+	0x0000000a,
+	0x00000000,
+	0xc0016900,
+	0x0000000b,
+	0x00000000,
+	0xc0016900,
+	0x0000010c,
+	0x00000000,
+	0xc0016900,
+	0x0000010d,
+	0x00000000,
+	0xc0016900,
+	0x00000200,
+	0x00000000,
+	0xc0016900,
+	0x00000343,
+	0x00000060,
+	0xc0016900,
+	0x00000344,
+	0x00000000,
+	0xc0016900,
+	0x00000351,
+	0x0000aa00,
+	0xc0016900,
+	0x00000104,
+	0x00000000,
+	0xc0016900,
+	0x0000010e,
+	0x00000000,
+	0xc0046900,
+	0x00000105,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0046900,
+	0x0000030c,
+	0x01000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0016900,
+	0x0000008e,
+	0x0000000f,
+	0xc0016900,
+	0x00000080,
+	0x00000000,
+	0xc0016900,
+	0x00000083,
+	0x0000ffff,
+	0xc0016900,
+	0x00000084,
+	0x00000000,
+	0xc0016900,
+	0x00000085,
+	0x20002000,
+	0xc0016900,
+	0x00000086,
+	0x00000000,
+	0xc0016900,
+	0x00000087,
+	0x20002000,
+	0xc0016900,
+	0x00000088,
+	0x00000000,
+	0xc0016900,
+	0x00000089,
+	0x20002000,
+	0xc0016900,
+	0x0000008a,
+	0x00000000,
+	0xc0016900,
+	0x0000008b,
+	0x20002000,
+	0xc0016900,
+	0x0000008c,
+	0xaaaaaaaa,
+	0xc0016900,
+	0x00000094,
+	0x80000000,
+	0xc0016900,
+	0x00000095,
+	0x20002000,
+	0xc0026900,
+	0x000000b4,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x00000096,
+	0x80000000,
+	0xc0016900,
+	0x00000097,
+	0x20002000,
+	0xc0026900,
+	0x000000b6,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x00000098,
+	0x80000000,
+	0xc0016900,
+	0x00000099,
+	0x20002000,
+	0xc0026900,
+	0x000000b8,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x0000009a,
+	0x80000000,
+	0xc0016900,
+	0x0000009b,
+	0x20002000,
+	0xc0026900,
+	0x000000ba,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x0000009c,
+	0x80000000,
+	0xc0016900,
+	0x0000009d,
+	0x20002000,
+	0xc0026900,
+	0x000000bc,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x0000009e,
+	0x80000000,
+	0xc0016900,
+	0x0000009f,
+	0x20002000,
+	0xc0026900,
+	0x000000be,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a0,
+	0x80000000,
+	0xc0016900,
+	0x000000a1,
+	0x20002000,
+	0xc0026900,
+	0x000000c0,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a2,
+	0x80000000,
+	0xc0016900,
+	0x000000a3,
+	0x20002000,
+	0xc0026900,
+	0x000000c2,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a4,
+	0x80000000,
+	0xc0016900,
+	0x000000a5,
+	0x20002000,
+	0xc0026900,
+	0x000000c4,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a6,
+	0x80000000,
+	0xc0016900,
+	0x000000a7,
+	0x20002000,
+	0xc0026900,
+	0x000000c6,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a8,
+	0x80000000,
+	0xc0016900,
+	0x000000a9,
+	0x20002000,
+	0xc0026900,
+	0x000000c8,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000aa,
+	0x80000000,
+	0xc0016900,
+	0x000000ab,
+	0x20002000,
+	0xc0026900,
+	0x000000ca,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000ac,
+	0x80000000,
+	0xc0016900,
+	0x000000ad,
+	0x20002000,
+	0xc0026900,
+	0x000000cc,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000ae,
+	0x80000000,
+	0xc0016900,
+	0x000000af,
+	0x20002000,
+	0xc0026900,
+	0x000000ce,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000b0,
+	0x80000000,
+	0xc0016900,
+	0x000000b1,
+	0x20002000,
+	0xc0026900,
+	0x000000d0,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000b2,
+	0x80000000,
+	0xc0016900,
+	0x000000b3,
+	0x20002000,
+	0xc0026900,
+	0x000000d2,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x00000293,
+	0x00514000,
+	0xc0016900,
+	0x00000300,
+	0x00000000,
+	0xc0016900,
+	0x00000301,
+	0x00000000,
+	0xc0016900,
+	0x00000312,
+	0xffffffff,
+	0xc0016900,
+	0x00000307,
+	0x00000000,
+	0xc0016900,
+	0x00000308,
+	0x00000000,
+	0xc0016900,
+	0x00000283,
+	0x00000000,
+	0xc0016900,
+	0x00000292,
+	0x00000000,
+	0xc0066900,
+	0x0000010f,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0016900,
+	0x00000206,
+	0x00000000,
+	0xc0016900,
+	0x00000207,
+	0x00000000,
+	0xc0016900,
+	0x00000208,
+	0x00000000,
+	0xc0046900,
+	0x00000303,
+	0x3f800000,
+	0x3f800000,
+	0x3f800000,
+	0x3f800000,
+	0xc0016900,
+	0x00000205,
+	0x00000004,
+	0xc0016900,
+	0x00000280,
+	0x00000000,
+	0xc0016900,
+	0x00000281,
+	0x00000000,
+	0xc0016900,
+	0x0000037e,
+	0x00000000,
+	0xc0016900,
+	0x00000382,
+	0x00000000,
+	0xc0016900,
+	0x00000380,
+	0x00000000,
+	0xc0016900,
+	0x00000383,
+	0x00000000,
+	0xc0016900,
+	0x00000381,
+	0x00000000,
+	0xc0016900,
+	0x00000282,
+	0x00000008,
+	0xc0016900,
+	0x00000302,
+	0x0000002d,
+	0xc0016900,
+	0x0000037f,
+	0x00000000,
+	0xc0016900,
+	0x000001b2,
+	0x00000001,
+	0xc0016900,
+	0x000001b6,
+	0x00000000,
+	0xc0016900,
+	0x000001b7,
+	0x00000000,
+	0xc0016900,
+	0x000001b8,
+	0x00000000,
+	0xc0016900,
+	0x000001b9,
+	0x00000000,
+	0xc0016900,
+	0x00000225,
+	0x00000000,
+	0xc0016900,
+	0x00000229,
+	0x00000000,
+	0xc0016900,
+	0x00000237,
+	0x00000000,
+	0xc0016900,
+	0x00000100,
+	0x00000800,
+	0xc0016900,
+	0x00000101,
+	0x00000000,
+	0xc0016900,
+	0x00000102,
+	0x00000000,
+	0xc0016900,
+	0x000002a8,
+	0x00000000,
+	0xc0016900,
+	0x000002a9,
+	0x00000000,
+	0xc0016900,
+	0x00000103,
+	0x00000000,
+	0xc0016900,
+	0x00000284,
+	0x00000000,
+	0xc0016900,
+	0x00000290,
+	0x00000000,
+	0xc0016900,
+	0x00000285,
+	0x00000000,
+	0xc0016900,
+	0x00000286,
+	0x00000000,
+	0xc0016900,
+	0x00000287,
+	0x00000000,
+	0xc0016900,
+	0x00000288,
+	0x00000000,
+	0xc0016900,
+	0x00000289,
+	0x00000000,
+	0xc0016900,
+	0x0000028a,
+	0x00000000,
+	0xc0016900,
+	0x0000028b,
+	0x00000000,
+	0xc0016900,
+	0x0000028c,
+	0x00000000,
+	0xc0016900,
+	0x0000028d,
+	0x00000000,
+	0xc0016900,
+	0x0000028e,
+	0x00000000,
+	0xc0016900,
+	0x0000028f,
+	0x00000000,
+	0xc0016900,
+	0x000002a1,
+	0x00000000,
+	0xc0016900,
+	0x000002a5,
+	0x00000000,
+	0xc0016900,
+	0x000002ac,
+	0x00000000,
+	0xc0016900,
+	0x000002ad,
+	0x00000000,
+	0xc0016900,
+	0x000002ae,
+	0x00000000,
+	0xc0016900,
+	0x000002c8,
+	0x00000000,
+	0xc0016900,
+	0x00000206,
+	0x00000100,
+	0xc0016900,
+	0x00000204,
+	0x00010000,
+	0xc0036e00,
+	0x00000000,
+	0x00000012,
+	0x00000000,
+	0x00000000,
+	0xc0016900,
+	0x0000008f,
+	0x0000000f,
+	0xc0016900,
+	0x000001e8,
+	0x00000001,
+	0xc0016900,
+	0x00000202,
+	0x00cc0000,
+	0xc0016900,
+	0x00000205,
+	0x00000244,
+	0xc0016900,
+	0x00000203,
+	0x00000210,
+	0xc0016900,
+	0x000001b1,
+	0x00000000,
+	0xc0016900,
+	0x00000185,
+	0x00000000,
+	0xc0016900,
+	0x000001b3,
+	0x00000001,
+	0xc0016900,
+	0x000001b4,
+	0x00000000,
+	0xc0016900,
+	0x00000191,
+	0x00000b00,
+	0xc0016900,
+	0x000001b5,
+	0x00000000,
+};
+
+/* same for r6xx/r7xx */
+const u32 r6xx_vs[] =
+{
+	0x00000004,
+	0x81000000,
+	0x0000203c,
+	0x94000b08,
+	0x00004000,
+	0x14200b1a,
+	0x00000000,
+	0x00000000,
+	0x3c000000,
+	0x68cd1000,
+	0x00080000,
+	0x00000000,
+};
+
+const u32 r6xx_ps[] =
+{
+	0x00000002,
+	0x80800000,
+	0x00000000,
+	0x94200688,
+	0x00000010,
+	0x000d1000,
+	0xb0800000,
+	0x00000000,
+};
+
+const u32 r6xx_ps_size = ARRAY_SIZE(r6xx_ps);
+const u32 r6xx_vs_size = ARRAY_SIZE(r6xx_vs);
+const u32 r6xx_default_size = ARRAY_SIZE(r6xx_default_state);
+const u32 r7xx_default_size = ARRAY_SIZE(r7xx_default_state);
diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.h b/drivers/gpu/drm/radeon/r600_blit_shaders.h
new file mode 100644
index 0000000000000000000000000000000000000000..fdc3b378cbb0d78542987e111fc1693462dc2b1b
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_blit_shaders.h
@@ -0,0 +1,14 @@
+
+#ifndef R600_BLIT_SHADERS_H
+#define R600_BLIT_SHADERS_H
+
+extern const u32 r6xx_ps[];
+extern const u32 r6xx_vs[];
+extern const u32 r7xx_default_state[];
+extern const u32 r6xx_default_state[];
+
+
+extern const u32 r6xx_ps_size, r6xx_vs_size;
+extern const u32 r6xx_default_size, r7xx_default_size;
+
+#endif
diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index 8327912de9644137e04c9ebfe37fd118b3b646b6..6d5a711c2e91c63818c5a86ca1a6e4e194b418ff 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -58,6 +58,12 @@ MODULE_FIRMWARE("radeon/RV730_me.bin");
 MODULE_FIRMWARE("radeon/RV710_pfp.bin");
 MODULE_FIRMWARE("radeon/RV710_me.bin");
 
+
+int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
+			unsigned family, u32 *ib, int *l);
+void r600_cs_legacy_init(void);
+
+
 # define ATI_PCIGART_PAGE_SIZE		4096	/**< PCI GART page size */
 # define ATI_PCIGART_PAGE_MASK		(~(ATI_PCIGART_PAGE_SIZE-1))
 
@@ -1857,6 +1863,8 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
 
 	DRM_DEBUG("\n");
 
+	mutex_init(&dev_priv->cs_mutex);
+	r600_cs_legacy_init();
 	/* if we require new memory map but we don't have it fail */
 	if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) {
 		DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n");
@@ -1888,7 +1896,7 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
 	/* Enable vblank on CRTC1 for older X servers
 	 */
 	dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1;
-
+	dev_priv->do_boxes = 0;
 	dev_priv->cp_mode = init->cp_mode;
 
 	/* We don't support anything other than bus-mastering ring mode,
@@ -1974,11 +1982,11 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
 	} else
 #endif
 	{
-		dev_priv->cp_ring->handle = (void *)dev_priv->cp_ring->offset;
+		dev_priv->cp_ring->handle = (void *)(unsigned long)dev_priv->cp_ring->offset;
 		dev_priv->ring_rptr->handle =
-		    (void *)dev_priv->ring_rptr->offset;
+			(void *)(unsigned long)dev_priv->ring_rptr->offset;
 		dev->agp_buffer_map->handle =
-		    (void *)dev->agp_buffer_map->offset;
+			(void *)(unsigned long)dev->agp_buffer_map->offset;
 
 		DRM_DEBUG("dev_priv->cp_ring->handle %p\n",
 			  dev_priv->cp_ring->handle);
@@ -2282,3 +2290,239 @@ int r600_cp_dispatch_indirect(struct drm_device *dev,
 
 	return 0;
 }
+
+void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	struct drm_master *master = file_priv->master;
+	struct drm_radeon_master_private *master_priv = master->driver_priv;
+	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
+	int nbox = sarea_priv->nbox;
+	struct drm_clip_rect *pbox = sarea_priv->boxes;
+	int i, cpp, src_pitch, dst_pitch;
+	uint64_t src, dst;
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	if (dev_priv->color_fmt == RADEON_COLOR_FORMAT_ARGB8888)
+		cpp = 4;
+	else
+		cpp = 2;
+
+	if (sarea_priv->pfCurrentPage == 0) {
+		src_pitch = dev_priv->back_pitch;
+		dst_pitch = dev_priv->front_pitch;
+		src = dev_priv->back_offset + dev_priv->fb_location;
+		dst = dev_priv->front_offset + dev_priv->fb_location;
+	} else {
+		src_pitch = dev_priv->front_pitch;
+		dst_pitch = dev_priv->back_pitch;
+		src = dev_priv->front_offset + dev_priv->fb_location;
+		dst = dev_priv->back_offset + dev_priv->fb_location;
+	}
+
+	if (r600_prepare_blit_copy(dev, file_priv)) {
+		DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
+		return;
+	}
+	for (i = 0; i < nbox; i++) {
+		int x = pbox[i].x1;
+		int y = pbox[i].y1;
+		int w = pbox[i].x2 - x;
+		int h = pbox[i].y2 - y;
+
+		DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
+
+		r600_blit_swap(dev,
+			       src, dst,
+			       x, y, x, y, w, h,
+			       src_pitch, dst_pitch, cpp);
+	}
+	r600_done_blit_copy(dev);
+
+	/* Increment the frame counter.  The client-side 3D driver must
+	 * throttle the framerate by waiting for this value before
+	 * performing the swapbuffer ioctl.
+	 */
+	sarea_priv->last_frame++;
+
+	BEGIN_RING(3);
+	R600_FRAME_AGE(sarea_priv->last_frame);
+	ADVANCE_RING();
+}
+
+int r600_cp_dispatch_texture(struct drm_device *dev,
+			     struct drm_file *file_priv,
+			     drm_radeon_texture_t *tex,
+			     drm_radeon_tex_image_t *image)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	struct drm_buf *buf;
+	u32 *buffer;
+	const u8 __user *data;
+	int size, pass_size;
+	u64 src_offset, dst_offset;
+
+	if (!radeon_check_offset(dev_priv, tex->offset)) {
+		DRM_ERROR("Invalid destination offset\n");
+		return -EINVAL;
+	}
+
+	/* this might fail for zero-sized uploads - are those illegal? */
+	if (!radeon_check_offset(dev_priv, tex->offset + tex->height * tex->pitch - 1)) {
+		DRM_ERROR("Invalid final destination offset\n");
+		return -EINVAL;
+	}
+
+	size = tex->height * tex->pitch;
+
+	if (size == 0)
+		return 0;
+
+	dst_offset = tex->offset;
+
+	if (r600_prepare_blit_copy(dev, file_priv)) {
+		DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
+		return -EAGAIN;
+	}
+	do {
+		data = (const u8 __user *)image->data;
+		pass_size = size;
+
+		buf = radeon_freelist_get(dev);
+		if (!buf) {
+			DRM_DEBUG("EAGAIN\n");
+			if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
+				return -EFAULT;
+			return -EAGAIN;
+		}
+
+		if (pass_size > buf->total)
+			pass_size = buf->total;
+
+		/* Dispatch the indirect buffer.
+		 */
+		buffer =
+		    (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
+
+		if (DRM_COPY_FROM_USER(buffer, data, pass_size)) {
+			DRM_ERROR("EFAULT on pad, %d bytes\n", pass_size);
+			return -EFAULT;
+		}
+
+		buf->file_priv = file_priv;
+		buf->used = pass_size;
+		src_offset = dev_priv->gart_buffers_offset + buf->offset;
+
+		r600_blit_copy(dev, src_offset, dst_offset, pass_size);
+
+		radeon_cp_discard_buffer(dev, file_priv->master, buf);
+
+		/* Update the input parameters for next time */
+		image->data = (const u8 __user *)image->data + pass_size;
+		dst_offset += pass_size;
+		size -= pass_size;
+	} while (size > 0);
+	r600_done_blit_copy(dev);
+
+	return 0;
+}
+
+/*
+ * Legacy cs ioctl
+ */
+static u32 radeon_cs_id_get(struct drm_radeon_private *radeon)
+{
+	/* FIXME: check if wrap affect last reported wrap & sequence */
+	radeon->cs_id_scnt = (radeon->cs_id_scnt + 1) & 0x00FFFFFF;
+	if (!radeon->cs_id_scnt) {
+		/* increment wrap counter */
+		radeon->cs_id_wcnt += 0x01000000;
+		/* valid sequence counter start at 1 */
+		radeon->cs_id_scnt = 1;
+	}
+	return (radeon->cs_id_scnt | radeon->cs_id_wcnt);
+}
+
+static void r600_cs_id_emit(drm_radeon_private_t *dev_priv, u32 *id)
+{
+	RING_LOCALS;
+
+	*id = radeon_cs_id_get(dev_priv);
+
+	/* SCRATCH 2 */
+	BEGIN_RING(3);
+	R600_CLEAR_AGE(*id);
+	ADVANCE_RING();
+	COMMIT_RING();
+}
+
+static int r600_ib_get(struct drm_device *dev,
+			struct drm_file *fpriv,
+			struct drm_buf **buffer)
+{
+	struct drm_buf *buf;
+
+	*buffer = NULL;
+	buf = radeon_freelist_get(dev);
+	if (!buf) {
+		return -EBUSY;
+	}
+	buf->file_priv = fpriv;
+	*buffer = buf;
+	return 0;
+}
+
+static void r600_ib_free(struct drm_device *dev, struct drm_buf *buf,
+			struct drm_file *fpriv, int l, int r)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+
+	if (buf) {
+		if (!r)
+			r600_cp_dispatch_indirect(dev, buf, 0, l * 4);
+		radeon_cp_discard_buffer(dev, fpriv->master, buf);
+		COMMIT_RING();
+	}
+}
+
+int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv)
+{
+	struct drm_radeon_private *dev_priv = dev->dev_private;
+	struct drm_radeon_cs *cs = data;
+	struct drm_buf *buf;
+	unsigned family;
+	int l, r = 0;
+	u32 *ib, cs_id = 0;
+
+	if (dev_priv == NULL) {
+		DRM_ERROR("called with no initialization\n");
+		return -EINVAL;
+	}
+	family = dev_priv->flags & RADEON_FAMILY_MASK;
+	if (family < CHIP_R600) {
+		DRM_ERROR("cs ioctl valid only for R6XX & R7XX in legacy mode\n");
+		return -EINVAL;
+	}
+	mutex_lock(&dev_priv->cs_mutex);
+	/* get ib */
+	r = r600_ib_get(dev, fpriv, &buf);
+	if (r) {
+		DRM_ERROR("ib_get failed\n");
+		goto out;
+	}
+	ib = dev->agp_buffer_map->handle + buf->offset;
+	/* now parse command stream */
+	r = r600_cs_legacy(dev, data,  fpriv, family, ib, &l);
+	if (r) {
+		goto out;
+	}
+
+out:
+	r600_ib_free(dev, buf, fpriv, l, r);
+	/* emit cs id sequence */
+	r600_cs_id_emit(dev_priv, &cs_id);
+	cs->cs_id = cs_id;
+	mutex_unlock(&dev_priv->cs_mutex);
+	return r;
+}
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
new file mode 100644
index 0000000000000000000000000000000000000000..39bf6349351b12b747bfd79ea5f54af7331ff1d1
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -0,0 +1,658 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include "drmP.h"
+#include "radeon.h"
+#include "radeon_share.h"
+#include "r600d.h"
+#include "avivod.h"
+
+static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
+					struct radeon_cs_reloc **cs_reloc);
+static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
+					struct radeon_cs_reloc **cs_reloc);
+typedef int (*next_reloc_t)(struct radeon_cs_parser*, struct radeon_cs_reloc**);
+static next_reloc_t r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_mm;
+
+/**
+ * r600_cs_packet_parse() - parse cp packet and point ib index to next packet
+ * @parser:	parser structure holding parsing context.
+ * @pkt:	where to store packet informations
+ *
+ * Assume that chunk_ib_index is properly set. Will return -EINVAL
+ * if packet is bigger than remaining ib size. or if packets is unknown.
+ **/
+int r600_cs_packet_parse(struct radeon_cs_parser *p,
+			struct radeon_cs_packet *pkt,
+			unsigned idx)
+{
+	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+	uint32_t header;
+
+	if (idx >= ib_chunk->length_dw) {
+		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
+			  idx, ib_chunk->length_dw);
+		return -EINVAL;
+	}
+	header = ib_chunk->kdata[idx];
+	pkt->idx = idx;
+	pkt->type = CP_PACKET_GET_TYPE(header);
+	pkt->count = CP_PACKET_GET_COUNT(header);
+	pkt->one_reg_wr = 0;
+	switch (pkt->type) {
+	case PACKET_TYPE0:
+		pkt->reg = CP_PACKET0_GET_REG(header);
+		break;
+	case PACKET_TYPE3:
+		pkt->opcode = CP_PACKET3_GET_OPCODE(header);
+		break;
+	case PACKET_TYPE2:
+		pkt->count = -1;
+		break;
+	default:
+		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
+		return -EINVAL;
+	}
+	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
+		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
+			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * r600_cs_packet_next_reloc_mm() - parse next packet which should be reloc packet3
+ * @parser:		parser structure holding parsing context.
+ * @data:		pointer to relocation data
+ * @offset_start:	starting offset
+ * @offset_mask:	offset mask (to align start offset on)
+ * @reloc:		reloc informations
+ *
+ * Check next packet is relocation packet3, do bo validation and compute
+ * GPU offset using the provided start.
+ **/
+static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
+					struct radeon_cs_reloc **cs_reloc)
+{
+	struct radeon_cs_chunk *ib_chunk;
+	struct radeon_cs_chunk *relocs_chunk;
+	struct radeon_cs_packet p3reloc;
+	unsigned idx;
+	int r;
+
+	if (p->chunk_relocs_idx == -1) {
+		DRM_ERROR("No relocation chunk !\n");
+		return -EINVAL;
+	}
+	*cs_reloc = NULL;
+	ib_chunk = &p->chunks[p->chunk_ib_idx];
+	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+	r = r600_cs_packet_parse(p, &p3reloc, p->idx);
+	if (r) {
+		return r;
+	}
+	p->idx += p3reloc.count + 2;
+	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
+		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
+			  p3reloc.idx);
+		return -EINVAL;
+	}
+	idx = ib_chunk->kdata[p3reloc.idx + 1];
+	if (idx >= relocs_chunk->length_dw) {
+		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+			  idx, relocs_chunk->length_dw);
+		return -EINVAL;
+	}
+	/* FIXME: we assume reloc size is 4 dwords */
+	*cs_reloc = p->relocs_ptr[(idx / 4)];
+	return 0;
+}
+
+/**
+ * r600_cs_packet_next_reloc_nomm() - parse next packet which should be reloc packet3
+ * @parser:		parser structure holding parsing context.
+ * @data:		pointer to relocation data
+ * @offset_start:	starting offset
+ * @offset_mask:	offset mask (to align start offset on)
+ * @reloc:		reloc informations
+ *
+ * Check next packet is relocation packet3, do bo validation and compute
+ * GPU offset using the provided start.
+ **/
+static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
+					struct radeon_cs_reloc **cs_reloc)
+{
+	struct radeon_cs_chunk *ib_chunk;
+	struct radeon_cs_chunk *relocs_chunk;
+	struct radeon_cs_packet p3reloc;
+	unsigned idx;
+	int r;
+
+	if (p->chunk_relocs_idx == -1) {
+		DRM_ERROR("No relocation chunk !\n");
+		return -EINVAL;
+	}
+	*cs_reloc = NULL;
+	ib_chunk = &p->chunks[p->chunk_ib_idx];
+	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+	r = r600_cs_packet_parse(p, &p3reloc, p->idx);
+	if (r) {
+		return r;
+	}
+	p->idx += p3reloc.count + 2;
+	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
+		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
+			  p3reloc.idx);
+		return -EINVAL;
+	}
+	idx = ib_chunk->kdata[p3reloc.idx + 1];
+	if (idx >= relocs_chunk->length_dw) {
+		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+			  idx, relocs_chunk->length_dw);
+		return -EINVAL;
+	}
+	*cs_reloc = &p->relocs[0];
+	(*cs_reloc)->lobj.gpu_offset = (u64)relocs_chunk->kdata[idx + 3] << 32;
+	(*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0];
+	return 0;
+}
+
+static int r600_packet0_check(struct radeon_cs_parser *p,
+				struct radeon_cs_packet *pkt,
+				unsigned idx, unsigned reg)
+{
+	switch (reg) {
+	case AVIVO_D1MODE_VLINE_START_END:
+	case AVIVO_D2MODE_VLINE_START_END:
+		break;
+	default:
+		printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
+		       reg, idx);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
+				struct radeon_cs_packet *pkt)
+{
+	unsigned reg, i;
+	unsigned idx;
+	int r;
+
+	idx = pkt->idx + 1;
+	reg = pkt->reg;
+	for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
+		r = r600_packet0_check(p, pkt, idx, reg);
+		if (r) {
+			return r;
+		}
+	}
+	return 0;
+}
+
+static int r600_packet3_check(struct radeon_cs_parser *p,
+				struct radeon_cs_packet *pkt)
+{
+	struct radeon_cs_chunk *ib_chunk;
+	struct radeon_cs_reloc *reloc;
+	volatile u32 *ib;
+	unsigned idx;
+	unsigned i;
+	unsigned start_reg, end_reg, reg;
+	int r;
+
+	ib = p->ib->ptr;
+	ib_chunk = &p->chunks[p->chunk_ib_idx];
+	idx = pkt->idx + 1;
+	switch (pkt->opcode) {
+	case PACKET3_START_3D_CMDBUF:
+		if (p->family >= CHIP_RV770 || pkt->count) {
+			DRM_ERROR("bad START_3D\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_CONTEXT_CONTROL:
+		if (pkt->count != 1) {
+			DRM_ERROR("bad CONTEXT_CONTROL\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_INDEX_TYPE:
+	case PACKET3_NUM_INSTANCES:
+		if (pkt->count) {
+			DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_DRAW_INDEX:
+		if (pkt->count != 3) {
+			DRM_ERROR("bad DRAW_INDEX\n");
+			return -EINVAL;
+		}
+		r = r600_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("bad DRAW_INDEX\n");
+			return -EINVAL;
+		}
+		ib[idx+0] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+		ib[idx+1] = upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+		break;
+	case PACKET3_DRAW_INDEX_AUTO:
+		if (pkt->count != 1) {
+			DRM_ERROR("bad DRAW_INDEX_AUTO\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_DRAW_INDEX_IMMD_BE:
+	case PACKET3_DRAW_INDEX_IMMD:
+		if (pkt->count < 2) {
+			DRM_ERROR("bad DRAW_INDEX_IMMD\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_WAIT_REG_MEM:
+		if (pkt->count != 5) {
+			DRM_ERROR("bad WAIT_REG_MEM\n");
+			return -EINVAL;
+		}
+		/* bit 4 is reg (0) or mem (1) */
+		if (ib_chunk->kdata[idx+0] & 0x10) {
+			r = r600_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("bad WAIT_REG_MEM\n");
+				return -EINVAL;
+			}
+			ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+			ib[idx+2] = upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+		}
+		break;
+	case PACKET3_SURFACE_SYNC:
+		if (pkt->count != 3) {
+			DRM_ERROR("bad SURFACE_SYNC\n");
+			return -EINVAL;
+		}
+		/* 0xffffffff/0x0 is flush all cache flag */
+		if (ib_chunk->kdata[idx+1] != 0xffffffff ||
+		    ib_chunk->kdata[idx+2] != 0) {
+			r = r600_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("bad SURFACE_SYNC\n");
+				return -EINVAL;
+			}
+			ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		}
+		break;
+	case PACKET3_EVENT_WRITE:
+		if (pkt->count != 2 && pkt->count != 0) {
+			DRM_ERROR("bad EVENT_WRITE\n");
+			return -EINVAL;
+		}
+		if (pkt->count) {
+			r = r600_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("bad EVENT_WRITE\n");
+				return -EINVAL;
+			}
+			ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+			ib[idx+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+		}
+		break;
+	case PACKET3_EVENT_WRITE_EOP:
+		if (pkt->count != 4) {
+			DRM_ERROR("bad EVENT_WRITE_EOP\n");
+			return -EINVAL;
+		}
+		r = r600_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("bad EVENT_WRITE\n");
+			return -EINVAL;
+		}
+		ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+		ib[idx+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+		break;
+	case PACKET3_SET_CONFIG_REG:
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_CONFIG_REG_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_CONFIG_REG_OFFSET) ||
+		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
+		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
+			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
+			return -EINVAL;
+		}
+		for (i = 0; i < pkt->count; i++) {
+			reg = start_reg + (4 * i);
+			switch (reg) {
+			case CP_COHER_BASE:
+				/* use PACKET3_SURFACE_SYNC */
+				return -EINVAL;
+			default:
+				break;
+			}
+		}
+		break;
+	case PACKET3_SET_CONTEXT_REG:
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_CONTEXT_REG_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_CONTEXT_REG_OFFSET) ||
+		    (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
+		    (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
+			DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
+			return -EINVAL;
+		}
+		for (i = 0; i < pkt->count; i++) {
+			reg = start_reg + (4 * i);
+			switch (reg) {
+			case DB_DEPTH_BASE:
+			case CB_COLOR0_BASE:
+			case CB_COLOR1_BASE:
+			case CB_COLOR2_BASE:
+			case CB_COLOR3_BASE:
+			case CB_COLOR4_BASE:
+			case CB_COLOR5_BASE:
+			case CB_COLOR6_BASE:
+			case CB_COLOR7_BASE:
+			case SQ_PGM_START_FS:
+			case SQ_PGM_START_ES:
+			case SQ_PGM_START_VS:
+			case SQ_PGM_START_GS:
+			case SQ_PGM_START_PS:
+				r = r600_cs_packet_next_reloc(p, &reloc);
+				if (r) {
+					DRM_ERROR("bad SET_CONTEXT_REG "
+							"0x%04X\n", reg);
+					return -EINVAL;
+				}
+				ib[idx+1+i] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+				break;
+			case VGT_DMA_BASE:
+			case VGT_DMA_BASE_HI:
+				/* These should be handled by DRAW_INDEX packet 3 */
+			case VGT_STRMOUT_BASE_OFFSET_0:
+			case VGT_STRMOUT_BASE_OFFSET_1:
+			case VGT_STRMOUT_BASE_OFFSET_2:
+			case VGT_STRMOUT_BASE_OFFSET_3:
+			case VGT_STRMOUT_BASE_OFFSET_HI_0:
+			case VGT_STRMOUT_BASE_OFFSET_HI_1:
+			case VGT_STRMOUT_BASE_OFFSET_HI_2:
+			case VGT_STRMOUT_BASE_OFFSET_HI_3:
+			case VGT_STRMOUT_BUFFER_BASE_0:
+			case VGT_STRMOUT_BUFFER_BASE_1:
+			case VGT_STRMOUT_BUFFER_BASE_2:
+			case VGT_STRMOUT_BUFFER_BASE_3:
+			case VGT_STRMOUT_BUFFER_OFFSET_0:
+			case VGT_STRMOUT_BUFFER_OFFSET_1:
+			case VGT_STRMOUT_BUFFER_OFFSET_2:
+			case VGT_STRMOUT_BUFFER_OFFSET_3:
+				/* These should be handled by STRMOUT_BUFFER packet 3 */
+				DRM_ERROR("bad context reg: 0x%08x\n", reg);
+				return -EINVAL;
+			default:
+				break;
+			}
+		}
+		break;
+	case PACKET3_SET_RESOURCE:
+		if (pkt->count % 7) {
+			DRM_ERROR("bad SET_RESOURCE\n");
+			return -EINVAL;
+		}
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_RESOURCE_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_RESOURCE_OFFSET) ||
+		    (start_reg >= PACKET3_SET_RESOURCE_END) ||
+		    (end_reg >= PACKET3_SET_RESOURCE_END)) {
+			DRM_ERROR("bad SET_RESOURCE\n");
+			return -EINVAL;
+		}
+		for (i = 0; i < (pkt->count / 7); i++) {
+			switch (G__SQ_VTX_CONSTANT_TYPE(ib[idx+(i*7)+6+1])) {
+			case SQ_TEX_VTX_VALID_TEXTURE:
+				/* tex base */
+				r = r600_cs_packet_next_reloc(p, &reloc);
+				if (r) {
+					DRM_ERROR("bad SET_RESOURCE\n");
+					return -EINVAL;
+				}
+				ib[idx+1+(i*7)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+				/* tex mip base */
+				r = r600_cs_packet_next_reloc(p, &reloc);
+				if (r) {
+					DRM_ERROR("bad SET_RESOURCE\n");
+					return -EINVAL;
+				}
+				ib[idx+1+(i*7)+3] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+				break;
+			case SQ_TEX_VTX_VALID_BUFFER:
+				/* vtx base */
+				r = r600_cs_packet_next_reloc(p, &reloc);
+				if (r) {
+					DRM_ERROR("bad SET_RESOURCE\n");
+					return -EINVAL;
+				}
+				ib[idx+1+(i*7)+0] += (u32)((reloc->lobj.gpu_offset) & 0xffffffff);
+				ib[idx+1+(i*7)+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+				break;
+			case SQ_TEX_VTX_INVALID_TEXTURE:
+			case SQ_TEX_VTX_INVALID_BUFFER:
+			default:
+				DRM_ERROR("bad SET_RESOURCE\n");
+				return -EINVAL;
+			}
+		}
+		break;
+	case PACKET3_SET_ALU_CONST:
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_ALU_CONST_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_ALU_CONST_OFFSET) ||
+		    (start_reg >= PACKET3_SET_ALU_CONST_END) ||
+		    (end_reg >= PACKET3_SET_ALU_CONST_END)) {
+			DRM_ERROR("bad SET_ALU_CONST\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_SET_BOOL_CONST:
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_BOOL_CONST_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_BOOL_CONST_OFFSET) ||
+		    (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
+		    (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
+			DRM_ERROR("bad SET_BOOL_CONST\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_SET_LOOP_CONST:
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_LOOP_CONST_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_LOOP_CONST_OFFSET) ||
+		    (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
+		    (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
+			DRM_ERROR("bad SET_LOOP_CONST\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_SET_CTL_CONST:
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_CTL_CONST_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_CTL_CONST_OFFSET) ||
+		    (start_reg >= PACKET3_SET_CTL_CONST_END) ||
+		    (end_reg >= PACKET3_SET_CTL_CONST_END)) {
+			DRM_ERROR("bad SET_CTL_CONST\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_SET_SAMPLER:
+		if (pkt->count % 3) {
+			DRM_ERROR("bad SET_SAMPLER\n");
+			return -EINVAL;
+		}
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_SAMPLER_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_SAMPLER_OFFSET) ||
+		    (start_reg >= PACKET3_SET_SAMPLER_END) ||
+		    (end_reg >= PACKET3_SET_SAMPLER_END)) {
+			DRM_ERROR("bad SET_SAMPLER\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_SURFACE_BASE_UPDATE:
+		if (p->family >= CHIP_RV770 || p->family == CHIP_R600) {
+			DRM_ERROR("bad SURFACE_BASE_UPDATE\n");
+			return -EINVAL;
+		}
+		if (pkt->count) {
+			DRM_ERROR("bad SURFACE_BASE_UPDATE\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_NOP:
+		break;
+	default:
+		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int r600_cs_parse(struct radeon_cs_parser *p)
+{
+	struct radeon_cs_packet pkt;
+	int r;
+
+	do {
+		r = r600_cs_packet_parse(p, &pkt, p->idx);
+		if (r) {
+			return r;
+		}
+		p->idx += pkt.count + 2;
+		switch (pkt.type) {
+		case PACKET_TYPE0:
+			r = r600_cs_parse_packet0(p, &pkt);
+			break;
+		case PACKET_TYPE2:
+			break;
+		case PACKET_TYPE3:
+			r = r600_packet3_check(p, &pkt);
+			break;
+		default:
+			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
+			return -EINVAL;
+		}
+		if (r) {
+			return r;
+		}
+	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+#if 0
+	for (r = 0; r < p->ib->length_dw; r++) {
+		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib->ptr[r]);
+		mdelay(1);
+	}
+#endif
+	return 0;
+}
+
+static int r600_cs_parser_relocs_legacy(struct radeon_cs_parser *p)
+{
+	if (p->chunk_relocs_idx == -1) {
+		return 0;
+	}
+	p->relocs = kcalloc(1, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
+	if (p->relocs == NULL) {
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * cs_parser_fini() - clean parser states
+ * @parser:	parser structure holding parsing context.
+ * @error:	error number
+ *
+ * If error is set than unvalidate buffer, otherwise just free memory
+ * used by parsing context.
+ **/
+static void r600_cs_parser_fini(struct radeon_cs_parser *parser, int error)
+{
+	unsigned i;
+
+	kfree(parser->relocs);
+	for (i = 0; i < parser->nchunks; i++) {
+		kfree(parser->chunks[i].kdata);
+	}
+	kfree(parser->chunks);
+	kfree(parser->chunks_array);
+}
+
+int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
+			unsigned family, u32 *ib, int *l)
+{
+	struct radeon_cs_parser parser;
+	struct radeon_cs_chunk *ib_chunk;
+	struct radeon_ib	fake_ib;
+	int r;
+
+	/* initialize parser */
+	memset(&parser, 0, sizeof(struct radeon_cs_parser));
+	parser.filp = filp;
+	parser.rdev = NULL;
+	parser.family = family;
+	parser.ib = &fake_ib;
+	fake_ib.ptr = ib;
+	r = radeon_cs_parser_init(&parser, data);
+	if (r) {
+		DRM_ERROR("Failed to initialize parser !\n");
+		r600_cs_parser_fini(&parser, r);
+		return r;
+	}
+	r = r600_cs_parser_relocs_legacy(&parser);
+	if (r) {
+		DRM_ERROR("Failed to parse relocation !\n");
+		r600_cs_parser_fini(&parser, r);
+		return r;
+	}
+	/* Copy the packet into the IB, the parser will read from the
+	 * input memory (cached) and write to the IB (which can be
+	 * uncached). */
+	ib_chunk = &parser.chunks[parser.chunk_ib_idx];
+	parser.ib->length_dw = ib_chunk->length_dw;
+	memcpy((void *)parser.ib->ptr, ib_chunk->kdata, ib_chunk->length_dw*4);
+	*l = parser.ib->length_dw;
+	r = r600_cs_parse(&parser);
+	if (r) {
+		DRM_ERROR("Invalid command stream !\n");
+		r600_cs_parser_fini(&parser, r);
+		return r;
+	}
+	r600_cs_parser_fini(&parser, r);
+	return r;
+}
+
+void r600_cs_legacy_init(void)
+{
+	r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
+}
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
new file mode 100644
index 0000000000000000000000000000000000000000..723295f59281f2dd61d8b0be13a5def2db71af20
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -0,0 +1,661 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ * Copyright 2009 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef R600D_H
+#define R600D_H
+
+#define CP_PACKET2			0x80000000
+#define		PACKET2_PAD_SHIFT		0
+#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
+
+#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+
+#define R6XX_MAX_SH_GPRS			256
+#define R6XX_MAX_TEMP_GPRS			16
+#define R6XX_MAX_SH_THREADS			256
+#define R6XX_MAX_SH_STACK_ENTRIES		4096
+#define R6XX_MAX_BACKENDS			8
+#define R6XX_MAX_BACKENDS_MASK			0xff
+#define R6XX_MAX_SIMDS				8
+#define R6XX_MAX_SIMDS_MASK			0xff
+#define R6XX_MAX_PIPES				8
+#define R6XX_MAX_PIPES_MASK			0xff
+
+/* PTE flags */
+#define PTE_VALID				(1 << 0)
+#define PTE_SYSTEM				(1 << 1)
+#define PTE_SNOOPED				(1 << 2)
+#define PTE_READABLE				(1 << 5)
+#define PTE_WRITEABLE				(1 << 6)
+
+/* Registers */
+#define	ARB_POP						0x2418
+#define 	ENABLE_TC128					(1 << 30)
+#define	ARB_GDEC_RD_CNTL				0x246C
+
+#define	CC_GC_SHADER_PIPE_CONFIG			0x8950
+#define	CC_RB_BACKEND_DISABLE				0x98F4
+#define		BACKEND_DISABLE(x)				((x) << 16)
+
+#define	CB_COLOR0_BASE					0x28040
+#define	CB_COLOR1_BASE					0x28044
+#define	CB_COLOR2_BASE					0x28048
+#define	CB_COLOR3_BASE					0x2804C
+#define	CB_COLOR4_BASE					0x28050
+#define	CB_COLOR5_BASE					0x28054
+#define	CB_COLOR6_BASE					0x28058
+#define	CB_COLOR7_BASE					0x2805C
+#define	CB_COLOR7_FRAG					0x280FC
+
+#define CB_COLOR0_SIZE                                  0x28060
+#define CB_COLOR0_VIEW                                  0x28080
+#define CB_COLOR0_INFO                                  0x280a0
+#define CB_COLOR0_TILE                                  0x280c0
+#define CB_COLOR0_FRAG                                  0x280e0
+#define CB_COLOR0_MASK                                  0x28100
+
+#define	CONFIG_MEMSIZE					0x5428
+#define	CP_STAT						0x8680
+#define	CP_COHER_BASE					0x85F8
+#define	CP_DEBUG					0xC1FC
+#define	R_0086D8_CP_ME_CNTL			0x86D8
+#define		S_0086D8_CP_ME_HALT(x)			(((x) & 1)<<28)
+#define		C_0086D8_CP_ME_HALT(x)			((x) & 0xEFFFFFFF)
+#define	CP_ME_RAM_DATA					0xC160
+#define	CP_ME_RAM_RADDR					0xC158
+#define	CP_ME_RAM_WADDR					0xC15C
+#define CP_MEQ_THRESHOLDS				0x8764
+#define		MEQ_END(x)					((x) << 16)
+#define		ROQ_END(x)					((x) << 24)
+#define	CP_PERFMON_CNTL					0x87FC
+#define	CP_PFP_UCODE_ADDR				0xC150
+#define	CP_PFP_UCODE_DATA				0xC154
+#define	CP_QUEUE_THRESHOLDS				0x8760
+#define		ROQ_IB1_START(x)				((x) << 0)
+#define		ROQ_IB2_START(x)				((x) << 8)
+#define	CP_RB_BASE					0xC100
+#define	CP_RB_CNTL					0xC104
+#define		RB_BUFSZ(x)					((x)<<0)
+#define		RB_BLKSZ(x)					((x)<<8)
+#define		RB_NO_UPDATE					(1<<27)
+#define		RB_RPTR_WR_ENA					(1<<31)
+#define		BUF_SWAP_32BIT					(2 << 16)
+#define	CP_RB_RPTR					0x8700
+#define	CP_RB_RPTR_ADDR					0xC10C
+#define	CP_RB_RPTR_ADDR_HI				0xC110
+#define	CP_RB_RPTR_WR					0xC108
+#define	CP_RB_WPTR					0xC114
+#define	CP_RB_WPTR_ADDR					0xC118
+#define	CP_RB_WPTR_ADDR_HI				0xC11C
+#define	CP_RB_WPTR_DELAY				0x8704
+#define	CP_ROQ_IB1_STAT					0x8784
+#define	CP_ROQ_IB2_STAT					0x8788
+#define	CP_SEM_WAIT_TIMER				0x85BC
+
+#define	DB_DEBUG					0x9830
+#define		PREZ_MUST_WAIT_FOR_POSTZ_DONE			(1 << 31)
+#define	DB_DEPTH_BASE					0x2800C
+#define	DB_WATERMARKS					0x9838
+#define		DEPTH_FREE(x)					((x) << 0)
+#define		DEPTH_FLUSH(x)					((x) << 5)
+#define		DEPTH_PENDING_FREE(x)				((x) << 15)
+#define		DEPTH_CACHELINE_FREE(x)				((x) << 20)
+
+#define	DCP_TILING_CONFIG				0x6CA0
+#define		PIPE_TILING(x)					((x) << 1)
+#define 	BANK_TILING(x)					((x) << 4)
+#define		GROUP_SIZE(x)					((x) << 6)
+#define		ROW_TILING(x)					((x) << 8)
+#define		BANK_SWAPS(x)					((x) << 11)
+#define		SAMPLE_SPLIT(x)					((x) << 14)
+#define		BACKEND_MAP(x)					((x) << 16)
+
+#define GB_TILING_CONFIG				0x98F0
+
+#define	GC_USER_SHADER_PIPE_CONFIG			0x8954
+#define		INACTIVE_QD_PIPES(x)				((x) << 8)
+#define		INACTIVE_QD_PIPES_MASK				0x0000FF00
+#define		INACTIVE_SIMDS(x)				((x) << 16)
+#define		INACTIVE_SIMDS_MASK				0x00FF0000
+
+#define SQ_CONFIG                                         0x8c00
+#       define VC_ENABLE                                  (1 << 0)
+#       define EXPORT_SRC_C                               (1 << 1)
+#       define DX9_CONSTS                                 (1 << 2)
+#       define ALU_INST_PREFER_VECTOR                     (1 << 3)
+#       define DX10_CLAMP                                 (1 << 4)
+#       define CLAUSE_SEQ_PRIO(x)                         ((x) << 8)
+#       define PS_PRIO(x)                                 ((x) << 24)
+#       define VS_PRIO(x)                                 ((x) << 26)
+#       define GS_PRIO(x)                                 ((x) << 28)
+#       define ES_PRIO(x)                                 ((x) << 30)
+#define SQ_GPR_RESOURCE_MGMT_1                            0x8c04
+#       define NUM_PS_GPRS(x)                             ((x) << 0)
+#       define NUM_VS_GPRS(x)                             ((x) << 16)
+#       define NUM_CLAUSE_TEMP_GPRS(x)                    ((x) << 28)
+#define SQ_GPR_RESOURCE_MGMT_2                            0x8c08
+#       define NUM_GS_GPRS(x)                             ((x) << 0)
+#       define NUM_ES_GPRS(x)                             ((x) << 16)
+#define SQ_THREAD_RESOURCE_MGMT                           0x8c0c
+#       define NUM_PS_THREADS(x)                          ((x) << 0)
+#       define NUM_VS_THREADS(x)                          ((x) << 8)
+#       define NUM_GS_THREADS(x)                          ((x) << 16)
+#       define NUM_ES_THREADS(x)                          ((x) << 24)
+#define SQ_STACK_RESOURCE_MGMT_1                          0x8c10
+#       define NUM_PS_STACK_ENTRIES(x)                    ((x) << 0)
+#       define NUM_VS_STACK_ENTRIES(x)                    ((x) << 16)
+#define SQ_STACK_RESOURCE_MGMT_2                          0x8c14
+#       define NUM_GS_STACK_ENTRIES(x)                    ((x) << 0)
+#       define NUM_ES_STACK_ENTRIES(x)                    ((x) << 16)
+
+#define GRBM_CNTL                                       0x8000
+#       define GRBM_READ_TIMEOUT(x)                     ((x) << 0)
+#define	GRBM_STATUS					0x8010
+#define		CMDFIFO_AVAIL_MASK				0x0000001F
+#define		GUI_ACTIVE					(1<<31)
+#define	GRBM_STATUS2					0x8014
+#define	GRBM_SOFT_RESET					0x8020
+#define		SOFT_RESET_CP					(1<<0)
+
+#define	HDP_HOST_PATH_CNTL				0x2C00
+#define	HDP_NONSURFACE_BASE				0x2C04
+#define	HDP_NONSURFACE_INFO				0x2C08
+#define	HDP_NONSURFACE_SIZE				0x2C0C
+#define HDP_REG_COHERENCY_FLUSH_CNTL			0x54A0
+#define	HDP_TILING_CONFIG				0x2F3C
+
+#define MC_VM_AGP_TOP					0x2184
+#define MC_VM_AGP_BOT					0x2188
+#define	MC_VM_AGP_BASE					0x218C
+#define MC_VM_FB_LOCATION				0x2180
+#define MC_VM_L1_TLB_MCD_RD_A_CNTL			0x219C
+#define 	ENABLE_L1_TLB					(1 << 0)
+#define		ENABLE_L1_FRAGMENT_PROCESSING			(1 << 1)
+#define		ENABLE_L1_STRICT_ORDERING			(1 << 2)
+#define		SYSTEM_ACCESS_MODE_MASK				0x000000C0
+#define		SYSTEM_ACCESS_MODE_SHIFT			6
+#define		SYSTEM_ACCESS_MODE_PA_ONLY			(0 << 6)
+#define		SYSTEM_ACCESS_MODE_USE_SYS_MAP			(1 << 6)
+#define		SYSTEM_ACCESS_MODE_IN_SYS			(2 << 6)
+#define		SYSTEM_ACCESS_MODE_NOT_IN_SYS			(3 << 6)
+#define		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU	(0 << 8)
+#define		SYSTEM_APERTURE_UNMAPPED_ACCESS_DEFAULT_PAGE	(1 << 8)
+#define		ENABLE_SEMAPHORE_MODE				(1 << 10)
+#define		ENABLE_WAIT_L2_QUERY				(1 << 11)
+#define		EFFECTIVE_L1_TLB_SIZE(x)			(((x) & 7) << 12)
+#define		EFFECTIVE_L1_TLB_SIZE_MASK			0x00007000
+#define		EFFECTIVE_L1_TLB_SIZE_SHIFT			12
+#define		EFFECTIVE_L1_QUEUE_SIZE(x)			(((x) & 7) << 15)
+#define		EFFECTIVE_L1_QUEUE_SIZE_MASK			0x00038000
+#define		EFFECTIVE_L1_QUEUE_SIZE_SHIFT			15
+#define MC_VM_L1_TLB_MCD_RD_B_CNTL			0x21A0
+#define MC_VM_L1_TLB_MCB_RD_GFX_CNTL			0x21FC
+#define MC_VM_L1_TLB_MCB_RD_HDP_CNTL			0x2204
+#define MC_VM_L1_TLB_MCB_RD_PDMA_CNTL			0x2208
+#define MC_VM_L1_TLB_MCB_RD_SEM_CNTL			0x220C
+#define	MC_VM_L1_TLB_MCB_RD_SYS_CNTL			0x2200
+#define MC_VM_L1_TLB_MCD_WR_A_CNTL			0x21A4
+#define MC_VM_L1_TLB_MCD_WR_B_CNTL			0x21A8
+#define MC_VM_L1_TLB_MCB_WR_GFX_CNTL			0x2210
+#define MC_VM_L1_TLB_MCB_WR_HDP_CNTL			0x2218
+#define MC_VM_L1_TLB_MCB_WR_PDMA_CNTL			0x221C
+#define MC_VM_L1_TLB_MCB_WR_SEM_CNTL			0x2220
+#define MC_VM_L1_TLB_MCB_WR_SYS_CNTL			0x2214
+#define MC_VM_SYSTEM_APERTURE_LOW_ADDR			0x2190
+#define		LOGICAL_PAGE_NUMBER_MASK			0x000FFFFF
+#define		LOGICAL_PAGE_NUMBER_SHIFT			0
+#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR			0x2194
+#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR		0x2198
+
+#define	PA_CL_ENHANCE					0x8A14
+#define		CLIP_VTX_REORDER_ENA				(1 << 0)
+#define		NUM_CLIP_SEQ(x)					((x) << 1)
+#define PA_SC_AA_CONFIG					0x28C04
+#define	PA_SC_AA_SAMPLE_LOCS_2S				0x8B40
+#define	PA_SC_AA_SAMPLE_LOCS_4S				0x8B44
+#define	PA_SC_AA_SAMPLE_LOCS_8S_WD0			0x8B48
+#define	PA_SC_AA_SAMPLE_LOCS_8S_WD1			0x8B4C
+#define		S0_X(x)						((x) << 0)
+#define		S0_Y(x)						((x) << 4)
+#define		S1_X(x)						((x) << 8)
+#define		S1_Y(x)						((x) << 12)
+#define		S2_X(x)						((x) << 16)
+#define		S2_Y(x)						((x) << 20)
+#define		S3_X(x)						((x) << 24)
+#define		S3_Y(x)						((x) << 28)
+#define		S4_X(x)						((x) << 0)
+#define		S4_Y(x)						((x) << 4)
+#define		S5_X(x)						((x) << 8)
+#define		S5_Y(x)						((x) << 12)
+#define		S6_X(x)						((x) << 16)
+#define		S6_Y(x)						((x) << 20)
+#define		S7_X(x)						((x) << 24)
+#define		S7_Y(x)						((x) << 28)
+#define PA_SC_CLIPRECT_RULE				0x2820c
+#define	PA_SC_ENHANCE					0x8BF0
+#define		FORCE_EOV_MAX_CLK_CNT(x)			((x) << 0)
+#define		FORCE_EOV_MAX_TILE_CNT(x)			((x) << 12)
+#define PA_SC_LINE_STIPPLE				0x28A0C
+#define	PA_SC_LINE_STIPPLE_STATE			0x8B10
+#define PA_SC_MODE_CNTL					0x28A4C
+#define	PA_SC_MULTI_CHIP_CNTL				0x8B20
+
+#define PA_SC_SCREEN_SCISSOR_TL                         0x28030
+#define PA_SC_GENERIC_SCISSOR_TL                        0x28240
+#define PA_SC_WINDOW_SCISSOR_TL                         0x28204
+
+#define	PCIE_PORT_INDEX					0x0038
+#define	PCIE_PORT_DATA					0x003C
+
+#define RAMCFG						0x2408
+#define		NOOFBANK_SHIFT					0
+#define		NOOFBANK_MASK					0x00000001
+#define		NOOFRANK_SHIFT					1
+#define		NOOFRANK_MASK					0x00000002
+#define		NOOFROWS_SHIFT					2
+#define		NOOFROWS_MASK					0x0000001C
+#define		NOOFCOLS_SHIFT					5
+#define		NOOFCOLS_MASK					0x00000060
+#define		CHANSIZE_SHIFT					7
+#define		CHANSIZE_MASK					0x00000080
+#define		BURSTLENGTH_SHIFT				8
+#define		BURSTLENGTH_MASK				0x00000100
+#define		CHANSIZE_OVERRIDE				(1 << 10)
+
+#define	SCRATCH_REG0					0x8500
+#define	SCRATCH_REG1					0x8504
+#define	SCRATCH_REG2					0x8508
+#define	SCRATCH_REG3					0x850C
+#define	SCRATCH_REG4					0x8510
+#define	SCRATCH_REG5					0x8514
+#define	SCRATCH_REG6					0x8518
+#define	SCRATCH_REG7					0x851C
+#define	SCRATCH_UMSK					0x8540
+#define	SCRATCH_ADDR					0x8544
+
+#define	SPI_CONFIG_CNTL					0x9100
+#define		GPR_WRITE_PRIORITY(x)				((x) << 0)
+#define		DISABLE_INTERP_1				(1 << 5)
+#define	SPI_CONFIG_CNTL_1				0x913C
+#define		VTX_DONE_DELAY(x)				((x) << 0)
+#define		INTERP_ONE_PRIM_PER_ROW				(1 << 4)
+#define	SPI_INPUT_Z					0x286D8
+#define	SPI_PS_IN_CONTROL_0				0x286CC
+#define		NUM_INTERP(x)					((x)<<0)
+#define		POSITION_ENA					(1<<8)
+#define		POSITION_CENTROID				(1<<9)
+#define		POSITION_ADDR(x)				((x)<<10)
+#define		PARAM_GEN(x)					((x)<<15)
+#define		PARAM_GEN_ADDR(x)				((x)<<19)
+#define		BARYC_SAMPLE_CNTL(x)				((x)<<26)
+#define		PERSP_GRADIENT_ENA				(1<<28)
+#define		LINEAR_GRADIENT_ENA				(1<<29)
+#define		POSITION_SAMPLE					(1<<30)
+#define		BARYC_AT_SAMPLE_ENA				(1<<31)
+#define	SPI_PS_IN_CONTROL_1				0x286D0
+#define		GEN_INDEX_PIX					(1<<0)
+#define		GEN_INDEX_PIX_ADDR(x)				((x)<<1)
+#define		FRONT_FACE_ENA					(1<<8)
+#define		FRONT_FACE_CHAN(x)				((x)<<9)
+#define		FRONT_FACE_ALL_BITS				(1<<11)
+#define		FRONT_FACE_ADDR(x)				((x)<<12)
+#define		FOG_ADDR(x)					((x)<<17)
+#define		FIXED_PT_POSITION_ENA				(1<<24)
+#define		FIXED_PT_POSITION_ADDR(x)			((x)<<25)
+
+#define	SQ_MS_FIFO_SIZES				0x8CF0
+#define		CACHE_FIFO_SIZE(x)				((x) << 0)
+#define		FETCH_FIFO_HIWATER(x)				((x) << 8)
+#define		DONE_FIFO_HIWATER(x)				((x) << 16)
+#define		ALU_UPDATE_FIFO_HIWATER(x)			((x) << 24)
+#define	SQ_PGM_START_ES					0x28880
+#define	SQ_PGM_START_FS					0x28894
+#define	SQ_PGM_START_GS					0x2886C
+#define	SQ_PGM_START_PS					0x28840
+#define SQ_PGM_RESOURCES_PS                             0x28850
+#define SQ_PGM_EXPORTS_PS                               0x28854
+#define SQ_PGM_CF_OFFSET_PS                             0x288cc
+#define	SQ_PGM_START_VS					0x28858
+#define SQ_PGM_RESOURCES_VS                             0x28868
+#define SQ_PGM_CF_OFFSET_VS                             0x288d0
+#define	SQ_VTX_CONSTANT_WORD6_0				0x38018
+#define		S__SQ_VTX_CONSTANT_TYPE(x)			(((x) & 3) << 30)
+#define		G__SQ_VTX_CONSTANT_TYPE(x)			(((x) >> 30) & 3)
+#define			SQ_TEX_VTX_INVALID_TEXTURE			0x0
+#define			SQ_TEX_VTX_INVALID_BUFFER			0x1
+#define			SQ_TEX_VTX_VALID_TEXTURE			0x2
+#define			SQ_TEX_VTX_VALID_BUFFER				0x3
+
+
+#define	SX_MISC						0x28350
+#define	SX_DEBUG_1					0x9054
+#define		SMX_EVENT_RELEASE				(1 << 0)
+#define		ENABLE_NEW_SMX_ADDRESS				(1 << 16)
+
+#define	TA_CNTL_AUX					0x9508
+#define		DISABLE_CUBE_WRAP				(1 << 0)
+#define		DISABLE_CUBE_ANISO				(1 << 1)
+#define		SYNC_GRADIENT					(1 << 24)
+#define		SYNC_WALKER					(1 << 25)
+#define		SYNC_ALIGNER					(1 << 26)
+#define		BILINEAR_PRECISION_6_BIT			(0 << 31)
+#define		BILINEAR_PRECISION_8_BIT			(1 << 31)
+
+#define	TC_CNTL						0x9608
+#define		TC_L2_SIZE(x)					((x)<<5)
+#define		L2_DISABLE_LATE_HIT				(1<<9)
+
+
+#define	VGT_CACHE_INVALIDATION				0x88C4
+#define		CACHE_INVALIDATION(x)				((x)<<0)
+#define			VC_ONLY						0
+#define			TC_ONLY						1
+#define			VC_AND_TC					2
+#define	VGT_DMA_BASE					0x287E8
+#define	VGT_DMA_BASE_HI					0x287E4
+#define	VGT_ES_PER_GS					0x88CC
+#define	VGT_GS_PER_ES					0x88C8
+#define	VGT_GS_PER_VS					0x88E8
+#define	VGT_GS_VERTEX_REUSE				0x88D4
+#define VGT_PRIMITIVE_TYPE                              0x8958
+#define	VGT_NUM_INSTANCES				0x8974
+#define	VGT_OUT_DEALLOC_CNTL				0x28C5C
+#define		DEALLOC_DIST_MASK				0x0000007F
+#define	VGT_STRMOUT_BASE_OFFSET_0			0x28B10
+#define	VGT_STRMOUT_BASE_OFFSET_1			0x28B14
+#define	VGT_STRMOUT_BASE_OFFSET_2			0x28B18
+#define	VGT_STRMOUT_BASE_OFFSET_3			0x28B1c
+#define	VGT_STRMOUT_BASE_OFFSET_HI_0			0x28B44
+#define	VGT_STRMOUT_BASE_OFFSET_HI_1			0x28B48
+#define	VGT_STRMOUT_BASE_OFFSET_HI_2			0x28B4c
+#define	VGT_STRMOUT_BASE_OFFSET_HI_3			0x28B50
+#define	VGT_STRMOUT_BUFFER_BASE_0			0x28AD8
+#define	VGT_STRMOUT_BUFFER_BASE_1			0x28AE8
+#define	VGT_STRMOUT_BUFFER_BASE_2			0x28AF8
+#define	VGT_STRMOUT_BUFFER_BASE_3			0x28B08
+#define	VGT_STRMOUT_BUFFER_OFFSET_0			0x28ADC
+#define	VGT_STRMOUT_BUFFER_OFFSET_1			0x28AEC
+#define	VGT_STRMOUT_BUFFER_OFFSET_2			0x28AFC
+#define	VGT_STRMOUT_BUFFER_OFFSET_3			0x28B0C
+#define	VGT_STRMOUT_EN					0x28AB0
+#define	VGT_VERTEX_REUSE_BLOCK_CNTL			0x28C58
+#define		VTX_REUSE_DEPTH_MASK				0x000000FF
+#define VGT_EVENT_INITIATOR                             0x28a90
+#       define CACHE_FLUSH_AND_INV_EVENT                        (0x16 << 0)
+
+#define VM_CONTEXT0_CNTL				0x1410
+#define		ENABLE_CONTEXT					(1 << 0)
+#define		PAGE_TABLE_DEPTH(x)				(((x) & 3) << 1)
+#define		RANGE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 4)
+#define VM_CONTEXT0_INVALIDATION_LOW_ADDR		0x1490
+#define VM_CONTEXT0_INVALIDATION_HIGH_ADDR		0x14B0
+#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR		0x1574
+#define VM_CONTEXT0_PAGE_TABLE_START_ADDR		0x1594
+#define VM_CONTEXT0_PAGE_TABLE_END_ADDR			0x15B4
+#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR	0x1554
+#define VM_CONTEXT0_REQUEST_RESPONSE			0x1470
+#define		REQUEST_TYPE(x)					(((x) & 0xf) << 0)
+#define		RESPONSE_TYPE_MASK				0x000000F0
+#define		RESPONSE_TYPE_SHIFT				4
+#define VM_L2_CNTL					0x1400
+#define		ENABLE_L2_CACHE					(1 << 0)
+#define		ENABLE_L2_FRAGMENT_PROCESSING			(1 << 1)
+#define		ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE		(1 << 9)
+#define		EFFECTIVE_L2_QUEUE_SIZE(x)			(((x) & 7) << 13)
+#define VM_L2_CNTL2					0x1404
+#define		INVALIDATE_ALL_L1_TLBS				(1 << 0)
+#define		INVALIDATE_L2_CACHE				(1 << 1)
+#define VM_L2_CNTL3					0x1408
+#define		BANK_SELECT_0(x)				(((x) & 0x1f) << 0)
+#define		BANK_SELECT_1(x)				(((x) & 0x1f) << 5)
+#define		L2_CACHE_UPDATE_MODE(x)				(((x) & 3) << 10)
+#define	VM_L2_STATUS					0x140C
+#define		L2_BUSY						(1 << 0)
+
+#define	WAIT_UNTIL					0x8040
+#define         WAIT_2D_IDLE_bit                                (1 << 14)
+#define         WAIT_3D_IDLE_bit                                (1 << 15)
+#define         WAIT_2D_IDLECLEAN_bit                           (1 << 16)
+#define         WAIT_3D_IDLECLEAN_bit                           (1 << 17)
+
+
+
+/*
+ * PM4
+ */
+#define	PACKET_TYPE0	0
+#define	PACKET_TYPE1	1
+#define	PACKET_TYPE2	2
+#define	PACKET_TYPE3	3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0xFFFF) << 2)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
+			 (((reg) >> 2) & 0xFFFF) |			\
+			 ((n) & 0x3FFF) << 16)
+#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
+			 (((op) & 0xFF) << 8) |				\
+			 ((n) & 0x3FFF) << 16)
+
+/* Packet 3 types */
+#define	PACKET3_NOP					0x10
+#define	PACKET3_INDIRECT_BUFFER_END			0x17
+#define	PACKET3_SET_PREDICATION				0x20
+#define	PACKET3_REG_RMW					0x21
+#define	PACKET3_COND_EXEC				0x22
+#define	PACKET3_PRED_EXEC				0x23
+#define	PACKET3_START_3D_CMDBUF				0x24
+#define	PACKET3_DRAW_INDEX_2				0x27
+#define	PACKET3_CONTEXT_CONTROL				0x28
+#define	PACKET3_DRAW_INDEX_IMMD_BE			0x29
+#define	PACKET3_INDEX_TYPE				0x2A
+#define	PACKET3_DRAW_INDEX				0x2B
+#define	PACKET3_DRAW_INDEX_AUTO				0x2D
+#define	PACKET3_DRAW_INDEX_IMMD				0x2E
+#define	PACKET3_NUM_INSTANCES				0x2F
+#define	PACKET3_STRMOUT_BUFFER_UPDATE			0x34
+#define	PACKET3_INDIRECT_BUFFER_MP			0x38
+#define	PACKET3_MEM_SEMAPHORE				0x39
+#define	PACKET3_MPEG_INDEX				0x3A
+#define	PACKET3_WAIT_REG_MEM				0x3C
+#define	PACKET3_MEM_WRITE				0x3D
+#define	PACKET3_INDIRECT_BUFFER				0x32
+#define	PACKET3_CP_INTERRUPT				0x40
+#define	PACKET3_SURFACE_SYNC				0x43
+#              define PACKET3_CB0_DEST_BASE_ENA    (1 << 6)
+#              define PACKET3_TC_ACTION_ENA        (1 << 23)
+#              define PACKET3_VC_ACTION_ENA        (1 << 24)
+#              define PACKET3_CB_ACTION_ENA        (1 << 25)
+#              define PACKET3_DB_ACTION_ENA        (1 << 26)
+#              define PACKET3_SH_ACTION_ENA        (1 << 27)
+#              define PACKET3_SMX_ACTION_ENA       (1 << 28)
+#define	PACKET3_ME_INITIALIZE				0x44
+#define		PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
+#define	PACKET3_COND_WRITE				0x45
+#define	PACKET3_EVENT_WRITE				0x46
+#define	PACKET3_EVENT_WRITE_EOP				0x47
+#define	PACKET3_ONE_REG_WRITE				0x57
+#define	PACKET3_SET_CONFIG_REG				0x68
+#define		PACKET3_SET_CONFIG_REG_OFFSET			0x00008000
+#define		PACKET3_SET_CONFIG_REG_END			0x0000ac00
+#define	PACKET3_SET_CONTEXT_REG				0x69
+#define		PACKET3_SET_CONTEXT_REG_OFFSET			0x00028000
+#define		PACKET3_SET_CONTEXT_REG_END			0x00029000
+#define	PACKET3_SET_ALU_CONST				0x6A
+#define		PACKET3_SET_ALU_CONST_OFFSET			0x00030000
+#define		PACKET3_SET_ALU_CONST_END			0x00032000
+#define	PACKET3_SET_BOOL_CONST				0x6B
+#define		PACKET3_SET_BOOL_CONST_OFFSET			0x0003e380
+#define		PACKET3_SET_BOOL_CONST_END			0x00040000
+#define	PACKET3_SET_LOOP_CONST				0x6C
+#define		PACKET3_SET_LOOP_CONST_OFFSET			0x0003e200
+#define		PACKET3_SET_LOOP_CONST_END			0x0003e380
+#define	PACKET3_SET_RESOURCE				0x6D
+#define		PACKET3_SET_RESOURCE_OFFSET			0x00038000
+#define		PACKET3_SET_RESOURCE_END			0x0003c000
+#define	PACKET3_SET_SAMPLER				0x6E
+#define		PACKET3_SET_SAMPLER_OFFSET			0x0003c000
+#define		PACKET3_SET_SAMPLER_END				0x0003cff0
+#define	PACKET3_SET_CTL_CONST				0x6F
+#define		PACKET3_SET_CTL_CONST_OFFSET			0x0003cff0
+#define		PACKET3_SET_CTL_CONST_END			0x0003e200
+#define	PACKET3_SURFACE_BASE_UPDATE			0x73
+
+
+#define	R_008020_GRBM_SOFT_RESET		0x8020
+#define		S_008020_SOFT_RESET_CP(x)		(((x) & 1) << 0)
+#define		S_008020_SOFT_RESET_CB(x)		(((x) & 1) << 1)
+#define		S_008020_SOFT_RESET_CR(x)		(((x) & 1) << 2)
+#define		S_008020_SOFT_RESET_DB(x)		(((x) & 1) << 3)
+#define		S_008020_SOFT_RESET_PA(x)		(((x) & 1) << 5)
+#define		S_008020_SOFT_RESET_SC(x)		(((x) & 1) << 6)
+#define		S_008020_SOFT_RESET_SMX(x)		(((x) & 1) << 7)
+#define		S_008020_SOFT_RESET_SPI(x)		(((x) & 1) << 8)
+#define		S_008020_SOFT_RESET_SH(x)		(((x) & 1) << 9)
+#define		S_008020_SOFT_RESET_SX(x)		(((x) & 1) << 10)
+#define		S_008020_SOFT_RESET_TC(x)		(((x) & 1) << 11)
+#define		S_008020_SOFT_RESET_TA(x)		(((x) & 1) << 12)
+#define		S_008020_SOFT_RESET_VC(x)		(((x) & 1) << 13)
+#define		S_008020_SOFT_RESET_VGT(x)		(((x) & 1) << 14)
+#define	R_008010_GRBM_STATUS			0x8010
+#define		S_008010_CMDFIFO_AVAIL(x)		(((x) & 0x1F) << 0)
+#define		S_008010_CP_RQ_PENDING(x)		(((x) & 1) << 6)
+#define		S_008010_CF_RQ_PENDING(x)		(((x) & 1) << 7)
+#define		S_008010_PF_RQ_PENDING(x)		(((x) & 1) << 8)
+#define		S_008010_GRBM_EE_BUSY(x)		(((x) & 1) << 10)
+#define		S_008010_VC_BUSY(x)			(((x) & 1) << 11)
+#define		S_008010_DB03_CLEAN(x)			(((x) & 1) << 12)
+#define		S_008010_CB03_CLEAN(x)			(((x) & 1) << 13)
+#define		S_008010_VGT_BUSY_NO_DMA(x)		(((x) & 1) << 16)
+#define		S_008010_VGT_BUSY(x)			(((x) & 1) << 17)
+#define		S_008010_TA03_BUSY(x)			(((x) & 1) << 18)
+#define		S_008010_TC_BUSY(x)			(((x) & 1) << 19)
+#define		S_008010_SX_BUSY(x)			(((x) & 1) << 20)
+#define		S_008010_SH_BUSY(x)			(((x) & 1) << 21)
+#define		S_008010_SPI03_BUSY(x)			(((x) & 1) << 22)
+#define		S_008010_SMX_BUSY(x)			(((x) & 1) << 23)
+#define		S_008010_SC_BUSY(x)			(((x) & 1) << 24)
+#define		S_008010_PA_BUSY(x)			(((x) & 1) << 25)
+#define		S_008010_DB03_BUSY(x)			(((x) & 1) << 26)
+#define		S_008010_CR_BUSY(x)			(((x) & 1) << 27)
+#define		S_008010_CP_COHERENCY_BUSY(x)		(((x) & 1) << 28)
+#define		S_008010_CP_BUSY(x)			(((x) & 1) << 29)
+#define		S_008010_CB03_BUSY(x)			(((x) & 1) << 30)
+#define		S_008010_GUI_ACTIVE(x)			(((x) & 1) << 31)
+#define		G_008010_CMDFIFO_AVAIL(x)		(((x) >> 0) & 0x1F)
+#define		G_008010_CP_RQ_PENDING(x)		(((x) >> 6) & 1)
+#define		G_008010_CF_RQ_PENDING(x)		(((x) >> 7) & 1)
+#define		G_008010_PF_RQ_PENDING(x)		(((x) >> 8) & 1)
+#define		G_008010_GRBM_EE_BUSY(x)		(((x) >> 10) & 1)
+#define		G_008010_VC_BUSY(x)			(((x) >> 11) & 1)
+#define		G_008010_DB03_CLEAN(x)			(((x) >> 12) & 1)
+#define		G_008010_CB03_CLEAN(x)			(((x) >> 13) & 1)
+#define		G_008010_VGT_BUSY_NO_DMA(x)		(((x) >> 16) & 1)
+#define		G_008010_VGT_BUSY(x)			(((x) >> 17) & 1)
+#define		G_008010_TA03_BUSY(x)			(((x) >> 18) & 1)
+#define		G_008010_TC_BUSY(x)			(((x) >> 19) & 1)
+#define		G_008010_SX_BUSY(x)			(((x) >> 20) & 1)
+#define		G_008010_SH_BUSY(x)			(((x) >> 21) & 1)
+#define		G_008010_SPI03_BUSY(x)			(((x) >> 22) & 1)
+#define		G_008010_SMX_BUSY(x)			(((x) >> 23) & 1)
+#define		G_008010_SC_BUSY(x)			(((x) >> 24) & 1)
+#define		G_008010_PA_BUSY(x)			(((x) >> 25) & 1)
+#define		G_008010_DB03_BUSY(x)			(((x) >> 26) & 1)
+#define		G_008010_CR_BUSY(x)			(((x) >> 27) & 1)
+#define		G_008010_CP_COHERENCY_BUSY(x)		(((x) >> 28) & 1)
+#define		G_008010_CP_BUSY(x)			(((x) >> 29) & 1)
+#define		G_008010_CB03_BUSY(x)			(((x) >> 30) & 1)
+#define		G_008010_GUI_ACTIVE(x)			(((x) >> 31) & 1)
+#define	R_008014_GRBM_STATUS2			0x8014
+#define		S_008014_CR_CLEAN(x)			(((x) & 1) << 0)
+#define		S_008014_SMX_CLEAN(x)			(((x) & 1) << 1)
+#define		S_008014_SPI0_BUSY(x)			(((x) & 1) << 8)
+#define		S_008014_SPI1_BUSY(x)			(((x) & 1) << 9)
+#define		S_008014_SPI2_BUSY(x)			(((x) & 1) << 10)
+#define		S_008014_SPI3_BUSY(x)			(((x) & 1) << 11)
+#define		S_008014_TA0_BUSY(x)			(((x) & 1) << 12)
+#define		S_008014_TA1_BUSY(x)			(((x) & 1) << 13)
+#define		S_008014_TA2_BUSY(x)			(((x) & 1) << 14)
+#define		S_008014_TA3_BUSY(x)			(((x) & 1) << 15)
+#define		S_008014_DB0_BUSY(x)			(((x) & 1) << 16)
+#define		S_008014_DB1_BUSY(x)			(((x) & 1) << 17)
+#define		S_008014_DB2_BUSY(x)			(((x) & 1) << 18)
+#define		S_008014_DB3_BUSY(x)			(((x) & 1) << 19)
+#define		S_008014_CB0_BUSY(x)			(((x) & 1) << 20)
+#define		S_008014_CB1_BUSY(x)			(((x) & 1) << 21)
+#define		S_008014_CB2_BUSY(x)			(((x) & 1) << 22)
+#define		S_008014_CB3_BUSY(x)			(((x) & 1) << 23)
+#define		G_008014_CR_CLEAN(x)			(((x) >> 0) & 1)
+#define		G_008014_SMX_CLEAN(x)			(((x) >> 1) & 1)
+#define		G_008014_SPI0_BUSY(x)			(((x) >> 8) & 1)
+#define		G_008014_SPI1_BUSY(x)			(((x) >> 9) & 1)
+#define		G_008014_SPI2_BUSY(x)			(((x) >> 10) & 1)
+#define		G_008014_SPI3_BUSY(x)			(((x) >> 11) & 1)
+#define		G_008014_TA0_BUSY(x)			(((x) >> 12) & 1)
+#define		G_008014_TA1_BUSY(x)			(((x) >> 13) & 1)
+#define		G_008014_TA2_BUSY(x)			(((x) >> 14) & 1)
+#define		G_008014_TA3_BUSY(x)			(((x) >> 15) & 1)
+#define		G_008014_DB0_BUSY(x)			(((x) >> 16) & 1)
+#define		G_008014_DB1_BUSY(x)			(((x) >> 17) & 1)
+#define		G_008014_DB2_BUSY(x)			(((x) >> 18) & 1)
+#define		G_008014_DB3_BUSY(x)			(((x) >> 19) & 1)
+#define		G_008014_CB0_BUSY(x)			(((x) >> 20) & 1)
+#define		G_008014_CB1_BUSY(x)			(((x) >> 21) & 1)
+#define		G_008014_CB2_BUSY(x)			(((x) >> 22) & 1)
+#define		G_008014_CB3_BUSY(x)			(((x) >> 23) & 1)
+#define	R_000E50_SRBM_STATUS				0x0E50
+#define		G_000E50_RLC_RQ_PENDING(x)		(((x) >> 3) & 1)
+#define		G_000E50_RCU_RQ_PENDING(x)		(((x) >> 4) & 1)
+#define		G_000E50_GRBM_RQ_PENDING(x)		(((x) >> 5) & 1)
+#define		G_000E50_HI_RQ_PENDING(x)		(((x) >> 6) & 1)
+#define		G_000E50_IO_EXTERN_SIGNAL(x)		(((x) >> 7) & 1)
+#define		G_000E50_VMC_BUSY(x)			(((x) >> 8) & 1)
+#define		G_000E50_MCB_BUSY(x)			(((x) >> 9) & 1)
+#define		G_000E50_MCDZ_BUSY(x)			(((x) >> 10) & 1)
+#define		G_000E50_MCDY_BUSY(x)			(((x) >> 11) & 1)
+#define		G_000E50_MCDX_BUSY(x)			(((x) >> 12) & 1)
+#define		G_000E50_MCDW_BUSY(x)			(((x) >> 13) & 1)
+#define		G_000E50_SEM_BUSY(x)			(((x) >> 14) & 1)
+#define		G_000E50_RLC_BUSY(x)			(((x) >> 15) & 1)
+#define	R_000E60_SRBM_SOFT_RESET			0x0E60
+#define		S_000E60_SOFT_RESET_BIF(x)		(((x) & 1) << 1)
+#define		S_000E60_SOFT_RESET_CG(x)		(((x) & 1) << 2)
+#define		S_000E60_SOFT_RESET_CMC(x)		(((x) & 1) << 3)
+#define		S_000E60_SOFT_RESET_CSC(x)		(((x) & 1) << 4)
+#define		S_000E60_SOFT_RESET_DC(x)		(((x) & 1) << 5)
+#define		S_000E60_SOFT_RESET_GRBM(x)		(((x) & 1) << 8)
+#define		S_000E60_SOFT_RESET_HDP(x)		(((x) & 1) << 9)
+#define		S_000E60_SOFT_RESET_IH(x)		(((x) & 1) << 10)
+#define		S_000E60_SOFT_RESET_MC(x)		(((x) & 1) << 11)
+#define		S_000E60_SOFT_RESET_RLC(x)		(((x) & 1) << 13)
+#define		S_000E60_SOFT_RESET_ROM(x)		(((x) & 1) << 14)
+#define		S_000E60_SOFT_RESET_SEM(x)		(((x) & 1) << 15)
+#define		S_000E60_SOFT_RESET_TSC(x)		(((x) & 1) << 16)
+#define		S_000E60_SOFT_RESET_VMC(x)		(((x) & 1) << 17)
+
+#endif
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index e47f2fc294ce9250352b894befe0e943df1efa62..3299733ac3003d30c485b9624c5cd706dae5c84d 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -50,8 +50,8 @@
 #include <linux/kref.h>
 
 #include "radeon_mode.h"
+#include "radeon_share.h"
 #include "radeon_reg.h"
-#include "r300.h"
 
 /*
  * Modules parameters.
@@ -112,10 +112,11 @@ enum radeon_family {
 	CHIP_RV635,
 	CHIP_RV670,
 	CHIP_RS780,
+	CHIP_RS880,
 	CHIP_RV770,
 	CHIP_RV730,
 	CHIP_RV710,
-	CHIP_RS880,
+	CHIP_RV740,
 	CHIP_LAST,
 };
 
@@ -152,10 +153,21 @@ struct radeon_device;
  */
 bool radeon_get_bios(struct radeon_device *rdev);
 
+
 /*
- * Clocks
+ * Dummy page
  */
+struct radeon_dummy_page {
+	struct page	*page;
+	dma_addr_t	addr;
+};
+int radeon_dummy_page_init(struct radeon_device *rdev);
+void radeon_dummy_page_fini(struct radeon_device *rdev);
+
 
+/*
+ * Clocks
+ */
 struct radeon_clock {
 	struct radeon_pll p1pll;
 	struct radeon_pll p2pll;
@@ -166,6 +178,7 @@ struct radeon_clock {
 	uint32_t default_sclk;
 };
 
+
 /*
  * Fences.
  */
@@ -332,14 +345,18 @@ struct radeon_mc {
 	resource_size_t		aper_size;
 	resource_size_t		aper_base;
 	resource_size_t		agp_base;
-	unsigned		gtt_location;
-	unsigned		gtt_size;
-	unsigned		vram_location;
 	/* for some chips with <= 32MB we need to lie
 	 * about vram size near mc fb location */
-	unsigned		mc_vram_size;
+	u64			mc_vram_size;
+	u64			gtt_location;
+	u64			gtt_size;
+	u64			gtt_start;
+	u64			gtt_end;
+	u64			vram_location;
+	u64			vram_start;
+	u64			vram_end;
 	unsigned		vram_width;
-	unsigned		real_vram_size;
+	u64			real_vram_size;
 	int			vram_mtrr;
 	bool			vram_is_ddr;
 };
@@ -411,6 +428,16 @@ struct radeon_cp {
 	bool			ready;
 };
 
+struct r600_blit {
+	struct radeon_object	*shader_obj;
+	u64 shader_gpu_addr;
+	u32 vs_offset, ps_offset;
+	u32 state_offset;
+	u32 state_len;
+	u32 vb_used, vb_total;
+	struct radeon_ib *vb_ib;
+};
+
 int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
@@ -463,6 +490,7 @@ struct radeon_cs_parser {
 	int			chunk_relocs_idx;
 	struct radeon_ib	*ib;
 	void			*track;
+	unsigned		family;
 };
 
 struct radeon_cs_packet {
@@ -559,6 +587,9 @@ int r100_debugfs_cp_init(struct radeon_device *rdev);
  */
 struct radeon_asic {
 	int (*init)(struct radeon_device *rdev);
+	void (*fini)(struct radeon_device *rdev);
+	int (*resume)(struct radeon_device *rdev);
+	int (*suspend)(struct radeon_device *rdev);
 	void (*errata)(struct radeon_device *rdev);
 	void (*vram_info)(struct radeon_device *rdev);
 	int (*gpu_reset)(struct radeon_device *rdev);
@@ -573,7 +604,11 @@ struct radeon_asic {
 	int (*cp_init)(struct radeon_device *rdev, unsigned ring_size);
 	void (*cp_fini)(struct radeon_device *rdev);
 	void (*cp_disable)(struct radeon_device *rdev);
+	void (*cp_commit)(struct radeon_device *rdev);
 	void (*ring_start)(struct radeon_device *rdev);
+	int (*ring_test)(struct radeon_device *rdev);
+	void (*ring_ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
+	int (*ib_test)(struct radeon_device *rdev);
 	int (*irq_set)(struct radeon_device *rdev);
 	int (*irq_process)(struct radeon_device *rdev);
 	u32 (*get_vblank_counter)(struct radeon_device *rdev, int crtc);
@@ -613,6 +648,8 @@ struct r100_asic {
 union radeon_asic_config {
 	struct r300_asic	r300;
 	struct r100_asic	r100;
+	struct r600_asic	r600;
+	struct rv770_asic	rv770;
 };
 
 
@@ -698,12 +735,16 @@ struct radeon_device {
 	struct radeon_pm		pm;
 	struct mutex			cs_mutex;
 	struct radeon_wb		wb;
+	struct radeon_dummy_page	dummy_page;
 	bool				gpu_lockup;
 	bool				shutdown;
 	bool				suspend;
 	bool				need_dma32;
+	bool				new_init_path;
 	struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
-	const struct firmware *fw;	/* firmware */
+	const struct firmware *me_fw;	/* all family ME firmware */
+	const struct firmware *pfp_fw;	/* r6/700 PFP firmware */
+	struct r600_blit r600_blit;
 };
 
 int radeon_device_init(struct radeon_device *rdev,
@@ -713,6 +754,13 @@ int radeon_device_init(struct radeon_device *rdev,
 void radeon_device_fini(struct radeon_device *rdev);
 int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
 
+/* r600 blit */
+int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes);
+void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence);
+void r600_kms_blit_copy(struct radeon_device *rdev,
+			u64 src_gpu_addr, u64 dst_gpu_addr,
+			int size_bytes);
+
 static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
 {
 	if (reg < 0x10000)
@@ -740,6 +788,7 @@ static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32
 #define RREG8(reg) readb(((void __iomem *)rdev->rmmio) + (reg))
 #define WREG8(reg, v) writeb(v, ((void __iomem *)rdev->rmmio) + (reg))
 #define RREG32(reg) r100_mm_rreg(rdev, (reg))
+#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg)))
 #define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v))
 #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
@@ -763,6 +812,7 @@ static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32
 		tmp_ |= ((val) & ~(mask));			\
 		WREG32_PLL(reg, tmp_);				\
 	} while (0)
+#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg)))
 
 /*
  * Indirect registers accessor
@@ -827,51 +877,6 @@ void radeon_atombios_fini(struct radeon_device *rdev);
 /*
  * RING helpers.
  */
-#define CP_PACKET0			0x00000000
-#define		PACKET0_BASE_INDEX_SHIFT	0
-#define		PACKET0_BASE_INDEX_MASK		(0x1ffff << 0)
-#define		PACKET0_COUNT_SHIFT		16
-#define		PACKET0_COUNT_MASK		(0x3fff << 16)
-#define CP_PACKET1			0x40000000
-#define CP_PACKET2			0x80000000
-#define		PACKET2_PAD_SHIFT		0
-#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
-#define CP_PACKET3			0xC0000000
-#define		PACKET3_IT_OPCODE_SHIFT		8
-#define		PACKET3_IT_OPCODE_MASK		(0xff << 8)
-#define		PACKET3_COUNT_SHIFT		16
-#define		PACKET3_COUNT_MASK		(0x3fff << 16)
-/* PACKET3 op code */
-#define		PACKET3_NOP			0x10
-#define		PACKET3_3D_DRAW_VBUF		0x28
-#define		PACKET3_3D_DRAW_IMMD		0x29
-#define		PACKET3_3D_DRAW_INDX		0x2A
-#define		PACKET3_3D_LOAD_VBPNTR		0x2F
-#define		PACKET3_INDX_BUFFER		0x33
-#define		PACKET3_3D_DRAW_VBUF_2		0x34
-#define		PACKET3_3D_DRAW_IMMD_2		0x35
-#define		PACKET3_3D_DRAW_INDX_2		0x36
-#define		PACKET3_BITBLT_MULTI		0x9B
-
-#define PACKET0(reg, n)	(CP_PACKET0 |					\
-			 REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) |	\
-			 REG_SET(PACKET0_COUNT, (n)))
-#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
-#define PACKET3(op, n)	(CP_PACKET3 |					\
-			 REG_SET(PACKET3_IT_OPCODE, (op)) |		\
-			 REG_SET(PACKET3_COUNT, (n)))
-
-#define	PACKET_TYPE0	0
-#define	PACKET_TYPE1	1
-#define	PACKET_TYPE2	2
-#define	PACKET_TYPE3	3
-
-#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
-#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
-#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
-#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
-#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
-
 static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 {
 #if DRM_DEBUG_CODE
@@ -890,6 +895,9 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
  * ASICs macro.
  */
 #define radeon_init(rdev) (rdev)->asic->init((rdev))
+#define radeon_fini(rdev) (rdev)->asic->fini((rdev))
+#define radeon_resume(rdev) (rdev)->asic->resume((rdev))
+#define radeon_suspend(rdev) (rdev)->asic->suspend((rdev))
 #define radeon_cs_parse(p) rdev->asic->cs_parse((p))
 #define radeon_errata(rdev) (rdev)->asic->errata((rdev))
 #define radeon_vram_info(rdev) (rdev)->asic->vram_info((rdev))
@@ -905,7 +913,11 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 #define radeon_cp_init(rdev,rsize) (rdev)->asic->cp_init((rdev), (rsize))
 #define radeon_cp_fini(rdev) (rdev)->asic->cp_fini((rdev))
 #define radeon_cp_disable(rdev) (rdev)->asic->cp_disable((rdev))
+#define radeon_cp_commit(rdev) (rdev)->asic->cp_commit((rdev))
 #define radeon_ring_start(rdev) (rdev)->asic->ring_start((rdev))
+#define radeon_ring_test(rdev) (rdev)->asic->ring_test((rdev))
+#define radeon_ring_ib_execute(rdev, ib) (rdev)->asic->ring_ib_execute((rdev), (ib))
+#define radeon_ib_test(rdev) (rdev)->asic->ib_test((rdev))
 #define radeon_irq_set(rdev) (rdev)->asic->irq_set((rdev))
 #define radeon_irq_process(rdev) (rdev)->asic->irq_process((rdev))
 #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->get_vblank_counter((rdev), (crtc))
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index c9cbd8ae1f9533ff6be7a44c64bac0734092cc26..e87bb915a6de4ca96fac655d2a5f5c88f6c89d67 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -60,6 +60,7 @@ int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
 void r100_cp_fini(struct radeon_device *rdev);
 void r100_cp_disable(struct radeon_device *rdev);
+void r100_cp_commit(struct radeon_device *rdev);
 void r100_ring_start(struct radeon_device *rdev);
 int r100_irq_set(struct radeon_device *rdev);
 int r100_irq_process(struct radeon_device *rdev);
@@ -78,6 +79,9 @@ int r100_set_surface_reg(struct radeon_device *rdev, int reg,
 			 uint32_t offset, uint32_t obj_size);
 int r100_clear_surface_reg(struct radeon_device *rdev, int reg);
 void r100_bandwidth_update(struct radeon_device *rdev);
+void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+int r100_ib_test(struct radeon_device *rdev);
+int r100_ring_test(struct radeon_device *rdev);
 
 static struct radeon_asic r100_asic = {
 	.init = &r100_init,
@@ -95,7 +99,11 @@ static struct radeon_asic r100_asic = {
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &r100_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &r100_irq_set,
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
@@ -156,7 +164,11 @@ static struct radeon_asic r300_asic = {
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &r100_irq_set,
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
@@ -197,7 +209,11 @@ static struct radeon_asic r420_asic = {
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &r100_irq_set,
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
@@ -245,7 +261,11 @@ static struct radeon_asic rs400_asic = {
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &r100_irq_set,
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
@@ -298,7 +318,11 @@ static struct radeon_asic rs600_asic = {
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &rs600_irq_set,
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
@@ -341,7 +365,11 @@ static struct radeon_asic rs690_asic = {
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &rs600_irq_set,
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
@@ -391,7 +419,11 @@ static struct radeon_asic rv515_asic = {
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &rv515_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &rs600_irq_set,
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
@@ -434,7 +466,11 @@ static struct radeon_asic r520_asic = {
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &rv515_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &rs600_irq_set,
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
@@ -453,9 +489,127 @@ static struct radeon_asic r520_asic = {
 };
 
 /*
- * r600,rv610,rv630,rv620,rv635,rv670,rs780,rv770,rv730,rv710
+ * r600,rv610,rv630,rv620,rv635,rv670,rs780,rs880
  */
+int r600_init(struct radeon_device *rdev);
+void r600_fini(struct radeon_device *rdev);
+int r600_suspend(struct radeon_device *rdev);
+int r600_resume(struct radeon_device *rdev);
+int r600_wb_init(struct radeon_device *rdev);
+void r600_wb_fini(struct radeon_device *rdev);
+void r600_cp_commit(struct radeon_device *rdev);
+void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
 uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
 void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+int r600_cs_parse(struct radeon_cs_parser *p);
+void r600_fence_ring_emit(struct radeon_device *rdev,
+			  struct radeon_fence *fence);
+int r600_copy_dma(struct radeon_device *rdev,
+		  uint64_t src_offset,
+		  uint64_t dst_offset,
+		  unsigned num_pages,
+		  struct radeon_fence *fence);
+int r600_irq_process(struct radeon_device *rdev);
+int r600_irq_set(struct radeon_device *rdev);
+int r600_gpu_reset(struct radeon_device *rdev);
+int r600_set_surface_reg(struct radeon_device *rdev, int reg,
+			 uint32_t tiling_flags, uint32_t pitch,
+			 uint32_t offset, uint32_t obj_size);
+int r600_clear_surface_reg(struct radeon_device *rdev, int reg);
+void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+int r600_ib_test(struct radeon_device *rdev);
+int r600_ring_test(struct radeon_device *rdev);
+int r600_copy_blit(struct radeon_device *rdev,
+		   uint64_t src_offset, uint64_t dst_offset,
+		   unsigned num_pages, struct radeon_fence *fence);
+
+static struct radeon_asic r600_asic = {
+	.errata = NULL,
+	.init = &r600_init,
+	.fini = &r600_fini,
+	.suspend = &r600_suspend,
+	.resume = &r600_resume,
+	.cp_commit = &r600_cp_commit,
+	.vram_info = NULL,
+	.gpu_reset = &r600_gpu_reset,
+	.mc_init = NULL,
+	.mc_fini = NULL,
+	.wb_init = &r600_wb_init,
+	.wb_fini = &r600_wb_fini,
+	.gart_enable = NULL,
+	.gart_disable = NULL,
+	.gart_tlb_flush = &r600_pcie_gart_tlb_flush,
+	.gart_set_page = &rs600_gart_set_page,
+	.cp_init = NULL,
+	.cp_fini = NULL,
+	.cp_disable = NULL,
+	.ring_start = NULL,
+	.ring_test = &r600_ring_test,
+	.ring_ib_execute = &r600_ring_ib_execute,
+	.ib_test = &r600_ib_test,
+	.irq_set = &r600_irq_set,
+	.irq_process = &r600_irq_process,
+	.fence_ring_emit = &r600_fence_ring_emit,
+	.cs_parse = &r600_cs_parse,
+	.copy_blit = &r600_copy_blit,
+	.copy_dma = &r600_copy_blit,
+	.copy = NULL,
+	.set_engine_clock = &radeon_atom_set_engine_clock,
+	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.set_pcie_lanes = NULL,
+	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r600_set_surface_reg,
+	.clear_surface_reg = r600_clear_surface_reg,
+	.bandwidth_update = &r520_bandwidth_update,
+};
+
+/*
+ * rv770,rv730,rv710,rv740
+ */
+int rv770_init(struct radeon_device *rdev);
+void rv770_fini(struct radeon_device *rdev);
+int rv770_suspend(struct radeon_device *rdev);
+int rv770_resume(struct radeon_device *rdev);
+int rv770_gpu_reset(struct radeon_device *rdev);
+
+static struct radeon_asic rv770_asic = {
+	.errata = NULL,
+	.init = &rv770_init,
+	.fini = &rv770_fini,
+	.suspend = &rv770_suspend,
+	.resume = &rv770_resume,
+	.cp_commit = &r600_cp_commit,
+	.vram_info = NULL,
+	.gpu_reset = &rv770_gpu_reset,
+	.mc_init = NULL,
+	.mc_fini = NULL,
+	.wb_init = &r600_wb_init,
+	.wb_fini = &r600_wb_fini,
+	.gart_enable = NULL,
+	.gart_disable = NULL,
+	.gart_tlb_flush = &r600_pcie_gart_tlb_flush,
+	.gart_set_page = &rs600_gart_set_page,
+	.cp_init = NULL,
+	.cp_fini = NULL,
+	.cp_disable = NULL,
+	.ring_start = NULL,
+	.ring_test = &r600_ring_test,
+	.ring_ib_execute = &r600_ring_ib_execute,
+	.ib_test = &r600_ib_test,
+	.irq_set = &r600_irq_set,
+	.irq_process = &r600_irq_process,
+	.fence_ring_emit = &r600_fence_ring_emit,
+	.cs_parse = &r600_cs_parse,
+	.copy_blit = &r600_copy_blit,
+	.copy_dma = &r600_copy_blit,
+	.copy = NULL,
+	.set_engine_clock = &radeon_atom_set_engine_clock,
+	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.set_pcie_lanes = NULL,
+	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r600_set_surface_reg,
+	.clear_surface_reg = r600_clear_surface_reg,
+	.bandwidth_update = &r520_bandwidth_update,
+};
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index bba9b4bd8f5cd22d3f46de9a043f81751a324d2d..a8fb392c9cd61c40a2f29e02a0b0b6273d74ba68 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -370,10 +370,6 @@ bool radeon_get_atom_connector_info_from_object_table(struct drm_device *dev)
 						       && record->
 						       ucRecordType <=
 						       ATOM_MAX_OBJECT_RECORD_NUMBER) {
-							DRM_ERROR
-							    ("record type %d\n",
-							     record->
-							     ucRecordType);
 							switch (record->
 								ucRecordType) {
 							case ATOM_I2C_RECORD_TYPE:
diff --git a/drivers/gpu/drm/radeon/radeon_clocks.c b/drivers/gpu/drm/radeon/radeon_clocks.c
index a37cbce531816e7798d1def64f0a9020054a6905..152eef13197a1721602841511467652756e09a62 100644
--- a/drivers/gpu/drm/radeon/radeon_clocks.c
+++ b/drivers/gpu/drm/radeon/radeon_clocks.c
@@ -102,10 +102,12 @@ void radeon_get_clock_info(struct drm_device *dev)
 			p1pll->reference_div = 12;
 		if (p2pll->reference_div < 2)
 			p2pll->reference_div = 12;
-		if (spll->reference_div < 2)
-			spll->reference_div =
-			    RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) &
-			    RADEON_M_SPLL_REF_DIV_MASK;
+		if (rdev->family < CHIP_RS600) {
+			if (spll->reference_div < 2)
+				spll->reference_div =
+					RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) &
+					RADEON_M_SPLL_REF_DIV_MASK;
+		}
 		if (mpll->reference_div < 2)
 			mpll->reference_div = spll->reference_div;
 	} else {
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 7693f7c67bd3a9ed4740744007cd65ace93936b3..f2469c51178934bf1ac8f516a6a509adc7d3f0ab 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -37,7 +37,7 @@
 /*
  * Clear GPU surface registers.
  */
-static void radeon_surface_init(struct radeon_device *rdev)
+void radeon_surface_init(struct radeon_device *rdev)
 {
 	/* FIXME: check this out */
 	if (rdev->family < CHIP_R600) {
@@ -56,7 +56,7 @@ static void radeon_surface_init(struct radeon_device *rdev)
 /*
  * GPU scratch registers helpers function.
  */
-static void radeon_scratch_init(struct radeon_device *rdev)
+void radeon_scratch_init(struct radeon_device *rdev)
 {
 	int i;
 
@@ -156,16 +156,14 @@ int radeon_mc_setup(struct radeon_device *rdev)
 		tmp = (tmp + rdev->mc.gtt_size - 1) & ~(rdev->mc.gtt_size - 1);
 		rdev->mc.gtt_location = tmp;
 	}
-	DRM_INFO("radeon: VRAM %uM\n", rdev->mc.real_vram_size >> 20);
+	DRM_INFO("radeon: VRAM %uM\n", (unsigned)(rdev->mc.mc_vram_size >> 20));
 	DRM_INFO("radeon: VRAM from 0x%08X to 0x%08X\n",
-		 rdev->mc.vram_location,
-		 rdev->mc.vram_location + rdev->mc.mc_vram_size - 1);
-	if (rdev->mc.real_vram_size != rdev->mc.mc_vram_size)
-		DRM_INFO("radeon: VRAM less than aperture workaround enabled\n");
-	DRM_INFO("radeon: GTT %uM\n", rdev->mc.gtt_size >> 20);
+		 (unsigned)rdev->mc.vram_location,
+		 (unsigned)(rdev->mc.vram_location + rdev->mc.mc_vram_size - 1));
+	DRM_INFO("radeon: GTT %uM\n", (unsigned)(rdev->mc.gtt_size >> 20));
 	DRM_INFO("radeon: GTT from 0x%08X to 0x%08X\n",
-		 rdev->mc.gtt_location,
-		 rdev->mc.gtt_location + rdev->mc.gtt_size - 1);
+		 (unsigned)rdev->mc.gtt_location,
+		 (unsigned)(rdev->mc.gtt_location + rdev->mc.gtt_size - 1));
 	return 0;
 }
 
@@ -205,6 +203,31 @@ static bool radeon_card_posted(struct radeon_device *rdev)
 
 }
 
+int radeon_dummy_page_init(struct radeon_device *rdev)
+{
+	rdev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO);
+	if (rdev->dummy_page.page == NULL)
+		return -ENOMEM;
+	rdev->dummy_page.addr = pci_map_page(rdev->pdev, rdev->dummy_page.page,
+					0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	if (!rdev->dummy_page.addr) {
+		__free_page(rdev->dummy_page.page);
+		rdev->dummy_page.page = NULL;
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void radeon_dummy_page_fini(struct radeon_device *rdev)
+{
+	if (rdev->dummy_page.page == NULL)
+		return;
+	pci_unmap_page(rdev->pdev, rdev->dummy_page.addr,
+			PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	__free_page(rdev->dummy_page.page);
+	rdev->dummy_page.page = NULL;
+}
+
 
 /*
  * Registers accessors functions.
@@ -323,9 +346,15 @@ int radeon_asic_init(struct radeon_device *rdev)
 	case CHIP_RV635:
 	case CHIP_RV670:
 	case CHIP_RS780:
+	case CHIP_RS880:
+		rdev->asic = &r600_asic;
+		break;
 	case CHIP_RV770:
 	case CHIP_RV730:
 	case CHIP_RV710:
+	case CHIP_RV740:
+		rdev->asic = &rv770_asic;
+		break;
 	default:
 		/* FIXME: not supported yet */
 		return -EINVAL;
@@ -448,7 +477,7 @@ int radeon_device_init(struct radeon_device *rdev,
 		       struct pci_dev *pdev,
 		       uint32_t flags)
 {
-	int r, ret;
+	int r, ret = 0;
 	int dma_bits;
 
 	DRM_INFO("radeon: Initializing kernel modesetting.\n");
@@ -487,10 +516,6 @@ int radeon_device_init(struct radeon_device *rdev,
 	if (r) {
 		return r;
 	}
-	r = radeon_init(rdev);
-	if (r) {
-		return r;
-	}
 
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 40-bits.
@@ -521,111 +546,118 @@ int radeon_device_init(struct radeon_device *rdev,
 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)rdev->rmmio_base);
 	DRM_INFO("register mmio size: %u\n", (unsigned)rdev->rmmio_size);
 
-	/* Setup errata flags */
-	radeon_errata(rdev);
-	/* Initialize scratch registers */
-	radeon_scratch_init(rdev);
-	/* Initialize surface registers */
-	radeon_surface_init(rdev);
-
-	/* TODO: disable VGA need to use VGA request */
-	/* BIOS*/
-	if (!radeon_get_bios(rdev)) {
-		if (ASIC_IS_AVIVO(rdev))
-			return -EINVAL;
-	}
-	if (rdev->is_atom_bios) {
-		r = radeon_atombios_init(rdev);
+	rdev->new_init_path = false;
+	r = radeon_init(rdev);
+	if (r) {
+		return r;
+	}
+	if (!rdev->new_init_path) {
+		/* Setup errata flags */
+		radeon_errata(rdev);
+		/* Initialize scratch registers */
+		radeon_scratch_init(rdev);
+		/* Initialize surface registers */
+		radeon_surface_init(rdev);
+
+		/* TODO: disable VGA need to use VGA request */
+		/* BIOS*/
+		if (!radeon_get_bios(rdev)) {
+			if (ASIC_IS_AVIVO(rdev))
+				return -EINVAL;
+		}
+		if (rdev->is_atom_bios) {
+			r = radeon_atombios_init(rdev);
+			if (r) {
+				return r;
+			}
+		} else {
+			r = radeon_combios_init(rdev);
+			if (r) {
+				return r;
+			}
+		}
+		/* Reset gpu before posting otherwise ATOM will enter infinite loop */
+		if (radeon_gpu_reset(rdev)) {
+			/* FIXME: what do we want to do here ? */
+		}
+		/* check if cards are posted or not */
+		if (!radeon_card_posted(rdev) && rdev->bios) {
+			DRM_INFO("GPU not posted. posting now...\n");
+			if (rdev->is_atom_bios) {
+				atom_asic_init(rdev->mode_info.atom_context);
+			} else {
+				radeon_combios_asic_init(rdev->ddev);
+			}
+		}
+		/* Initialize clocks */
+		r = radeon_clocks_init(rdev);
 		if (r) {
 			return r;
 		}
-	} else {
-		r = radeon_combios_init(rdev);
+		/* Get vram informations */
+		radeon_vram_info(rdev);
+
+		/* Add an MTRR for the VRAM */
+		rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
+				MTRR_TYPE_WRCOMB, 1);
+		DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n",
+				(unsigned)(rdev->mc.mc_vram_size >> 20),
+				(unsigned)(rdev->mc.aper_size >> 20));
+		DRM_INFO("RAM width %dbits %cDR\n",
+				rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
+		/* Initialize memory controller (also test AGP) */
+		r = radeon_mc_init(rdev);
 		if (r) {
 			return r;
 		}
-	}
-	/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-	if (radeon_gpu_reset(rdev)) {
-		/* FIXME: what do we want to do here ? */
-	}
-	/* check if cards are posted or not */
-	if (!radeon_card_posted(rdev) && rdev->bios) {
-		DRM_INFO("GPU not posted. posting now...\n");
-		if (rdev->is_atom_bios) {
-			atom_asic_init(rdev->mode_info.atom_context);
-		} else {
-			radeon_combios_asic_init(rdev->ddev);
-		}
-	}
-	/* Initialize clocks */
-	r = radeon_clocks_init(rdev);
-	if (r) {
-		return r;
-	}
-	/* Get vram informations */
-	radeon_vram_info(rdev);
-
-	/* Add an MTRR for the VRAM */
-	rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
-				      MTRR_TYPE_WRCOMB, 1);
-	DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n",
-		 rdev->mc.real_vram_size >> 20,
-		 (unsigned)rdev->mc.aper_size >> 20);
-	DRM_INFO("RAM width %dbits %cDR\n",
-		 rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
-	/* Initialize memory controller (also test AGP) */
-	r = radeon_mc_init(rdev);
-	if (r) {
-		return r;
-	}
-	/* Fence driver */
-	r = radeon_fence_driver_init(rdev);
-	if (r) {
-		return r;
-	}
-	r = radeon_irq_kms_init(rdev);
-	if (r) {
-		return r;
-	}
-	/* Memory manager */
-	r = radeon_object_init(rdev);
-	if (r) {
-		return r;
-	}
-	/* Initialize GART (initialize after TTM so we can allocate
-	 * memory through TTM but finalize after TTM) */
-	r = radeon_gart_enable(rdev);
-	if (!r) {
-		r = radeon_gem_init(rdev);
-	}
-
-	/* 1M ring buffer */
-	if (!r) {
-		r = radeon_cp_init(rdev, 1024 * 1024);
-	}
-	if (!r) {
-		r = radeon_wb_init(rdev);
+		/* Fence driver */
+		r = radeon_fence_driver_init(rdev);
 		if (r) {
-			DRM_ERROR("radeon: failled initializing WB (%d).\n", r);
 			return r;
 		}
-	}
-	if (!r) {
-		r = radeon_ib_pool_init(rdev);
+		r = radeon_irq_kms_init(rdev);
 		if (r) {
-			DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
 			return r;
 		}
-	}
-	if (!r) {
-		r = radeon_ib_test(rdev);
+		/* Memory manager */
+		r = radeon_object_init(rdev);
 		if (r) {
-			DRM_ERROR("radeon: failled testing IB (%d).\n", r);
 			return r;
 		}
+		/* Initialize GART (initialize after TTM so we can allocate
+		 * memory through TTM but finalize after TTM) */
+		r = radeon_gart_enable(rdev);
+		if (!r) {
+			r = radeon_gem_init(rdev);
+		}
+
+		/* 1M ring buffer */
+		if (!r) {
+			r = radeon_cp_init(rdev, 1024 * 1024);
+		}
+		if (!r) {
+			r = radeon_wb_init(rdev);
+			if (r) {
+				DRM_ERROR("radeon: failled initializing WB (%d).\n", r);
+				return r;
+			}
+		}
+		if (!r) {
+			r = radeon_ib_pool_init(rdev);
+			if (r) {
+				DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
+				return r;
+			}
+		}
+		if (!r) {
+			r = radeon_ib_test(rdev);
+			if (r) {
+				DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+				return r;
+			}
+		}
+		ret = r;
 	}
-	ret = r;
 	r = radeon_modeset_init(rdev);
 	if (r) {
 		return r;
@@ -651,26 +683,29 @@ void radeon_device_fini(struct radeon_device *rdev)
 	rdev->shutdown = true;
 	/* Order matter so becarefull if you rearrange anythings */
 	radeon_modeset_fini(rdev);
-	radeon_ib_pool_fini(rdev);
-	radeon_cp_fini(rdev);
-	radeon_wb_fini(rdev);
-	radeon_gem_fini(rdev);
-	radeon_object_fini(rdev);
-	/* mc_fini must be after object_fini */
-	radeon_mc_fini(rdev);
+	if (!rdev->new_init_path) {
+		radeon_ib_pool_fini(rdev);
+		radeon_cp_fini(rdev);
+		radeon_wb_fini(rdev);
+		radeon_gem_fini(rdev);
+		radeon_mc_fini(rdev);
 #if __OS_HAS_AGP
-	radeon_agp_fini(rdev);
+		radeon_agp_fini(rdev);
 #endif
-	radeon_irq_kms_fini(rdev);
-	radeon_fence_driver_fini(rdev);
-	radeon_clocks_fini(rdev);
-	if (rdev->is_atom_bios) {
-		radeon_atombios_fini(rdev);
+		radeon_irq_kms_fini(rdev);
+		radeon_fence_driver_fini(rdev);
+		radeon_clocks_fini(rdev);
+		radeon_object_fini(rdev);
+		if (rdev->is_atom_bios) {
+			radeon_atombios_fini(rdev);
+		} else {
+			radeon_combios_fini(rdev);
+		}
+		kfree(rdev->bios);
+		rdev->bios = NULL;
 	} else {
-		radeon_combios_fini(rdev);
+		radeon_fini(rdev);
 	}
-	kfree(rdev->bios);
-	rdev->bios = NULL;
 	iounmap(rdev->rmmio);
 	rdev->rmmio = NULL;
 }
@@ -708,9 +743,12 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
 	/* wait for gpu to finish processing current batch */
 	radeon_fence_wait_last(rdev);
 
-	radeon_cp_disable(rdev);
-	radeon_gart_disable(rdev);
-
+	if (!rdev->new_init_path) {
+		radeon_cp_disable(rdev);
+		radeon_gart_disable(rdev);
+	} else {
+		radeon_suspend(rdev);
+	}
 	/* evict remaining vram memory */
 	radeon_object_evict_vram(rdev);
 
@@ -746,33 +784,37 @@ int radeon_resume_kms(struct drm_device *dev)
 	if (radeon_gpu_reset(rdev)) {
 		/* FIXME: what do we want to do here ? */
 	}
-	/* post card */
-	if (rdev->is_atom_bios) {
-		atom_asic_init(rdev->mode_info.atom_context);
+	if (!rdev->new_init_path) {
+		/* post card */
+		if (rdev->is_atom_bios) {
+			atom_asic_init(rdev->mode_info.atom_context);
+		} else {
+			radeon_combios_asic_init(rdev->ddev);
+		}
+		/* Initialize clocks */
+		r = radeon_clocks_init(rdev);
+		if (r) {
+			release_console_sem();
+			return r;
+		}
+		/* Enable IRQ */
+		rdev->irq.sw_int = true;
+		radeon_irq_set(rdev);
+		/* Initialize GPU Memory Controller */
+		r = radeon_mc_init(rdev);
+		if (r) {
+			goto out;
+		}
+		r = radeon_gart_enable(rdev);
+		if (r) {
+			goto out;
+		}
+		r = radeon_cp_init(rdev, rdev->cp.ring_size);
+		if (r) {
+			goto out;
+		}
 	} else {
-		radeon_combios_asic_init(rdev->ddev);
-	}
-	/* Initialize clocks */
-	r = radeon_clocks_init(rdev);
-	if (r) {
-		release_console_sem();
-		return r;
-	}
-	/* Enable IRQ */
-	rdev->irq.sw_int = true;
-	radeon_irq_set(rdev);
-	/* Initialize GPU Memory Controller */
-	r = radeon_mc_init(rdev);
-	if (r) {
-		goto out;
-	}
-	r = radeon_gart_enable(rdev);
-	if (r) {
-		goto out;
-	}
-	r = radeon_cp_init(rdev, rdev->cp.ring_size);
-	if (r) {
-		goto out;
+		radeon_resume(rdev);
 	}
 out:
 	fb_set_suspend(rdev->fbdev_info, 0);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index 40294a07976f0f8e1ab847222664e826a4a43cd6..c7b185924f6c983af842e6a409924d987353f72c 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -356,6 +356,12 @@ typedef struct drm_radeon_private {
 	int r700_sc_hiz_tile_fifo_size;
 	int r700_sc_earlyz_tile_fifo_fize;
 
+	struct mutex cs_mutex;
+	u32 cs_id_scnt;
+	u32 cs_id_wcnt;
+	/* r6xx/r7xx drm blit vertex buffer */
+	struct drm_buf *blit_vb;
+
 	/* firmware */
 	const struct firmware *me_fw, *pfp_fw;
 } drm_radeon_private_t;
@@ -396,6 +402,9 @@ static __inline__ int radeon_check_offset(drm_radeon_private_t *dev_priv,
 		(off >= gart_start && off <= gart_end));
 }
 
+/* radeon_state.c */
+extern void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf);
+
 				/* radeon_cp.c */
 extern int radeon_cp_init(struct drm_device *dev, void *data, struct drm_file *file_priv);
 extern int radeon_cp_start(struct drm_device *dev, void *data, struct drm_file *file_priv);
@@ -487,6 +496,22 @@ extern int r600_cp_dispatch_indirect(struct drm_device *dev,
 				     struct drm_buf *buf, int start, int end);
 extern int r600_page_table_init(struct drm_device *dev);
 extern void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info);
+extern int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv);
+extern void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv);
+extern int r600_cp_dispatch_texture(struct drm_device *dev,
+				    struct drm_file *file_priv,
+				    drm_radeon_texture_t *tex,
+				    drm_radeon_tex_image_t *image);
+/* r600_blit.c */
+extern int r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv);
+extern void r600_done_blit_copy(struct drm_device *dev);
+extern void r600_blit_copy(struct drm_device *dev,
+			   uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
+			   int size_bytes);
+extern void r600_blit_swap(struct drm_device *dev,
+			   uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
+			   int sx, int sy, int dx, int dy,
+			   int w, int h, int src_pitch, int dst_pitch, int cpp);
 
 /* Flags for stats.boxes
  */
@@ -1114,13 +1139,71 @@ extern u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index);
 #	define RADEON_CNTL_BITBLT_MULTI		0x00009B00
 #	define RADEON_CNTL_SET_SCISSORS		0xC0001E00
 
-#	define R600_IT_INDIRECT_BUFFER		0x00003200
-#	define R600_IT_ME_INITIALIZE		0x00004400
+#       define R600_IT_INDIRECT_BUFFER_END      0x00001700
+#       define R600_IT_SET_PREDICATION          0x00002000
+#       define R600_IT_REG_RMW                  0x00002100
+#       define R600_IT_COND_EXEC                0x00002200
+#       define R600_IT_PRED_EXEC                0x00002300
+#       define R600_IT_START_3D_CMDBUF          0x00002400
+#       define R600_IT_DRAW_INDEX_2             0x00002700
+#       define R600_IT_CONTEXT_CONTROL          0x00002800
+#       define R600_IT_DRAW_INDEX_IMMD_BE       0x00002900
+#       define R600_IT_INDEX_TYPE               0x00002A00
+#       define R600_IT_DRAW_INDEX               0x00002B00
+#       define R600_IT_DRAW_INDEX_AUTO          0x00002D00
+#       define R600_IT_DRAW_INDEX_IMMD          0x00002E00
+#       define R600_IT_NUM_INSTANCES            0x00002F00
+#       define R600_IT_STRMOUT_BUFFER_UPDATE    0x00003400
+#       define R600_IT_INDIRECT_BUFFER_MP       0x00003800
+#       define R600_IT_MEM_SEMAPHORE            0x00003900
+#       define R600_IT_MPEG_INDEX               0x00003A00
+#       define R600_IT_WAIT_REG_MEM             0x00003C00
+#       define R600_IT_MEM_WRITE                0x00003D00
+#       define R600_IT_INDIRECT_BUFFER          0x00003200
+#       define R600_IT_CP_INTERRUPT             0x00004000
+#       define R600_IT_SURFACE_SYNC             0x00004300
+#              define R600_CB0_DEST_BASE_ENA    (1 << 6)
+#              define R600_TC_ACTION_ENA        (1 << 23)
+#              define R600_VC_ACTION_ENA        (1 << 24)
+#              define R600_CB_ACTION_ENA        (1 << 25)
+#              define R600_DB_ACTION_ENA        (1 << 26)
+#              define R600_SH_ACTION_ENA        (1 << 27)
+#              define R600_SMX_ACTION_ENA       (1 << 28)
+#       define R600_IT_ME_INITIALIZE            0x00004400
 #	       define R600_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
-#	define R600_IT_EVENT_WRITE		0x00004600
-#	define R600_IT_SET_CONFIG_REG		0x00006800
-#	define R600_SET_CONFIG_REG_OFFSET       0x00008000
-#	define R600_SET_CONFIG_REG_END          0x0000ac00
+#       define R600_IT_COND_WRITE               0x00004500
+#       define R600_IT_EVENT_WRITE              0x00004600
+#       define R600_IT_EVENT_WRITE_EOP          0x00004700
+#       define R600_IT_ONE_REG_WRITE            0x00005700
+#       define R600_IT_SET_CONFIG_REG           0x00006800
+#              define R600_SET_CONFIG_REG_OFFSET 0x00008000
+#              define R600_SET_CONFIG_REG_END   0x0000ac00
+#       define R600_IT_SET_CONTEXT_REG          0x00006900
+#              define R600_SET_CONTEXT_REG_OFFSET 0x00028000
+#              define R600_SET_CONTEXT_REG_END  0x00029000
+#       define R600_IT_SET_ALU_CONST            0x00006A00
+#              define R600_SET_ALU_CONST_OFFSET 0x00030000
+#              define R600_SET_ALU_CONST_END    0x00032000
+#       define R600_IT_SET_BOOL_CONST           0x00006B00
+#              define R600_SET_BOOL_CONST_OFFSET 0x0003e380
+#              define R600_SET_BOOL_CONST_END   0x00040000
+#       define R600_IT_SET_LOOP_CONST           0x00006C00
+#              define R600_SET_LOOP_CONST_OFFSET 0x0003e200
+#              define R600_SET_LOOP_CONST_END   0x0003e380
+#       define R600_IT_SET_RESOURCE             0x00006D00
+#              define R600_SET_RESOURCE_OFFSET  0x00038000
+#              define R600_SET_RESOURCE_END     0x0003c000
+#              define R600_SQ_TEX_VTX_INVALID_TEXTURE  0x0
+#              define R600_SQ_TEX_VTX_INVALID_BUFFER   0x1
+#              define R600_SQ_TEX_VTX_VALID_TEXTURE    0x2
+#              define R600_SQ_TEX_VTX_VALID_BUFFER     0x3
+#       define R600_IT_SET_SAMPLER              0x00006E00
+#              define R600_SET_SAMPLER_OFFSET   0x0003c000
+#              define R600_SET_SAMPLER_END      0x0003cff0
+#       define R600_IT_SET_CTL_CONST            0x00006F00
+#              define R600_SET_CTL_CONST_OFFSET 0x0003cff0
+#              define R600_SET_CTL_CONST_END    0x0003e200
+#       define R600_IT_SURFACE_BASE_UPDATE      0x00007300
 
 #define RADEON_CP_PACKET_MASK		0xC0000000
 #define RADEON_CP_PACKET_COUNT_MASK	0x3fff0000
@@ -1598,6 +1681,52 @@ extern u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index);
 #define R600_CB_COLOR7_BASE                                    0x2805c
 #define R600_CB_COLOR7_FRAG                                    0x280fc
 
+#define R600_CB_COLOR0_SIZE                                    0x28060
+#define R600_CB_COLOR0_VIEW                                    0x28080
+#define R600_CB_COLOR0_INFO                                    0x280a0
+#define R600_CB_COLOR0_TILE                                    0x280c0
+#define R600_CB_COLOR0_FRAG                                    0x280e0
+#define R600_CB_COLOR0_MASK                                    0x28100
+
+#define AVIVO_D1MODE_VLINE_START_END                           0x6538
+#define AVIVO_D2MODE_VLINE_START_END                           0x6d38
+#define R600_CP_COHER_BASE                                     0x85f8
+#define R600_DB_DEPTH_BASE                                     0x2800c
+#define R600_SQ_PGM_START_FS                                   0x28894
+#define R600_SQ_PGM_START_ES                                   0x28880
+#define R600_SQ_PGM_START_VS                                   0x28858
+#define R600_SQ_PGM_RESOURCES_VS                               0x28868
+#define R600_SQ_PGM_CF_OFFSET_VS                               0x288d0
+#define R600_SQ_PGM_START_GS                                   0x2886c
+#define R600_SQ_PGM_START_PS                                   0x28840
+#define R600_SQ_PGM_RESOURCES_PS                               0x28850
+#define R600_SQ_PGM_EXPORTS_PS                                 0x28854
+#define R600_SQ_PGM_CF_OFFSET_PS                               0x288cc
+#define R600_VGT_DMA_BASE                                      0x287e8
+#define R600_VGT_DMA_BASE_HI                                   0x287e4
+#define R600_VGT_STRMOUT_BASE_OFFSET_0                         0x28b10
+#define R600_VGT_STRMOUT_BASE_OFFSET_1                         0x28b14
+#define R600_VGT_STRMOUT_BASE_OFFSET_2                         0x28b18
+#define R600_VGT_STRMOUT_BASE_OFFSET_3                         0x28b1c
+#define R600_VGT_STRMOUT_BASE_OFFSET_HI_0                      0x28b44
+#define R600_VGT_STRMOUT_BASE_OFFSET_HI_1                      0x28b48
+#define R600_VGT_STRMOUT_BASE_OFFSET_HI_2                      0x28b4c
+#define R600_VGT_STRMOUT_BASE_OFFSET_HI_3                      0x28b50
+#define R600_VGT_STRMOUT_BUFFER_BASE_0                         0x28ad8
+#define R600_VGT_STRMOUT_BUFFER_BASE_1                         0x28ae8
+#define R600_VGT_STRMOUT_BUFFER_BASE_2                         0x28af8
+#define R600_VGT_STRMOUT_BUFFER_BASE_3                         0x28b08
+#define R600_VGT_STRMOUT_BUFFER_OFFSET_0                       0x28adc
+#define R600_VGT_STRMOUT_BUFFER_OFFSET_1                       0x28aec
+#define R600_VGT_STRMOUT_BUFFER_OFFSET_2                       0x28afc
+#define R600_VGT_STRMOUT_BUFFER_OFFSET_3                       0x28b0c
+
+#define R600_VGT_PRIMITIVE_TYPE                                0x8958
+
+#define R600_PA_SC_SCREEN_SCISSOR_TL                           0x28030
+#define R600_PA_SC_GENERIC_SCISSOR_TL                          0x28240
+#define R600_PA_SC_WINDOW_SCISSOR_TL                           0x28204
+
 #define R600_TC_CNTL                                           0x9608
 #       define R600_TC_L2_SIZE(x)                              ((x) << 5)
 #       define R600_L2_DISABLE_LATE_HIT                        (1 << 9)
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index b4e48dd2e859c056d8280121f44c2e376070747f..506dd4dd3a244dbf173cf5ea526b75a6a471ce6e 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -53,9 +53,9 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
 		 * away
 		 */
 		WREG32(rdev->fence_drv.scratch_reg, fence->seq);
-	} else {
+	} else
 		radeon_fence_ring_emit(rdev, fence);
-	}
+
 	fence->emited = true;
 	fence->timeout = jiffies + ((2000 * HZ) / 1000);
 	list_del(&fence->list);
@@ -168,7 +168,47 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
 	return signaled;
 }
 
-int radeon_fence_wait(struct radeon_fence *fence, bool interruptible)
+int r600_fence_wait(struct radeon_fence *fence,  bool intr, bool lazy)
+{
+	struct radeon_device *rdev;
+	unsigned long cur_jiffies;
+	unsigned long timeout;
+	int ret = 0;
+
+	cur_jiffies = jiffies;
+	timeout = HZ / 100;
+
+	if (time_after(fence->timeout, cur_jiffies)) {
+		timeout = fence->timeout - cur_jiffies;
+	}
+
+	rdev = fence->rdev;
+
+	__set_current_state(intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+
+	while (1) {
+		if (radeon_fence_signaled(fence))
+			break;
+
+		if (time_after_eq(jiffies, timeout)) {
+			ret = -EBUSY;
+			break;
+		}
+
+		if (lazy)
+			schedule_timeout(1);
+
+		if (intr && signal_pending(current)) {
+			ret = -ERESTART;
+			break;
+		}
+	}
+	__set_current_state(TASK_RUNNING);
+	return ret;
+}
+
+
+int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 {
 	struct radeon_device *rdev;
 	unsigned long cur_jiffies;
@@ -176,7 +216,6 @@ int radeon_fence_wait(struct radeon_fence *fence, bool interruptible)
 	bool expired = false;
 	int r;
 
-
 	if (fence == NULL) {
 		WARN(1, "Querying an invalid fence : %p !\n", fence);
 		return 0;
@@ -185,13 +224,18 @@ int radeon_fence_wait(struct radeon_fence *fence, bool interruptible)
 	if (radeon_fence_signaled(fence)) {
 		return 0;
 	}
+
+	if (rdev->family >= CHIP_R600)
+		return r600_fence_wait(fence, intr, 0);
+
 retry:
 	cur_jiffies = jiffies;
 	timeout = HZ / 100;
 	if (time_after(fence->timeout, cur_jiffies)) {
 		timeout = fence->timeout - cur_jiffies;
 	}
-	if (interruptible) {
+
+	if (intr) {
 		r = wait_event_interruptible_timeout(rdev->fence_drv.queue,
 				radeon_fence_signaled(fence), timeout);
 		if (unlikely(r == -ERESTARTSYS)) {
diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h
index 28be2f1165cba5f62b55617c4bc587190f20542e..21da871a793c64aec6ce16ac4b1d820aa0628faa 100644
--- a/drivers/gpu/drm/radeon/radeon_reg.h
+++ b/drivers/gpu/drm/radeon/radeon_reg.h
@@ -3255,6 +3255,24 @@
 #define RADEON_CP_RB_WPTR                   0x0714
 #define RADEON_CP_RB_RPTR_WR                0x071c
 
+#define RADEON_SCRATCH_UMSK		    0x0770
+#define RADEON_SCRATCH_ADDR		    0x0774
+
+#define R600_CP_RB_BASE                     0xc100
+#define R600_CP_RB_CNTL                     0xc104
+#       define R600_RB_BUFSZ(x)             ((x) << 0)
+#       define R600_RB_BLKSZ(x)             ((x) << 8)
+#       define R600_RB_NO_UPDATE            (1 << 27)
+#       define R600_RB_RPTR_WR_ENA          (1 << 31)
+#define R600_CP_RB_RPTR_WR                  0xc108
+#define R600_CP_RB_RPTR_ADDR                0xc10c
+#define R600_CP_RB_RPTR_ADDR_HI             0xc110
+#define R600_CP_RB_WPTR                     0xc114
+#define R600_CP_RB_WPTR_ADDR                0xc118
+#define R600_CP_RB_WPTR_ADDR_HI             0xc11c
+#define R600_CP_RB_RPTR                     0x8700
+#define R600_CP_RB_WPTR_DELAY               0x8704
+
 #define RADEON_CP_IB_BASE                   0x0738
 #define RADEON_CP_IB_BUFSZ                  0x073c
 
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 60d159308b883adddec3628b343a7784c6d22cab..aa9837a6aa75ffa37788180024cf1d4e81f5aa39 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -110,7 +110,6 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib)
 		return;
 	}
 	list_del(&tmp->list);
-	INIT_LIST_HEAD(&tmp->list);
 	if (tmp->fence) {
 		radeon_fence_unref(&tmp->fence);
 	}
@@ -119,19 +118,11 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib)
 	mutex_unlock(&rdev->ib_pool.mutex);
 }
 
-static void radeon_ib_align(struct radeon_device *rdev, struct radeon_ib *ib)
-{
-	while ((ib->length_dw & rdev->cp.align_mask)) {
-		ib->ptr[ib->length_dw++] = PACKET2(0);
-	}
-}
-
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	int r = 0;
 
 	mutex_lock(&rdev->ib_pool.mutex);
-	radeon_ib_align(rdev, ib);
 	if (!ib->length_dw || !rdev->cp.ready) {
 		/* TODO: Nothings in the ib we should report. */
 		mutex_unlock(&rdev->ib_pool.mutex);
@@ -145,9 +136,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 		mutex_unlock(&rdev->ib_pool.mutex);
 		return r;
 	}
-	radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1));
-	radeon_ring_write(rdev, ib->gpu_addr);
-	radeon_ring_write(rdev, ib->length_dw);
+	radeon_ring_ib_execute(rdev, ib);
 	radeon_fence_emit(rdev, ib->fence);
 	radeon_ring_unlock_commit(rdev);
 	list_add_tail(&ib->list, &rdev->ib_pool.scheduled_ibs);
@@ -215,69 +204,16 @@ void radeon_ib_pool_fini(struct radeon_device *rdev)
 	mutex_unlock(&rdev->ib_pool.mutex);
 }
 
-int radeon_ib_test(struct radeon_device *rdev)
-{
-	struct radeon_ib *ib;
-	uint32_t scratch;
-	uint32_t tmp = 0;
-	unsigned i;
-	int r;
-
-	r = radeon_scratch_get(rdev, &scratch);
-	if (r) {
-		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
-		return r;
-	}
-	WREG32(scratch, 0xCAFEDEAD);
-	r = radeon_ib_get(rdev, &ib);
-	if (r) {
-		return r;
-	}
-	ib->ptr[0] = PACKET0(scratch, 0);
-	ib->ptr[1] = 0xDEADBEEF;
-	ib->ptr[2] = PACKET2(0);
-	ib->ptr[3] = PACKET2(0);
-	ib->ptr[4] = PACKET2(0);
-	ib->ptr[5] = PACKET2(0);
-	ib->ptr[6] = PACKET2(0);
-	ib->ptr[7] = PACKET2(0);
-	ib->length_dw = 8;
-	r = radeon_ib_schedule(rdev, ib);
-	if (r) {
-		radeon_scratch_free(rdev, scratch);
-		radeon_ib_free(rdev, &ib);
-		return r;
-	}
-	r = radeon_fence_wait(ib->fence, false);
-	if (r) {
-		return r;
-	}
-	for (i = 0; i < rdev->usec_timeout; i++) {
-		tmp = RREG32(scratch);
-		if (tmp == 0xDEADBEEF) {
-			break;
-		}
-		DRM_UDELAY(1);
-	}
-	if (i < rdev->usec_timeout) {
-		DRM_INFO("ib test succeeded in %u usecs\n", i);
-	} else {
-		DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n",
-			  scratch, tmp);
-		r = -EINVAL;
-	}
-	radeon_scratch_free(rdev, scratch);
-	radeon_ib_free(rdev, &ib);
-	return r;
-}
-
 
 /*
  * Ring.
  */
 void radeon_ring_free_size(struct radeon_device *rdev)
 {
-	rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
+	if (rdev->family >= CHIP_R600)
+		rdev->cp.rptr = RREG32(R600_CP_RB_RPTR);
+	else
+		rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
 	/* This works because ring_size is a power of 2 */
 	rdev->cp.ring_free_dw = (rdev->cp.rptr + (rdev->cp.ring_size / 4));
 	rdev->cp.ring_free_dw -= rdev->cp.wptr;
@@ -320,11 +256,10 @@ void radeon_ring_unlock_commit(struct radeon_device *rdev)
 	count_dw_pad = (rdev->cp.align_mask + 1) -
 		       (rdev->cp.wptr & rdev->cp.align_mask);
 	for (i = 0; i < count_dw_pad; i++) {
-		radeon_ring_write(rdev, PACKET2(0));
+		radeon_ring_write(rdev, 2 << 30);
 	}
 	DRM_MEMORYBARRIER();
-	WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
-	(void)RREG32(RADEON_CP_RB_WPTR);
+	radeon_cp_commit(rdev);
 	mutex_unlock(&rdev->cp.mutex);
 }
 
@@ -334,46 +269,6 @@ void radeon_ring_unlock_undo(struct radeon_device *rdev)
 	mutex_unlock(&rdev->cp.mutex);
 }
 
-int radeon_ring_test(struct radeon_device *rdev)
-{
-	uint32_t scratch;
-	uint32_t tmp = 0;
-	unsigned i;
-	int r;
-
-	r = radeon_scratch_get(rdev, &scratch);
-	if (r) {
-		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
-		return r;
-	}
-	WREG32(scratch, 0xCAFEDEAD);
-	r = radeon_ring_lock(rdev, 2);
-	if (r) {
-		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
-		radeon_scratch_free(rdev, scratch);
-		return r;
-	}
-	radeon_ring_write(rdev, PACKET0(scratch, 0));
-	radeon_ring_write(rdev, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev);
-	for (i = 0; i < rdev->usec_timeout; i++) {
-		tmp = RREG32(scratch);
-		if (tmp == 0xDEADBEEF) {
-			break;
-		}
-		DRM_UDELAY(1);
-	}
-	if (i < rdev->usec_timeout) {
-		DRM_INFO("ring test succeeded in %d usecs\n", i);
-	} else {
-		DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n",
-			  scratch, tmp);
-		r = -EINVAL;
-	}
-	radeon_scratch_free(rdev, scratch);
-	return r;
-}
-
 int radeon_ring_init(struct radeon_device *rdev, unsigned ring_size)
 {
 	int r;
diff --git a/drivers/gpu/drm/radeon/radeon_share.h b/drivers/gpu/drm/radeon/radeon_share.h
index 63a773578f17044189f71f3dbdbf3a7ae3ad73de..5f9e358ab5062103b49d3b5eca6fa15cbcbc29ee 100644
--- a/drivers/gpu/drm/radeon/radeon_share.h
+++ b/drivers/gpu/drm/radeon/radeon_share.h
@@ -28,12 +28,89 @@
 #ifndef __RADEON_SHARE_H__
 #define __RADEON_SHARE_H__
 
+/* Common */
+struct radeon_device;
+struct radeon_cs_parser;
+int radeon_clocks_init(struct radeon_device *rdev);
+void radeon_clocks_fini(struct radeon_device *rdev);
+void radeon_scratch_init(struct radeon_device *rdev);
+void radeon_surface_init(struct radeon_device *rdev);
+int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
+
+
+/* R100, RV100, RS100, RV200, RS200, R200, RV250, RS300, RV280 */
 void r100_vram_init_sizes(struct radeon_device *rdev);
 
+
+/* R300, R350, RV350, RV380 */
+struct r300_asic {
+	const unsigned	*reg_safe_bm;
+	unsigned	reg_safe_bm_size;
+};
+
+
+/* RS690, RS740 */
 void rs690_line_buffer_adjust(struct radeon_device *rdev,
 			      struct drm_display_mode *mode1,
 			      struct drm_display_mode *mode2);
 
+
+/* RV515 */
 void rv515_bandwidth_avivo_update(struct radeon_device *rdev);
 
+
+/* R600, RV610, RV630, RV620, RV635, RV670, RS780, RS880 */
+bool r600_card_posted(struct radeon_device *rdev);
+void r600_cp_stop(struct radeon_device *rdev);
+void r600_ring_init(struct radeon_device *rdev, unsigned ring_size);
+int r600_cp_resume(struct radeon_device *rdev);
+int r600_count_pipe_bits(uint32_t val);
+int r600_gart_clear_page(struct radeon_device *rdev, int i);
+int r600_mc_wait_for_idle(struct radeon_device *rdev);
+void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
+int r600_ib_test(struct radeon_device *rdev);
+int r600_ring_test(struct radeon_device *rdev);
+int r600_wb_init(struct radeon_device *rdev);
+void r600_wb_fini(struct radeon_device *rdev);
+void r600_scratch_init(struct radeon_device *rdev);
+int r600_blit_init(struct radeon_device *rdev);
+void r600_blit_fini(struct radeon_device *rdev);
+int r600_cp_init_microcode(struct radeon_device *rdev);
+struct r600_asic {
+	unsigned max_pipes;
+	unsigned max_tile_pipes;
+	unsigned max_simds;
+	unsigned max_backends;
+	unsigned max_gprs;
+	unsigned max_threads;
+	unsigned max_stack_entries;
+	unsigned max_hw_contexts;
+	unsigned max_gs_threads;
+	unsigned sx_max_export_size;
+	unsigned sx_max_export_pos_size;
+	unsigned sx_max_export_smx_size;
+	unsigned sq_num_cf_insts;
+};
+
+/* RV770, RV7300, RV710 */
+struct rv770_asic {
+	unsigned max_pipes;
+	unsigned max_tile_pipes;
+	unsigned max_simds;
+	unsigned max_backends;
+	unsigned max_gprs;
+	unsigned max_threads;
+	unsigned max_stack_entries;
+	unsigned max_hw_contexts;
+	unsigned max_gs_threads;
+	unsigned sx_max_export_size;
+	unsigned sx_max_export_pos_size;
+	unsigned sx_max_export_smx_size;
+	unsigned sq_num_cf_insts;
+	unsigned sx_num_of_sets;
+	unsigned sc_prim_fifo_size;
+	unsigned sc_hiz_tile_fifo_size;
+	unsigned sc_earlyz_tile_fifo_fize;
+};
+
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 2882f40d5ec563d4ac5760314ca2a8b7b5da8282..aad0c6fafcf44cba3eb4773f558d065d24ce7ec5 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -1546,7 +1546,7 @@ static void radeon_cp_dispatch_vertex(struct drm_device * dev,
 	} while (i < nbox);
 }
 
-static void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
+void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	struct drm_radeon_master_private *master_priv = master->driver_priv;
@@ -2213,7 +2213,10 @@ static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *f
 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
 		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
 
-	radeon_cp_dispatch_swap(dev, file_priv->master);
+	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
+		r600_cp_dispatch_swap(dev, file_priv);
+	else
+		radeon_cp_dispatch_swap(dev, file_priv->master);
 	sarea_priv->ctx_owner = 0;
 
 	COMMIT_RING();
@@ -2412,7 +2415,10 @@ static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file
 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
 	VB_AGE_TEST_WITH_RETURN(dev_priv);
 
-	ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
+	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
+		ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
+	else
+		ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
 
 	return ret;
 }
@@ -2495,8 +2501,9 @@ static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_fil
 		radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
 	}
 
-	if (indirect->discard)
+	if (indirect->discard) {
 		radeon_cp_discard_buffer(dev, file_priv->master, buf);
+	}
 
 	COMMIT_RING();
 	return 0;
@@ -3227,7 +3234,8 @@ struct drm_ioctl_desc radeon_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH)
+	DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
 };
 
 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index dc7a44274ea80c359c336f3b4b32865f3bac4fca..acd889c945494b27e0722f6bd37d0afae5b29e95 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -376,9 +376,8 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 		radeon_move_null(bo, new_mem);
 		return 0;
 	}
-	if (!rdev->cp.ready) {
+	if (!rdev->cp.ready || rdev->asic->copy == NULL) {
 		/* use memcpy */
-		DRM_ERROR("CP is not ready use memcpy.\n");
 		goto memcpy;
 	}
 
@@ -495,7 +494,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
 		return r;
 	}
 	DRM_INFO("radeon: %uM of VRAM memory ready\n",
-		 rdev->mc.real_vram_size / (1024 * 1024));
+		 (unsigned)rdev->mc.real_vram_size / (1024 * 1024));
 	r = ttm_bo_init_mm(&rdev->mman.bdev, TTM_PL_TT, 0,
 			   ((rdev->mc.gtt_size) >> PAGE_SHIFT));
 	if (r) {
@@ -503,7 +502,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
 		return r;
 	}
 	DRM_INFO("radeon: %uM of GTT memory ready.\n",
-		 rdev->mc.gtt_size / (1024 * 1024));
+		 (unsigned)(rdev->mc.gtt_size / (1024 * 1024)));
 	if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) {
 		rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping;
 	}
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index b29affd9c5d8e87797f34d7afe06b3a03ecc8753..8c3ea7e360605921f9f2621feca17c595d7dc4ff 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -63,7 +63,7 @@ void rs400_gart_adjust_size(struct radeon_device *rdev)
 		break;
 	default:
 		DRM_ERROR("Unable to use IGP GART size %uM\n",
-			  rdev->mc.gtt_size >> 20);
+			  (unsigned)(rdev->mc.gtt_size >> 20));
 		DRM_ERROR("Valid GART size for IGP are 32M,64M,128M,256M,512M,1G,2G\n");
 		DRM_ERROR("Forcing to 32M GART size\n");
 		rdev->mc.gtt_size = 32 * 1024 * 1024;
diff --git a/drivers/gpu/drm/radeon/rs780.c b/drivers/gpu/drm/radeon/rs780.c
deleted file mode 100644
index 0affcff8182509f87f98a14822351354f842d366..0000000000000000000000000000000000000000
--- a/drivers/gpu/drm/radeon/rs780.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright 2008 Advanced Micro Devices, Inc.
- * Copyright 2008 Red Hat Inc.
- * Copyright 2009 Jerome Glisse.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Dave Airlie
- *          Alex Deucher
- *          Jerome Glisse
- */
-#include "drmP.h"
-#include "radeon_reg.h"
-#include "radeon.h"
-
-/* rs780  depends on : */
-void rs600_mc_disable_clients(struct radeon_device *rdev);
-
-/* This files gather functions specifics to:
- * rs780
- *
- * Some of these functions might be used by newer ASICs.
- */
-int rs780_mc_wait_for_idle(struct radeon_device *rdev);
-void rs780_gpu_init(struct radeon_device *rdev);
-
-
-/*
- * MC
- */
-int rs780_mc_init(struct radeon_device *rdev)
-{
-	rs780_gpu_init(rdev);
-	/* FIXME: implement */
-
-	rs600_mc_disable_clients(rdev);
-	if (rs780_mc_wait_for_idle(rdev)) {
-		printk(KERN_WARNING "Failed to wait MC idle while "
-		       "programming pipes. Bad things might happen.\n");
-	}
-	return 0;
-}
-
-void rs780_mc_fini(struct radeon_device *rdev)
-{
-	/* FIXME: implement */
-}
-
-
-/*
- * Global GPU functions
- */
-void rs780_errata(struct radeon_device *rdev)
-{
-	rdev->pll_errata = 0;
-}
-
-int rs780_mc_wait_for_idle(struct radeon_device *rdev)
-{
-	/* FIXME: implement */
-	return 0;
-}
-
-void rs780_gpu_init(struct radeon_device *rdev)
-{
-	/* FIXME: implement */
-}
-
-
-/*
- * VRAM info
- */
-void rs780_vram_get_type(struct radeon_device *rdev)
-{
-	/* FIXME: implement */
-}
-
-void rs780_vram_info(struct radeon_device *rdev)
-{
-	rs780_vram_get_type(rdev);
-
-	/* FIXME: implement */
-	/* Could aper size report 0 ? */
-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
-}
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 97965c430c1f251fa1366f51ed6e249998480c31..99e397f1638433dc0d8556460b972ebd39704e9a 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -27,7 +27,7 @@
  */
 #include <linux/seq_file.h>
 #include "drmP.h"
-#include "rv515r.h"
+#include "rv515d.h"
 #include "radeon.h"
 #include "radeon_share.h"
 
diff --git a/drivers/gpu/drm/radeon/rv515r.h b/drivers/gpu/drm/radeon/rv515d.h
similarity index 78%
rename from drivers/gpu/drm/radeon/rv515r.h
rename to drivers/gpu/drm/radeon/rv515d.h
index f3cf8403990690d6568ce45ae5aa225c3ac7de72..a65e17ec1c08cd7dd75b512b4e076db9c10b3bcc 100644
--- a/drivers/gpu/drm/radeon/rv515r.h
+++ b/drivers/gpu/drm/radeon/rv515d.h
@@ -25,10 +25,12 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#ifndef RV515R_H
-#define RV515R_H
+#ifndef __RV515D_H__
+#define __RV515D_H__
 
-/* RV515 registers */
+/*
+ * RV515 registers
+ */
 #define PCIE_INDEX			0x0030
 #define PCIE_DATA			0x0034
 #define	MC_IND_INDEX			0x0070
@@ -166,5 +168,53 @@
 #define		MC_GLOBW_INIT_LAT_MASK			0xF0000000
 
 
+/*
+ * PM4 packet
+ */
+#define CP_PACKET0			0x00000000
+#define		PACKET0_BASE_INDEX_SHIFT	0
+#define		PACKET0_BASE_INDEX_MASK		(0x1ffff << 0)
+#define		PACKET0_COUNT_SHIFT		16
+#define		PACKET0_COUNT_MASK		(0x3fff << 16)
+#define CP_PACKET1			0x40000000
+#define CP_PACKET2			0x80000000
+#define		PACKET2_PAD_SHIFT		0
+#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
+#define CP_PACKET3			0xC0000000
+#define		PACKET3_IT_OPCODE_SHIFT		8
+#define		PACKET3_IT_OPCODE_MASK		(0xff << 8)
+#define		PACKET3_COUNT_SHIFT		16
+#define		PACKET3_COUNT_MASK		(0x3fff << 16)
+/* PACKET3 op code */
+#define		PACKET3_NOP			0x10
+#define		PACKET3_3D_DRAW_VBUF		0x28
+#define		PACKET3_3D_DRAW_IMMD		0x29
+#define		PACKET3_3D_DRAW_INDX		0x2A
+#define		PACKET3_3D_LOAD_VBPNTR		0x2F
+#define		PACKET3_INDX_BUFFER		0x33
+#define		PACKET3_3D_DRAW_VBUF_2		0x34
+#define		PACKET3_3D_DRAW_IMMD_2		0x35
+#define		PACKET3_3D_DRAW_INDX_2		0x36
+#define		PACKET3_BITBLT_MULTI		0x9B
+
+#define PACKET0(reg, n)	(CP_PACKET0 |					\
+			 REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) |	\
+			 REG_SET(PACKET0_COUNT, (n)))
+#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+#define PACKET3(op, n)	(CP_PACKET3 |					\
+			 REG_SET(PACKET3_IT_OPCODE, (op)) |		\
+			 REG_SET(PACKET3_COUNT, (n)))
+
+#define	PACKET_TYPE0	0
+#define	PACKET_TYPE1	1
+#define	PACKET_TYPE2	2
+#define	PACKET_TYPE3	3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
+#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+
 #endif
 
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 21d8ffd57308d1a4f994104f74f795e0a6ff1632..57765f6d5b204ffd1211444c2c5914366af093fc 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -25,100 +25,975 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
 #include "drmP.h"
-#include "radeon_reg.h"
 #include "radeon.h"
+#include "radeon_share.h"
+#include "rv770d.h"
+#include "avivod.h"
+#include "atom.h"
 
-/* rv770,rv730,rv710  depends on : */
-void rs600_mc_disable_clients(struct radeon_device *rdev);
+#define R700_PFP_UCODE_SIZE 848
+#define R700_PM4_UCODE_SIZE 1360
 
-/* This files gather functions specifics to:
- * rv770,rv730,rv710
- *
- * Some of these functions might be used by newer ASICs.
- */
-int rv770_mc_wait_for_idle(struct radeon_device *rdev);
-void rv770_gpu_init(struct radeon_device *rdev);
+static void rv770_gpu_init(struct radeon_device *rdev);
+void rv770_fini(struct radeon_device *rdev);
 
 
 /*
- * MC
+ * GART
  */
-int rv770_mc_init(struct radeon_device *rdev)
+int rv770_pcie_gart_enable(struct radeon_device *rdev)
 {
-	uint32_t tmp;
+	u32 tmp;
+	int r, i;
 
-	rv770_gpu_init(rdev);
+	/* Initialize common gart structure */
+	r = radeon_gart_init(rdev);
+	if (r) {
+		return r;
+	}
+	rdev->gart.table_size = rdev->gart.num_gpu_pages * 8;
+	r = radeon_gart_table_vram_alloc(rdev);
+	if (r) {
+		return r;
+	}
+	for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+		r600_gart_clear_page(rdev, i);
+	/* Setup L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
+				ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+				EFFECTIVE_L2_QUEUE_SIZE(7));
+	WREG32(VM_L2_CNTL2, 0);
+	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
+	/* Setup TLB control */
+	tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
+		SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
+		EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
+	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end - 1) >> 12);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
+	WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
+				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+			(u32)(rdev->dummy_page.addr >> 12));
+	for (i = 1; i < 7; i++)
+		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
 
-	/* setup the gart before changing location so we can ask to
-	 * discard unmapped mc request
-	 */
-	/* FIXME: disable out of gart access */
-	tmp = rdev->mc.gtt_location / 4096;
-	tmp = REG_SET(R700_LOGICAL_PAGE_NUMBER, tmp);
-	WREG32(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, tmp);
-	tmp = (rdev->mc.gtt_location + rdev->mc.gtt_size) / 4096;
-	tmp = REG_SET(R700_LOGICAL_PAGE_NUMBER, tmp);
-	WREG32(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, tmp);
-
-	rs600_mc_disable_clients(rdev);
-	if (rv770_mc_wait_for_idle(rdev)) {
-		printk(KERN_WARNING "Failed to wait MC idle while "
-		       "programming pipes. Bad things might happen.\n");
-	}
-
-	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
-	tmp = REG_SET(R700_MC_FB_TOP, tmp >> 24);
-	tmp |= REG_SET(R700_MC_FB_BASE, rdev->mc.vram_location >> 24);
-	WREG32(R700_MC_VM_FB_LOCATION, tmp);
-	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
-	tmp = REG_SET(R700_MC_AGP_TOP, tmp >> 22);
-	WREG32(R700_MC_VM_AGP_TOP, tmp);
-	tmp = REG_SET(R700_MC_AGP_BOT, rdev->mc.gtt_location >> 22);
-	WREG32(R700_MC_VM_AGP_BOT, tmp);
+	r600_pcie_gart_tlb_flush(rdev);
+	rdev->gart.ready = true;
 	return 0;
 }
 
-void rv770_mc_fini(struct radeon_device *rdev)
+void rv770_pcie_gart_disable(struct radeon_device *rdev)
 {
-	/* FIXME: implement */
+	u32 tmp;
+	int i;
+
+	/* Clear ptes*/
+	for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+		r600_gart_clear_page(rdev, i);
+	r600_pcie_gart_tlb_flush(rdev);
+	/* Disable all tables */
+	for (i = 0; i < 7; i++)
+		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
+
+	/* Setup L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING |
+				EFFECTIVE_L2_QUEUE_SIZE(7));
+	WREG32(VM_L2_CNTL2, 0);
+	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
+	/* Setup TLB control */
+	tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
+	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
 }
 
 
 /*
- * Global GPU functions
+ * MC
  */
-void rv770_errata(struct radeon_device *rdev)
+static void rv770_mc_resume(struct radeon_device *rdev)
 {
-	rdev->pll_errata = 0;
+	u32 d1vga_control, d2vga_control;
+	u32 vga_render_control, vga_hdp_control;
+	u32 d1crtc_control, d2crtc_control;
+	u32 new_d1grph_primary, new_d1grph_secondary;
+	u32 new_d2grph_primary, new_d2grph_secondary;
+	u64 old_vram_start;
+	u32 tmp;
+	int i, j;
+
+	/* Initialize HDP */
+	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+		WREG32((0x2c14 + j), 0x00000000);
+		WREG32((0x2c18 + j), 0x00000000);
+		WREG32((0x2c1c + j), 0x00000000);
+		WREG32((0x2c20 + j), 0x00000000);
+		WREG32((0x2c24 + j), 0x00000000);
+	}
+	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
+
+	d1vga_control = RREG32(D1VGA_CONTROL);
+	d2vga_control = RREG32(D2VGA_CONTROL);
+	vga_render_control = RREG32(VGA_RENDER_CONTROL);
+	vga_hdp_control = RREG32(VGA_HDP_CONTROL);
+	d1crtc_control = RREG32(D1CRTC_CONTROL);
+	d2crtc_control = RREG32(D2CRTC_CONTROL);
+	old_vram_start = (u64)(RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24;
+	new_d1grph_primary = RREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS);
+	new_d1grph_secondary = RREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS);
+	new_d1grph_primary += rdev->mc.vram_start - old_vram_start;
+	new_d1grph_secondary += rdev->mc.vram_start - old_vram_start;
+	new_d2grph_primary = RREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS);
+	new_d2grph_secondary = RREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS);
+	new_d2grph_primary += rdev->mc.vram_start - old_vram_start;
+	new_d2grph_secondary += rdev->mc.vram_start - old_vram_start;
+
+	/* Stop all video */
+	WREG32(D1VGA_CONTROL, 0);
+	WREG32(D2VGA_CONTROL, 0);
+	WREG32(VGA_RENDER_CONTROL, 0);
+	WREG32(D1CRTC_UPDATE_LOCK, 1);
+	WREG32(D2CRTC_UPDATE_LOCK, 1);
+	WREG32(D1CRTC_CONTROL, 0);
+	WREG32(D2CRTC_CONTROL, 0);
+	WREG32(D1CRTC_UPDATE_LOCK, 0);
+	WREG32(D2CRTC_UPDATE_LOCK, 0);
+
+	mdelay(1);
+	if (r600_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "[drm] MC not idle !\n");
+	}
+
+	/* Lockout access through VGA aperture*/
+	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
+
+	/* Update configuration */
+	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, rdev->mc.vram_start >> 12);
+	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (rdev->mc.vram_end - 1) >> 12);
+	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
+	tmp = (((rdev->mc.vram_end - 1) >> 24) & 0xFFFF) << 16;
+	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
+	WREG32(MC_VM_FB_LOCATION, tmp);
+	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
+	WREG32(HDP_NONSURFACE_INFO, (2 << 7));
+	WREG32(HDP_NONSURFACE_SIZE, (rdev->mc.mc_vram_size - 1) | 0x3FF);
+	if (rdev->flags & RADEON_IS_AGP) {
+		WREG32(MC_VM_AGP_TOP, (rdev->mc.gtt_end - 1) >> 16);
+		WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16);
+		WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22);
+	} else {
+		WREG32(MC_VM_AGP_BASE, 0);
+		WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
+		WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
+	}
+	WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS, new_d1grph_primary);
+	WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS, new_d1grph_secondary);
+	WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS, new_d2grph_primary);
+	WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS, new_d2grph_secondary);
+	WREG32(VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start);
+
+	/* Unlock host access */
+	WREG32(VGA_HDP_CONTROL, vga_hdp_control);
+
+	mdelay(1);
+	if (r600_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "[drm] MC not idle !\n");
+	}
+
+	/* Restore video state */
+	WREG32(D1CRTC_UPDATE_LOCK, 1);
+	WREG32(D2CRTC_UPDATE_LOCK, 1);
+	WREG32(D1CRTC_CONTROL, d1crtc_control);
+	WREG32(D2CRTC_CONTROL, d2crtc_control);
+	WREG32(D1CRTC_UPDATE_LOCK, 0);
+	WREG32(D2CRTC_UPDATE_LOCK, 0);
+	WREG32(D1VGA_CONTROL, d1vga_control);
+	WREG32(D2VGA_CONTROL, d2vga_control);
+	WREG32(VGA_RENDER_CONTROL, vga_render_control);
 }
 
-int rv770_mc_wait_for_idle(struct radeon_device *rdev)
+
+/*
+ * CP.
+ */
+void r700_cp_stop(struct radeon_device *rdev)
 {
-	/* FIXME: implement */
-	return 0;
+	WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
 }
 
-void rv770_gpu_init(struct radeon_device *rdev)
+
+static int rv770_cp_load_microcode(struct radeon_device *rdev)
 {
-	/* FIXME: implement */
+	const __be32 *fw_data;
+	int i;
+
+	if (!rdev->me_fw || !rdev->pfp_fw)
+		return -EINVAL;
+
+	r700_cp_stop(rdev);
+	WREG32(CP_RB_CNTL, RB_NO_UPDATE | (15 << 8) | (3 << 0));
+
+	/* Reset cp */
+	WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
+	RREG32(GRBM_SOFT_RESET);
+	mdelay(15);
+	WREG32(GRBM_SOFT_RESET, 0);
+
+	fw_data = (const __be32 *)rdev->pfp_fw->data;
+	WREG32(CP_PFP_UCODE_ADDR, 0);
+	for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
+		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
+	WREG32(CP_PFP_UCODE_ADDR, 0);
+
+	fw_data = (const __be32 *)rdev->me_fw->data;
+	WREG32(CP_ME_RAM_WADDR, 0);
+	for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
+		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
+
+	WREG32(CP_PFP_UCODE_ADDR, 0);
+	WREG32(CP_ME_RAM_WADDR, 0);
+	WREG32(CP_ME_RAM_RADDR, 0);
+	return 0;
 }
 
 
 /*
- * VRAM info
+ * Core functions
  */
-void rv770_vram_get_type(struct radeon_device *rdev)
+static u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
+						u32 num_backends,
+						u32 backend_disable_mask)
 {
-	/* FIXME: implement */
+	u32 backend_map = 0;
+	u32 enabled_backends_mask;
+	u32 enabled_backends_count;
+	u32 cur_pipe;
+	u32 swizzle_pipe[R7XX_MAX_PIPES];
+	u32 cur_backend;
+	u32 i;
+
+	if (num_tile_pipes > R7XX_MAX_PIPES)
+		num_tile_pipes = R7XX_MAX_PIPES;
+	if (num_tile_pipes < 1)
+		num_tile_pipes = 1;
+	if (num_backends > R7XX_MAX_BACKENDS)
+		num_backends = R7XX_MAX_BACKENDS;
+	if (num_backends < 1)
+		num_backends = 1;
+
+	enabled_backends_mask = 0;
+	enabled_backends_count = 0;
+	for (i = 0; i < R7XX_MAX_BACKENDS; ++i) {
+		if (((backend_disable_mask >> i) & 1) == 0) {
+			enabled_backends_mask |= (1 << i);
+			++enabled_backends_count;
+		}
+		if (enabled_backends_count == num_backends)
+			break;
+	}
+
+	if (enabled_backends_count == 0) {
+		enabled_backends_mask = 1;
+		enabled_backends_count = 1;
+	}
+
+	if (enabled_backends_count != num_backends)
+		num_backends = enabled_backends_count;
+
+	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);
+	switch (num_tile_pipes) {
+	case 1:
+		swizzle_pipe[0] = 0;
+		break;
+	case 2:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 1;
+		break;
+	case 3:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 1;
+		break;
+	case 4:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 3;
+		swizzle_pipe[3] = 1;
+		break;
+	case 5:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 4;
+		swizzle_pipe[3] = 1;
+		swizzle_pipe[4] = 3;
+		break;
+	case 6:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 4;
+		swizzle_pipe[3] = 5;
+		swizzle_pipe[4] = 3;
+		swizzle_pipe[5] = 1;
+		break;
+	case 7:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 4;
+		swizzle_pipe[3] = 6;
+		swizzle_pipe[4] = 3;
+		swizzle_pipe[5] = 1;
+		swizzle_pipe[6] = 5;
+		break;
+	case 8:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 4;
+		swizzle_pipe[3] = 6;
+		swizzle_pipe[4] = 3;
+		swizzle_pipe[5] = 1;
+		swizzle_pipe[6] = 7;
+		swizzle_pipe[7] = 5;
+		break;
+	}
+
+	cur_backend = 0;
+	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
+		while (((1 << cur_backend) & enabled_backends_mask) == 0)
+			cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
+
+		backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
+
+		cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
+	}
+
+	return backend_map;
 }
 
-void rv770_vram_info(struct radeon_device *rdev)
+static void rv770_gpu_init(struct radeon_device *rdev)
 {
-	rv770_vram_get_type(rdev);
+	int i, j, num_qd_pipes;
+	u32 sx_debug_1;
+	u32 smx_dc_ctl0;
+	u32 num_gs_verts_per_thread;
+	u32 vgt_gs_per_es;
+	u32 gs_prim_buffer_depth = 0;
+	u32 sq_ms_fifo_sizes;
+	u32 sq_config;
+	u32 sq_thread_resource_mgmt;
+	u32 hdp_host_path_cntl;
+	u32 sq_dyn_gpr_size_simd_ab_0;
+	u32 backend_map;
+	u32 gb_tiling_config = 0;
+	u32 cc_rb_backend_disable = 0;
+	u32 cc_gc_shader_pipe_config = 0;
+	u32 mc_arb_ramcfg;
+	u32 db_debug4;
 
-	/* FIXME: implement */
+	/* setup chip specs */
+	switch (rdev->family) {
+	case CHIP_RV770:
+		rdev->config.rv770.max_pipes = 4;
+		rdev->config.rv770.max_tile_pipes = 8;
+		rdev->config.rv770.max_simds = 10;
+		rdev->config.rv770.max_backends = 4;
+		rdev->config.rv770.max_gprs = 256;
+		rdev->config.rv770.max_threads = 248;
+		rdev->config.rv770.max_stack_entries = 512;
+		rdev->config.rv770.max_hw_contexts = 8;
+		rdev->config.rv770.max_gs_threads = 16 * 2;
+		rdev->config.rv770.sx_max_export_size = 128;
+		rdev->config.rv770.sx_max_export_pos_size = 16;
+		rdev->config.rv770.sx_max_export_smx_size = 112;
+		rdev->config.rv770.sq_num_cf_insts = 2;
+
+		rdev->config.rv770.sx_num_of_sets = 7;
+		rdev->config.rv770.sc_prim_fifo_size = 0xF9;
+		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
+		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
+		break;
+	case CHIP_RV730:
+		rdev->config.rv770.max_pipes = 2;
+		rdev->config.rv770.max_tile_pipes = 4;
+		rdev->config.rv770.max_simds = 8;
+		rdev->config.rv770.max_backends = 2;
+		rdev->config.rv770.max_gprs = 128;
+		rdev->config.rv770.max_threads = 248;
+		rdev->config.rv770.max_stack_entries = 256;
+		rdev->config.rv770.max_hw_contexts = 8;
+		rdev->config.rv770.max_gs_threads = 16 * 2;
+		rdev->config.rv770.sx_max_export_size = 256;
+		rdev->config.rv770.sx_max_export_pos_size = 32;
+		rdev->config.rv770.sx_max_export_smx_size = 224;
+		rdev->config.rv770.sq_num_cf_insts = 2;
+
+		rdev->config.rv770.sx_num_of_sets = 7;
+		rdev->config.rv770.sc_prim_fifo_size = 0xf9;
+		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
+		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
+		if (rdev->config.rv770.sx_max_export_pos_size > 16) {
+			rdev->config.rv770.sx_max_export_pos_size -= 16;
+			rdev->config.rv770.sx_max_export_smx_size += 16;
+		}
+		break;
+	case CHIP_RV710:
+		rdev->config.rv770.max_pipes = 2;
+		rdev->config.rv770.max_tile_pipes = 2;
+		rdev->config.rv770.max_simds = 2;
+		rdev->config.rv770.max_backends = 1;
+		rdev->config.rv770.max_gprs = 256;
+		rdev->config.rv770.max_threads = 192;
+		rdev->config.rv770.max_stack_entries = 256;
+		rdev->config.rv770.max_hw_contexts = 4;
+		rdev->config.rv770.max_gs_threads = 8 * 2;
+		rdev->config.rv770.sx_max_export_size = 128;
+		rdev->config.rv770.sx_max_export_pos_size = 16;
+		rdev->config.rv770.sx_max_export_smx_size = 112;
+		rdev->config.rv770.sq_num_cf_insts = 1;
+
+		rdev->config.rv770.sx_num_of_sets = 7;
+		rdev->config.rv770.sc_prim_fifo_size = 0x40;
+		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
+		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
+		break;
+	case CHIP_RV740:
+		rdev->config.rv770.max_pipes = 4;
+		rdev->config.rv770.max_tile_pipes = 4;
+		rdev->config.rv770.max_simds = 8;
+		rdev->config.rv770.max_backends = 4;
+		rdev->config.rv770.max_gprs = 256;
+		rdev->config.rv770.max_threads = 248;
+		rdev->config.rv770.max_stack_entries = 512;
+		rdev->config.rv770.max_hw_contexts = 8;
+		rdev->config.rv770.max_gs_threads = 16 * 2;
+		rdev->config.rv770.sx_max_export_size = 256;
+		rdev->config.rv770.sx_max_export_pos_size = 32;
+		rdev->config.rv770.sx_max_export_smx_size = 224;
+		rdev->config.rv770.sq_num_cf_insts = 2;
+
+		rdev->config.rv770.sx_num_of_sets = 7;
+		rdev->config.rv770.sc_prim_fifo_size = 0x100;
+		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
+		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
+
+		if (rdev->config.rv770.sx_max_export_pos_size > 16) {
+			rdev->config.rv770.sx_max_export_pos_size -= 16;
+			rdev->config.rv770.sx_max_export_smx_size += 16;
+		}
+		break;
+	default:
+		break;
+	}
+
+	/* Initialize HDP */
+	j = 0;
+	for (i = 0; i < 32; i++) {
+		WREG32((0x2c14 + j), 0x00000000);
+		WREG32((0x2c18 + j), 0x00000000);
+		WREG32((0x2c1c + j), 0x00000000);
+		WREG32((0x2c20 + j), 0x00000000);
+		WREG32((0x2c24 + j), 0x00000000);
+		j += 0x18;
+	}
+
+	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
+
+	/* setup tiling, simd, pipe config */
+	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
+
+	switch (rdev->config.rv770.max_tile_pipes) {
+	case 1:
+		gb_tiling_config |= PIPE_TILING(0);
+		break;
+	case 2:
+		gb_tiling_config |= PIPE_TILING(1);
+		break;
+	case 4:
+		gb_tiling_config |= PIPE_TILING(2);
+		break;
+	case 8:
+		gb_tiling_config |= PIPE_TILING(3);
+		break;
+	default:
+		break;
+	}
+
+	if (rdev->family == CHIP_RV770)
+		gb_tiling_config |= BANK_TILING(1);
+	else
+		gb_tiling_config |= BANK_TILING((mc_arb_ramcfg & NOOFBANK_SHIFT) >> NOOFBANK_MASK);
+
+	gb_tiling_config |= GROUP_SIZE(0);
+
+	if (((mc_arb_ramcfg & NOOFROWS_MASK) & NOOFROWS_SHIFT) > 3) {
+		gb_tiling_config |= ROW_TILING(3);
+		gb_tiling_config |= SAMPLE_SPLIT(3);
+	} else {
+		gb_tiling_config |=
+			ROW_TILING(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT));
+		gb_tiling_config |=
+			SAMPLE_SPLIT(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT));
+	}
+
+	gb_tiling_config |= BANK_SWAPS(1);
+
+	backend_map = r700_get_tile_pipe_to_backend_map(rdev->config.rv770.max_tile_pipes,
+							rdev->config.rv770.max_backends,
+							(0xff << rdev->config.rv770.max_backends) & 0xff);
+	gb_tiling_config |= BACKEND_MAP(backend_map);
+
+	cc_gc_shader_pipe_config =
+		INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << rdev->config.rv770.max_pipes) & R7XX_MAX_PIPES_MASK);
+	cc_gc_shader_pipe_config |=
+		INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << rdev->config.rv770.max_simds) & R7XX_MAX_SIMDS_MASK);
+
+	cc_rb_backend_disable =
+		BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << rdev->config.rv770.max_backends) & R7XX_MAX_BACKENDS_MASK);
+
+	WREG32(GB_TILING_CONFIG, gb_tiling_config);
+	WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
+	WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
+
+	WREG32(CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
+	WREG32(CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
+	WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
+
+	WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
+	WREG32(CGTS_SYS_TCC_DISABLE, 0);
+	WREG32(CGTS_TCC_DISABLE, 0);
+	WREG32(CGTS_USER_SYS_TCC_DISABLE, 0);
+	WREG32(CGTS_USER_TCC_DISABLE, 0);
+
+	num_qd_pipes =
+		R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK);
+	WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK);
+	WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK);
+
+	/* set HW defaults for 3D engine */
+	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
+						ROQ_IB2_START(0x2b)));
+
+	WREG32(CP_MEQ_THRESHOLDS, STQ_SPLIT(0x30));
+
+	WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO |
+					SYNC_GRADIENT |
+					SYNC_WALKER |
+					SYNC_ALIGNER));
+
+	sx_debug_1 = RREG32(SX_DEBUG_1);
+	sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
+	WREG32(SX_DEBUG_1, sx_debug_1);
+
+	smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
+	smx_dc_ctl0 &= ~CACHE_DEPTH(0x1ff);
+	smx_dc_ctl0 |= CACHE_DEPTH((rdev->config.rv770.sx_num_of_sets * 64) - 1);
+	WREG32(SMX_DC_CTL0, smx_dc_ctl0);
+
+	WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) |
+					  GS_FLUSH_CTL(4) |
+					  ACK_FLUSH_CTL(3) |
+					  SYNC_FLUSH_CTL));
+
+	if (rdev->family == CHIP_RV770)
+		WREG32(DB_DEBUG3, DB_CLK_OFF_DELAY(0x1f));
+	else {
+		db_debug4 = RREG32(DB_DEBUG4);
+		db_debug4 |= DISABLE_TILE_COVERED_FOR_PS_ITER;
+		WREG32(DB_DEBUG4, db_debug4);
+	}
+
+	WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.rv770.sx_max_export_size / 4) - 1) |
+						   POSITION_BUFFER_SIZE((rdev->config.rv770.sx_max_export_pos_size / 4) - 1) |
+						   SMX_BUFFER_SIZE((rdev->config.rv770.sx_max_export_smx_size / 4) - 1)));
+
+	WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.rv770.sc_prim_fifo_size) |
+						 SC_HIZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_hiz_tile_fifo_size) |
+						 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_earlyz_tile_fifo_fize)));
+
+	WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
+
+	WREG32(VGT_NUM_INSTANCES, 1);
+
+	WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0));
+
+	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
+
+	WREG32(CP_PERFMON_CNTL, 0);
+
+	sq_ms_fifo_sizes = (CACHE_FIFO_SIZE(16 * rdev->config.rv770.sq_num_cf_insts) |
+			    DONE_FIFO_HIWATER(0xe0) |
+			    ALU_UPDATE_FIFO_HIWATER(0x8));
+	switch (rdev->family) {
+	case CHIP_RV770:
+		sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1);
+		break;
+	case CHIP_RV730:
+	case CHIP_RV710:
+	case CHIP_RV740:
+	default:
+		sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x4);
+		break;
+	}
+	WREG32(SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
+
+	/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
+	 * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
+	 */
+	sq_config = RREG32(SQ_CONFIG);
+	sq_config &= ~(PS_PRIO(3) |
+		       VS_PRIO(3) |
+		       GS_PRIO(3) |
+		       ES_PRIO(3));
+	sq_config |= (DX9_CONSTS |
+		      VC_ENABLE |
+		      EXPORT_SRC_C |
+		      PS_PRIO(0) |
+		      VS_PRIO(1) |
+		      GS_PRIO(2) |
+		      ES_PRIO(3));
+	if (rdev->family == CHIP_RV710)
+		/* no vertex cache */
+		sq_config &= ~VC_ENABLE;
+
+	WREG32(SQ_CONFIG, sq_config);
+
+	WREG32(SQ_GPR_RESOURCE_MGMT_1,  (NUM_PS_GPRS((rdev->config.rv770.max_gprs * 24)/64) |
+						    NUM_VS_GPRS((rdev->config.rv770.max_gprs * 24)/64) |
+						    NUM_CLAUSE_TEMP_GPRS(((rdev->config.rv770.max_gprs * 24)/64)/2)));
+
+	WREG32(SQ_GPR_RESOURCE_MGMT_2,  (NUM_GS_GPRS((rdev->config.rv770.max_gprs * 7)/64) |
+						    NUM_ES_GPRS((rdev->config.rv770.max_gprs * 7)/64)));
+
+	sq_thread_resource_mgmt = (NUM_PS_THREADS((rdev->config.rv770.max_threads * 4)/8) |
+				   NUM_VS_THREADS((rdev->config.rv770.max_threads * 2)/8) |
+				   NUM_ES_THREADS((rdev->config.rv770.max_threads * 1)/8));
+	if (((rdev->config.rv770.max_threads * 1) / 8) > rdev->config.rv770.max_gs_threads)
+		sq_thread_resource_mgmt |= NUM_GS_THREADS(rdev->config.rv770.max_gs_threads);
+	else
+		sq_thread_resource_mgmt |= NUM_GS_THREADS((rdev->config.rv770.max_gs_threads * 1)/8);
+	WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
+
+	WREG32(SQ_STACK_RESOURCE_MGMT_1, (NUM_PS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) |
+						     NUM_VS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4)));
+
+	WREG32(SQ_STACK_RESOURCE_MGMT_2, (NUM_GS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) |
+						     NUM_ES_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4)));
+
+	sq_dyn_gpr_size_simd_ab_0 = (SIMDA_RING0((rdev->config.rv770.max_gprs * 38)/64) |
+				     SIMDA_RING1((rdev->config.rv770.max_gprs * 38)/64) |
+				     SIMDB_RING0((rdev->config.rv770.max_gprs * 38)/64) |
+				     SIMDB_RING1((rdev->config.rv770.max_gprs * 38)/64));
+
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);
+
+	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
+						     FORCE_EOV_MAX_REZ_CNT(255)));
+
+	if (rdev->family == CHIP_RV710)
+		WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(TC_ONLY) |
+							   AUTO_INVLD_EN(ES_AND_GS_AUTO)));
+	else
+		WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(VC_AND_TC) |
+							   AUTO_INVLD_EN(ES_AND_GS_AUTO)));
+
+	switch (rdev->family) {
+	case CHIP_RV770:
+	case CHIP_RV730:
+	case CHIP_RV740:
+		gs_prim_buffer_depth = 384;
+		break;
+	case CHIP_RV710:
+		gs_prim_buffer_depth = 128;
+		break;
+	default:
+		break;
+	}
+
+	num_gs_verts_per_thread = rdev->config.rv770.max_pipes * 16;
+	vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
+	/* Max value for this is 256 */
+	if (vgt_gs_per_es > 256)
+		vgt_gs_per_es = 256;
+
+	WREG32(VGT_ES_PER_GS, 128);
+	WREG32(VGT_GS_PER_ES, vgt_gs_per_es);
+	WREG32(VGT_GS_PER_VS, 2);
+
+	/* more default values. 2D/3D driver should adjust as needed */
+	WREG32(VGT_GS_VERTEX_REUSE, 16);
+	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
+	WREG32(VGT_STRMOUT_EN, 0);
+	WREG32(SX_MISC, 0);
+	WREG32(PA_SC_MODE_CNTL, 0);
+	WREG32(PA_SC_EDGERULE, 0xaaaaaaaa);
+	WREG32(PA_SC_AA_CONFIG, 0);
+	WREG32(PA_SC_CLIPRECT_RULE, 0xffff);
+	WREG32(PA_SC_LINE_STIPPLE, 0);
+	WREG32(SPI_INPUT_Z, 0);
+	WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2));
+	WREG32(CB_COLOR7_FRAG, 0);
+
+	/* clear render buffer base addresses */
+	WREG32(CB_COLOR0_BASE, 0);
+	WREG32(CB_COLOR1_BASE, 0);
+	WREG32(CB_COLOR2_BASE, 0);
+	WREG32(CB_COLOR3_BASE, 0);
+	WREG32(CB_COLOR4_BASE, 0);
+	WREG32(CB_COLOR5_BASE, 0);
+	WREG32(CB_COLOR6_BASE, 0);
+	WREG32(CB_COLOR7_BASE, 0);
+
+	WREG32(TCP_CNTL, 0);
+
+	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
+	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
+
+	WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
+
+	WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA |
+					  NUM_CLIP_SEQ(3)));
+
+}
+
+int rv770_mc_init(struct radeon_device *rdev)
+{
+	fixed20_12 a;
+	u32 tmp;
+	int r;
+
+	/* Get VRAM informations */
+	/* FIXME: Don't know how to determine vram width, need to check
+	 * vram_width usage
+	 */
+	rdev->mc.vram_width = 128;
+	rdev->mc.vram_is_ddr = true;
 	/* Could aper size report 0 ? */
 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	/* Setup GPU memory space */
+	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
+	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r)
+			return r;
+		/* gtt_size is setup by radeon_agp_init */
+		rdev->mc.gtt_location = rdev->mc.agp_base;
+		tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size;
+		/* Try to put vram before or after AGP because we
+		 * we want SYSTEM_APERTURE to cover both VRAM and
+		 * AGP so that GPU can catch out of VRAM/AGP access
+		 */
+		if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) {
+			/* Enought place before */
+			rdev->mc.vram_location = rdev->mc.gtt_location -
+							rdev->mc.mc_vram_size;
+		} else if (tmp > rdev->mc.mc_vram_size) {
+			/* Enought place after */
+			rdev->mc.vram_location = rdev->mc.gtt_location +
+							rdev->mc.gtt_size;
+		} else {
+			/* Try to setup VRAM then AGP might not
+			 * not work on some card
+			 */
+			rdev->mc.vram_location = 0x00000000UL;
+			rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+		}
+	} else {
+		rdev->mc.vram_location = 0x00000000UL;
+		rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+		rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+	}
+	rdev->mc.vram_start = rdev->mc.vram_location;
+	rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size;
+	rdev->mc.gtt_start = rdev->mc.gtt_location;
+	rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size;
+	/* FIXME: we should enforce default clock in case GPU is not in
+	 * default setup
+	 */
+	a.full = rfixed_const(100);
+	rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
+	rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
+	return 0;
+}
+int rv770_gpu_reset(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+int rv770_resume(struct radeon_device *rdev)
+{
+	int r;
+
+	rv770_mc_resume(rdev);
+	r = rv770_pcie_gart_enable(rdev);
+	if (r)
+		return r;
+	rv770_gpu_init(rdev);
+	r = radeon_ring_init(rdev, rdev->cp.ring_size);
+	if (r)
+		return r;
+	r = rv770_cp_load_microcode(rdev);
+	if (r)
+		return r;
+	r = r600_cp_resume(rdev);
+	if (r)
+		return r;
+	r = r600_wb_init(rdev);
+	if (r)
+		return r;
+	return 0;
+}
+
+int rv770_suspend(struct radeon_device *rdev)
+{
+	/* FIXME: we should wait for ring to be empty */
+	r700_cp_stop(rdev);
+	return 0;
+}
+
+/* Plan is to move initialization in that function and use
+ * helper function so that radeon_device_init pretty much
+ * do nothing more than calling asic specific function. This
+ * should also allow to remove a bunch of callback function
+ * like vram_info.
+ */
+int rv770_init(struct radeon_device *rdev)
+{
+	int r;
+
+	rdev->new_init_path = true;
+	r = radeon_dummy_page_init(rdev);
+	if (r)
+		return r;
+	/* This don't do much */
+	r = radeon_gem_init(rdev);
+	if (r)
+		return r;
+	/* Read BIOS */
+	if (!radeon_get_bios(rdev)) {
+		if (ASIC_IS_AVIVO(rdev))
+			return -EINVAL;
+	}
+	/* Must be an ATOMBIOS */
+	if (!rdev->is_atom_bios)
+		return -EINVAL;
+	r = radeon_atombios_init(rdev);
+	if (r)
+		return r;
+	/* Post card if necessary */
+	if (!r600_card_posted(rdev) && rdev->bios) {
+		DRM_INFO("GPU not posted. posting now...\n");
+		atom_asic_init(rdev->mode_info.atom_context);
+	}
+	/* Initialize scratch registers */
+	r600_scratch_init(rdev);
+	/* Initialize surface registers */
+	radeon_surface_init(rdev);
+	r = radeon_clocks_init(rdev);
+	if (r)
+		return r;
+	/* Fence driver */
+	r = radeon_fence_driver_init(rdev);
+	if (r)
+		return r;
+	r = rv770_mc_init(rdev);
+	if (r) {
+		if (rdev->flags & RADEON_IS_AGP) {
+			/* Retry with disabling AGP */
+			rv770_fini(rdev);
+			rdev->flags &= ~RADEON_IS_AGP;
+			return rv770_init(rdev);
+		}
+		return r;
+	}
+	/* Memory manager */
+	r = radeon_object_init(rdev);
+	if (r)
+		return r;
+	rdev->cp.ring_obj = NULL;
+	r600_ring_init(rdev, 1024 * 1024);
+
+	if (!rdev->me_fw || !rdev->pfp_fw) {
+		r = r600_cp_init_microcode(rdev);
+		if (r) {
+			DRM_ERROR("Failed to load firmware!\n");
+			return r;
+		}
+	}
+
+	r = rv770_resume(rdev);
+	if (r) {
+		if (rdev->flags & RADEON_IS_AGP) {
+			/* Retry with disabling AGP */
+			rv770_fini(rdev);
+			rdev->flags &= ~RADEON_IS_AGP;
+			return rv770_init(rdev);
+		}
+		return r;
+	}
+	r = r600_blit_init(rdev);
+	if (r) {
+		DRM_ERROR("radeon: failled blitter (%d).\n", r);
+		return r;
+	}
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
+		return r;
+	}
+	r = radeon_ib_test(rdev);
+	if (r) {
+		DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+			return r;
+	}
+	return 0;
+}
+
+void rv770_fini(struct radeon_device *rdev)
+{
+	r600_blit_fini(rdev);
+	radeon_ring_fini(rdev);
+	rv770_pcie_gart_disable(rdev);
+	radeon_gart_table_vram_free(rdev);
+	radeon_gart_fini(rdev);
+	radeon_gem_fini(rdev);
+	radeon_fence_driver_fini(rdev);
+	radeon_clocks_fini(rdev);
+#if __OS_HAS_AGP
+	if (rdev->flags & RADEON_IS_AGP)
+		radeon_agp_fini(rdev);
+#endif
+	radeon_object_fini(rdev);
+	if (rdev->is_atom_bios) {
+		radeon_atombios_fini(rdev);
+	} else {
+		radeon_combios_fini(rdev);
+	}
+	kfree(rdev->bios);
+	rdev->bios = NULL;
+	radeon_dummy_page_fini(rdev);
 }
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
new file mode 100644
index 0000000000000000000000000000000000000000..4b9c3d6396ff5239653e2b96fc9a54a2eb0a51d1
--- /dev/null
+++ b/drivers/gpu/drm/radeon/rv770d.h
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ * Copyright 2009 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef RV770_H
+#define RV770_H
+
+#define R7XX_MAX_SH_GPRS           256
+#define R7XX_MAX_TEMP_GPRS         16
+#define R7XX_MAX_SH_THREADS        256
+#define R7XX_MAX_SH_STACK_ENTRIES  4096
+#define R7XX_MAX_BACKENDS          8
+#define R7XX_MAX_BACKENDS_MASK     0xff
+#define R7XX_MAX_SIMDS             16
+#define R7XX_MAX_SIMDS_MASK        0xffff
+#define R7XX_MAX_PIPES             8
+#define R7XX_MAX_PIPES_MASK        0xff
+
+/* Registers */
+#define	CB_COLOR0_BASE					0x28040
+#define	CB_COLOR1_BASE					0x28044
+#define	CB_COLOR2_BASE					0x28048
+#define	CB_COLOR3_BASE					0x2804C
+#define	CB_COLOR4_BASE					0x28050
+#define	CB_COLOR5_BASE					0x28054
+#define	CB_COLOR6_BASE					0x28058
+#define	CB_COLOR7_BASE					0x2805C
+#define	CB_COLOR7_FRAG					0x280FC
+
+#define	CC_GC_SHADER_PIPE_CONFIG			0x8950
+#define	CC_RB_BACKEND_DISABLE				0x98F4
+#define		BACKEND_DISABLE(x)				((x) << 16)
+#define	CC_SYS_RB_BACKEND_DISABLE			0x3F88
+
+#define	CGTS_SYS_TCC_DISABLE				0x3F90
+#define	CGTS_TCC_DISABLE				0x9148
+#define	CGTS_USER_SYS_TCC_DISABLE			0x3F94
+#define	CGTS_USER_TCC_DISABLE				0x914C
+
+#define	CONFIG_MEMSIZE					0x5428
+
+#define	CP_ME_CNTL					0x86D8
+#define		CP_ME_HALT					(1<<28)
+#define		CP_PFP_HALT					(1<<26)
+#define	CP_ME_RAM_DATA					0xC160
+#define	CP_ME_RAM_RADDR					0xC158
+#define	CP_ME_RAM_WADDR					0xC15C
+#define CP_MEQ_THRESHOLDS				0x8764
+#define		STQ_SPLIT(x)					((x) << 0)
+#define	CP_PERFMON_CNTL					0x87FC
+#define	CP_PFP_UCODE_ADDR				0xC150
+#define	CP_PFP_UCODE_DATA				0xC154
+#define	CP_QUEUE_THRESHOLDS				0x8760
+#define		ROQ_IB1_START(x)				((x) << 0)
+#define		ROQ_IB2_START(x)				((x) << 8)
+#define	CP_RB_CNTL					0xC104
+#define		RB_BUFSZ(x)					((x)<<0)
+#define		RB_BLKSZ(x)					((x)<<8)
+#define		RB_NO_UPDATE					(1<<27)
+#define		RB_RPTR_WR_ENA					(1<<31)
+#define		BUF_SWAP_32BIT					(2 << 16)
+#define	CP_RB_RPTR					0x8700
+#define	CP_RB_RPTR_ADDR					0xC10C
+#define	CP_RB_RPTR_ADDR_HI				0xC110
+#define	CP_RB_RPTR_WR					0xC108
+#define	CP_RB_WPTR					0xC114
+#define	CP_RB_WPTR_ADDR					0xC118
+#define	CP_RB_WPTR_ADDR_HI				0xC11C
+#define	CP_RB_WPTR_DELAY				0x8704
+#define	CP_SEM_WAIT_TIMER				0x85BC
+
+#define	DB_DEBUG3					0x98B0
+#define		DB_CLK_OFF_DELAY(x)				((x) << 11)
+#define DB_DEBUG4					0x9B8C
+#define		DISABLE_TILE_COVERED_FOR_PS_ITER		(1 << 6)
+
+#define	DCP_TILING_CONFIG				0x6CA0
+#define		PIPE_TILING(x)					((x) << 1)
+#define 	BANK_TILING(x)					((x) << 4)
+#define		GROUP_SIZE(x)					((x) << 6)
+#define		ROW_TILING(x)					((x) << 8)
+#define		BANK_SWAPS(x)					((x) << 11)
+#define		SAMPLE_SPLIT(x)					((x) << 14)
+#define		BACKEND_MAP(x)					((x) << 16)
+
+#define GB_TILING_CONFIG				0x98F0
+
+#define	GC_USER_SHADER_PIPE_CONFIG			0x8954
+#define		INACTIVE_QD_PIPES(x)				((x) << 8)
+#define		INACTIVE_QD_PIPES_MASK				0x0000FF00
+#define		INACTIVE_SIMDS(x)				((x) << 16)
+#define		INACTIVE_SIMDS_MASK				0x00FF0000
+
+#define	GRBM_CNTL					0x8000
+#define		GRBM_READ_TIMEOUT(x)				((x) << 0)
+#define	GRBM_SOFT_RESET					0x8020
+#define		SOFT_RESET_CP					(1<<0)
+#define	GRBM_STATUS					0x8010
+#define		CMDFIFO_AVAIL_MASK				0x0000000F
+#define		GUI_ACTIVE					(1<<31)
+#define	GRBM_STATUS2					0x8014
+
+#define	HDP_HOST_PATH_CNTL				0x2C00
+#define	HDP_NONSURFACE_BASE				0x2C04
+#define	HDP_NONSURFACE_INFO				0x2C08
+#define	HDP_NONSURFACE_SIZE				0x2C0C
+#define HDP_REG_COHERENCY_FLUSH_CNTL			0x54A0
+#define	HDP_TILING_CONFIG				0x2F3C
+
+#define	MC_ARB_RAMCFG					0x2760
+#define		NOOFBANK_SHIFT					0
+#define		NOOFBANK_MASK					0x00000003
+#define		NOOFRANK_SHIFT					2
+#define		NOOFRANK_MASK					0x00000004
+#define		NOOFROWS_SHIFT					3
+#define		NOOFROWS_MASK					0x00000038
+#define		NOOFCOLS_SHIFT					6
+#define		NOOFCOLS_MASK					0x000000C0
+#define		CHANSIZE_SHIFT					8
+#define		CHANSIZE_MASK					0x00000100
+#define		BURSTLENGTH_SHIFT				9
+#define		BURSTLENGTH_MASK				0x00000200
+#define	MC_VM_AGP_TOP					0x2028
+#define	MC_VM_AGP_BOT					0x202C
+#define	MC_VM_AGP_BASE					0x2030
+#define	MC_VM_FB_LOCATION				0x2024
+#define	MC_VM_MB_L1_TLB0_CNTL				0x2234
+#define	MC_VM_MB_L1_TLB1_CNTL				0x2238
+#define	MC_VM_MB_L1_TLB2_CNTL				0x223C
+#define	MC_VM_MB_L1_TLB3_CNTL				0x2240
+#define		ENABLE_L1_TLB					(1 << 0)
+#define		ENABLE_L1_FRAGMENT_PROCESSING			(1 << 1)
+#define		SYSTEM_ACCESS_MODE_PA_ONLY			(0 << 3)
+#define		SYSTEM_ACCESS_MODE_USE_SYS_MAP			(1 << 3)
+#define		SYSTEM_ACCESS_MODE_IN_SYS			(2 << 3)
+#define		SYSTEM_ACCESS_MODE_NOT_IN_SYS			(3 << 3)
+#define		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU	(0 << 5)
+#define		EFFECTIVE_L1_TLB_SIZE(x)			((x)<<15)
+#define		EFFECTIVE_L1_QUEUE_SIZE(x)			((x)<<18)
+#define	MC_VM_MD_L1_TLB0_CNTL				0x2654
+#define	MC_VM_MD_L1_TLB1_CNTL				0x2658
+#define	MC_VM_MD_L1_TLB2_CNTL				0x265C
+#define	MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR		0x203C
+#define	MC_VM_SYSTEM_APERTURE_HIGH_ADDR			0x2038
+#define	MC_VM_SYSTEM_APERTURE_LOW_ADDR			0x2034
+
+#define	PA_CL_ENHANCE					0x8A14
+#define		CLIP_VTX_REORDER_ENA				(1 << 0)
+#define		NUM_CLIP_SEQ(x)					((x) << 1)
+#define PA_SC_AA_CONFIG					0x28C04
+#define PA_SC_CLIPRECT_RULE				0x2820C
+#define	PA_SC_EDGERULE					0x28230
+#define	PA_SC_FIFO_SIZE					0x8BCC
+#define		SC_PRIM_FIFO_SIZE(x)				((x) << 0)
+#define		SC_HIZ_TILE_FIFO_SIZE(x)			((x) << 12)
+#define	PA_SC_FORCE_EOV_MAX_CNTS			0x8B24
+#define		FORCE_EOV_MAX_CLK_CNT(x)			((x)<<0)
+#define		FORCE_EOV_MAX_REZ_CNT(x)			((x)<<16)
+#define PA_SC_LINE_STIPPLE				0x28A0C
+#define	PA_SC_LINE_STIPPLE_STATE			0x8B10
+#define PA_SC_MODE_CNTL					0x28A4C
+#define	PA_SC_MULTI_CHIP_CNTL				0x8B20
+#define		SC_EARLYZ_TILE_FIFO_SIZE(x)			((x) << 20)
+
+#define	SCRATCH_REG0					0x8500
+#define	SCRATCH_REG1					0x8504
+#define	SCRATCH_REG2					0x8508
+#define	SCRATCH_REG3					0x850C
+#define	SCRATCH_REG4					0x8510
+#define	SCRATCH_REG5					0x8514
+#define	SCRATCH_REG6					0x8518
+#define	SCRATCH_REG7					0x851C
+#define	SCRATCH_UMSK					0x8540
+#define	SCRATCH_ADDR					0x8544
+
+#define	SMX_DC_CTL0					0xA020
+#define		USE_HASH_FUNCTION				(1 << 0)
+#define		CACHE_DEPTH(x)					((x) << 1)
+#define		FLUSH_ALL_ON_EVENT				(1 << 10)
+#define		STALL_ON_EVENT					(1 << 11)
+#define	SMX_EVENT_CTL					0xA02C
+#define		ES_FLUSH_CTL(x)					((x) << 0)
+#define		GS_FLUSH_CTL(x)					((x) << 3)
+#define		ACK_FLUSH_CTL(x)				((x) << 6)
+#define		SYNC_FLUSH_CTL					(1 << 8)
+
+#define	SPI_CONFIG_CNTL					0x9100
+#define		GPR_WRITE_PRIORITY(x)				((x) << 0)
+#define		DISABLE_INTERP_1				(1 << 5)
+#define	SPI_CONFIG_CNTL_1				0x913C
+#define		VTX_DONE_DELAY(x)				((x) << 0)
+#define		INTERP_ONE_PRIM_PER_ROW				(1 << 4)
+#define	SPI_INPUT_Z					0x286D8
+#define	SPI_PS_IN_CONTROL_0				0x286CC
+#define		NUM_INTERP(x)					((x)<<0)
+#define		POSITION_ENA					(1<<8)
+#define		POSITION_CENTROID				(1<<9)
+#define		POSITION_ADDR(x)				((x)<<10)
+#define		PARAM_GEN(x)					((x)<<15)
+#define		PARAM_GEN_ADDR(x)				((x)<<19)
+#define		BARYC_SAMPLE_CNTL(x)				((x)<<26)
+#define		PERSP_GRADIENT_ENA				(1<<28)
+#define		LINEAR_GRADIENT_ENA				(1<<29)
+#define		POSITION_SAMPLE					(1<<30)
+#define		BARYC_AT_SAMPLE_ENA				(1<<31)
+
+#define	SQ_CONFIG					0x8C00
+#define		VC_ENABLE					(1 << 0)
+#define		EXPORT_SRC_C					(1 << 1)
+#define		DX9_CONSTS					(1 << 2)
+#define		ALU_INST_PREFER_VECTOR				(1 << 3)
+#define		DX10_CLAMP					(1 << 4)
+#define		CLAUSE_SEQ_PRIO(x)				((x) << 8)
+#define		PS_PRIO(x)					((x) << 24)
+#define		VS_PRIO(x)					((x) << 26)
+#define		GS_PRIO(x)					((x) << 28)
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_0			0x8DB0
+#define		SIMDA_RING0(x)					((x)<<0)
+#define		SIMDA_RING1(x)					((x)<<8)
+#define		SIMDB_RING0(x)					((x)<<16)
+#define		SIMDB_RING1(x)					((x)<<24)
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_1			0x8DB4
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_2			0x8DB8
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_3			0x8DBC
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_4			0x8DC0
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_5			0x8DC4
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_6			0x8DC8
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_7			0x8DCC
+#define		ES_PRIO(x)					((x) << 30)
+#define	SQ_GPR_RESOURCE_MGMT_1				0x8C04
+#define		NUM_PS_GPRS(x)					((x) << 0)
+#define		NUM_VS_GPRS(x)					((x) << 16)
+#define		DYN_GPR_ENABLE					(1 << 27)
+#define		NUM_CLAUSE_TEMP_GPRS(x)				((x) << 28)
+#define	SQ_GPR_RESOURCE_MGMT_2				0x8C08
+#define		NUM_GS_GPRS(x)					((x) << 0)
+#define		NUM_ES_GPRS(x)					((x) << 16)
+#define	SQ_MS_FIFO_SIZES				0x8CF0
+#define		CACHE_FIFO_SIZE(x)				((x) << 0)
+#define		FETCH_FIFO_HIWATER(x)				((x) << 8)
+#define		DONE_FIFO_HIWATER(x)				((x) << 16)
+#define		ALU_UPDATE_FIFO_HIWATER(x)			((x) << 24)
+#define	SQ_STACK_RESOURCE_MGMT_1			0x8C10
+#define		NUM_PS_STACK_ENTRIES(x)				((x) << 0)
+#define		NUM_VS_STACK_ENTRIES(x)				((x) << 16)
+#define	SQ_STACK_RESOURCE_MGMT_2			0x8C14
+#define		NUM_GS_STACK_ENTRIES(x)				((x) << 0)
+#define		NUM_ES_STACK_ENTRIES(x)				((x) << 16)
+#define	SQ_THREAD_RESOURCE_MGMT				0x8C0C
+#define		NUM_PS_THREADS(x)				((x) << 0)
+#define		NUM_VS_THREADS(x)				((x) << 8)
+#define		NUM_GS_THREADS(x)				((x) << 16)
+#define		NUM_ES_THREADS(x)				((x) << 24)
+
+#define	SX_DEBUG_1					0x9058
+#define		ENABLE_NEW_SMX_ADDRESS				(1 << 16)
+#define	SX_EXPORT_BUFFER_SIZES				0x900C
+#define		COLOR_BUFFER_SIZE(x)				((x) << 0)
+#define		POSITION_BUFFER_SIZE(x)				((x) << 8)
+#define		SMX_BUFFER_SIZE(x)				((x) << 16)
+#define	SX_MISC						0x28350
+
+#define	TA_CNTL_AUX					0x9508
+#define		DISABLE_CUBE_WRAP				(1 << 0)
+#define		DISABLE_CUBE_ANISO				(1 << 1)
+#define		SYNC_GRADIENT					(1 << 24)
+#define		SYNC_WALKER					(1 << 25)
+#define		SYNC_ALIGNER					(1 << 26)
+#define		BILINEAR_PRECISION_6_BIT			(0 << 31)
+#define		BILINEAR_PRECISION_8_BIT			(1 << 31)
+
+#define	TCP_CNTL					0x9610
+
+#define	VGT_CACHE_INVALIDATION				0x88C4
+#define		CACHE_INVALIDATION(x)				((x)<<0)
+#define			VC_ONLY						0
+#define			TC_ONLY						1
+#define			VC_AND_TC					2
+#define		AUTO_INVLD_EN(x)				((x) << 6)
+#define			NO_AUTO						0
+#define			ES_AUTO						1
+#define			GS_AUTO						2
+#define			ES_AND_GS_AUTO					3
+#define	VGT_ES_PER_GS					0x88CC
+#define	VGT_GS_PER_ES					0x88C8
+#define	VGT_GS_PER_VS					0x88E8
+#define	VGT_GS_VERTEX_REUSE				0x88D4
+#define	VGT_NUM_INSTANCES				0x8974
+#define	VGT_OUT_DEALLOC_CNTL				0x28C5C
+#define		DEALLOC_DIST_MASK				0x0000007F
+#define	VGT_STRMOUT_EN					0x28AB0
+#define	VGT_VERTEX_REUSE_BLOCK_CNTL			0x28C58
+#define		VTX_REUSE_DEPTH_MASK				0x000000FF
+
+#define VM_CONTEXT0_CNTL				0x1410
+#define		ENABLE_CONTEXT					(1 << 0)
+#define		PAGE_TABLE_DEPTH(x)				(((x) & 3) << 1)
+#define		RANGE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 4)
+#define	VM_CONTEXT0_PAGE_TABLE_BASE_ADDR		0x153C
+#define	VM_CONTEXT0_PAGE_TABLE_END_ADDR			0x157C
+#define	VM_CONTEXT0_PAGE_TABLE_START_ADDR		0x155C
+#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR	0x1518
+#define VM_L2_CNTL					0x1400
+#define		ENABLE_L2_CACHE					(1 << 0)
+#define		ENABLE_L2_FRAGMENT_PROCESSING			(1 << 1)
+#define		ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE		(1 << 9)
+#define		EFFECTIVE_L2_QUEUE_SIZE(x)			(((x) & 7) << 14)
+#define VM_L2_CNTL2					0x1404
+#define		INVALIDATE_ALL_L1_TLBS				(1 << 0)
+#define		INVALIDATE_L2_CACHE				(1 << 1)
+#define VM_L2_CNTL3					0x1408
+#define		BANK_SELECT(x)					((x) << 0)
+#define		CACHE_UPDATE_MODE(x)				((x) << 6)
+#define	VM_L2_STATUS					0x140C
+#define		L2_BUSY						(1 << 0)
+
+#define	WAIT_UNTIL					0x8040
+
+#endif