summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2005-11-07 11:06:55 +1100
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-06 16:56:47 -0800
commit3c726f8dee6f55e96475574e9f645327e461884c (patch)
treef67c381e8f57959aa4a94bda4c68e24253cd8171 /include
parentf912696ab330bf539231d1f8032320f2a08b850f (diff)
downloadlinux-3c726f8dee6f55e96475574e9f645327e461884c.tar.gz
[PATCH] ppc64: support 64k pages
Adds a new CONFIG_PPC_64K_PAGES which, when enabled, changes the kernel base page size to 64K. The resulting kernel still boots on any hardware. On current machines with 4K pages support only, the kernel will maintain 16 "subpages" for each 64K page transparently. Note that while real 64K capable HW has been tested, the current patch will not enable it yet as such hardware is not released yet, and I'm still verifying with the firmware architects the proper to get the information from the newer hypervisors. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include')
-rw-r--r--include/asm-powerpc/cputable.h3
-rw-r--r--include/asm-powerpc/iommu.h5
-rw-r--r--include/asm-powerpc/machdep.h10
-rw-r--r--include/asm-powerpc/prom.h8
-rw-r--r--include/asm-powerpc/system.h2
-rw-r--r--include/asm-powerpc/thread_info.h20
-rw-r--r--include/asm-powerpc/tlbflush.h9
-rw-r--r--include/asm-ppc64/mmu.h208
-rw-r--r--include/asm-ppc64/mmu_context.h15
-rw-r--r--include/asm-ppc64/paca.h13
-rw-r--r--include/asm-ppc64/page.h147
-rw-r--r--include/asm-ppc64/pgalloc.h47
-rw-r--r--include/asm-ppc64/pgtable-4k.h88
-rw-r--r--include/asm-ppc64/pgtable-64k.h87
-rw-r--r--include/asm-ppc64/pgtable.h160
-rw-r--r--include/asm-ppc64/prom.h8
-rw-r--r--include/asm-ppc64/system.h2
17 files changed, 572 insertions, 260 deletions
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index c019501daceb..79a0556a0ab8 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -101,6 +101,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset);
#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x0000020000000000)
#define CPU_FTR_LOCKLESS_TLBIE ASM_CONST(0x0000040000000000)
#define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0000080000000000)
+#define CPU_FTR_CI_LARGE_PAGE ASM_CONST(0x0000100000000000)
#else
/* ensure on 32b processors the flags are available for compiling but
* don't do anything */
@@ -116,6 +117,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset);
#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x0)
#define CPU_FTR_LOCKLESS_TLBIE ASM_CONST(0x0)
#define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0)
+#define CPU_FTR_CI_LARGE_PAGE ASM_CONST(0x0)
#endif
#ifndef __ASSEMBLY__
@@ -339,6 +341,7 @@ enum {
#ifdef __powerpc64__
CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 |
CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_CELL |
+ CPU_FTR_CI_LARGE_PAGE |
#endif
0,
diff --git a/include/asm-powerpc/iommu.h b/include/asm-powerpc/iommu.h
index 9d91bdd667ae..6a35e6570ccd 100644
--- a/include/asm-powerpc/iommu.h
+++ b/include/asm-powerpc/iommu.h
@@ -74,6 +74,11 @@ extern void iommu_devnode_init_pSeries(struct device_node *dn);
/* Creates table for an individual device node */
extern void iommu_devnode_init_iSeries(struct device_node *dn);
+/* Get table parameters from HV */
+extern void iommu_table_getparms_iSeries(unsigned long busno,
+ unsigned char slotno,
+ unsigned char virtbus,
+ struct iommu_table* tbl);
#endif /* CONFIG_PPC_ISERIES */
diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h
index 629ca964b974..fa03864d06eb 100644
--- a/include/asm-powerpc/machdep.h
+++ b/include/asm-powerpc/machdep.h
@@ -47,20 +47,22 @@ struct machdep_calls {
#ifdef CONFIG_PPC64
void (*hpte_invalidate)(unsigned long slot,
unsigned long va,
- int large,
+ int psize,
int local);
long (*hpte_updatepp)(unsigned long slot,
unsigned long newpp,
unsigned long va,
- int large,
+ int pize,
int local);
void (*hpte_updateboltedpp)(unsigned long newpp,
- unsigned long ea);
+ unsigned long ea,
+ int psize);
long (*hpte_insert)(unsigned long hpte_group,
unsigned long va,
unsigned long prpn,
+ unsigned long rflags,
unsigned long vflags,
- unsigned long rflags);
+ int psize);
long (*hpte_remove)(unsigned long hpte_group);
void (*flush_hash_range)(unsigned long number, int local);
diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h
index 3a0104fa0462..7587bf5f38c6 100644
--- a/include/asm-powerpc/prom.h
+++ b/include/asm-powerpc/prom.h
@@ -178,6 +178,14 @@ extern struct device_node *of_get_next_child(const struct device_node *node,
extern struct device_node *of_node_get(struct device_node *node);
extern void of_node_put(struct device_node *node);
+/* For scanning the flat device-tree at boot time */
+int __init of_scan_flat_dt(int (*it)(unsigned long node,
+ const char *uname, int depth,
+ void *data),
+ void *data);
+void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
+ unsigned long *size);
+
/* For updating the device tree at runtime */
extern void of_attach_node(struct device_node *);
extern void of_detach_node(const struct device_node *);
diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
index b5da0b851e02..3536a5cd7a2d 100644
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -289,7 +289,7 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
#ifdef CONFIG_PPC64
static __inline__ unsigned long
-__cmpxchg_u64(volatile long *p, unsigned long old, unsigned long new)
+__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
{
unsigned long prev;
diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h
index ab17db79f69d..e525f49bd179 100644
--- a/include/asm-powerpc/thread_info.h
+++ b/include/asm-powerpc/thread_info.h
@@ -65,23 +65,27 @@ struct thread_info {
/* thread information allocation */
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define THREAD_INFO_GFP GFP_KERNEL | __GFP_ZERO
-#else
-#define THREAD_INFO_GFP GFP_KERNEL
-#endif
-
#if THREAD_SHIFT >= PAGE_SHIFT
#define THREAD_ORDER (THREAD_SHIFT - PAGE_SHIFT)
+#ifdef CONFIG_DEBUG_STACK_USAGE
#define alloc_thread_info(tsk) \
- ((struct thread_info *)__get_free_pages(THREAD_INFO_GFP, THREAD_ORDER))
+ ((struct thread_info *)__get_free_pages(GFP_KERNEL | \
+ __GFP_ZERO, THREAD_ORDER))
+#else
+#define alloc_thread_info(tsk) \
+ ((struct thread_info *)__get_free_pages(GFP_KERNEL, THREAD_ORDER))
+#endif
#define free_thread_info(ti) free_pages((unsigned long)ti, THREAD_ORDER)
#else /* THREAD_SHIFT < PAGE_SHIFT */
-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, THREAD_INFO_GFP)
+#ifdef CONFIG_DEBUG_STACK_USAGE
+#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL)
+#else
+#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+#endif
#define free_thread_info(ti) kfree(ti)
#endif /* THREAD_SHIFT < PAGE_SHIFT */
diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h
index ca3655672bbc..a2998eee37bb 100644
--- a/include/asm-powerpc/tlbflush.h
+++ b/include/asm-powerpc/tlbflush.h
@@ -31,9 +31,9 @@ struct mm_struct;
struct ppc64_tlb_batch {
unsigned long index;
struct mm_struct *mm;
- pte_t pte[PPC64_TLB_BATCH_NR];
+ real_pte_t pte[PPC64_TLB_BATCH_NR];
unsigned long vaddr[PPC64_TLB_BATCH_NR];
- unsigned int large;
+ unsigned int psize;
};
DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
@@ -48,8 +48,9 @@ static inline void flush_tlb_pending(void)
put_cpu_var(ppc64_tlb_batch);
}
-extern void flush_hash_page(unsigned long va, pte_t pte, int local);
-void flush_hash_range(unsigned long number, int local);
+extern void flush_hash_page(unsigned long va, real_pte_t pte, int psize,
+ int local);
+extern void flush_hash_range(unsigned long number, int local);
#else /* CONFIG_PPC64 */
diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h
index e0505acb77d9..4c18a5cb69f5 100644
--- a/include/asm-ppc64/mmu.h
+++ b/include/asm-ppc64/mmu.h
@@ -48,13 +48,21 @@ extern char initial_stab[];
/* Bits in the SLB VSID word */
#define SLB_VSID_SHIFT 12
+#define SLB_VSID_B ASM_CONST(0xc000000000000000)
+#define SLB_VSID_B_256M ASM_CONST(0x0000000000000000)
+#define SLB_VSID_B_1T ASM_CONST(0x4000000000000000)
#define SLB_VSID_KS ASM_CONST(0x0000000000000800)
#define SLB_VSID_KP ASM_CONST(0x0000000000000400)
#define SLB_VSID_N ASM_CONST(0x0000000000000200) /* no-execute */
-#define SLB_VSID_L ASM_CONST(0x0000000000000100) /* largepage */
+#define SLB_VSID_L ASM_CONST(0x0000000000000100)
#define SLB_VSID_C ASM_CONST(0x0000000000000080) /* class */
-#define SLB_VSID_LS ASM_CONST(0x0000000000000070) /* size of largepage */
-
+#define SLB_VSID_LP ASM_CONST(0x0000000000000030)
+#define SLB_VSID_LP_00 ASM_CONST(0x0000000000000000)
+#define SLB_VSID_LP_01 ASM_CONST(0x0000000000000010)
+#define SLB_VSID_LP_10 ASM_CONST(0x0000000000000020)
+#define SLB_VSID_LP_11 ASM_CONST(0x0000000000000030)
+#define SLB_VSID_LLP (SLB_VSID_L|SLB_VSID_LP)
+
#define SLB_VSID_KERNEL (SLB_VSID_KP)
#define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C)
@@ -69,6 +77,7 @@ extern char initial_stab[];
#define HPTE_V_AVPN_SHIFT 7
#define HPTE_V_AVPN ASM_CONST(0xffffffffffffff80)
#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
+#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & HPTE_V_AVPN))
#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010)
#define HPTE_V_LOCK ASM_CONST(0x0000000000000008)
#define HPTE_V_LARGE ASM_CONST(0x0000000000000004)
@@ -81,6 +90,7 @@ extern char initial_stab[];
#define HPTE_R_RPN ASM_CONST(0x3ffffffffffff000)
#define HPTE_R_FLAGS ASM_CONST(0x00000000000003ff)
#define HPTE_R_PP ASM_CONST(0x0000000000000003)
+#define HPTE_R_N ASM_CONST(0x0000000000000004)
/* Values for PP (assumes Ks=0, Kp=1) */
/* pp0 will always be 0 for linux */
@@ -99,100 +109,120 @@ typedef struct {
extern hpte_t *htab_address;
extern unsigned long htab_hash_mask;
-static inline unsigned long hpt_hash(unsigned long vpn, int large)
+/*
+ * Page size definition
+ *
+ * shift : is the "PAGE_SHIFT" value for that page size
+ * sllp : is a bit mask with the value of SLB L || LP to be or'ed
+ * directly to a slbmte "vsid" value
+ * penc : is the HPTE encoding mask for the "LP" field:
+ *
+ */
+struct mmu_psize_def
{
- unsigned long vsid;
- unsigned long page;
-
- if (large) {
- vsid = vpn >> 4;
- page = vpn & 0xf;
- } else {
- vsid = vpn >> 16;
- page = vpn & 0xffff;
- }
+ unsigned int shift; /* number of bits */
+ unsigned int penc; /* HPTE encoding */
+ unsigned int tlbiel; /* tlbiel supported for that page size */
+ unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
+ unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
+};
- return (vsid & 0x7fffffffffUL) ^ page;
-}
-
-static inline void __tlbie(unsigned long va, int large)
-{
- /* clear top 16 bits, non SLS segment */
- va &= ~(0xffffULL << 48);
-
- if (large) {
- va &= HPAGE_MASK;
- asm volatile("tlbie %0,1" : : "r"(va) : "memory");
- } else {
- va &= PAGE_MASK;
- asm volatile("tlbie %0,0" : : "r"(va) : "memory");
- }
-}
+#endif /* __ASSEMBLY__ */
-static inline void tlbie(unsigned long va, int large)
-{
- asm volatile("ptesync": : :"memory");
- __tlbie(va, large);
- asm volatile("eieio; tlbsync; ptesync": : :"memory");
-}
+/*
+ * The kernel use the constants below to index in the page sizes array.
+ * The use of fixed constants for this purpose is better for performances
+ * of the low level hash refill handlers.
+ *
+ * A non supported page size has a "shift" field set to 0
+ *
+ * Any new page size being implemented can get a new entry in here. Whether
+ * the kernel will use it or not is a different matter though. The actual page
+ * size used by hugetlbfs is not defined here and may be made variable
+ */
-static inline void __tlbiel(unsigned long va)
-{
- /* clear top 16 bits, non SLS segment */
- va &= ~(0xffffULL << 48);
- va &= PAGE_MASK;
-
- /*
- * Thanks to Alan Modra we are now able to use machine specific
- * assembly instructions (like tlbiel) by using the gas -many flag.
- * However we have to support older toolchains so for the moment
- * we hardwire it.
- */
-#if 0
- asm volatile("tlbiel %0" : : "r"(va) : "memory");
-#else
- asm volatile(".long 0x7c000224 | (%0 << 11)" : : "r"(va) : "memory");
-#endif
-}
+#define MMU_PAGE_4K 0 /* 4K */
+#define MMU_PAGE_64K 1 /* 64K */
+#define MMU_PAGE_64K_AP 2 /* 64K Admixed (in a 4K segment) */
+#define MMU_PAGE_1M 3 /* 1M */
+#define MMU_PAGE_16M 4 /* 16M */
+#define MMU_PAGE_16G 5 /* 16G */
+#define MMU_PAGE_COUNT 6
-static inline void tlbiel(unsigned long va)
-{
- asm volatile("ptesync": : :"memory");
- __tlbiel(va);
- asm volatile("ptesync": : :"memory");
-}
+#ifndef __ASSEMBLY__
-static inline unsigned long slot2va(unsigned long hpte_v, unsigned long slot)
-{
- unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v);
- unsigned long va;
+/*
+ * The current system page sizes
+ */
+extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+extern int mmu_linear_psize;
+extern int mmu_virtual_psize;
- va = avpn << 23;
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * The page size index of the huge pages for use by hugetlbfs
+ */
+extern int mmu_huge_psize;
- if (! (hpte_v & HPTE_V_LARGE)) {
- unsigned long vpi, pteg;
+#endif /* CONFIG_HUGETLB_PAGE */
- pteg = slot / HPTES_PER_GROUP;
- if (hpte_v & HPTE_V_SECONDARY)
- pteg = ~pteg;
+/*
+ * This function sets the AVPN and L fields of the HPTE appropriately
+ * for the page size
+ */
+static inline unsigned long hpte_encode_v(unsigned long va, int psize)
+{
+ unsigned long v =
+ v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm);
+ v <<= HPTE_V_AVPN_SHIFT;
+ if (psize != MMU_PAGE_4K)
+ v |= HPTE_V_LARGE;
+ return v;
+}
- vpi = ((va >> 28) ^ pteg) & htab_hash_mask;
+/*
+ * This function sets the ARPN, and LP fields of the HPTE appropriately
+ * for the page size. We assume the pa is already "clean" that is properly
+ * aligned for the requested page size
+ */
+static inline unsigned long hpte_encode_r(unsigned long pa, int psize)
+{
+ unsigned long r;
- va |= vpi << PAGE_SHIFT;
+ /* A 4K page needs no special encoding */
+ if (psize == MMU_PAGE_4K)
+ return pa & HPTE_R_RPN;
+ else {
+ unsigned int penc = mmu_psize_defs[psize].penc;
+ unsigned int shift = mmu_psize_defs[psize].shift;
+ return (pa & ~((1ul << shift) - 1)) | (penc << 12);
}
-
- return va;
+ return r;
}
/*
- * Handle a fault by adding an HPTE. If the address can't be determined
- * to be valid via Linux page tables, return 1. If handled return 0
+ * This hashes a virtual address for a 256Mb segment only for now
*/
-extern int __hash_page(unsigned long ea, unsigned long access,
- unsigned long vsid, pte_t *ptep, unsigned long trap,
- int local);
+
+static inline unsigned long hpt_hash(unsigned long va, unsigned int shift)
+{
+ return ((va >> 28) & 0x7fffffffffUL) ^ ((va & 0x0fffffffUL) >> shift);
+}
+
+extern int __hash_page_4K(unsigned long ea, unsigned long access,
+ unsigned long vsid, pte_t *ptep, unsigned long trap,
+ unsigned int local);
+extern int __hash_page_64K(unsigned long ea, unsigned long access,
+ unsigned long vsid, pte_t *ptep, unsigned long trap,
+ unsigned int local);
+struct mm_struct;
+extern int hash_huge_page(struct mm_struct *mm, unsigned long access,
+ unsigned long ea, unsigned long vsid, int local);
extern void htab_finish_init(void);
+extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
+ unsigned long pstart, unsigned long mode,
+ int psize);
extern void hpte_init_native(void);
extern void hpte_init_lpar(void);
@@ -200,17 +230,21 @@ extern void hpte_init_iSeries(void);
extern long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long va, unsigned long prpn,
- unsigned long vflags,
- unsigned long rflags);
-extern long native_hpte_insert(unsigned long hpte_group, unsigned long va,
- unsigned long prpn,
- unsigned long vflags, unsigned long rflags);
+ unsigned long rflags,
+ unsigned long vflags, int psize);
+
+extern long native_hpte_insert(unsigned long hpte_group,
+ unsigned long va, unsigned long prpn,
+ unsigned long rflags,
+ unsigned long vflags, int psize);
-extern long iSeries_hpte_bolt_or_insert(unsigned long hpte_group,
- unsigned long va, unsigned long prpn,
- unsigned long vflags, unsigned long rflags);
+extern long iSeries_hpte_insert(unsigned long hpte_group,
+ unsigned long va, unsigned long prpn,
+ unsigned long rflags,
+ unsigned long vflags, int psize);
extern void stabs_alloc(void);
+extern void slb_initialize(void);
#endif /* __ASSEMBLY__ */
diff --git a/include/asm-ppc64/mmu_context.h b/include/asm-ppc64/mmu_context.h
index 820dd729b895..4f512e9fa6b8 100644
--- a/include/asm-ppc64/mmu_context.h
+++ b/include/asm-ppc64/mmu_context.h
@@ -16,8 +16,16 @@
* 2 of the License, or (at your option) any later version.
*/
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+/*
+ * Getting into a kernel thread, there is no valid user segment, mark
+ * paca->pgdir NULL so that SLB miss on user addresses will fault
+ */
+static inline void enter_lazy_tlb(struct mm_struct *mm,
+ struct task_struct *tsk)
{
+#ifdef CONFIG_PPC_64K_PAGES
+ get_paca()->pgdir = NULL;
+#endif /* CONFIG_PPC_64K_PAGES */
}
#define NO_CONTEXT 0
@@ -40,8 +48,13 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
cpu_set(smp_processor_id(), next->cpu_vm_mask);
/* No need to flush userspace segments if the mm doesnt change */
+#ifdef CONFIG_PPC_64K_PAGES
+ if (prev == next && get_paca()->pgdir == next->pgd)
+ return;
+#else
if (prev == next)
return;
+#endif /* CONFIG_PPC_64K_PAGES */
#ifdef CONFIG_ALTIVEC
if (cpu_has_feature(CPU_FTR_ALTIVEC))
diff --git a/include/asm-ppc64/paca.h b/include/asm-ppc64/paca.h
index f68fe91debaf..bccacd6aa93a 100644
--- a/include/asm-ppc64/paca.h
+++ b/include/asm-ppc64/paca.h
@@ -72,10 +72,15 @@ struct paca_struct {
/*
* Now, starting in cacheline 2, the exception save areas
*/
- u64 exgen[8] __attribute__((aligned(0x80))); /* used for most interrupts/exceptions */
- u64 exmc[8]; /* used for machine checks */
- u64 exslb[8]; /* used for SLB/segment table misses
- * on the linear mapping */
+ /* used for most interrupts/exceptions */
+ u64 exgen[10] __attribute__((aligned(0x80)));
+ u64 exmc[10]; /* used for machine checks */
+ u64 exslb[10]; /* used for SLB/segment table misses
+ * on the linear mapping */
+#ifdef CONFIG_PPC_64K_PAGES
+ pgd_t *pgdir;
+#endif /* CONFIG_PPC_64K_PAGES */
+
mm_context_t context;
u16 slb_cache[SLB_CACHE_ENTRIES];
u16 slb_cache_ptr;
diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h
index d404431f0a9a..82ce187e5be8 100644
--- a/include/asm-ppc64/page.h
+++ b/include/asm-ppc64/page.h
@@ -13,32 +13,59 @@
#include <linux/config.h>
#include <asm/ppc_asm.h> /* for ASM_CONST */
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
+/*
+ * We support either 4k or 64k software page size. When using 64k pages
+ * however, wether we are really supporting 64k pages in HW or not is
+ * irrelevant to those definitions. We always define HW_PAGE_SHIFT to 12
+ * as use of 64k pages remains a linux kernel specific, every notion of
+ * page number shared with the firmware, TCEs, iommu, etc... still assumes
+ * a page size of 4096.
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+#define PAGE_SHIFT 16
+#else
+#define PAGE_SHIFT 12
+#endif
-#define SID_SHIFT 28
-#define SID_MASK 0xfffffffffUL
-#define ESID_MASK 0xfffffffff0000000UL
-#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK)
+#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
-#define HPAGE_SHIFT 24
-#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
-#define HPAGE_MASK (~(HPAGE_SIZE - 1))
+/* HW_PAGE_SHIFT is always 4k pages */
+#define HW_PAGE_SHIFT 12
+#define HW_PAGE_SIZE (ASM_CONST(1) << HW_PAGE_SHIFT)
+#define HW_PAGE_MASK (~(HW_PAGE_SIZE-1))
-#ifdef CONFIG_HUGETLB_PAGE
+/* PAGE_FACTOR is the number of bits factor between PAGE_SHIFT and
+ * HW_PAGE_SHIFT, that is 4k pages
+ */
+#define PAGE_FACTOR (PAGE_SHIFT - HW_PAGE_SHIFT)
+
+/* Segment size */
+#define SID_SHIFT 28
+#define SID_MASK 0xfffffffffUL
+#define ESID_MASK 0xfffffffff0000000UL
+#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK)
+/* Large pages size */
+
+#ifndef __ASSEMBLY__
+extern unsigned int HPAGE_SHIFT;
+#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
+#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
+#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_HUGETLB_PAGE
+
#define HTLB_AREA_SHIFT 40
#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT)
#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT)
-#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \
- - (1U << GET_ESID(addr))) & 0xffff)
-#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \
- - (1U << GET_HTLB_AREA(addr))) & 0xffff)
+#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \
+ - (1U << GET_ESID(addr))) & 0xffff)
+#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \
+ - (1U << GET_HTLB_AREA(addr))) & 0xffff)
#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
@@ -114,7 +141,25 @@ static __inline__ void clear_page(void *addr)
: "ctr", "memory");
}
-extern void copy_page(void *to, void *from);
+extern void copy_4K_page(void *to, void *from);
+
+#ifdef CONFIG_PPC_64K_PAGES
+static inline void copy_page(void *to, void *from)
+{
+ unsigned int i;
+ for (i=0; i < (1 << (PAGE_SHIFT - 12)); i++) {
+ copy_4K_page(to, from);
+ to += 4096;
+ from += 4096;
+ }
+}
+#else /* CONFIG_PPC_64K_PAGES */
+static inline void copy_page(void *to, void *from)
+{
+ copy_4K_page(to, from);
+}
+#endif /* CONFIG_PPC_64K_PAGES */
+
struct page;
extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *p);
@@ -124,43 +169,75 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag
* These are used to make use of C type-checking.
* Entries in the pte table are 64b, while entries in the pgd & pmd are 32b.
*/
-typedef struct { unsigned long pte; } pte_t;
-typedef struct { unsigned long pmd; } pmd_t;
-typedef struct { unsigned long pud; } pud_t;
-typedef struct { unsigned long pgd; } pgd_t;
-typedef struct { unsigned long pgprot; } pgprot_t;
+/* PTE level */
+typedef struct { unsigned long pte; } pte_t;
#define pte_val(x) ((x).pte)
-#define pmd_val(x) ((x).pmd)
-#define pud_val(x) ((x).pud)
-#define pgd_val(x) ((x).pgd)
-#define pgprot_val(x) ((x).pgprot)
-
#define __pte(x) ((pte_t) { (x) })
+
+/* 64k pages additionally define a bigger "real PTE" type that gathers
+ * the "second half" part of the PTE for pseudo 64k pages
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef struct { pte_t pte; } real_pte_t;
+#endif
+
+/* PMD level */
+typedef struct { unsigned long pmd; } pmd_t;
+#define pmd_val(x) ((x).pmd)
#define __pmd(x) ((pmd_t) { (x) })
+
+/* PUD level exusts only on 4k pages */
+#ifndef CONFIG_PPC_64K_PAGES
+typedef struct { unsigned long pud; } pud_t;
+#define pud_val(x) ((x).pud)
#define __pud(x) ((pud_t) { (x) })
+#endif
+
+/* PGD level */
+typedef struct { unsigned long pgd; } pgd_t;
+#define pgd_val(x) ((x).pgd)
#define __pgd(x) ((pgd_t) { (x) })
+
+/* Page protection bits */
+typedef struct { unsigned long pgprot; } pgprot_t;
+#define pgprot_val(x) ((x).pgprot)
#define __pgprot(x) ((pgprot_t) { (x) })
#else
+
/*
* .. while these make it easier on the compiler
*/
-typedef unsigned long pte_t;
-typedef unsigned long pmd_t;
-typedef unsigned long pud_t;
-typedef unsigned long pgd_t;
-typedef unsigned long pgprot_t;
+typedef unsigned long pte_t;
#define pte_val(x) (x)
+#define __pte(x) (x)
+
+#ifdef CONFIG_PPC_64K_PAGES
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef unsigned long real_pte_t;
+#endif
+
+
+typedef unsigned long pmd_t;
#define pmd_val(x) (x)
+#define __pmd(x) (x)
+
+#ifndef CONFIG_PPC_64K_PAGES
+typedef unsigned long pud_t;
#define pud_val(x) (x)
+#define __pud(x) (x)
+#endif
+
+typedef unsigned long pgd_t;
#define pgd_val(x) (x)
#define pgprot_val(x) (x)
-#define __pte(x) (x)
-#define __pmd(x) (x)
-#define __pud(x) (x)
+typedef unsigned long pgprot_t;
#define __pgd(x) (x)
#define __pgprot(x) (x)
diff --git a/include/asm-ppc64/pgalloc.h b/include/asm-ppc64/pgalloc.h
index 26bc49c1108d..98da0e4262bd 100644
--- a/include/asm-ppc64/pgalloc.h
+++ b/include/asm-ppc64/pgalloc.h
@@ -8,10 +8,16 @@
extern kmem_cache_t *pgtable_cache[];
+#ifdef CONFIG_PPC_64K_PAGES
+#define PTE_CACHE_NUM 0
+#define PMD_CACHE_NUM 0
+#define PGD_CACHE_NUM 1
+#else
#define PTE_CACHE_NUM 0
#define PMD_CACHE_NUM 1
#define PUD_CACHE_NUM 1
#define PGD_CACHE_NUM 0
+#endif
/*
* This program is free software; you can redistribute it and/or
@@ -30,6 +36,8 @@ static inline void pgd_free(pgd_t *pgd)
kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd);
}
+#ifndef CONFIG_PPC_64K_PAGES
+
#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -43,7 +51,30 @@ static inline void pud_free(pud_t *pud)
kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud);
}
-#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+ pud_set(pud, (unsigned long)pmd);
+}
+
+#define pmd_populate(mm, pmd, pte_page) \
+ pmd_populate_kernel(mm, pmd, page_address(pte_page))
+#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte))
+
+
+#else /* CONFIG_PPC_64K_PAGES */
+
+#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd)
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+ pte_t *pte)
+{
+ pmd_set(pmd, (unsigned long)pte);
+}
+
+#define pmd_populate(mm, pmd, pte_page) \
+ pmd_populate_kernel(mm, pmd, page_address(pte_page))
+
+#endif /* CONFIG_PPC_64K_PAGES */
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
@@ -56,17 +87,15 @@ static inline void pmd_free(pmd_t *pmd)
kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
}
-#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte)
-#define pmd_populate(mm, pmd, pte_page) \
- pmd_populate_kernel(mm, pmd, page_address(pte_page))
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long address)
{
return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
GFP_KERNEL|__GFP_REPEAT);
}
-static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
{
return virt_to_page(pte_alloc_one_kernel(mm, address));
}
@@ -103,7 +132,7 @@ static inline void pgtable_free(pgtable_free_t pgf)
kmem_cache_free(pgtable_cache[cachenum], p);
}
-void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
+extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
#define __pte_free_tlb(tlb, ptepage) \
pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
@@ -111,9 +140,11 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
#define __pmd_free_tlb(tlb, pmd) \
pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
+#ifndef CONFIG_PPC_64K_PAGES
#define __pud_free_tlb(tlb, pmd) \
pgtable_free_tlb(tlb, pgtable_free_cache(pud, \
PUD_CACHE_NUM, PUD_TABLE_SIZE-1))
+#endif /* CONFIG_PPC_64K_PAGES */
#define check_pgt_cache() do { } while (0)
diff --git a/include/asm-ppc64/pgtable-4k.h b/include/asm-ppc64/pgtable-4k.h
new file mode 100644
index 000000000000..c883a2748558
--- /dev/null
+++ b/include/asm-ppc64/pgtable-4k.h
@@ -0,0 +1,88 @@
+/*
+ * Entries per page directory level. The PTE level must use a 64b record
+ * for each page table entry. The PMD and PGD level use a 32b record for
+ * each entry by assuming that each entry is page aligned.
+ */
+#define PTE_INDEX_SIZE 9
+#define PMD_INDEX_SIZE 7
+#define PUD_INDEX_SIZE 7
+#define PGD_INDEX_SIZE 9
+
+#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
+#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
+#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
+
+#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
+
+/* PMD_SHIFT determines what a second-level page table entry can map */
+#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PMD_SIZE (1UL << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+
+/* PUD_SHIFT determines what a third-level page table entry can map */
+#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
+#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+/* PTE bits */
+#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
+#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */
+#define _PAGE_F_SECOND _PAGE_SECONDARY
+#define _PAGE_F_GIX _PAGE_GROUP_IX
+
+/* PTE flags to conserve for HPTE identification */
+#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \
+ _PAGE_SECONDARY | _PAGE_GROUP_IX)
+
+/* PAGE_MASK gives the right answer below, but only by accident */
+/* It should be preserving the high 48 bits and then specifically */
+/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
+#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | \
+ _PAGE_HPTEFLAGS)
+
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS 0
+/* Bits to mask out from a PUD to get to the PMD page */
+#define PUD_MASKED_BITS 0
+/* Bits to mask out from a PGD to get to the PUD page */
+#define PGD_MASKED_BITS 0
+
+/* shift to put page number into pte */
+#define PTE_RPN_SHIFT (17)
+
+#define __real_pte(e,p) ((real_pte_t)(e))
+#define __rpte_to_pte(r) (r)
+#define __rpte_to_hidx(r,index) (pte_val((r)) >> 12)
+
+#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
+ do { \
+ index = 0; \
+ shift = mmu_psize_defs[psize].shift; \
+
+#define pte_iterate_hashed_end() } while(0)
+
+/*
+ * 4-level page tables related bits
+ */
+
+#define pgd_none(pgd) (!pgd_val(pgd))
+#define pgd_bad(pgd) (pgd_val(pgd) == 0)
+#define pgd_present(pgd) (pgd_val(pgd) != 0)
+#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0)
+#define pgd_page(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS)
+
+#define pud_offset(pgdp, addr) \
+ (((pud_t *) pgd_page(*(pgdp))) + \
+ (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
+
+#define pud_ERROR(e) \
+ printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e))
diff --git a/include/asm-ppc64/pgtable-64k.h b/include/asm-ppc64/pgtable-64k.h
new file mode 100644
index 000000000000..c5f437c86b3c
--- /dev/null
+++ b/include/asm-ppc64/pgtable-64k.h
@@ -0,0 +1,87 @@
+#include <asm-generic/pgtable-nopud.h>
+
+
+#define PTE_INDEX_SIZE 12
+#define PMD_INDEX_SIZE 12
+#define PUD_INDEX_SIZE 0
+#define PGD_INDEX_SIZE 4
+
+#define PTE_TABLE_SIZE (sizeof(real_pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
+#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
+
+#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
+
+/* PMD_SHIFT determines what a second-level page table entry can map */
+#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PMD_SIZE (1UL << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a third-level page table entry can map */
+#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+/* Additional PTE bits (don't change without checking asm in hash_low.S) */
+#define _PAGE_HPTE_SUB 0x0ffff000 /* combo only: sub pages HPTE bits */
+#define _PAGE_HPTE_SUB0 0x08000000 /* combo only: first sub page */
+#define _PAGE_COMBO 0x10000000 /* this is a combo 4k page */
+#define _PAGE_F_SECOND 0x00008000 /* full page: hidx bits */
+#define _PAGE_F_GIX 0x00007000 /* full page: hidx bits */
+
+/* PTE flags to conserve for HPTE identification */
+#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_HPTE_SUB |\
+ _PAGE_COMBO)
+
+/* Shift to put page number into pte.
+ *
+ * That gives us a max RPN of 32 bits, which means a max of 48 bits
+ * of addressable physical space.
+ * We could get 3 more bits here by setting PTE_RPN_SHIFT to 29 but
+ * 32 makes PTEs more readable for debugging for now :)
+ */
+#define PTE_RPN_SHIFT (32)
+#define PTE_RPN_MAX (1UL << (64 - PTE_RPN_SHIFT))
+#define PTE_RPN_MASK (~((1UL<<PTE_RPN_SHIFT)-1))
+
+/* _PAGE_CHG_MASK masks of bits that are to be preserved accross
+ * pgprot changes
+ */
+#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
+ _PAGE_ACCESSED)
+
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS 0x1ff
+/* Bits to mask out from a PGD/PUD to get to the PMD page */
+#define PUD_MASKED_BITS 0x1ff
+
+#ifndef __ASSEMBLY__
+
+/* Manipulate "rpte" values */
+#define __real_pte(e,p) ((real_pte_t) { \
+ (e), pte_val(*((p) + PTRS_PER_PTE)) })
+#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \
+ (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf))
+#define __rpte_to_pte(r) ((r).pte)
+#define __rpte_sub_valid(rpte, index) \
+ (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index)))
+
+
+/* Trick: we set __end to va + 64k, which happens works for
+ * a 16M page as well as we want only one iteration
+ */
+#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
+ do { \
+ unsigned long __end = va + PAGE_SIZE; \
+ unsigned __split = (psize == MMU_PAGE_4K || \
+ psize == MMU_PAGE_64K_AP); \
+ shift = mmu_psize_defs[psize].shift; \
+ for (index = 0; va < __end; index++, va += (1 << shift)) { \
+ if (!__split || __rpte_sub_valid(rpte, index)) do { \
+
+#define pte_iterate_hashed_end() } while(0); } } while(0)
+
+
+#endif /* __ASSEMBLY__ */
diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h
index 8c3f574046b6..fde93ec36abc 100644
--- a/include/asm-ppc64/pgtable.h
+++ b/include/asm-ppc64/pgtable.h
@@ -15,40 +15,11 @@
#include <asm/tlbflush.h>
#endif /* __ASSEMBLY__ */
-/*
- * Entries per page directory level. The PTE level must use a 64b record
- * for each page table entry. The PMD and PGD level use a 32b record for
- * each entry by assuming that each entry is page aligned.
- */
-#define PTE_INDEX_SIZE 9
-#define PMD_INDEX_SIZE 7
-#define PUD_INDEX_SIZE 7
-#define PGD_INDEX_SIZE 9
-
-#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
-#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
-#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
-#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
-
-#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
-#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
-#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE)
-#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
-
-/* PMD_SHIFT determines what a second-level page table entry can map */
-#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
-#define PMD_SIZE (1UL << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE-1))
-
-/* PUD_SHIFT determines what a third-level page table entry can map */
-#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
-#define PUD_SIZE (1UL << PUD_SHIFT)
-#define PUD_MASK (~(PUD_SIZE-1))
-
-/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
-#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
-#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
-#define PGDIR_MASK (~(PGDIR_SIZE-1))
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/pgtable-64k.h>
+#else
+#include <asm/pgtable-4k.h>
+#endif
#define FIRST_USER_ADDRESS 0
@@ -75,8 +46,9 @@
#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
/*
- * Bits in a linux-style PTE. These match the bits in the
- * (hardware-defined) PowerPC PTE as closely as possible.
+ * Common bits in a linux-style PTE. These match the bits in the
+ * (hardware-defined) PowerPC PTE as closely as possible. Additional
+ * bits may be defined in pgtable-*.h
*/
#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
#define _PAGE_USER 0x0002 /* matches one of the PP bits */
@@ -91,15 +63,6 @@
#define _PAGE_RW 0x0200 /* software: user write access allowed */
#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */
#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
-#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
-#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */
-#define _PAGE_HUGE 0x10000 /* 16MB page */
-/* Bits 0x7000 identify the index within an HPT Group */
-#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_SECONDARY | _PAGE_GROUP_IX)
-/* PAGE_MASK gives the right answer below, but only by accident */
-/* It should be preserving the high 48 bits and then specifically */
-/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
-#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_HPTEFLAGS)
#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_COHERENT)
@@ -122,10 +85,10 @@
#define PAGE_AGP __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_NO_CACHE)
#define HAVE_PAGE_AGP
-/*
- * This bit in a hardware PTE indicates that the page is *not* executable.
- */
-#define HW_NO_EXEC _PAGE_EXEC
+/* PTEIDX nibble */
+#define _PTEIDX_SECONDARY 0x8
+#define _PTEIDX_GROUP_IX 0x7
+
/*
* POWER4 and newer have per page execute protection, older chips can only
@@ -164,21 +127,10 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
#endif /* __ASSEMBLY__ */
-/* shift to put page number into pte */
-#define PTE_SHIFT (17)
-
#ifdef CONFIG_HUGETLB_PAGE
-#ifndef __ASSEMBLY__
-int hash_huge_page(struct mm_struct *mm, unsigned long access,
- unsigned long ea, unsigned long vsid, int local);
-#endif /* __ASSEMBLY__ */
-
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#else
-
-#define hash_huge_page(mm,a,ea,vsid,local) -1
#endif
@@ -197,7 +149,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
pte_t pte;
- pte_val(pte) = (pfn << PTE_SHIFT) | pgprot_val(pgprot);
+ pte_val(pte) = (pfn << PTE_RPN_SHIFT) | pgprot_val(pgprot);
return pte;
}
@@ -209,30 +161,25 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
/* pte_clear moved to later in this file */
-#define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT)))
+#define pte_pfn(x) ((unsigned long)((pte_val(x)>>PTE_RPN_SHIFT)))
#define pte_page(x) pfn_to_page(pte_pfn(x))
-#define pmd_set(pmdp, ptep) ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);})
+#define pmd_set(pmdp, pmdval) (pmd_val(*(pmdp)) = (pmdval))
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) (pmd_val(pmd) == 0)
#define pmd_present(pmd) (pmd_val(pmd) != 0)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
-#define pmd_page_kernel(pmd) (pmd_val(pmd))
+#define pmd_page_kernel(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd))
-#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (unsigned long)(pmdp))
+#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval))
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) ((pud_val(pud)) == 0)
#define pud_present(pud) (pud_val(pud) != 0)
#define pud_clear(pudp) (pud_val(*(pudp)) = 0)
-#define pud_page(pud) (pud_val(pud))
+#define pud_page(pud) (pud_val(pud) & ~PUD_MASKED_BITS)
#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
-#define pgd_none(pgd) (!pgd_val(pgd))
-#define pgd_bad(pgd) (pgd_val(pgd) == 0)
-#define pgd_present(pgd) (pgd_val(pgd) != 0)
-#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0)
-#define pgd_page(pgd) (pgd_val(pgd))
/*
* Find an entry in a page-table-directory. We combine the address region
@@ -243,9 +190,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
-#define pud_offset(pgdp, addr) \
- (((pud_t *) pgd_page(*(pgdp))) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
-
#define pmd_offset(pudp,addr) \
(((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
@@ -271,7 +215,6 @@ static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC;}
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY;}
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;}
static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;}
-static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE;}
static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; }
static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; }
@@ -286,7 +229,6 @@ static inline pte_t pte_mkclean(pte_t pte) {
pte_val(pte) &= ~(_PAGE_DIRTY); return pte; }
static inline pte_t pte_mkold(pte_t pte) {
pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
-
static inline pte_t pte_mkread(pte_t pte) {
pte_val(pte) |= _PAGE_USER; return pte; }
static inline pte_t pte_mkexec(pte_t pte) {
@@ -298,7 +240,7 @@ static inline pte_t pte_mkdirty(pte_t pte) {
static inline pte_t pte_mkyoung(pte_t pte) {
pte_val(pte) |= _PAGE_ACCESSED; return pte; }
static inline pte_t pte_mkhuge(pte_t pte) {
- pte_val(pte) |= _PAGE_HUGE; return pte; }
+ return pte; }
/* Atomic PTE updates */
static inline unsigned long pte_update(pte_t *p, unsigned long clr)
@@ -321,11 +263,13 @@ static inline unsigned long pte_update(pte_t *p, unsigned long clr)
/* PTE updating functions, this function puts the PTE in the
* batch, doesn't actually triggers the hash flush immediately,
* you need to call flush_tlb_pending() to do that.
+ * Pass -1 for "normal" size (4K or 64K)
*/
-extern void hpte_update(struct mm_struct *mm, unsigned long addr, unsigned long pte,
- int wrprot);
+extern void hpte_update(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long pte, int huge);
-static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
unsigned long old;
@@ -333,7 +277,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned lon
return 0;
old = pte_update(ptep, _PAGE_ACCESSED);
if (old & _PAGE_HASHPTE) {
- hpte_update(mm, addr, old, 0);
+ hpte_update(mm, addr, ptep, old, 0);
flush_tlb_pending();
}
return (old & _PAGE_ACCESSED) != 0;
@@ -351,7 +295,8 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned lon
* moment we always flush but we need to fix hpte_update and test if the
* optimisation is worth it.
*/
-static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
unsigned long old;
@@ -359,7 +304,7 @@ static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, unsigned lon
return 0;
old = pte_update(ptep, _PAGE_DIRTY);
if (old & _PAGE_HASHPTE)
- hpte_update(mm, addr, old, 0);
+ hpte_update(mm, addr, ptep, old, 0);
return (old & _PAGE_DIRTY) != 0;
}
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
@@ -371,7 +316,8 @@ static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, unsigned lon
})
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
unsigned long old;
@@ -379,7 +325,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
return;
old = pte_update(ptep, _PAGE_RW);
if (old & _PAGE_HASHPTE)
- hpte_update(mm, addr, old, 0);
+ hpte_update(mm, addr, ptep, old, 0);
}
/*
@@ -408,21 +354,23 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
})
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
unsigned long old = pte_update(ptep, ~0UL);
if (old & _PAGE_HASHPTE)
- hpte_update(mm, addr, old, 0);
+ hpte_update(mm, addr, ptep, old, 0);
return __pte(old);
}
-static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t * ptep)
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t * ptep)
{
unsigned long old = pte_update(ptep, ~0UL);
if (old & _PAGE_HASHPTE)
- hpte_update(mm, addr, old, 0);
+ hpte_update(mm, addr, ptep, old, 0);
}
/*
@@ -435,7 +383,14 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_clear(mm, addr, ptep);
flush_tlb_pending();
}
- *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
+ pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
+
+#ifdef CONFIG_PPC_64K_PAGES
+ if (mmu_virtual_psize != MMU_PAGE_64K)
+ pte = __pte(pte_val(pte) | _PAGE_COMBO);
+#endif /* CONFIG_PPC_64K_PAGES */
+
+ *ptep = pte;
}
/* Set the dirty and/or accessed bits atomically in a linux PTE, this
@@ -482,8 +437,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
#define pmd_ERROR(e) \
printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
-#define pud_ERROR(e) \
- printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
#define pgd_ERROR(e) \
printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
@@ -509,12 +462,12 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
/* Encode and de-code a swap entry */
#define __swp_type(entry) (((entry).val >> 1) & 0x3f)
#define __swp_offset(entry) ((entry).val >> 8)
-#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
-#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> PTE_SHIFT })
-#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_SHIFT })
-#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_SHIFT)
-#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_SHIFT)|_PAGE_FILE})
-#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_SHIFT)
+#define __swp_entry(type, offset) ((swp_entry_t){((type)<< 1)|((offset)<<8)})
+#define __pte_to_swp_entry(pte) ((swp_entry_t){pte_val(pte) >> PTE_RPN_SHIFT})
+#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_RPN_SHIFT })
+#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_RPN_SHIFT)
+#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE})
+#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT)
/*
* kern_addr_valid is intended to indicate whether an address is a valid
@@ -532,29 +485,22 @@ void pgtable_cache_init(void);
/*
* find_linux_pte returns the address of a linux pte for a given
* effective address and directory. If not found, it returns zero.
- */
-static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
+ */static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
{
pgd_t *pg;
pud_t *pu;
pmd_t *pm;
pte_t *pt = NULL;
- pte_t pte;
pg = pgdir + pgd_index(ea);
if (!pgd_none(*pg)) {
pu = pud_offset(pg, ea);
if (!pud_none(*pu)) {
pm = pmd_offset(pu, ea);
- if (pmd_present(*pm)) {
+ if (pmd_present(*pm))
pt = pte_offset_kernel(pm, ea);
- pte = *pt;
- if (!pte_present(pte))
- pt = NULL;
- }
}
}
-
return pt;
}
diff --git a/include/asm-ppc64/prom.h b/include/asm-ppc64/prom.h
index e8d0d2ab4c0f..bdb47174ff0e 100644
--- a/include/asm-ppc64/prom.h
+++ b/include/asm-ppc64/prom.h
@@ -188,6 +188,14 @@ extern struct device_node *of_get_next_child(const struct device_node *node,
extern struct device_node *of_node_get(struct device_node *node);
extern void of_node_put(struct device_node *node);
+/* For scanning the flat device-tree at boot time */
+int __init of_scan_flat_dt(int (*it)(unsigned long node,
+ const char *uname, int depth,
+ void *data),
+ void *data);
+void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
+ unsigned long *size);
+
/* For updating the device tree at runtime */
extern void of_attach_node(struct device_node *);
extern void of_detach_node(const struct device_node *);
diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h
index 99b8ca52f101..0cdd66c9f4b7 100644
--- a/include/asm-ppc64/system.h
+++ b/include/asm-ppc64/system.h
@@ -248,7 +248,7 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
}
static __inline__ unsigned long
-__cmpxchg_u64(volatile long *p, unsigned long old, unsigned long new)
+__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
{
unsigned long prev;