summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/mmiotrace/kmmio.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/mmiotrace/kmmio.c')
-rw-r--r--arch/x86/kernel/mmiotrace/kmmio.c349
1 files changed, 240 insertions, 109 deletions
diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c
index 5e239d0b8467..539a9b19588f 100644
--- a/arch/x86/kernel/mmiotrace/kmmio.c
+++ b/arch/x86/kernel/mmiotrace/kmmio.c
@@ -6,6 +6,7 @@
*/
#include <linux/version.h>
+#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/hash.h>
#include <linux/init.h>
@@ -17,70 +18,119 @@
#include <linux/ptrace.h>
#include <linux/preempt.h>
#include <linux/percpu.h>
+#include <linux/kdebug.h>
#include <asm/io.h>
#include <asm/cacheflush.h>
#include <asm/errno.h>
#include <asm/tlbflush.h>
#include <asm/pgtable.h>
-#include "kmmio.h"
+#include <linux/mmiotrace.h>
-#define KMMIO_HASH_BITS 6
-#define KMMIO_TABLE_SIZE (1 << KMMIO_HASH_BITS)
#define KMMIO_PAGE_HASH_BITS 4
#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
+struct kmmio_fault_page {
+ struct list_head list;
+ struct kmmio_fault_page *release_next;
+ unsigned long page; /* location of the fault page */
+
+ /*
+ * Number of times this page has been registered as a part
+ * of a probe. If zero, page is disarmed and this may be freed.
+ * Used only by writers (RCU).
+ */
+ int count;
+};
+
+struct kmmio_delayed_release {
+ struct rcu_head rcu;
+ struct kmmio_fault_page *release_list;
+};
+
struct kmmio_context {
struct kmmio_fault_page *fpage;
struct kmmio_probe *probe;
unsigned long saved_flags;
+ unsigned long addr;
int active;
};
-static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code,
- unsigned long address);
static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
void *args);
+static DECLARE_MUTEX(kmmio_init_mutex);
static DEFINE_SPINLOCK(kmmio_lock);
/* These are protected by kmmio_lock */
+static int kmmio_initialized;
unsigned int kmmio_count;
-static unsigned int handler_registered;
+
+/* Read-protected by RCU, write-protected by kmmio_lock. */
static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
static LIST_HEAD(kmmio_probes);
+static struct list_head *kmmio_page_list(unsigned long page)
+{
+ return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
+}
+
/* Accessed per-cpu */
static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
+/* protected by kmmio_init_mutex */
static struct notifier_block nb_die = {
.notifier_call = kmmio_die_notifier
};
-int init_kmmio(void)
+/**
+ * Makes sure kmmio is initialized and usable.
+ * This must be called before any other kmmio function defined here.
+ * May sleep.
+ */
+void reference_kmmio(void)
{
- int i;
- for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
- INIT_LIST_HEAD(&kmmio_page_table[i]);
-
- register_die_notifier(&nb_die);
- return 0;
+ down(&kmmio_init_mutex);
+ spin_lock_irq(&kmmio_lock);
+ if (!kmmio_initialized) {
+ int i;
+ for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
+ INIT_LIST_HEAD(&kmmio_page_table[i]);
+ if (register_die_notifier(&nb_die))
+ BUG();
+ }
+ kmmio_initialized++;
+ spin_unlock_irq(&kmmio_lock);
+ up(&kmmio_init_mutex);
}
+EXPORT_SYMBOL_GPL(reference_kmmio);
-void cleanup_kmmio(void)
+/**
+ * Clean up kmmio after use. This must be called for every call to
+ * reference_kmmio(). All probes registered after the corresponding
+ * reference_kmmio() must have been unregistered when calling this.
+ * May sleep.
+ */
+void unreference_kmmio(void)
{
- /*
- * Assume the following have been already cleaned by calling
- * unregister_kmmio_probe() appropriately:
- * kmmio_page_table, kmmio_probes
- */
- if (handler_registered) {
- if (mmiotrace_unregister_pf(&kmmio_page_fault))
- BUG();
- synchronize_rcu();
+ bool unreg = false;
+
+ down(&kmmio_init_mutex);
+ spin_lock_irq(&kmmio_lock);
+
+ if (kmmio_initialized == 1) {
+ BUG_ON(is_kmmio_active());
+ unreg = true;
}
- unregister_die_notifier(&nb_die);
+ kmmio_initialized--;
+ BUG_ON(kmmio_initialized < 0);
+ spin_unlock_irq(&kmmio_lock);
+
+ if (unreg)
+ unregister_die_notifier(&nb_die); /* calls sync_rcu() */
+ up(&kmmio_init_mutex);
}
+EXPORT_SYMBOL(unreference_kmmio);
/*
* this is basically a dynamic stabbing problem:
@@ -90,33 +140,33 @@ void cleanup_kmmio(void)
* Overlap a Point (might be simple)
* Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
*/
-/* Get the kmmio at this addr (if any). You must be holding kmmio_lock. */
+/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
{
struct kmmio_probe *p;
- list_for_each_entry(p, &kmmio_probes, list) {
+ list_for_each_entry_rcu(p, &kmmio_probes, list) {
if (addr >= p->addr && addr <= (p->addr + p->len))
return p;
}
return NULL;
}
+/* You must be holding RCU read lock. */
static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
{
- struct list_head *head, *tmp;
+ struct list_head *head;
+ struct kmmio_fault_page *p;
page &= PAGE_MASK;
- head = &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
- list_for_each(tmp, head) {
- struct kmmio_fault_page *p
- = list_entry(tmp, struct kmmio_fault_page, list);
+ head = kmmio_page_list(page);
+ list_for_each_entry_rcu(p, head, list) {
if (p->page == page)
return p;
}
-
return NULL;
}
+/** Mark the given page as not present. Access to it will trigger a fault. */
static void arm_kmmio_fault_page(unsigned long page, int *page_level)
{
unsigned long address = page & PAGE_MASK;
@@ -124,8 +174,8 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level)
pte_t *pte = lookup_address(address, &level);
if (!pte) {
- printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n",
- __FUNCTION__, page);
+ pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
+ __func__, page);
return;
}
@@ -143,6 +193,7 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level)
__flush_tlb_one(page);
}
+/** Mark the given page as present. */
static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
{
unsigned long address = page & PAGE_MASK;
@@ -150,8 +201,8 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
pte_t *pte = lookup_address(address, &level);
if (!pte) {
- printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n",
- __FUNCTION__, page);
+ pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
+ __func__, page);
return;
}
@@ -170,12 +221,24 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
}
/*
+ * This is being called from do_page_fault().
+ *
+ * We may be in an interrupt or a critical section. Also prefecthing may
+ * trigger a page fault. We may be in the middle of process switch.
+ * We cannot take any locks, because we could be executing especially
+ * within a kmmio critical section.
+ *
+ * Local interrupts are disabled, so preemption cannot happen.
+ * Do not enable interrupts, do not sleep, and watch out for other CPUs.
+ */
+/*
* Interrupts are disabled on entry as trap3 is an interrupt gate
* and they remain disabled thorough out this function.
*/
-static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
+int kmmio_handler(struct pt_regs *regs, unsigned long addr)
{
- struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
+ struct kmmio_context *ctx;
+ struct kmmio_fault_page *faultpage;
/*
* Preemption is now disabled to prevent process switch during
@@ -186,40 +249,40 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
* XXX what if an interrupt occurs between returning from
* do_page_fault() and entering the single-step exception handler?
* And that interrupt triggers a kmmio trap?
+ * XXX If we tracing an interrupt service routine or whatever, is
+ * this enough to keep it on the current cpu?
*/
preempt_disable();
- /* interrupts disabled and CPU-local data => atomicity guaranteed. */
+ rcu_read_lock();
+ faultpage = get_kmmio_fault_page(addr);
+ if (!faultpage) {
+ /*
+ * Either this page fault is not caused by kmmio, or
+ * another CPU just pulled the kmmio probe from under
+ * our feet. In the latter case all hell breaks loose.
+ */
+ goto no_kmmio;
+ }
+
+ ctx = &get_cpu_var(kmmio_ctx);
if (ctx->active) {
/*
- * This avoids a deadlock with kmmio_lock.
+ * Prevent overwriting already in-flight context.
* If this page fault really was due to kmmio trap,
* all hell breaks loose.
*/
- printk(KERN_EMERG "mmiotrace: recursive probe hit on CPU %d, "
- "for address %lu. Ignoring.\n",
+ pr_emerg("kmmio: recursive probe hit on CPU %d, "
+ "for address 0x%08lx. Ignoring.\n",
smp_processor_id(), addr);
- goto no_kmmio;
+ goto no_kmmio_ctx;
}
ctx->active++;
- /*
- * Acquire the kmmio lock to prevent changes affecting
- * get_kmmio_fault_page() and get_kmmio_probe(), since we save their
- * returned pointers.
- * The lock is released in post_kmmio_handler().
- * XXX: could/should get_kmmio_*() be using RCU instead of spinlock?
- */
- spin_lock(&kmmio_lock);
-
- ctx->fpage = get_kmmio_fault_page(addr);
- if (!ctx->fpage) {
- /* this page fault is not caused by kmmio */
- goto no_kmmio_locked;
- }
-
+ ctx->fpage = faultpage;
ctx->probe = get_kmmio_probe(addr);
ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK));
+ ctx->addr = addr;
if (ctx->probe && ctx->probe->pre_handler)
ctx->probe->pre_handler(ctx->probe, regs, addr);
@@ -227,46 +290,62 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
regs->flags |= TF_MASK;
regs->flags &= ~IF_MASK;
- /* We hold lock, now we set present bit in PTE and single step. */
+ /* Now we set present bit in PTE and single step. */
disarm_kmmio_fault_page(ctx->fpage->page, NULL);
put_cpu_var(kmmio_ctx);
+ rcu_read_unlock();
return 1;
-no_kmmio_locked:
- spin_unlock(&kmmio_lock);
- ctx->active--;
+no_kmmio_ctx:
+ put_cpu_var(kmmio_ctx);
no_kmmio:
+ rcu_read_unlock();
preempt_enable_no_resched();
- put_cpu_var(kmmio_ctx);
- /* page fault not handled by kmmio */
- return 0;
+ return 0; /* page fault not handled by kmmio */
}
/*
* Interrupts are disabled on entry as trap1 is an interrupt gate
* and they remain disabled thorough out this function.
- * And we hold kmmio lock.
+ * This must always get called as the pair to kmmio_handler().
*/
static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
{
int ret = 0;
+ struct kmmio_probe *probe;
+ struct kmmio_fault_page *faultpage;
struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
if (!ctx->active)
goto out;
+ rcu_read_lock();
+
+ faultpage = get_kmmio_fault_page(ctx->addr);
+ probe = get_kmmio_probe(ctx->addr);
+ if (faultpage != ctx->fpage || probe != ctx->probe) {
+ /*
+ * The trace setup changed after kmmio_handler() and before
+ * running this respective post handler. User does not want
+ * the result anymore.
+ */
+ ctx->probe = NULL;
+ ctx->fpage = NULL;
+ }
+
if (ctx->probe && ctx->probe->post_handler)
ctx->probe->post_handler(ctx->probe, condition, regs);
- arm_kmmio_fault_page(ctx->fpage->page, NULL);
+ if (ctx->fpage)
+ arm_kmmio_fault_page(ctx->fpage->page, NULL);
regs->flags &= ~TF_MASK;
regs->flags |= ctx->saved_flags;
/* These were acquired in kmmio_handler(). */
ctx->active--;
- spin_unlock(&kmmio_lock);
+ BUG_ON(ctx->active);
preempt_enable_no_resched();
/*
@@ -277,11 +356,13 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
if (!(regs->flags & TF_MASK))
ret = 1;
+ rcu_read_unlock();
out:
put_cpu_var(kmmio_ctx);
return ret;
}
+/* You must be holding kmmio_lock. */
static int add_kmmio_fault_page(unsigned long page)
{
struct kmmio_fault_page *f;
@@ -289,6 +370,8 @@ static int add_kmmio_fault_page(unsigned long page)
page &= PAGE_MASK;
f = get_kmmio_fault_page(page);
if (f) {
+ if (!f->count)
+ arm_kmmio_fault_page(f->page, NULL);
f->count++;
return 0;
}
@@ -299,15 +382,16 @@ static int add_kmmio_fault_page(unsigned long page)
f->count = 1;
f->page = page;
- list_add(&f->list,
- &kmmio_page_table[hash_long(f->page, KMMIO_PAGE_HASH_BITS)]);
+ list_add_rcu(&f->list, kmmio_page_list(f->page));
arm_kmmio_fault_page(f->page, NULL);
return 0;
}
-static void release_kmmio_fault_page(unsigned long page)
+/* You must be holding kmmio_lock. */
+static void release_kmmio_fault_page(unsigned long page,
+ struct kmmio_fault_page **release_list)
{
struct kmmio_fault_page *f;
@@ -317,9 +401,11 @@ static void release_kmmio_fault_page(unsigned long page)
return;
f->count--;
+ BUG_ON(f->count < 0);
if (!f->count) {
disarm_kmmio_fault_page(f->page, NULL);
- list_del(&f->list);
+ f->release_next = *release_list;
+ *release_list = f;
}
}
@@ -334,68 +420,113 @@ int register_kmmio_probe(struct kmmio_probe *p)
ret = -EEXIST;
goto out;
}
- list_add(&p->list, &kmmio_probes);
- /*printk("adding fault pages...\n");*/
+ list_add_rcu(&p->list, &kmmio_probes);
while (size < p->len) {
if (add_kmmio_fault_page(p->addr + size))
- printk(KERN_ERR "mmio: Unable to set page fault.\n");
+ pr_err("kmmio: Unable to set page fault.\n");
size += PAGE_SIZE;
}
-
- if (!handler_registered) {
- if (mmiotrace_register_pf(&kmmio_page_fault))
- printk(KERN_ERR "mmiotrace: Cannot register page "
- "fault handler.\n");
- else
- handler_registered++;
- }
-
out:
spin_unlock_irq(&kmmio_lock);
/*
* XXX: What should I do here?
* Here was a call to global_flush_tlb(), but it does not exist
- * anymore.
+ * anymore. It seems it's not needed after all.
*/
return ret;
}
+EXPORT_SYMBOL(register_kmmio_probe);
+static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
+{
+ struct kmmio_delayed_release *dr = container_of(
+ head,
+ struct kmmio_delayed_release,
+ rcu);
+ struct kmmio_fault_page *p = dr->release_list;
+ while (p) {
+ struct kmmio_fault_page *next = p->release_next;
+ BUG_ON(p->count);
+ kfree(p);
+ p = next;
+ }
+ kfree(dr);
+}
+
+static void remove_kmmio_fault_pages(struct rcu_head *head)
+{
+ struct kmmio_delayed_release *dr = container_of(
+ head,
+ struct kmmio_delayed_release,
+ rcu);
+ struct kmmio_fault_page *p = dr->release_list;
+ struct kmmio_fault_page **prevp = &dr->release_list;
+ unsigned long flags;
+ spin_lock_irqsave(&kmmio_lock, flags);
+ while (p) {
+ if (!p->count)
+ list_del_rcu(&p->list);
+ else
+ *prevp = p->release_next;
+ prevp = &p->release_next;
+ p = p->release_next;
+ }
+ spin_unlock_irqrestore(&kmmio_lock, flags);
+ /* This is the real RCU destroy call. */
+ call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
+}
+
+/*
+ * Remove a kmmio probe. You have to synchronize_rcu() before you can be
+ * sure that the callbacks will not be called anymore.
+ *
+ * Unregistering a kmmio fault page has three steps:
+ * 1. release_kmmio_fault_page()
+ * Disarm the page, wait a grace period to let all faults finish.
+ * 2. remove_kmmio_fault_pages()
+ * Remove the pages from kmmio_page_table.
+ * 3. rcu_free_kmmio_fault_pages()
+ * Actally free the kmmio_fault_page structs as with RCU.
+ */
void unregister_kmmio_probe(struct kmmio_probe *p)
{
unsigned long size = 0;
+ struct kmmio_fault_page *release_list = NULL;
+ struct kmmio_delayed_release *drelease;
spin_lock_irq(&kmmio_lock);
while (size < p->len) {
- release_kmmio_fault_page(p->addr + size);
+ release_kmmio_fault_page(p->addr + size, &release_list);
size += PAGE_SIZE;
}
- list_del(&p->list);
+ list_del_rcu(&p->list);
kmmio_count--;
spin_unlock_irq(&kmmio_lock);
-}
-/*
- * According to 2.6.20, mainly x86_64 arch:
- * This is being called from do_page_fault(), via the page fault notifier
- * chain. The chain is called for both user space faults and kernel space
- * faults (address >= TASK_SIZE64), except not on faults serviced by
- * vmalloc_fault().
- *
- * We may be in an interrupt or a critical section. Also prefecthing may
- * trigger a page fault. We may be in the middle of process switch.
- * The page fault hook functionality has put us inside RCU read lock.
- *
- * Local interrupts are disabled, so preemption cannot happen.
- * Do not enable interrupts, do not sleep, and watch out for other CPUs.
- */
-static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code,
- unsigned long address)
-{
- if (is_kmmio_active())
- if (kmmio_handler(regs, address) == 1)
- return -1;
- return 0;
+ drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
+ if (!drelease) {
+ pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
+ return;
+ }
+ drelease->release_list = release_list;
+
+ /*
+ * This is not really RCU here. We have just disarmed a set of
+ * pages so that they cannot trigger page faults anymore. However,
+ * we cannot remove the pages from kmmio_page_table,
+ * because a probe hit might be in flight on another CPU. The
+ * pages are collected into a list, and they will be removed from
+ * kmmio_page_table when it is certain that no probe hit related to
+ * these pages can be in flight. RCU grace period sounds like a
+ * good choice.
+ *
+ * If we removed the pages too early, kmmio page fault handler might
+ * not find the respective kmmio_fault_page and determine it's not
+ * a kmmio fault, when it actually is. This would lead to madness.
+ */
+ call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
}
+EXPORT_SYMBOL(unregister_kmmio_probe);
static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
void *args)