From 19770b32609b6bf97a3dece2529089494cbfc549 Mon Sep 17 00:00:00 2001 From: Mel Gorman <mel@csn.ul.ie> Date: Mon, 28 Apr 2008 02:12:18 -0700 Subject: mm: filter based on a nodemask as well as a gfp_mask The MPOL_BIND policy creates a zonelist that is used for allocations controlled by that mempolicy. As the per-node zonelist is already being filtered based on a zone id, this patch adds a version of __alloc_pages() that takes a nodemask for further filtering. This eliminates the need for MPOL_BIND to create a custom zonelist. A positive benefit of this is that allocations using MPOL_BIND now use the local node's distance-ordered zonelist instead of a custom node-id-ordered zonelist. I.e., pages will be allocated from the closest allowed node with available memory. [Lee.Schermerhorn@hp.com: Mempolicy: update stale documentation and comments] [Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask] [Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask rework] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Hugh Dickins <hugh@veritas.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/mmzone.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'mm/mmzone.c') diff --git a/mm/mmzone.c b/mm/mmzone.c index eb5838634f18..486ed595ee6f 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -42,3 +42,33 @@ struct zone *next_zone(struct zone *zone) return zone; } +static inline int zref_in_nodemask(struct zoneref *zref, nodemask_t *nodes) +{ +#ifdef CONFIG_NUMA + return node_isset(zonelist_node_idx(zref), *nodes); +#else + return 1; +#endif /* CONFIG_NUMA */ +} + +/* Returns the next zone at or below highest_zoneidx in a zonelist */ +struct zoneref *next_zones_zonelist(struct zoneref *z, + enum zone_type highest_zoneidx, + nodemask_t *nodes, + struct zone **zone) +{ + /* + * Find the next suitable zone to use for the allocation. + * Only filter based on nodemask if it's set + */ + if (likely(nodes == NULL)) + while (zonelist_zone_idx(z) > highest_zoneidx) + z++; + else + while (zonelist_zone_idx(z) > highest_zoneidx || + (z->zone && !zref_in_nodemask(z, nodes))) + z++; + + *zone = zonelist_zone(z++); + return z; +} -- cgit v1.2.1