#define PTE_UPDATE_WITH_CMPXCHG
#endif
+int mem_hotplug = 0;
+
/* Private domain structs for DOMID_XEN and DOMID_IO. */
struct domain *dom_xen, *dom_io;
struct node_data node_data[MAX_NUMNODES];
+/* Mapping from pdx to node id */
int memnode_shift;
u8 memnodemap[NODEMAPSIZE];
* 0 if memnodmap[] too small (of shift too small)
* -1 if node overlap or lost ram (shift too big)
*/
-static int __devinit
-populate_memnodemap(const struct node *nodes, int numnodes, int shift)
+static int __init populate_memnodemap(const struct node *nodes,
+ int numnodes, int shift, int *nodeids)
{
- int i;
- int res = -1;
- paddr_t addr, end;
+ unsigned long spdx, epdx;
+ int i, res = -1;
- if (shift >= 64)
- return -1;
- memset(memnodemap, 0xff, sizeof(memnodemap));
+ memset(memnodemap, NUMA_NO_NODE, sizeof(memnodemap));
for (i = 0; i < numnodes; i++) {
- addr = nodes[i].start;
- end = nodes[i].end;
- if (addr >= end)
+ spdx = paddr_to_pdx(nodes[i].start);
+ epdx = paddr_to_pdx(nodes[i].end);
+ if (spdx >= epdx)
continue;
- if ((end >> shift) >= NODEMAPSIZE)
+ if ((epdx >> shift) >= NODEMAPSIZE)
return 0;
do {
- if (memnodemap[addr >> shift] != 0xff)
+ if (memnodemap[spdx >> shift] != NUMA_NO_NODE)
return -1;
- memnodemap[addr >> shift] = i;
- addr += (1ULL << shift);
- } while (addr < end);
+
+ if (!nodeids)
+ memnodemap[spdx >> shift] = i;
+ else
+ memnodemap[spdx >> shift] = nodeids[i];
+
+ spdx += (1UL << shift);
+ } while (spdx < epdx);
res = 1;
- }
+ }
return res;
}
-int __init compute_hash_shift(struct node *nodes, int numnodes)
+/*
+ * The LSB of all start and end addresses in the node map is the value of the
+ * maximum possible shift.
+ */
+static int __init extract_lsb_from_nodes(const struct node *nodes,
+ int numnodes)
{
- int shift = 20;
+ int i, nodes_used = 0;
+ unsigned long spdx, epdx;
+ unsigned long bitfield = 0, memtop = 0;
- while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0)
- shift++;
+ for (i = 0; i < numnodes; i++) {
+ spdx = paddr_to_pdx(nodes[i].start);
+ epdx = paddr_to_pdx(nodes[i].end);
+ if (spdx >= epdx)
+ continue;
+ bitfield |= spdx;
+ nodes_used++;
+ if (epdx > memtop)
+ memtop = epdx;
+ }
+ if (nodes_used <= 1)
+ i = 63;
+ else
+ i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
+ return i;
+}
+int __init compute_hash_shift(struct node *nodes, int numnodes,
+ int *nodeids)
+{
+ int shift;
+
+ shift = extract_lsb_from_nodes(nodes, numnodes);
printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
shift);
- if (populate_memnodemap(nodes, numnodes, shift) != 1) {
- printk(KERN_INFO
- "Your memory is not aligned you need to rebuild your kernel "
- "with a bigger NODEMAPSIZE shift=%d\n",
- shift);
+ if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) {
+ printk(KERN_INFO "Your memory is not aligned you need to "
+ "rebuild your kernel with a bigger NODEMAPSIZE "
+ "shift=%d\n", shift);
return -1;
}
return shift;
}
-
/* initialize NODE_DATA given nodeid and start/end */
void __init setup_node_bootmem(int nodeid, u64 start, u64 end)
{
(nodes[i].end - nodes[i].start) >> 20);
node_set_online(i);
}
- memnode_shift = compute_hash_shift(nodes, numa_fake);
+ memnode_shift = compute_hash_shift(nodes, numa_fake, NULL);
if (memnode_shift < 0) {
memnode_shift = 0;
printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
static struct node nodes[MAX_NUMNODES] __initdata;
static u8 __read_mostly pxm2node[256] = { [0 ... 255] = 0xff };
+
+static int num_node_memblks;
+static struct node node_memblk_range[NR_NODE_MEMBLKS];
+static int memblk_nodeid[NR_NODE_MEMBLKS];
+
/* Too small nodes confuse the VM badly. Usually they result
from BIOS bugs. */
#define NODE_MIN_SIZE (4*1024*1024)
return pxm2node[pxm];
}
-static __init int conflicting_nodes(u64 start, u64 end)
+int valid_numa_range(unsigned long start, unsigned long end, int node)
+{
+ int i;
+
+ for (i = 0; i < num_node_memblks; i++) {
+ struct node *nd = &node_memblk_range[i];
+
+ if (nd->start <= start && nd->end > end &&
+ memblk_nodeid[i] == node )
+ return 1;
+ }
+
+ return 0;
+}
+
+static __init int conflicting_memblks(unsigned long start, unsigned long end)
{
int i;
- for_each_node_mask(i, nodes_parsed) {
- struct node *nd = &nodes[i];
+
+ for (i = 0; i < num_node_memblks; i++) {
+ struct node *nd = &node_memblk_range[i];
if (nd->start == nd->end)
continue;
if (nd->end > start && nd->start < end)
- return i;
+ return memblk_nodeid[i];
if (nd->end == end && nd->start == start)
- return i;
+ return memblk_nodeid[i];
}
return -1;
}
}
if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
return;
+
+ if (num_node_memblks >= NR_NODE_MEMBLKS)
+ {
+ dprintk(XENLOG_WARNING,
+ "Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
+ bad_srat();
+ return;
+ }
+
start = ma->base_address;
end = start + ma->length;
pxm = ma->proximity_domain;
}
/* It is fine to add this area to the nodes data it will be used later*/
if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)
+ {
printk(KERN_INFO "SRAT: hot plug zone found %"PRIx64" - %"PRIx64" \n",
start, end);
- i = conflicting_nodes(start, end);
+#ifdef CONFIG_X86_64
+ mem_hotplug = 1;
+#endif
+ }
+
+ i = conflicting_memblks(start, end);
if (i == node) {
printk(KERN_WARNING
"SRAT: Warning: PXM %d (%"PRIx64"-%"PRIx64") overlaps with itself (%"
nd->end = end;
}
printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIx64"-%"PRIx64"\n", node, pxm,
- nd->start, nd->end);
+ start, end);
+
+ node_memblk_range[num_node_memblks].start = start;
+ node_memblk_range[num_node_memblks].end = end;
+ memblk_nodeid[num_node_memblks] = node;
+ num_node_memblks++;
}
/* Sanity check to catch more bad SRATs (they are amazingly common).
return 1;
}
-static void unparse_node(int node)
-{
- int i;
- node_clear(node, nodes_parsed);
- for (i = 0; i < MAX_LOCAL_APIC; i++) {
- if (apicid_to_node[i] == node)
- apicid_to_node[i] = NUMA_NO_NODE;
- }
-}
-
void __init acpi_numa_arch_fixup(void) {}
#ifdef __x86_64__
int i;
/* First clean up the node list */
- for (i = 0; i < MAX_NUMNODES; i++) {
+ for (i = 0; i < MAX_NUMNODES; i++)
cutoff_node(i, start, end);
- if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE)
- unparse_node(i);
- }
if (acpi_numa <= 0)
return -1;
return -1;
}
- memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES);
+ memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
+ memblk_nodeid);
+
if (memnode_shift < 0) {
printk(KERN_ERR
"SRAT: No NUMA node hash function found. Contact maintainer\n");
/* Finally register nodes */
for_each_node_mask(i, nodes_parsed)
+ {
+ if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE)
+ continue;
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+ }
for (i = 0; i < NR_CPUS; i++) {
if (cpu_to_node[i] == NUMA_NO_NODE)
continue;
int check_descriptor(const struct domain *, struct desc_struct *d);
extern int opt_allow_hugepage;
+extern int mem_hotplug;
/******************************************************************************
* With shadow pagetables, the different kinds of address start
u64 start,end;
};
-extern int compute_hash_shift(struct node *nodes, int numnodes);
+extern int __init compute_hash_shift(struct node *nodes, int numnodes,
+ int *nodeids);
extern int pxm_to_node(int nid);
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
cpu_clear(cpu, node_to_cpumask[cpu_to_node(cpu)]);
}
-/* Simple perfect hash to map physical addresses to node numbers */
+/* Simple perfect hash to map pdx to node numbers */
extern int memnode_shift;
extern u8 memnodemap[NODEMAPSIZE];
static inline __attribute__((pure)) int phys_to_nid(paddr_t addr)
{
- unsigned nid;
- VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE);
- nid = memnodemap[addr >> memnode_shift];
+ unsigned nid;
+ VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >= NODEMAPSIZE);
+ nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
return nid;
}
#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
NODE_DATA(nid)->node_spanned_pages)
-
+extern int valid_numa_range(unsigned long start, unsigned long end, int node);
#else
#define init_cpu_to_node() do {} while (0)
#define clear_node_cpumask(cpu) do {} while (0)
+#define valid_numa_range(start, end, node) {return 1;}
#endif
void srat_parse_regions(u64 addr);
#define page_to_virt(pg) __page_to_virt(pg)
#define pfn_to_paddr(pfn) __pfn_to_paddr(pfn)
#define paddr_to_pfn(pa) __paddr_to_pfn(pa)
+#define paddr_to_pdx(pa) pfn_to_pdx(paddr_to_pfn(pa))
#endif /* !defined(__ASSEMBLY__) */