/* * capabilities.c: hypervisor capabilities * * Copyright (C) 2006-2015 Red Hat, Inc. * Copyright (C) 2006-2008 Daniel P. Berrange * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see * . */ #include #include #include "capabilities.h" #include "cpu_conf.h" #include "domain_conf.h" #include "storage_conf.h" #include "viralloc.h" #include "virarch.h" #include "virbuffer.h" #include "virerror.h" #include "virfile.h" #include "virhostcpu.h" #include "virhostmem.h" #include "virlog.h" #include "virnuma.h" #include "virstring.h" #include "viruuid.h" #include "virenum.h" #include "virutil.h" #define VIR_FROM_THIS VIR_FROM_CAPABILITIES #define SYSFS_SYSTEM_PATH "/sys/devices/system" VIR_LOG_INIT("conf.capabilities"); VIR_ENUM_DECL(virCapsHostPMTarget); VIR_ENUM_IMPL(virCapsHostPMTarget, VIR_NODE_SUSPEND_TARGET_LAST, "suspend_mem", "suspend_disk", "suspend_hybrid", ); static virClass *virCapsClass; static void virCapsDispose(void *obj); static int virCapabilitiesOnceInit(void) { if (!VIR_CLASS_NEW(virCaps, virClassForObject())) return -1; return 0; } VIR_ONCE_GLOBAL_INIT(virCapabilities); /** * virCapabilitiesNew: * @hostarch: host machine architecture * @offlineMigrate: true if offline migration is available * @liveMigrate: true if live migration is available * * Allocate a new capabilities object */ virCaps * virCapabilitiesNew(virArch hostarch, bool offlineMigrate, bool liveMigrate) { virCaps *caps; if (virCapabilitiesInitialize() < 0) return NULL; if (!(caps = virObjectNew(virCapsClass))) return NULL; caps->host.arch = hostarch; caps->host.offlineMigrate = offlineMigrate; caps->host.liveMigrate = liveMigrate; return caps; } void virCapabilitiesClearHostNUMACellCPUTopology(virCapsHostNUMACellCPU *cpus, size_t ncpus) { size_t i; if (!cpus) return; for (i = 0; i < ncpus; i++) g_clear_pointer(&cpus[i].siblings, virBitmapFree); } static void virCapabilitiesFreeHostNUMACell(virCapsHostNUMACell *cell) { if (cell == NULL) return; virCapabilitiesClearHostNUMACellCPUTopology(cell->cpus, cell->ncpus); g_free(cell->cpus); g_free(cell->distances); g_free(cell->pageinfo); if (cell->caches) g_array_unref(cell->caches); g_free(cell); } static void virCapabilitiesFreeGuestMachine(virCapsGuestMachine *machine) { if (machine == NULL) return; g_free(machine->name); g_free(machine->canonical); g_free(machine); } static void virCapabilitiesFreeGuestDomain(virCapsGuestDomain *dom) { size_t i; if (dom == NULL) return; g_free(dom->info.emulator); g_free(dom->info.loader); for (i = 0; i < dom->info.nmachines; i++) virCapabilitiesFreeGuestMachine(dom->info.machines[i]); g_free(dom->info.machines); g_free(dom); } void virCapabilitiesFreeGuest(virCapsGuest *guest) { size_t i; if (guest == NULL) return; g_free(guest->arch.defaultInfo.emulator); g_free(guest->arch.defaultInfo.loader); for (i = 0; i < guest->arch.defaultInfo.nmachines; i++) virCapabilitiesFreeGuestMachine(guest->arch.defaultInfo.machines[i]); g_free(guest->arch.defaultInfo.machines); for (i = 0; i < guest->arch.ndomains; i++) virCapabilitiesFreeGuestDomain(guest->arch.domains[i]); g_free(guest->arch.domains); g_free(guest); } static void virCapabilitiesFreeStoragePool(virCapsStoragePool *pool) { if (!pool) return; g_free(pool); } void virCapabilitiesHostNUMAUnref(virCapsHostNUMA *caps) { if (!caps) return; if (g_atomic_int_dec_and_test(&caps->refs)) { g_ptr_array_unref(caps->cells); if (caps->interconnects) g_array_unref(caps->interconnects); g_free(caps); } } void virCapabilitiesHostNUMARef(virCapsHostNUMA *caps) { g_atomic_int_inc(&caps->refs); } static void virCapsHostMemBWNodeFree(virCapsHostMemBWNode *ptr) { if (!ptr) return; virBitmapFree(ptr->cpus); g_free(ptr); } static void virCapabilitiesClearSecModel(virCapsHostSecModel *secmodel) { size_t i; for (i = 0; i < secmodel->nlabels; i++) { VIR_FREE(secmodel->labels[i].type); VIR_FREE(secmodel->labels[i].label); } VIR_FREE(secmodel->labels); VIR_FREE(secmodel->model); VIR_FREE(secmodel->doi); } static void virCapsDispose(void *object) { virCaps *caps = object; size_t i; for (i = 0; i < caps->npools; i++) virCapabilitiesFreeStoragePool(caps->pools[i]); g_free(caps->pools); for (i = 0; i < caps->nguests; i++) virCapabilitiesFreeGuest(caps->guests[i]); g_free(caps->guests); for (i = 0; i < caps->host.nfeatures; i++) g_free(caps->host.features[i]); g_free(caps->host.features); if (caps->host.numa) virCapabilitiesHostNUMAUnref(caps->host.numa); for (i = 0; i < caps->host.nmigrateTrans; i++) g_free(caps->host.migrateTrans[i]); g_free(caps->host.migrateTrans); for (i = 0; i < caps->host.nsecModels; i++) virCapabilitiesClearSecModel(&caps->host.secModels[i]); g_free(caps->host.secModels); for (i = 0; i < caps->host.cache.nbanks; i++) virCapsHostCacheBankFree(caps->host.cache.banks[i]); virResctrlInfoMonFree(caps->host.cache.monitor); g_free(caps->host.cache.banks); for (i = 0; i < caps->host.memBW.nnodes; i++) virCapsHostMemBWNodeFree(caps->host.memBW.nodes[i]); virResctrlInfoMonFree(caps->host.memBW.monitor); g_free(caps->host.memBW.nodes); g_free(caps->host.netprefix); g_free(caps->host.pagesSize); virCPUDefFree(caps->host.cpu); virObjectUnref(caps->host.resctrl); } /** * virCapabilitiesAddHostFeature: * @caps: capabilities to extend * @name: name of new feature * * Registers a new host CPU feature, eg 'pae', or 'vmx' */ int virCapabilitiesAddHostFeature(virCaps *caps, const char *name) { VIR_RESIZE_N(caps->host.features, caps->host.nfeatures_max, caps->host.nfeatures, 1); caps->host.features[caps->host.nfeatures] = g_strdup(name); caps->host.nfeatures++; return 0; } /** * virCapabilitiesAddHostMigrateTransport: * @caps: capabilities to extend * @name: name of migration transport * * Registers a new domain migration transport URI */ int virCapabilitiesAddHostMigrateTransport(virCaps *caps, const char *name) { VIR_RESIZE_N(caps->host.migrateTrans, caps->host.nmigrateTrans_max, caps->host.nmigrateTrans, 1); caps->host.migrateTrans[caps->host.nmigrateTrans] = g_strdup(name); caps->host.nmigrateTrans++; return 0; } /** * virCapabilitiesSetNetPrefix: * @caps: capabilities to extend * @name: prefix for host generated network interfaces * * Registers the prefix that is used for generated network interfaces */ int virCapabilitiesSetNetPrefix(virCaps *caps, const char *prefix) { caps->host.netprefix = g_strdup(prefix); return 0; } /** * virCapabilitiesHostNUMAAddCell: * @caps: capabilities to extend * @num: ID number of NUMA cell * @mem: Total size of memory in the NUMA node (in KiB) * @ncpus: number of CPUs in cell * @cpus: array of CPU definition structures * @ndistances: number of sibling NUMA nodes * @distances: NUMA distances to other nodes * @npageinfo: number of pages at node @num * @pageinfo: info on each single memory page * @caches: info on memory side caches * * Registers a new NUMA cell for a host, passing in a array of * CPU IDs belonging to the cell, distances to other NUMA nodes * and info on hugepages on the node. * * All pointers are stolen. */ void virCapabilitiesHostNUMAAddCell(virCapsHostNUMA *caps, int num, unsigned long long mem, int ncpus, virCapsHostNUMACellCPU **cpus, int ndistances, virNumaDistance **distances, int npageinfo, virCapsHostNUMACellPageInfo **pageinfo, GArray **caches) { virCapsHostNUMACell *cell = g_new0(virCapsHostNUMACell, 1); cell->num = num; cell->mem = mem; if (cpus) { cell->ncpus = ncpus; cell->cpus = g_steal_pointer(cpus); } if (distances) { cell->ndistances = ndistances; cell->distances = g_steal_pointer(distances); } if (pageinfo) { cell->npageinfo = npageinfo; cell->pageinfo = g_steal_pointer(pageinfo); } if (caches) { cell->caches = g_steal_pointer(caches); } g_ptr_array_add(caps->cells, cell); } /** * virCapabilitiesAllocMachines: * @machines: NULL-terminated list of machine variants for emulator ('pc', or 'isapc', etc) * @nmachines: filled with number of machine variants for emulator * * Allocate a table of virCapsGuestMachine *from the supplied table * of machine names. */ virCapsGuestMachine ** virCapabilitiesAllocMachines(const char *const *names, int *nnames) { virCapsGuestMachine **machines; size_t i; *nnames = g_strv_length((gchar **)names); machines = g_new0(virCapsGuestMachine *, *nnames); for (i = 0; i < *nnames; i++) { machines[i] = g_new0(virCapsGuestMachine, 1); machines[i]->name = g_strdup(names[i]); } return machines; } /** * virCapabilitiesAddGuest: * @caps: capabilities to extend * @ostype: guest operating system type, of enum VIR_DOMAIN_OSTYPE * @arch: guest CPU architecture * @wordsize: number of bits in CPU word * @emulator: path to default device emulator for arch/ostype * @loader: path to default BIOS loader for arch/ostype * @nmachines: number of machine variants for emulator * @machines: machine variants for emulator ('pc', or 'isapc', etc) * * Registers a new guest operating system. This should be * followed by registration of at least one domain for * running the guest */ virCapsGuest * virCapabilitiesAddGuest(virCaps *caps, int ostype, virArch arch, const char *emulator, const char *loader, int nmachines, virCapsGuestMachine **machines) { virCapsGuest *guest; guest = g_new0(virCapsGuest, 1); guest->ostype = ostype; guest->arch.id = arch; guest->arch.wordsize = virArchGetWordSize(arch); guest->arch.defaultInfo.emulator = g_strdup(emulator); guest->arch.defaultInfo.loader = g_strdup(loader); VIR_RESIZE_N(caps->guests, caps->nguests_max, caps->nguests, 1); caps->guests[caps->nguests++] = guest; if (nmachines) { guest->arch.defaultInfo.nmachines = nmachines; guest->arch.defaultInfo.machines = machines; } return guest; } /** * virCapabilitiesAddGuestDomain: * @guest: guest to support * @hvtype: hypervisor type ('xen', 'qemu', 'kvm') * @emulator: specialized device emulator for domain * @loader: specialized BIOS loader for domain * @nmachines: number of machine variants for emulator * @machines: specialized machine variants for emulator * * Registers a virtual domain capable of running a * guest operating system */ virCapsGuestDomain * virCapabilitiesAddGuestDomain(virCapsGuest *guest, int hvtype, const char *emulator, const char *loader, int nmachines, virCapsGuestMachine **machines) { virCapsGuestDomain *dom; dom = g_new0(virCapsGuestDomain, 1); dom->type = hvtype; dom->info.emulator = g_strdup(emulator); dom->info.loader = g_strdup(loader); VIR_RESIZE_N(guest->arch.domains, guest->arch.ndomains_max, guest->arch.ndomains, 1); guest->arch.domains[guest->arch.ndomains] = dom; guest->arch.ndomains++; if (nmachines) { dom->info.nmachines = nmachines; dom->info.machines = machines; } return dom; } struct virCapsGuestFeatureInfo { const char *name; bool togglesRequired; }; static const struct virCapsGuestFeatureInfo virCapsGuestFeatureInfos[VIR_CAPS_GUEST_FEATURE_TYPE_LAST] = { [VIR_CAPS_GUEST_FEATURE_TYPE_PAE] = { "pae", false }, [VIR_CAPS_GUEST_FEATURE_TYPE_NONPAE] = { "nonpae", false }, [VIR_CAPS_GUEST_FEATURE_TYPE_IA64_BE] = { "ia64_be", false }, [VIR_CAPS_GUEST_FEATURE_TYPE_ACPI] = { "acpi", true }, [VIR_CAPS_GUEST_FEATURE_TYPE_APIC] = { "apic", true }, [VIR_CAPS_GUEST_FEATURE_TYPE_CPUSELECTION] = { "cpuselection", false }, [VIR_CAPS_GUEST_FEATURE_TYPE_DEVICEBOOT] = { "deviceboot", false }, [VIR_CAPS_GUEST_FEATURE_TYPE_DISKSNAPSHOT] = { "disksnapshot", true }, [VIR_CAPS_GUEST_FEATURE_TYPE_HAP] = { "hap", true }, }; static void virCapabilitiesAddGuestFeatureInternal(virCapsGuest *guest, virCapsGuestFeatureType feature, bool defaultOn, bool toggle) { guest->features[feature].present = true; if (virCapsGuestFeatureInfos[feature].togglesRequired) { guest->features[feature].defaultOn = virTristateSwitchFromBool(defaultOn); guest->features[feature].toggle = virTristateBoolFromBool(toggle); } } /** * virCapabilitiesAddGuestFeature: * @guest: guest to associate feature with * @feature: feature to add * * Registers a feature for a guest domain. */ void virCapabilitiesAddGuestFeature(virCapsGuest *guest, virCapsGuestFeatureType feature) { virCapabilitiesAddGuestFeatureInternal(guest, feature, false, false); } /** * virCapabilitiesAddGuestFeatureWithToggle: * @guest: guest to associate feature with * @feature: feature to add * @defaultOn: true if it defaults to on * @toggle: true if its state can be toggled * * Registers a feature with toggles for a guest domain. */ void virCapabilitiesAddGuestFeatureWithToggle(virCapsGuest *guest, virCapsGuestFeatureType feature, bool defaultOn, bool toggle) { virCapabilitiesAddGuestFeatureInternal(guest, feature, defaultOn, toggle); } /** * virCapabilitiesHostSecModelAddBaseLabel * @secmodel: Security model to add a base label for * @type: virtualization type * @label: base label * * Returns non-zero on error. */ extern int virCapabilitiesHostSecModelAddBaseLabel(virCapsHostSecModel *secmodel, const char *type, const char *label) { if (type == NULL || label == NULL) return -1; VIR_EXPAND_N(secmodel->labels, secmodel->nlabels, 1); secmodel->labels[secmodel->nlabels - 1].type = g_strdup(type); secmodel->labels[secmodel->nlabels - 1].label = g_strdup(label); return 0; } static virCapsDomainData * virCapabilitiesDomainDataLookupInternal(virCaps *caps, int ostype, virArch arch, virDomainVirtType domaintype, const char *emulator, const char *machinetype) { virCapsGuest *foundguest = NULL; virCapsGuestDomain *founddomain = NULL; virCapsGuestMachine *foundmachine = NULL; virCapsDomainData *ret = NULL; size_t i, j, k; VIR_DEBUG("Lookup ostype=%d arch=%d domaintype=%d emulator=%s machine=%s", ostype, arch, domaintype, NULLSTR(emulator), NULLSTR(machinetype)); for (i = 0; i < caps->nguests; i++) { virCapsGuest *guest = caps->guests[i]; if (ostype != -1 && guest->ostype != ostype) { VIR_DEBUG("Skip os type want=%d vs got=%d", ostype, guest->ostype); continue; } VIR_DEBUG("Match os type %d", ostype); if ((arch != VIR_ARCH_NONE) && (guest->arch.id != arch)) { VIR_DEBUG("Skip arch want=%d vs got=%d", arch, guest->arch.id); continue; } VIR_DEBUG("Match arch %d", arch); for (j = 0; j < guest->arch.ndomains; j++) { virCapsGuestDomain *domain = guest->arch.domains[j]; virCapsGuestMachine **machinelist; int nmachines; const char *check_emulator = NULL; if (domaintype != VIR_DOMAIN_VIRT_NONE && (domain->type != domaintype)) { VIR_DEBUG("Skip domain type want=%d vs got=%d", domaintype, domain->type); continue; } VIR_DEBUG("Match domain type %d", domaintype); check_emulator = domain->info.emulator; if (!check_emulator) check_emulator = guest->arch.defaultInfo.emulator; if (emulator && STRNEQ_NULLABLE(check_emulator, emulator)) { VIR_DEBUG("Skip emulator got=%s vs want=%s", emulator, NULLSTR(check_emulator)); continue; } VIR_DEBUG("Match emulator %s", NULLSTR(emulator)); if (domain->info.nmachines) { nmachines = domain->info.nmachines; machinelist = domain->info.machines; } else { nmachines = guest->arch.defaultInfo.nmachines; machinelist = guest->arch.defaultInfo.machines; } for (k = 0; k < nmachines; k++) { virCapsGuestMachine *machine = machinelist[k]; if (machinetype && STRNEQ(machine->name, machinetype) && STRNEQ_NULLABLE(machine->canonical, machinetype)) { VIR_DEBUG("Skip machine type want=%s vs got=%s got=%s", machinetype, machine->name, NULLSTR(machine->canonical)); continue; } VIR_DEBUG("Match machine type machine %s", NULLSTR(machinetype)); foundmachine = machine; break; } if (!foundmachine && nmachines) continue; founddomain = domain; break; } if (!founddomain) continue; foundguest = guest; break; } /* XXX check default_emulator, see how it uses this */ if (!foundguest) { g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER; if (ostype) virBufferAsprintf(&buf, "ostype=%s ", virDomainOSTypeToString(ostype)); if (arch) virBufferAsprintf(&buf, "arch=%s ", virArchToString(arch)); if (domaintype > VIR_DOMAIN_VIRT_NONE) virBufferAsprintf(&buf, "domaintype=%s ", virDomainVirtTypeToString(domaintype)); if (emulator) virBufferEscapeString(&buf, "emulator=%s ", emulator); if (machinetype) virBufferEscapeString(&buf, "machine=%s ", machinetype); if (virBufferCurrentContent(&buf) && !virBufferCurrentContent(&buf)[0]) virBufferAsprintf(&buf, "%s", _("any configuration")); virReportError(VIR_ERR_INVALID_ARG, _("could not find capabilities for %1$s"), virBufferCurrentContent(&buf)); return ret; } ret = g_new0(virCapsDomainData, 1); ret->ostype = foundguest->ostype; ret->arch = foundguest->arch.id; if (founddomain) { ret->domaintype = founddomain->type; ret->emulator = founddomain->info.emulator; } if (!ret->emulator) ret->emulator = foundguest->arch.defaultInfo.emulator; if (foundmachine) ret->machinetype = foundmachine->name; return ret; } /** * virCapabilitiesDomainDataLookup: * @caps: capabilities to query * @ostype: guest operating system type, of enum VIR_DOMAIN_OSTYPE * @arch: Architecture to search for * @domaintype: domain type to search for, of enum virDomainVirtType * @emulator: Emulator path to search for * @machinetype: Machine type to search for * * Search capabilities for the passed values, and if found return * virCapabilitiesDomainDataLookup filled in with the default values */ virCapsDomainData * virCapabilitiesDomainDataLookup(virCaps *caps, int ostype, virArch arch, int domaintype, const char *emulator, const char *machinetype) { virCapsDomainData *ret; if (arch == VIR_ARCH_NONE) { /* Prefer host arch if its available */ ret = virCapabilitiesDomainDataLookupInternal(caps, ostype, caps->host.arch, domaintype, emulator, machinetype); if (ret) return ret; } return virCapabilitiesDomainDataLookupInternal(caps, ostype, arch, domaintype, emulator, machinetype); } bool virCapabilitiesDomainSupported(virCaps *caps, int ostype, virArch arch, int virttype) { g_autofree virCapsDomainData *capsdata = NULL; capsdata = virCapabilitiesDomainDataLookup(caps, ostype, arch, virttype, NULL, NULL); return capsdata != NULL; } int virCapabilitiesAddStoragePool(virCaps *caps, int poolType) { virCapsStoragePool *pool; pool = g_new0(virCapsStoragePool, 1); pool->type = poolType; VIR_RESIZE_N(caps->pools, caps->npools_max, caps->npools, 1); caps->pools[caps->npools++] = pool; return 0; } static int virCapsHostNUMACellCPUFormat(virBuffer *buf, const virCapsHostNUMACellCPU *cpus, int ncpus) { g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER; g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf); size_t j; virBufferAsprintf(&attrBuf, " num='%d'", ncpus); for (j = 0; j < ncpus; j++) { virBufferAsprintf(&childBuf, "\n"); } virXMLFormatElement(buf, "cpus", &attrBuf, &childBuf); return 0; } static int virCapabilitiesHostNUMAFormat(virBuffer *buf, virCapsHostNUMA *caps) { size_t i; if (!caps) return 0; virBufferAddLit(buf, "\n"); virBufferAdjustIndent(buf, 2); virBufferAsprintf(buf, "\n", caps->cells->len); virBufferAdjustIndent(buf, 2); for (i = 0; i < caps->cells->len; i++) { virCapsHostNUMACell *cell = g_ptr_array_index(caps->cells, i); size_t j; virBufferAsprintf(buf, "\n", cell->num); virBufferAdjustIndent(buf, 2); /* Print out the numacell memory total if it is available */ if (cell->mem) virBufferAsprintf(buf, "%llu\n", cell->mem); for (j = 0; j < cell->npageinfo; j++) { virBufferAsprintf(buf, "%llu\n", cell->pageinfo[j].size, cell->pageinfo[j].avail); } virNumaDistanceFormat(buf, cell->distances, cell->ndistances); if (cell->caches) { virNumaCache *caches = &g_array_index(cell->caches, virNumaCache, 0); virNumaCacheFormat(buf, caches, cell->caches->len); } if (virCapsHostNUMACellCPUFormat(buf, cell->cpus, cell->ncpus) < 0) return -1; virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n"); } virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n"); if (caps->interconnects) { const virNumaInterconnect *interconnects; interconnects = &g_array_index(caps->interconnects, virNumaInterconnect, 0); virNumaInterconnectFormat(buf, interconnects, caps->interconnects->len); } virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n"); return 0; } static int virCapabilitiesFormatResctrlMonitor(virBuffer *buf, virResctrlInfoMon *monitor) { size_t i = 0; g_auto(virBuffer) childrenBuf = VIR_BUFFER_INIT_CHILD(buf); /* monitor not supported, no capability */ if (!monitor) return 0; /* no feature found in monitor means no capability, return */ if (monitor->nfeatures == 0) return 0; virBufferAddLit(buf, "type == VIR_RESCTRL_MONITOR_TYPE_CACHE) virBufferAsprintf(buf, "level='%u' reuseThreshold='%u' ", monitor->cache_level, monitor->cache_reuse_threshold); virBufferAsprintf(buf, "maxMonitors='%u'>\n", monitor->max_monitor); for (i = 0; i < monitor->nfeatures; i++) { virBufferAsprintf(&childrenBuf, "\n", monitor->features[i]); } virBufferAddBuffer(buf, &childrenBuf); virBufferAddLit(buf, "\n"); return 0; } static int virCapabilitiesFormatCaches(virBuffer *buf, virCapsHostCache *cache) { size_t i = 0; size_t j = 0; if (!cache->nbanks && !cache->monitor) return 0; virBufferAddLit(buf, "\n"); virBufferAdjustIndent(buf, 2); for (i = 0; i < cache->nbanks; i++) { g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER; g_auto(virBuffer) childrenBuf = VIR_BUFFER_INIT_CHILD(buf); virCapsHostCacheBank *bank = cache->banks[i]; g_autofree char *cpus_str = virBitmapFormat(bank->cpus); const char *unit = NULL; unsigned long long short_size = virFormatIntPretty(bank->size, &unit); if (!cpus_str) return -1; /* * Let's just *hope* the size is aligned to KiBs so that it does not * bite is back in the future */ virBufferAsprintf(&attrBuf, " id='%u' level='%u' type='%s' " "size='%llu' unit='%s' cpus='%s'", bank->id, bank->level, virCacheTypeToString(bank->type), short_size, unit, cpus_str); for (j = 0; j < bank->ncontrols; j++) { const char *min_unit; virResctrlInfoPerCache *controls = bank->controls[j]; unsigned long long gran_short_size = controls->granularity; unsigned long long min_short_size = controls->min; gran_short_size = virFormatIntPretty(gran_short_size, &unit); min_short_size = virFormatIntPretty(min_short_size, &min_unit); /* Only use the smaller unit if they are different */ if (min_short_size) { unsigned long long gran_div; unsigned long long min_div; gran_div = controls->granularity / gran_short_size; min_div = controls->min / min_short_size; if (min_div > gran_div) { min_short_size *= min_div / gran_div; } else if (min_div < gran_div) { unit = min_unit; gran_short_size *= gran_div / min_div; } } virBufferAsprintf(&childrenBuf, "\n", unit, virCacheTypeToString(controls->scope), controls->max_allocation); } virXMLFormatElement(buf, "bank", &attrBuf, &childrenBuf); } if (virCapabilitiesFormatResctrlMonitor(buf, cache->monitor) < 0) return -1; virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n"); return 0; } static int virCapabilitiesFormatMemoryBandwidth(virBuffer *buf, virCapsHostMemBW *memBW) { size_t i = 0; if (!memBW->nnodes && !memBW->monitor) return 0; virBufferAddLit(buf, "\n"); virBufferAdjustIndent(buf, 2); for (i = 0; i < memBW->nnodes; i++) { g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER; g_auto(virBuffer) childrenBuf = VIR_BUFFER_INIT_CHILD(buf); virCapsHostMemBWNode *node = memBW->nodes[i]; virResctrlInfoMemBWPerNode *control = &node->control; g_autofree char *cpus_str = virBitmapFormat(node->cpus); if (!cpus_str) return -1; virBufferAsprintf(&attrBuf, " id='%u' cpus='%s'", node->id, cpus_str); virBufferAsprintf(&childrenBuf, "\n", control->granularity, control->min, control->max_allocation); virXMLFormatElement(buf, "node", &attrBuf, &childrenBuf); } if (virCapabilitiesFormatResctrlMonitor(buf, memBW->monitor) < 0) return -1; virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n"); return 0; } static int virCapabilitiesFormatHostXML(virCapsHost *host, virBuffer *buf) { size_t i, j; char host_uuid[VIR_UUID_STRING_BUFLEN]; /* The lack of some data means we have nothing * minimally to format, so just return. */ if (!virUUIDIsValid(host->host_uuid) && !host->arch && !host->powerMgmt && !host->iommu) return 0; virBufferAddLit(buf, "\n"); virBufferAdjustIndent(buf, 2); if (virUUIDIsValid(host->host_uuid)) { virUUIDFormat(host->host_uuid, host_uuid); virBufferAsprintf(buf, "%s\n", host_uuid); } virBufferAddLit(buf, "\n"); virBufferAdjustIndent(buf, 2); if (host->arch) virBufferAsprintf(buf, "%s\n", virArchToString(host->arch)); if (host->nfeatures) { virBufferAddLit(buf, "\n"); virBufferAdjustIndent(buf, 2); for (i = 0; i < host->nfeatures; i++) { virBufferAsprintf(buf, "<%s/>\n", host->features[i]); } virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n"); } virCPUDefFormatBuf(buf, host->cpu); for (i = 0; i < host->nPagesSize; i++) { virBufferAsprintf(buf, "\n", host->pagesSize[i]); } virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n"); /* The PM query was successful. */ if (host->powerMgmt) { /* The host supports some PM features. */ unsigned int pm = host->powerMgmt; virBufferAddLit(buf, "\n"); virBufferAdjustIndent(buf, 2); while (pm) { int bit = __builtin_ffs(pm) - 1; virBufferAsprintf(buf, "<%s/>\n", virCapsHostPMTargetTypeToString(bit)); pm &= ~(1U << bit); } virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n"); } else { /* The host does not support any PM feature. */ virBufferAddLit(buf, "\n"); } virBufferAsprintf(buf, "\n", host->iommu ? "yes" : "no"); if (host->offlineMigrate) { virBufferAddLit(buf, "\n"); virBufferAdjustIndent(buf, 2); if (host->liveMigrate) virBufferAddLit(buf, "\n"); if (host->nmigrateTrans) { virBufferAddLit(buf, "\n"); virBufferAdjustIndent(buf, 2); for (i = 0; i < host->nmigrateTrans; i++) { virBufferAsprintf(buf, "%s\n", host->migrateTrans[i]); } virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n"); } virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n"); } if (host->netprefix) virBufferAsprintf(buf, "%s\n", host->netprefix); if (virCapabilitiesHostNUMAFormat(buf, host->numa) < 0) return -1; if (virCapabilitiesFormatCaches(buf, &host->cache) < 0) return -1; if (virCapabilitiesFormatMemoryBandwidth(buf, &host->memBW) < 0) return -1; for (i = 0; i < host->nsecModels; i++) { virBufferAddLit(buf, "\n"); virBufferAdjustIndent(buf, 2); virBufferAsprintf(buf, "%s\n", host->secModels[i].model); virBufferAsprintf(buf, "%s\n", host->secModels[i].doi); for (j = 0; j < host->secModels[i].nlabels; j++) { virBufferAsprintf(buf, "%s\n", host->secModels[i].labels[j].type, host->secModels[i].labels[j].label); } virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n"); } virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n\n"); return 0; } static void virCapabilitiesFormatGuestFeatures(virCapsGuest *guest, virBuffer *buf) { g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf); size_t i; for (i = 0; i < VIR_CAPS_GUEST_FEATURE_TYPE_LAST; i++) { virCapsGuestFeature *feature = guest->features + i; if (!feature->present) continue; virBufferAsprintf(&childBuf, "<%s", virCapsGuestFeatureInfos[i].name); if (feature->defaultOn) { virBufferAsprintf(&childBuf, " default='%s'", virTristateSwitchTypeToString(feature->defaultOn)); } if (feature->toggle) { virBufferAsprintf(&childBuf, " toggle='%s'", virTristateBoolTypeToString(feature->toggle)); } virBufferAddLit(&childBuf, "/>\n"); } virXMLFormatElement(buf, "features", NULL, &childBuf); } static void virCapabilitiesFormatGuestXML(virCapsGuest **guests, size_t nguests, virBuffer *buf) { size_t i, j, k; for (i = 0; i < nguests; i++) { virBufferAddLit(buf, "\n"); virBufferAdjustIndent(buf, 2); virBufferAsprintf(buf, "%s\n", virDomainOSTypeToString(guests[i]->ostype)); if (guests[i]->arch.id) virBufferAsprintf(buf, "\n", virArchToString(guests[i]->arch.id)); virBufferAdjustIndent(buf, 2); virBufferAsprintf(buf, "%d\n", guests[i]->arch.wordsize); if (guests[i]->arch.defaultInfo.emulator) virBufferAsprintf(buf, "%s\n", guests[i]->arch.defaultInfo.emulator); if (guests[i]->arch.defaultInfo.loader) virBufferAsprintf(buf, "%s\n", guests[i]->arch.defaultInfo.loader); for (j = 0; j < guests[i]->arch.defaultInfo.nmachines; j++) { virCapsGuestMachine *machine = guests[i]->arch.defaultInfo.machines[j]; virBufferAddLit(buf, "canonical) virBufferAsprintf(buf, " canonical='%s'", machine->canonical); if (machine->maxCpus > 0) virBufferAsprintf(buf, " maxCpus='%d'", machine->maxCpus); if (machine->deprecated) virBufferAddLit(buf, " deprecated='yes'"); virBufferAsprintf(buf, ">%s\n", machine->name); } for (j = 0; j < guests[i]->arch.ndomains; j++) { virBufferAsprintf(buf, "arch.domains[j]->type)); if (!guests[i]->arch.domains[j]->info.emulator && !guests[i]->arch.domains[j]->info.loader && !guests[i]->arch.domains[j]->info.nmachines) { virBufferAddLit(buf, "/>\n"); continue; } virBufferAddLit(buf, ">\n"); virBufferAdjustIndent(buf, 2); if (guests[i]->arch.domains[j]->info.emulator) virBufferAsprintf(buf, "%s\n", guests[i]->arch.domains[j]->info.emulator); if (guests[i]->arch.domains[j]->info.loader) virBufferAsprintf(buf, "%s\n", guests[i]->arch.domains[j]->info.loader); for (k = 0; k < guests[i]->arch.domains[j]->info.nmachines; k++) { virCapsGuestMachine *machine = guests[i]->arch.domains[j]->info.machines[k]; virBufferAddLit(buf, "canonical) virBufferAsprintf(buf, " canonical='%s'", machine->canonical); if (machine->maxCpus > 0) virBufferAsprintf(buf, " maxCpus='%d'", machine->maxCpus); virBufferAsprintf(buf, ">%s\n", machine->name); } virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n"); } virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n"); virCapabilitiesFormatGuestFeatures(guests[i], buf); virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n\n"); } } static void virCapabilitiesFormatStoragePoolXML(virCapsStoragePool **pools, size_t npools, virBuffer *buf) { size_t i; if (npools == 0) return; virBufferAddLit(buf, "\n"); virBufferAdjustIndent(buf, 2); virBufferAddLit(buf, "\n"); virBufferAdjustIndent(buf, 2); for (i = 0; i < npools; i++) virBufferAsprintf(buf, "%s\n", virStoragePoolTypeToString(pools[i]->type)); virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n"); virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "\n\n"); } /** * virCapabilitiesFormatXML: * @caps: capabilities to format * * Convert the capabilities object into an XML representation * * Returns the XML document as a string */ char * virCapabilitiesFormatXML(virCaps *caps) { g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER; virBufferAddLit(&buf, "\n\n"); virBufferAdjustIndent(&buf, 2); if (virCapabilitiesFormatHostXML(&caps->host, &buf) < 0) return NULL; virCapabilitiesFormatGuestXML(caps->guests, caps->nguests, &buf); virCapabilitiesFormatStoragePoolXML(caps->pools, caps->npools, &buf); virBufferAdjustIndent(&buf, -2); virBufferAddLit(&buf, "\n"); return virBufferContentAndReset(&buf); } /* get the maximum ID of cpus in the host */ static unsigned int virCapabilitiesHostNUMAGetMaxcpu(virCapsHostNUMA *caps) { unsigned int maxcpu = 0; size_t node; size_t cpu; for (node = 0; node < caps->cells->len; node++) { virCapsHostNUMACell *cell = g_ptr_array_index(caps->cells, node); for (cpu = 0; cpu < cell->ncpus; cpu++) { if (cell->cpus[cpu].id > maxcpu) maxcpu = cell->cpus[cpu].id; } } return maxcpu; } /* set cpus of a numa node in the bitmask */ static int virCapabilitiesHostNUMAGetCellCpus(virCapsHostNUMA *caps, size_t node, virBitmap *cpumask) { virCapsHostNUMACell *cell = NULL; size_t cpu; size_t i; /* The numa node numbers can be non-contiguous. Ex: 0,1,16,17. */ for (i = 0; i < caps->cells->len; i++) { cell = g_ptr_array_index(caps->cells, i); if (cell->num == node) break; cell = NULL; } for (cpu = 0; cell && cpu < cell->ncpus; cpu++) { if (virBitmapSetBit(cpumask, cell->cpus[cpu].id) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, _("Cpu '%1$u' in node '%2$zu' is out of range of the provided bitmap"), cell->cpus[cpu].id, node); return -1; } } return 0; } virBitmap * virCapabilitiesHostNUMAGetCpus(virCapsHostNUMA *caps, virBitmap *nodemask) { g_autoptr(virBitmap) ret = NULL; unsigned int maxcpu = virCapabilitiesHostNUMAGetMaxcpu(caps); ssize_t node = -1; ret = virBitmapNew(maxcpu + 1); while ((node = virBitmapNextSetBit(nodemask, node)) >= 0) { if (virCapabilitiesHostNUMAGetCellCpus(caps, node, ret) < 0) return NULL; } return g_steal_pointer(&ret); } int virCapabilitiesHostNUMAGetMaxNode(virCapsHostNUMA *caps) { virCapsHostNUMACell *cell = g_ptr_array_index(caps->cells, caps->cells->len - 1); return cell->num; } int virCapabilitiesGetNodeInfo(virNodeInfoPtr nodeinfo) { virArch hostarch = virArchFromHost(); unsigned long long memorybytes; memset(nodeinfo, 0, sizeof(*nodeinfo)); if (virStrcpyStatic(nodeinfo->model, virArchToString(hostarch)) < 0) return -1; if (virHostMemGetInfo(&memorybytes, NULL) < 0) return -1; nodeinfo->memory = memorybytes / 1024; if (virHostCPUGetInfo(hostarch, &nodeinfo->cpus, &nodeinfo->mhz, &nodeinfo->nodes, &nodeinfo->sockets, &nodeinfo->cores, &nodeinfo->threads) < 0) return -1; return 0; } /* returns 1 on success, 0 if the detection failed and -1 on hard error */ static int virCapabilitiesFillCPUInfo(int cpu_id G_GNUC_UNUSED, virCapsHostNUMACellCPU *cpu G_GNUC_UNUSED) { #ifdef __linux__ cpu->id = cpu_id; if (virHostCPUGetSocket(cpu_id, &cpu->socket_id) < 0 || virHostCPUGetDie(cpu_id, &cpu->die_id) < 0 || virHostCPUGetCore(cpu_id, &cpu->core_id) < 0) return -1; if (!(cpu->siblings = virHostCPUGetSiblingsList(cpu_id))) return -1; return 0; #else virReportError(VIR_ERR_NO_SUPPORT, "%s", _("node cpu info not implemented on this platform")); return -1; #endif } static int virCapabilitiesGetNUMADistances(int node, virNumaDistance **distancesRet, int *ndistancesRet) { g_autofree virNumaDistance *tmp = NULL; int tmp_size = 0; g_autofree int *distances = NULL; int ndistances = 0; size_t i; if (virNumaGetDistances(node, &distances, &ndistances) < 0) return -1; if (!distances) { *distancesRet = NULL; *ndistancesRet = 0; return 0; } tmp = g_new0(virNumaDistance, ndistances); for (i = 0; i < ndistances; i++) { if (!distances[i]) continue; tmp[tmp_size].cellid = i; tmp[tmp_size].value = distances[i]; tmp_size++; } VIR_REALLOC_N(tmp, tmp_size); *ndistancesRet = tmp_size; *distancesRet = g_steal_pointer(&tmp); tmp_size = 0; return 0; } static int virCapabilitiesGetNUMAPagesInfo(int node, virCapsHostNUMACellPageInfo **pageinfo, int *npageinfo) { g_autofree unsigned int *pages_size = NULL; g_autofree unsigned long long *pages_avail = NULL; size_t npages, i; if (virNumaGetPages(node, &pages_size, &pages_avail, NULL, &npages) < 0) return -1; *pageinfo = g_new0(virCapsHostNUMACellPageInfo, npages); *npageinfo = npages; for (i = 0; i < npages; i++) { (*pageinfo)[i].size = pages_size[i]; (*pageinfo)[i].avail = pages_avail[i]; } return 0; } static int virCapabilitiesGetNodeCacheReadFileUint(const char *prefix, const char *dir, const char *file, unsigned int *value) { g_autofree char *path = g_build_filename(prefix, dir, file, NULL); int rv = virFileReadValueUint(value, "%s", path); if (rv < 0) { if (rv == -2) { virReportError(VIR_ERR_INTERNAL_ERROR, _("File '%1$s' does not exist"), path); } return -1; } return 0; } static int virCapabilitiesGetNodeCacheReadFileUllong(const char *prefix, const char *dir, const char *file, unsigned long long *value) { g_autofree char *path = g_build_filename(prefix, dir, file, NULL); int rv = virFileReadValueUllong(value, "%s", path); if (rv < 0) { if (rv == -2) { virReportError(VIR_ERR_INTERNAL_ERROR, _("File '%1$s' does not exist"), path); } return -1; } return 0; } static int virCapsHostNUMACellCacheComparator(const void *a, const void *b) { const virNumaCache *aa = a; const virNumaCache *bb = b; return aa->level - bb->level; } static int virCapabilitiesGetNodeCache(int node, GArray **cachesRet) { g_autoptr(DIR) dir = NULL; int direrr = 0; struct dirent *entry; g_autofree char *path = NULL; g_autoptr(GArray) caches = g_array_new(FALSE, FALSE, sizeof(virNumaCache)); path = g_strdup_printf(SYSFS_SYSTEM_PATH "/node/node%d/memory_side_cache", node); if (virDirOpenIfExists(&dir, path) < 0) return -1; while (dir && (direrr = virDirRead(dir, &entry, path)) > 0) { const char *dname = STRSKIP(entry->d_name, "index"); virNumaCache cache = { 0 }; unsigned int indexing; unsigned int write_policy; if (!dname) continue; if (virStrToLong_ui(dname, NULL, 10, &cache.level) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, _("unable to parse %1$s"), entry->d_name); return -1; } if (virCapabilitiesGetNodeCacheReadFileUllong(path, entry->d_name, "size", &cache.size) < 0) return -1; cache.size >>= 10; /* read in bytes but stored in kibibytes */ if (virCapabilitiesGetNodeCacheReadFileUint(path, entry->d_name, "line_size", &cache.line) < 0) return -1; if (virCapabilitiesGetNodeCacheReadFileUint(path, entry->d_name, "indexing", &indexing) < 0) return -1; /* see enum cache_indexing in kernel */ switch (indexing) { case 0: cache.associativity = VIR_NUMA_CACHE_ASSOCIATIVITY_DIRECT; break; case 1: cache.associativity = VIR_NUMA_CACHE_ASSOCIATIVITY_FULL; break; case 2: cache.associativity = VIR_NUMA_CACHE_ASSOCIATIVITY_NONE; break; default: virReportError(VIR_ERR_INTERNAL_ERROR, _("unknown indexing value '%1$u'"), indexing); return -1; } if (virCapabilitiesGetNodeCacheReadFileUint(path, entry->d_name, "write_policy", &write_policy) < 0) return -1; /* see enum cache_write_policy in kernel */ switch (write_policy) { case 0: cache.policy = VIR_NUMA_CACHE_POLICY_WRITEBACK; break; case 1: cache.policy = VIR_NUMA_CACHE_POLICY_WRITETHROUGH; break; case 2: cache.policy = VIR_NUMA_CACHE_POLICY_NONE; break; default: virReportError(VIR_ERR_INTERNAL_ERROR, _("unknown write_policy value '%1$u'"), write_policy); return -1; } g_array_append_val(caches, cache); } if (direrr < 0) return -1; if (caches->len > 0) { g_array_sort(caches, virCapsHostNUMACellCacheComparator); *cachesRet = g_steal_pointer(&caches); } else { *cachesRet = NULL; } return 0; } static int virCapabilitiesHostNUMAInitFake(virCapsHostNUMA *caps) { virNodeInfo nodeinfo; virCapsHostNUMACellCPU *cpus; int ncpus; int n, s, c, t; int id, cid; int onlinecpus G_GNUC_UNUSED; bool tmp; if (virCapabilitiesGetNodeInfo(&nodeinfo) < 0) return -1; ncpus = VIR_NODEINFO_MAXCPUS(nodeinfo); id = 0; for (n = 0; n < nodeinfo.nodes; n++) { int nodecpus = nodeinfo.sockets * nodeinfo.cores * nodeinfo.threads; cid = 0; cpus = g_new0(virCapsHostNUMACellCPU, nodecpus); for (s = 0; s < nodeinfo.sockets; s++) { for (c = 0; c < nodeinfo.cores; c++) { g_autoptr(virBitmap) siblings = virBitmapNew(ncpus); for (t = 0; t < nodeinfo.threads; t++) ignore_value(virBitmapSetBit(siblings, id + t)); for (t = 0; t < nodeinfo.threads; t++) { if (virHostCPUGetOnline(id, &tmp) < 0) goto error; if (tmp) { cpus[cid].id = id; cpus[cid].die_id = 0; cpus[cid].socket_id = s; cpus[cid].core_id = c; cpus[cid].siblings = virBitmapNewCopy(siblings); cid++; } id++; } } } virCapabilitiesHostNUMAAddCell(caps, 0, nodeinfo.memory, cid, &cpus, 0, NULL, 0, NULL, NULL); } return 0; error: for (; cid >= 0; cid--) virBitmapFree(cpus[cid].siblings); VIR_FREE(cpus); return -1; } static void virCapabilitiesHostInsertHMAT(GArray *interconnects, unsigned int initiator, unsigned int target, unsigned int read_bandwidth, unsigned int write_bandwidth, unsigned int read_latency, unsigned int write_latency) { virNumaInterconnect ni; ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH, initiator, target, 0, VIR_MEMORY_LATENCY_READ, read_bandwidth}; g_array_append_val(interconnects, ni); ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH, initiator, target, 0, VIR_MEMORY_LATENCY_WRITE, write_bandwidth}; g_array_append_val(interconnects, ni); ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_LATENCY, initiator, target, 0, VIR_MEMORY_LATENCY_READ, read_latency}; g_array_append_val(interconnects, ni); ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_LATENCY, initiator, target, 0, VIR_MEMORY_LATENCY_WRITE, write_latency}; g_array_append_val(interconnects, ni); } static int virCapabilitiesHostNUMAInitInterconnectsNode(GArray *interconnects, unsigned int node) { g_autofree char *path = NULL; g_autofree char *initPath = NULL; g_autoptr(DIR) dir = NULL; int direrr = 0; struct dirent *entry; unsigned int read_bandwidth; unsigned int write_bandwidth; unsigned int read_latency; unsigned int write_latency; /* Unfortunately, kernel does not expose full HMAT table. I mean it does, * in its binary form under /sys/firmware/acpi/tables/HMAT but we don't * want to parse that. But some important info is still exposed, under * "access0" and "access1" directories. The former contains the best * interconnect to given node including CPUs and devices that might do I/O * (such as GPUs and NICs). The latter contains the best interconnect to * given node but only CPUs are considered. Stick with access1 until sysfs * exposes the full table in a sensible way. * NB on most system access0 and access1 contain the same values. */ path = g_strdup_printf(SYSFS_SYSTEM_PATH "/node/node%d/access1", node); if (!virFileExists(path)) return 0; if (virCapabilitiesGetNodeCacheReadFileUint(path, "initiators", "read_bandwidth", &read_bandwidth) < 0) return -1; if (virCapabilitiesGetNodeCacheReadFileUint(path, "initiators", "write_bandwidth", &write_bandwidth) < 0) return -1; /* Bandwidths are read in MiB but stored in KiB */ read_bandwidth <<= 10; write_bandwidth <<= 10; if (virCapabilitiesGetNodeCacheReadFileUint(path, "initiators", "read_latency", &read_latency) < 0) return -1; if (virCapabilitiesGetNodeCacheReadFileUint(path, "initiators", "write_latency", &write_latency) < 0) return -1; initPath = g_strdup_printf("%s/initiators", path); if (virDirOpen(&dir, initPath) < 0) return -1; while ((direrr = virDirRead(dir, &entry, path)) > 0) { const char *dname = STRSKIP(entry->d_name, "node"); unsigned int initNode; if (!dname) continue; if (virStrToLong_ui(dname, NULL, 10, &initNode) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, _("unable to parse %1$s"), entry->d_name); return -1; } virCapabilitiesHostInsertHMAT(interconnects, initNode, node, read_bandwidth, write_bandwidth, read_latency, write_latency); } return 0; } static int virCapsHostNUMAInterconnectComparator(const void *a, const void *b) { const virNumaInterconnect *aa = a; const virNumaInterconnect *bb = b; if (aa->type != bb->type) return aa->type - bb->type; if (aa->initiator != bb->initiator) return aa->initiator - bb->initiator; if (aa->target != bb->target) return aa->target - bb->target; if (aa->cache != bb->cache) return aa->cache - bb->cache; if (aa->accessType != bb->accessType) return aa->accessType - bb->accessType; return aa->value - bb->value; } static int virCapabilitiesHostNUMAInitInterconnects(virCapsHostNUMA *caps) { g_autoptr(DIR) dir = NULL; int direrr = 0; struct dirent *entry; const char *path = SYSFS_SYSTEM_PATH "/node/"; g_autoptr(GArray) interconnects = g_array_new(FALSE, FALSE, sizeof(virNumaInterconnect)); if (virDirOpenIfExists(&dir, path) < 0) return -1; while (dir && (direrr = virDirRead(dir, &entry, path)) > 0) { const char *dname = STRSKIP(entry->d_name, "node"); unsigned int node; if (!dname) continue; if (virStrToLong_ui(dname, NULL, 10, &node) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, _("unable to parse %1$s"), entry->d_name); return -1; } if (virCapabilitiesHostNUMAInitInterconnectsNode(interconnects, node) < 0) return -1; } if (interconnects->len > 0) { g_array_sort(interconnects, virCapsHostNUMAInterconnectComparator); caps->interconnects = g_steal_pointer(&interconnects); } return 0; } static int virCapabilitiesHostNUMAInitReal(virCapsHostNUMA *caps) { int n; virCapsHostNUMACellCPU *cpus = NULL; int ret = -1; int ncpus = 0; int max_node; if ((max_node = virNumaGetMaxNode()) < 0) goto cleanup; for (n = 0; n <= max_node; n++) { g_autoptr(virBitmap) cpumap = NULL; g_autofree virNumaDistance *distances = NULL; int ndistances = 0; g_autofree virCapsHostNUMACellPageInfo *pageinfo = NULL; int npageinfo = 0; unsigned long long memory; g_autoptr(GArray) caches = NULL; int cpu; size_t i; if ((ncpus = virNumaGetNodeCPUs(n, &cpumap)) < 0) { if (ncpus == -2) continue; ncpus = 0; goto cleanup; } cpus = g_new0(virCapsHostNUMACellCPU, ncpus); cpu = 0; for (i = 0; i < virBitmapSize(cpumap); i++) { if (virBitmapIsBitSet(cpumap, i)) { if (virCapabilitiesFillCPUInfo(i, cpus + cpu++) < 0) goto cleanup; } } if (virCapabilitiesGetNUMADistances(n, &distances, &ndistances) < 0) goto cleanup; if (virCapabilitiesGetNUMAPagesInfo(n, &pageinfo, &npageinfo) < 0) goto cleanup; if (virCapabilitiesGetNodeCache(n, &caches) < 0) goto cleanup; /* Detect the amount of memory in the numa cell in KiB */ virNumaGetNodeMemory(n, &memory, NULL); memory >>= 10; virCapabilitiesHostNUMAAddCell(caps, n, memory, ncpus, &cpus, ndistances, &distances, npageinfo, &pageinfo, &caches); } if (virCapabilitiesHostNUMAInitInterconnects(caps) < 0) goto cleanup; ret = 0; cleanup: virCapabilitiesClearHostNUMACellCPUTopology(cpus, ncpus); VIR_FREE(cpus); return ret; } virCapsHostNUMA * virCapabilitiesHostNUMANew(void) { virCapsHostNUMA *caps = NULL; caps = g_new0(virCapsHostNUMA, 1); caps->refs = 1; caps->cells = g_ptr_array_new_with_free_func( (GDestroyNotify)virCapabilitiesFreeHostNUMACell); return caps; } virCapsHostNUMA * virCapabilitiesHostNUMANewHost(void) { virCapsHostNUMA *caps = virCapabilitiesHostNUMANew(); if (virNumaIsAvailable()) { if (virCapabilitiesHostNUMAInitReal(caps) == 0) return caps; virCapabilitiesHostNUMAUnref(caps); caps = virCapabilitiesHostNUMANew(); VIR_WARN("Failed to query host NUMA topology, faking single NUMA node"); } if (virCapabilitiesHostNUMAInitFake(caps) < 0) { virCapabilitiesHostNUMAUnref(caps); return NULL; } return caps; } int virCapabilitiesInitPages(virCaps *caps) { g_autofree unsigned int *pages_size = NULL; size_t npages; if (virNumaGetPages(-1 /* Magic constant for overall info */, &pages_size, NULL, NULL, &npages) < 0) return -1; caps->host.pagesSize = g_steal_pointer(&pages_size); caps->host.nPagesSize = npages; npages = 0; return 0; } bool virCapsHostCacheBankEquals(virCapsHostCacheBank *a, virCapsHostCacheBank *b) { return (a->id == b->id && a->level == b->level && a->type == b->type && a->size == b->size && virBitmapEqual(a->cpus, b->cpus)); } void virCapsHostCacheBankFree(virCapsHostCacheBank *ptr) { size_t i; if (!ptr) return; virBitmapFree(ptr->cpus); for (i = 0; i < ptr->ncontrols; i++) g_free(ptr->controls[i]); g_free(ptr->controls); g_free(ptr); } static int virCapsHostCacheBankSorter(const void *a, const void *b) { virCapsHostCacheBank *ca = *(virCapsHostCacheBank **)a; virCapsHostCacheBank *cb = *(virCapsHostCacheBank **)b; if (ca->level < cb->level) return -1; if (ca->level > cb->level) return 1; return ca->id - cb->id; } static int virCapabilitiesInitResctrl(virCaps *caps) { if (caps->host.resctrl) return 0; caps->host.resctrl = virResctrlInfoNew(); if (!caps->host.resctrl) return -1; return 0; } static int virCapabilitiesInitResctrlMemory(virCaps *caps) { virCapsHostMemBWNode *node = NULL; size_t i = 0; int ret = -1; const virResctrlMonitorType montype = VIR_RESCTRL_MONITOR_TYPE_MEMBW; const char *prefix = virResctrlMonitorPrefixTypeToString(montype); for (i = 0; i < caps->host.cache.nbanks; i++) { virCapsHostCacheBank *bank = caps->host.cache.banks[i]; node = g_new0(virCapsHostMemBWNode, 1); if (virResctrlInfoGetMemoryBandwidth(caps->host.resctrl, bank->level, &node->control) > 0) { node->id = bank->id; node->cpus = virBitmapNewCopy(bank->cpus); VIR_APPEND_ELEMENT(caps->host.memBW.nodes, caps->host.memBW.nnodes, node); } g_clear_pointer(&node, virCapsHostMemBWNodeFree); } if (virResctrlInfoGetMonitorPrefix(caps->host.resctrl, prefix, &caps->host.memBW.monitor) < 0) goto cleanup; ret = 0; cleanup: virCapsHostMemBWNodeFree(node); return ret; } int virCapabilitiesInitCaches(virCaps *caps) { size_t i = 0; g_autoptr(virBitmap) cpus = NULL; ssize_t pos = -1; struct dirent *ent = NULL; const virResctrlMonitorType montype = VIR_RESCTRL_MONITOR_TYPE_CACHE; const char *prefix = virResctrlMonitorPrefixTypeToString(montype); /* Minimum level to expose in capabilities. Can be lowered or removed (with * the appropriate code below), but should not be increased, because we'd * lose information. */ const int cache_min_level = 3; if (virCapabilitiesInitResctrl(caps) < 0) return -1; /* offline CPUs don't provide cache info */ if (virFileReadValueBitmap(&cpus, "%s/cpu/online", SYSFS_SYSTEM_PATH) < 0) return -1; while ((pos = virBitmapNextSetBit(cpus, pos)) >= 0) { int rv = -1; g_autoptr(DIR) dirp = NULL; g_autofree char *path = g_strdup_printf("%s/cpu/cpu%zd/cache/", SYSFS_SYSTEM_PATH, pos); rv = virDirOpenIfExists(&dirp, path); if (rv < 0) return -1; if (!dirp) continue; while ((rv = virDirRead(dirp, &ent, path)) > 0) { g_autofree char *type = NULL; g_autoptr(virCapsHostCacheBank) bank = NULL; int kernel_type; unsigned int level; int ret; if (!STRPREFIX(ent->d_name, "index")) continue; if (virFileReadValueUint(&level, "%s/cpu/cpu%zd/cache/%s/level", SYSFS_SYSTEM_PATH, pos, ent->d_name) < 0) return -1; if (level < cache_min_level) continue; bank = g_new0(virCapsHostCacheBank, 1); bank->level = level; ret = virFileReadValueUint(&bank->id, "%s/cpu/cpu%zd/cache/%s/id", SYSFS_SYSTEM_PATH, pos, ent->d_name); if (ret == -2) { VIR_DEBUG("CPU %zd cache %s 'id' missing", pos, ent->d_name); continue; } if (ret < 0) return -1; ret = virFileReadValueUint(&bank->level, "%s/cpu/cpu%zd/cache/%s/level", SYSFS_SYSTEM_PATH, pos, ent->d_name); if (ret == -2) { VIR_DEBUG("CPU %zd cache %s 'level' missing", pos, ent->d_name); continue; } if (ret < 0) return -1; ret = virFileReadValueString(&type, "%s/cpu/cpu%zd/cache/%s/type", SYSFS_SYSTEM_PATH, pos, ent->d_name); if (ret == -2) { VIR_DEBUG("CPU %zd cache %s 'type' missing", pos, ent->d_name); continue; } if (ret < 0) return -1; ret = virFileReadValueScaledInt(&bank->size, "%s/cpu/cpu%zd/cache/%s/size", SYSFS_SYSTEM_PATH, pos, ent->d_name); if (ret == -2) { VIR_DEBUG("CPU %zd cache %s 'size' missing", pos, ent->d_name); continue; } if (ret < 0) return -1; ret = virFileReadValueBitmap(&bank->cpus, "%s/cpu/cpu%zd/cache/%s/shared_cpu_list", SYSFS_SYSTEM_PATH, pos, ent->d_name); if (ret == -2) { VIR_DEBUG("CPU %zd cache %s 'shared_cpu_list' missing", pos, ent->d_name); continue; } if (ret < 0) return -1; kernel_type = virCacheKernelTypeFromString(type); if (kernel_type < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, _("Unknown cache type '%1$s'"), type); return -1; } bank->type = kernel_type; for (i = 0; i < caps->host.cache.nbanks; i++) { if (virCapsHostCacheBankEquals(bank, caps->host.cache.banks[i])) break; } if (i == caps->host.cache.nbanks) { /* If it is a new cache, then update its resctrl information. */ if (virResctrlInfoGetCache(caps->host.resctrl, bank->level, bank->size, &bank->ncontrols, &bank->controls) < 0) return -1; VIR_APPEND_ELEMENT(caps->host.cache.banks, caps->host.cache.nbanks, bank); } } if (rv < 0) return -1; } /* Sort the array in order for the tests to be predictable. This way we can * still traverse the directory instead of guessing names (in case there is * 'index1' and 'index3' but no 'index2'). */ if (caps->host.cache.banks) { qsort(caps->host.cache.banks, caps->host.cache.nbanks, sizeof(*caps->host.cache.banks), virCapsHostCacheBankSorter); } if (virCapabilitiesInitResctrlMemory(caps) < 0) return -1; if (virResctrlInfoGetMonitorPrefix(caps->host.resctrl, prefix, &caps->host.cache.monitor) < 0) return -1; return 0; } void virCapabilitiesHostInitIOMMU(virCaps *caps) { caps->host.iommu = virHostHasIOMMU(); }