|  | // SPDX-License-Identifier: GPL-2.0-or-later | 
|  | /* | 
|  | * Hypervisor supplied "gpci" ("get performance counter info") performance | 
|  | * counter support | 
|  | * | 
|  | * Author: Cody P Schafer <cody@linux.vnet.ibm.com> | 
|  | * Copyright 2014 IBM Corporation. | 
|  | */ | 
|  |  | 
|  | #define pr_fmt(fmt) "hv-gpci: " fmt | 
|  |  | 
|  | #include <linux/init.h> | 
|  | #include <linux/perf_event.h> | 
|  | #include <asm/firmware.h> | 
|  | #include <asm/hvcall.h> | 
|  | #include <asm/io.h> | 
|  |  | 
|  | #include "hv-gpci.h" | 
|  | #include "hv-common.h" | 
|  |  | 
|  | /* | 
|  | * Example usage: | 
|  | *  perf stat -e 'hv_gpci/counter_info_version=3,offset=0,length=8, | 
|  | *		  secondary_index=0,starting_index=0xffffffff,request=0x10/' ... | 
|  | */ | 
|  |  | 
|  | /* u32 */ | 
|  | EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31); | 
|  | /* u32 */ | 
|  | /* | 
|  | * Note that starting_index, phys_processor_idx, sibling_part_id, | 
|  | * hw_chip_id, partition_id all refer to the same bit range. They | 
|  | * are basically aliases for the starting_index. The specific alias | 
|  | * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h | 
|  | */ | 
|  | EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63); | 
|  | EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63); | 
|  | EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63); | 
|  | EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63); | 
|  | EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63); | 
|  |  | 
|  | /* u16 */ | 
|  | EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15); | 
|  | /* u8 */ | 
|  | EVENT_DEFINE_RANGE_FORMAT(counter_info_version, config1, 16, 23); | 
|  | /* u8, bytes of data (1-8) */ | 
|  | EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31); | 
|  | /* u32, byte offset */ | 
|  | EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63); | 
|  |  | 
|  | static cpumask_t hv_gpci_cpumask; | 
|  |  | 
|  | static struct attribute *format_attrs[] = { | 
|  | &format_attr_request.attr, | 
|  | &format_attr_starting_index.attr, | 
|  | &format_attr_phys_processor_idx.attr, | 
|  | &format_attr_sibling_part_id.attr, | 
|  | &format_attr_hw_chip_id.attr, | 
|  | &format_attr_partition_id.attr, | 
|  | &format_attr_secondary_index.attr, | 
|  | &format_attr_counter_info_version.attr, | 
|  |  | 
|  | &format_attr_offset.attr, | 
|  | &format_attr_length.attr, | 
|  | NULL, | 
|  | }; | 
|  |  | 
|  | static const struct attribute_group format_group = { | 
|  | .name = "format", | 
|  | .attrs = format_attrs, | 
|  | }; | 
|  |  | 
|  | static struct attribute_group event_group = { | 
|  | .name  = "events", | 
|  | /* .attrs is set in init */ | 
|  | }; | 
|  |  | 
|  | #define HV_CAPS_ATTR(_name, _format)				\ | 
|  | static ssize_t _name##_show(struct device *dev,			\ | 
|  | struct device_attribute *attr,	\ | 
|  | char *page)				\ | 
|  | {								\ | 
|  | struct hv_perf_caps caps;				\ | 
|  | unsigned long hret = hv_perf_caps_get(&caps);		\ | 
|  | if (hret)						\ | 
|  | return -EIO;					\ | 
|  | \ | 
|  | return sprintf(page, _format, caps._name);		\ | 
|  | }								\ | 
|  | static struct device_attribute hv_caps_attr_##_name = __ATTR_RO(_name) | 
|  |  | 
|  | static ssize_t kernel_version_show(struct device *dev, | 
|  | struct device_attribute *attr, | 
|  | char *page) | 
|  | { | 
|  | return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT); | 
|  | } | 
|  |  | 
|  | static ssize_t cpumask_show(struct device *dev, | 
|  | struct device_attribute *attr, char *buf) | 
|  | { | 
|  | return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask); | 
|  | } | 
|  |  | 
|  | /* Interface attribute array index to store system information */ | 
|  | #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR	6 | 
|  | #define INTERFACE_PROCESSOR_CONFIG_ATTR		7 | 
|  | #define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR	8 | 
|  | #define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR	9 | 
|  | #define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR	10 | 
|  | #define INTERFACE_NULL_ATTR			11 | 
|  |  | 
|  | /* Counter request value to retrieve system information */ | 
|  | enum { | 
|  | PROCESSOR_BUS_TOPOLOGY, | 
|  | PROCESSOR_CONFIG, | 
|  | AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */ | 
|  | AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */ | 
|  | AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */ | 
|  | }; | 
|  |  | 
|  | static int sysinfo_counter_request[] = { | 
|  | [PROCESSOR_BUS_TOPOLOGY] = 0xD0, | 
|  | [PROCESSOR_CONFIG] = 0x90, | 
|  | [AFFINITY_DOMAIN_VIA_VP] = 0xA0, | 
|  | [AFFINITY_DOMAIN_VIA_DOM] = 0xB0, | 
|  | [AFFINITY_DOMAIN_VIA_PAR] = 0xB1, | 
|  | }; | 
|  |  | 
|  | static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); | 
|  |  | 
|  | static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index, | 
|  | u16 secondary_index, char *buf, | 
|  | size_t *n, struct hv_gpci_request_buffer *arg) | 
|  | { | 
|  | unsigned long ret; | 
|  | size_t i, j; | 
|  |  | 
|  | arg->params.counter_request = cpu_to_be32(req); | 
|  | arg->params.starting_index = cpu_to_be32(starting_index); | 
|  | arg->params.secondary_index = cpu_to_be16(secondary_index); | 
|  |  | 
|  | ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, | 
|  | virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | /* | 
|  | * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', | 
|  | * which means that the current buffer size cannot accommodate | 
|  | * all the information and a partial buffer returned. | 
|  | * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. | 
|  | * | 
|  | * ret value as H_AUTHORITY implies that partition is not permitted to retrieve | 
|  | * performance information, and required to set | 
|  | * "Enable Performance Information Collection" option. | 
|  | */ | 
|  | if (ret == H_AUTHORITY) | 
|  | return -EPERM; | 
|  |  | 
|  | /* | 
|  | * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE | 
|  | * because of invalid buffer-length/address or due to some hardware | 
|  | * error. | 
|  | */ | 
|  | if (ret && (ret != H_PARAMETER)) | 
|  | return -EIO; | 
|  |  | 
|  | /* | 
|  | * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' | 
|  | * to show the total number of counter_value array elements | 
|  | * returned via hcall. | 
|  | * hcall also populates 'cv_element_size' corresponds to individual | 
|  | * counter_value array element size. Below loop go through all | 
|  | * counter_value array elements as per their size and add it to | 
|  | * the output buffer. | 
|  | */ | 
|  | for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) { | 
|  | j = i * be16_to_cpu(arg->params.cv_element_size); | 
|  |  | 
|  | for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++) | 
|  | *n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[j]); | 
|  | *n += sprintf(buf + *n,  "\n"); | 
|  | } | 
|  |  | 
|  | if (*n >= PAGE_SIZE) { | 
|  | pr_info("System information exceeds PAGE_SIZE\n"); | 
|  | return -EFBIG; | 
|  | } | 
|  |  | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr, | 
|  | char *buf) | 
|  | { | 
|  | struct hv_gpci_request_buffer *arg; | 
|  | unsigned long ret; | 
|  | size_t n = 0; | 
|  |  | 
|  | arg = (void *)get_cpu_var(hv_gpci_reqb); | 
|  | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | /* | 
|  | * Pass the counter request value 0xD0 corresponds to request | 
|  | * type 'Processor_bus_topology', to retrieve | 
|  | * the system topology information. | 
|  | * starting_index value implies the starting hardware | 
|  | * chip id. | 
|  | */ | 
|  | ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], | 
|  | 0, 0, buf, &n, arg); | 
|  |  | 
|  | if (!ret) | 
|  | return n; | 
|  |  | 
|  | if (ret != H_PARAMETER) | 
|  | goto out; | 
|  |  | 
|  | /* | 
|  | * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which | 
|  | * implies that buffer can't accommodate all information, and a partial buffer | 
|  | * returned. To handle that, we need to make subsequent requests | 
|  | * with next starting index to retrieve additional (missing) data. | 
|  | * Below loop do subsequent hcalls with next starting index and add it | 
|  | * to buffer util we get all the information. | 
|  | */ | 
|  | while (ret == H_PARAMETER) { | 
|  | int returned_values = be16_to_cpu(arg->params.returned_values); | 
|  | int elementsize = be16_to_cpu(arg->params.cv_element_size); | 
|  | int last_element = (returned_values - 1) * elementsize; | 
|  |  | 
|  | /* | 
|  | * Since the starting index value is part of counter_value | 
|  | * buffer elements, use the starting index value in the last | 
|  | * element and add 1 to make subsequent hcalls. | 
|  | */ | 
|  | u32 starting_index = arg->bytes[last_element + 3] + | 
|  | (arg->bytes[last_element + 2] << 8) + | 
|  | (arg->bytes[last_element + 1] << 16) + | 
|  | (arg->bytes[last_element] << 24) + 1; | 
|  |  | 
|  | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], | 
|  | starting_index, 0, buf, &n, arg); | 
|  |  | 
|  | if (!ret) | 
|  | return n; | 
|  |  | 
|  | if (ret != H_PARAMETER) | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | return n; | 
|  |  | 
|  | out: | 
|  | put_cpu_var(hv_gpci_reqb); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr, | 
|  | char *buf) | 
|  | { | 
|  | struct hv_gpci_request_buffer *arg; | 
|  | unsigned long ret; | 
|  | size_t n = 0; | 
|  |  | 
|  | arg = (void *)get_cpu_var(hv_gpci_reqb); | 
|  | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | /* | 
|  | * Pass the counter request value 0x90 corresponds to request | 
|  | * type 'Processor_config', to retrieve | 
|  | * the system processor information. | 
|  | * starting_index value implies the starting hardware | 
|  | * processor index. | 
|  | */ | 
|  | ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG], | 
|  | 0, 0, buf, &n, arg); | 
|  |  | 
|  | if (!ret) | 
|  | return n; | 
|  |  | 
|  | if (ret != H_PARAMETER) | 
|  | goto out; | 
|  |  | 
|  | /* | 
|  | * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which | 
|  | * implies that buffer can't accommodate all information, and a partial buffer | 
|  | * returned. To handle that, we need to take subsequent requests | 
|  | * with next starting index to retrieve additional (missing) data. | 
|  | * Below loop do subsequent hcalls with next starting index and add it | 
|  | * to buffer util we get all the information. | 
|  | */ | 
|  | while (ret == H_PARAMETER) { | 
|  | int returned_values = be16_to_cpu(arg->params.returned_values); | 
|  | int elementsize = be16_to_cpu(arg->params.cv_element_size); | 
|  | int last_element = (returned_values - 1) * elementsize; | 
|  |  | 
|  | /* | 
|  | * Since the starting index is part of counter_value | 
|  | * buffer elements, use the starting index value in the last | 
|  | * element and add 1 to subsequent hcalls. | 
|  | */ | 
|  | u32 starting_index = arg->bytes[last_element + 3] + | 
|  | (arg->bytes[last_element + 2] << 8) + | 
|  | (arg->bytes[last_element + 1] << 16) + | 
|  | (arg->bytes[last_element] << 24) + 1; | 
|  |  | 
|  | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG], | 
|  | starting_index, 0, buf, &n, arg); | 
|  |  | 
|  | if (!ret) | 
|  | return n; | 
|  |  | 
|  | if (ret != H_PARAMETER) | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | return n; | 
|  |  | 
|  | out: | 
|  | put_cpu_var(hv_gpci_reqb); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev, | 
|  | struct device_attribute *attr, char *buf) | 
|  | { | 
|  | struct hv_gpci_request_buffer *arg; | 
|  | unsigned long ret; | 
|  | size_t n = 0; | 
|  |  | 
|  | arg = (void *)get_cpu_var(hv_gpci_reqb); | 
|  | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | /* | 
|  | * Pass the counter request 0xA0 corresponds to request | 
|  | * type 'Affinity_domain_information_by_virutal_processor', | 
|  | * to retrieve the system affinity domain information. | 
|  | * starting_index value refers to the starting hardware | 
|  | * processor index. | 
|  | */ | 
|  | ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], | 
|  | 0, 0, buf, &n, arg); | 
|  |  | 
|  | if (!ret) | 
|  | return n; | 
|  |  | 
|  | if (ret != H_PARAMETER) | 
|  | goto out; | 
|  |  | 
|  | /* | 
|  | * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which | 
|  | * implies that buffer can't accommodate all information, and a partial buffer | 
|  | * returned. To handle that, we need to take subsequent requests | 
|  | * with next secondary index to retrieve additional (missing) data. | 
|  | * Below loop do subsequent hcalls with next secondary index and add it | 
|  | * to buffer util we get all the information. | 
|  | */ | 
|  | while (ret == H_PARAMETER) { | 
|  | int returned_values = be16_to_cpu(arg->params.returned_values); | 
|  | int elementsize = be16_to_cpu(arg->params.cv_element_size); | 
|  | int last_element = (returned_values - 1) * elementsize; | 
|  |  | 
|  | /* | 
|  | * Since the starting index and secondary index type is part of the | 
|  | * counter_value buffer elements, use the starting index value in the | 
|  | * last array element as subsequent starting index, and use secondary index | 
|  | * value in the last array element plus 1 as subsequent secondary index. | 
|  | * For counter request '0xA0', starting index points to partition id | 
|  | * and secondary index points to corresponding virtual processor index. | 
|  | */ | 
|  | u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8); | 
|  | u16 secondary_index = arg->bytes[last_element + 3] + | 
|  | (arg->bytes[last_element + 2] << 8) + 1; | 
|  |  | 
|  | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], | 
|  | starting_index, secondary_index, buf, &n, arg); | 
|  |  | 
|  | if (!ret) | 
|  | return n; | 
|  |  | 
|  | if (ret != H_PARAMETER) | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | return n; | 
|  |  | 
|  | out: | 
|  | put_cpu_var(hv_gpci_reqb); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr, | 
|  | char *buf) | 
|  | { | 
|  | struct hv_gpci_request_buffer *arg; | 
|  | unsigned long ret; | 
|  | size_t n = 0; | 
|  |  | 
|  | arg = (void *)get_cpu_var(hv_gpci_reqb); | 
|  | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | /* | 
|  | * Pass the counter request 0xB0 corresponds to request | 
|  | * type 'Affinity_domain_information_by_domain', | 
|  | * to retrieve the system affinity domain information. | 
|  | * starting_index value refers to the starting hardware | 
|  | * processor index. | 
|  | */ | 
|  | ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], | 
|  | 0, 0, buf, &n, arg); | 
|  |  | 
|  | if (!ret) | 
|  | return n; | 
|  |  | 
|  | if (ret != H_PARAMETER) | 
|  | goto out; | 
|  |  | 
|  | /* | 
|  | * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which | 
|  | * implies that buffer can't accommodate all information, and a partial buffer | 
|  | * returned. To handle that, we need to take subsequent requests | 
|  | * with next starting index to retrieve additional (missing) data. | 
|  | * Below loop do subsequent hcalls with next starting index and add it | 
|  | * to buffer util we get all the information. | 
|  | */ | 
|  | while (ret == H_PARAMETER) { | 
|  | int returned_values = be16_to_cpu(arg->params.returned_values); | 
|  | int elementsize = be16_to_cpu(arg->params.cv_element_size); | 
|  | int last_element = (returned_values - 1) * elementsize; | 
|  |  | 
|  | /* | 
|  | * Since the starting index value is part of counter_value | 
|  | * buffer elements, use the starting index value in the last | 
|  | * element and add 1 to make subsequent hcalls. | 
|  | */ | 
|  | u32 starting_index = arg->bytes[last_element + 1] + | 
|  | (arg->bytes[last_element] << 8) + 1; | 
|  |  | 
|  | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], | 
|  | starting_index, 0, buf, &n, arg); | 
|  |  | 
|  | if (!ret) | 
|  | return n; | 
|  |  | 
|  | if (ret != H_PARAMETER) | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | return n; | 
|  |  | 
|  | out: | 
|  | put_cpu_var(hv_gpci_reqb); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static void affinity_domain_via_partition_result_parse(int returned_values, | 
|  | int element_size, char *buf, size_t *last_element, | 
|  | size_t *n, struct hv_gpci_request_buffer *arg) | 
|  | { | 
|  | size_t i = 0, j = 0; | 
|  | size_t k, l, m; | 
|  | uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele; | 
|  |  | 
|  | /* | 
|  | * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' | 
|  | * to show the total number of counter_value array elements | 
|  | * returned via hcall. | 
|  | * Unlike other request types, the data structure returned by this | 
|  | * request is variable-size. For this counter request type, | 
|  | * hcall populates 'cv_element_size' corresponds to minimum size of | 
|  | * the structure returned i.e; the size of the structure with no domain | 
|  | * information. Below loop go through all counter_value array | 
|  | * to determine the number and size of each domain array element and | 
|  | * add it to the output buffer. | 
|  | */ | 
|  | while (i < returned_values) { | 
|  | k = j; | 
|  | for (; k < j + element_size; k++) | 
|  | *n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[k]); | 
|  | *n += sprintf(buf + *n,  "\n"); | 
|  |  | 
|  | total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3]; | 
|  | size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1]; | 
|  |  | 
|  | for (l = 0; l < total_affinity_domain_ele; l++) { | 
|  | for (m = 0; m < size_of_each_affinity_domain_ele; m++) { | 
|  | *n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[k]); | 
|  | k++; | 
|  | } | 
|  | *n += sprintf(buf + *n,  "\n"); | 
|  | } | 
|  |  | 
|  | *n += sprintf(buf + *n,  "\n"); | 
|  | i++; | 
|  | j = k; | 
|  | } | 
|  |  | 
|  | *last_element = k; | 
|  | } | 
|  |  | 
|  | static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr, | 
|  | char *buf) | 
|  | { | 
|  | struct hv_gpci_request_buffer *arg; | 
|  | unsigned long ret; | 
|  | size_t n = 0; | 
|  | size_t last_element = 0; | 
|  | u32 starting_index; | 
|  |  | 
|  | arg = (void *)get_cpu_var(hv_gpci_reqb); | 
|  | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | /* | 
|  | * Pass the counter request value 0xB1 corresponds to counter request | 
|  | * type 'Affinity_domain_information_by_partition', | 
|  | * to retrieve the system affinity domain by partition information. | 
|  | * starting_index value refers to the starting hardware | 
|  | * processor index. | 
|  | */ | 
|  | arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); | 
|  | arg->params.starting_index = cpu_to_be32(0); | 
|  |  | 
|  | ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, | 
|  | virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | if (!ret) | 
|  | goto parse_result; | 
|  |  | 
|  | if (ret && (ret != H_PARAMETER)) | 
|  | goto out; | 
|  |  | 
|  | /* | 
|  | * ret value as 'H_PARAMETER' implies that the current buffer size | 
|  | * can't accommodate all the information, and a partial buffer | 
|  | * returned. To handle that, we need to make subsequent requests | 
|  | * with next starting index to retrieve additional (missing) data. | 
|  | * Below loop do subsequent hcalls with next starting index and add it | 
|  | * to buffer util we get all the information. | 
|  | */ | 
|  | while (ret == H_PARAMETER) { | 
|  | affinity_domain_via_partition_result_parse( | 
|  | be16_to_cpu(arg->params.returned_values) - 1, | 
|  | be16_to_cpu(arg->params.cv_element_size), buf, | 
|  | &last_element, &n, arg); | 
|  |  | 
|  | if (n >= PAGE_SIZE) { | 
|  | put_cpu_var(hv_gpci_reqb); | 
|  | pr_debug("System information exceeds PAGE_SIZE\n"); | 
|  | return -EFBIG; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Since the starting index value is part of counter_value | 
|  | * buffer elements, use the starting_index value in the last | 
|  | * element and add 1 to make subsequent hcalls. | 
|  | */ | 
|  | starting_index = (u8)arg->bytes[last_element] << 8 | | 
|  | (u8)arg->bytes[last_element + 1]; | 
|  |  | 
|  | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); | 
|  | arg->params.counter_request = cpu_to_be32( | 
|  | sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); | 
|  | arg->params.starting_index = cpu_to_be32(starting_index); | 
|  |  | 
|  | ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, | 
|  | virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | if (ret && (ret != H_PARAMETER)) | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | parse_result: | 
|  | affinity_domain_via_partition_result_parse( | 
|  | be16_to_cpu(arg->params.returned_values), | 
|  | be16_to_cpu(arg->params.cv_element_size), | 
|  | buf, &last_element, &n, arg); | 
|  |  | 
|  | put_cpu_var(hv_gpci_reqb); | 
|  | return n; | 
|  |  | 
|  | out: | 
|  | put_cpu_var(hv_gpci_reqb); | 
|  |  | 
|  | /* | 
|  | * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', | 
|  | * which means that the current buffer size cannot accommodate | 
|  | * all the information and a partial buffer returned. | 
|  | * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. | 
|  | * | 
|  | * ret value as H_AUTHORITY implies that partition is not permitted to retrieve | 
|  | * performance information, and required to set | 
|  | * "Enable Performance Information Collection" option. | 
|  | */ | 
|  | if (ret == H_AUTHORITY) | 
|  | return -EPERM; | 
|  |  | 
|  | /* | 
|  | * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE | 
|  | * because of invalid buffer-length/address or due to some hardware | 
|  | * error. | 
|  | */ | 
|  | return -EIO; | 
|  | } | 
|  |  | 
|  | static DEVICE_ATTR_RO(kernel_version); | 
|  | static DEVICE_ATTR_RO(cpumask); | 
|  |  | 
|  | HV_CAPS_ATTR(version, "0x%x\n"); | 
|  | HV_CAPS_ATTR(ga, "%d\n"); | 
|  | HV_CAPS_ATTR(expanded, "%d\n"); | 
|  | HV_CAPS_ATTR(lab, "%d\n"); | 
|  | HV_CAPS_ATTR(collect_privileged, "%d\n"); | 
|  |  | 
|  | static struct attribute *interface_attrs[] = { | 
|  | &dev_attr_kernel_version.attr, | 
|  | &hv_caps_attr_version.attr, | 
|  | &hv_caps_attr_ga.attr, | 
|  | &hv_caps_attr_expanded.attr, | 
|  | &hv_caps_attr_lab.attr, | 
|  | &hv_caps_attr_collect_privileged.attr, | 
|  | /* | 
|  | * This NULL is a placeholder for the processor_bus_topology | 
|  | * attribute, set in init function if applicable. | 
|  | */ | 
|  | NULL, | 
|  | /* | 
|  | * This NULL is a placeholder for the processor_config | 
|  | * attribute, set in init function if applicable. | 
|  | */ | 
|  | NULL, | 
|  | /* | 
|  | * This NULL is a placeholder for the affinity_domain_via_virtual_processor | 
|  | * attribute, set in init function if applicable. | 
|  | */ | 
|  | NULL, | 
|  | /* | 
|  | * This NULL is a placeholder for the affinity_domain_via_domain | 
|  | * attribute, set in init function if applicable. | 
|  | */ | 
|  | NULL, | 
|  | /* | 
|  | * This NULL is a placeholder for the affinity_domain_via_partition | 
|  | * attribute, set in init function if applicable. | 
|  | */ | 
|  | NULL, | 
|  | NULL, | 
|  | }; | 
|  |  | 
|  | static struct attribute *cpumask_attrs[] = { | 
|  | &dev_attr_cpumask.attr, | 
|  | NULL, | 
|  | }; | 
|  |  | 
|  | static const struct attribute_group cpumask_attr_group = { | 
|  | .attrs = cpumask_attrs, | 
|  | }; | 
|  |  | 
|  | static const struct attribute_group interface_group = { | 
|  | .name = "interface", | 
|  | .attrs = interface_attrs, | 
|  | }; | 
|  |  | 
|  | static const struct attribute_group *attr_groups[] = { | 
|  | &format_group, | 
|  | &event_group, | 
|  | &interface_group, | 
|  | &cpumask_attr_group, | 
|  | NULL, | 
|  | }; | 
|  |  | 
|  | static unsigned long single_gpci_request(u32 req, u32 starting_index, | 
|  | u16 secondary_index, u8 version_in, u32 offset, u8 length, | 
|  | u64 *value) | 
|  | { | 
|  | unsigned long ret; | 
|  | size_t i; | 
|  | u64 count; | 
|  | struct hv_gpci_request_buffer *arg; | 
|  |  | 
|  | arg = (void *)get_cpu_var(hv_gpci_reqb); | 
|  | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | arg->params.counter_request = cpu_to_be32(req); | 
|  | arg->params.starting_index = cpu_to_be32(starting_index); | 
|  | arg->params.secondary_index = cpu_to_be16(secondary_index); | 
|  | arg->params.counter_info_version_in = version_in; | 
|  |  | 
|  | ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, | 
|  | virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | /* | 
|  | * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL', | 
|  | * specifies that the current buffer size cannot accommodate | 
|  | * all the information and a partial buffer returned. | 
|  | * Since in this function we are only accessing data for a given starting index, | 
|  | * we don't need to accommodate whole data and can get required count by | 
|  | * accessing first entry data. | 
|  | * Hence hcall fails only incase the ret value is other than H_SUCCESS or | 
|  | * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B). | 
|  | */ | 
|  | if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B) | 
|  | ret = 0; | 
|  |  | 
|  | if (ret) { | 
|  | pr_devel("hcall failed: 0x%lx\n", ret); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * we verify offset and length are within the zeroed buffer at event | 
|  | * init. | 
|  | */ | 
|  | count = 0; | 
|  | for (i = offset; i < offset + length; i++) | 
|  | count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8); | 
|  |  | 
|  | *value = count; | 
|  | out: | 
|  | put_cpu_var(hv_gpci_reqb); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static u64 h_gpci_get_value(struct perf_event *event) | 
|  | { | 
|  | u64 count; | 
|  | unsigned long ret = single_gpci_request(event_get_request(event), | 
|  | event_get_starting_index(event), | 
|  | event_get_secondary_index(event), | 
|  | event_get_counter_info_version(event), | 
|  | event_get_offset(event), | 
|  | event_get_length(event), | 
|  | &count); | 
|  | if (ret) | 
|  | return 0; | 
|  | return count; | 
|  | } | 
|  |  | 
|  | static void h_gpci_event_update(struct perf_event *event) | 
|  | { | 
|  | s64 prev; | 
|  | u64 now = h_gpci_get_value(event); | 
|  | prev = local64_xchg(&event->hw.prev_count, now); | 
|  | local64_add(now - prev, &event->count); | 
|  | } | 
|  |  | 
|  | static void h_gpci_event_start(struct perf_event *event, int flags) | 
|  | { | 
|  | local64_set(&event->hw.prev_count, h_gpci_get_value(event)); | 
|  | } | 
|  |  | 
|  | static void h_gpci_event_stop(struct perf_event *event, int flags) | 
|  | { | 
|  | h_gpci_event_update(event); | 
|  | } | 
|  |  | 
|  | static int h_gpci_event_add(struct perf_event *event, int flags) | 
|  | { | 
|  | if (flags & PERF_EF_START) | 
|  | h_gpci_event_start(event, flags); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int h_gpci_event_init(struct perf_event *event) | 
|  | { | 
|  | u64 count; | 
|  | u8 length; | 
|  | unsigned long ret; | 
|  |  | 
|  | /* Not our event */ | 
|  | if (event->attr.type != event->pmu->type) | 
|  | return -ENOENT; | 
|  |  | 
|  | /* config2 is unused */ | 
|  | if (event->attr.config2) { | 
|  | pr_devel("config2 set when reserved\n"); | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | /* no branch sampling */ | 
|  | if (has_branch_stack(event)) | 
|  | return -EOPNOTSUPP; | 
|  |  | 
|  | length = event_get_length(event); | 
|  | if (length < 1 || length > 8) { | 
|  | pr_devel("length invalid\n"); | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | /* last byte within the buffer? */ | 
|  | if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) { | 
|  | pr_devel("request outside of buffer: %zu > %zu\n", | 
|  | (size_t)event_get_offset(event) + length, | 
|  | HGPCI_MAX_DATA_BYTES); | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | /* check if the request works... */ | 
|  | ret = single_gpci_request(event_get_request(event), | 
|  | event_get_starting_index(event), | 
|  | event_get_secondary_index(event), | 
|  | event_get_counter_info_version(event), | 
|  | event_get_offset(event), | 
|  | length, | 
|  | &count); | 
|  |  | 
|  | /* | 
|  | * ret value as H_AUTHORITY implies that partition is not permitted to retrieve | 
|  | * performance information, and required to set | 
|  | * "Enable Performance Information Collection" option. | 
|  | */ | 
|  | if (ret == H_AUTHORITY) | 
|  | return -EPERM; | 
|  |  | 
|  | if (ret) { | 
|  | pr_devel("gpci hcall failed\n"); | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static struct pmu h_gpci_pmu = { | 
|  | .task_ctx_nr = perf_invalid_context, | 
|  |  | 
|  | .name = "hv_gpci", | 
|  | .attr_groups = attr_groups, | 
|  | .event_init  = h_gpci_event_init, | 
|  | .add         = h_gpci_event_add, | 
|  | .del         = h_gpci_event_stop, | 
|  | .start       = h_gpci_event_start, | 
|  | .stop        = h_gpci_event_stop, | 
|  | .read        = h_gpci_event_update, | 
|  | .capabilities = PERF_PMU_CAP_NO_EXCLUDE, | 
|  | }; | 
|  |  | 
|  | static int ppc_hv_gpci_cpu_online(unsigned int cpu) | 
|  | { | 
|  | if (cpumask_empty(&hv_gpci_cpumask)) | 
|  | cpumask_set_cpu(cpu, &hv_gpci_cpumask); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int ppc_hv_gpci_cpu_offline(unsigned int cpu) | 
|  | { | 
|  | int target; | 
|  |  | 
|  | /* Check if exiting cpu is used for collecting gpci events */ | 
|  | if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask)) | 
|  | return 0; | 
|  |  | 
|  | /* Find a new cpu to collect gpci events */ | 
|  | target = cpumask_last(cpu_active_mask); | 
|  |  | 
|  | if (target < 0 || target >= nr_cpu_ids) { | 
|  | pr_err("hv_gpci: CPU hotplug init failed\n"); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | /* Migrate gpci events to the new target */ | 
|  | cpumask_set_cpu(target, &hv_gpci_cpumask); | 
|  | perf_pmu_migrate_context(&h_gpci_pmu, cpu, target); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int hv_gpci_cpu_hotplug_init(void) | 
|  | { | 
|  | return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, | 
|  | "perf/powerpc/hv_gcpi:online", | 
|  | ppc_hv_gpci_cpu_online, | 
|  | ppc_hv_gpci_cpu_offline); | 
|  | } | 
|  |  | 
|  | static struct device_attribute *sysinfo_device_attr_create(int | 
|  | sysinfo_interface_group_index, u32 req) | 
|  | { | 
|  | struct device_attribute *attr = NULL; | 
|  | unsigned long ret; | 
|  | struct hv_gpci_request_buffer *arg; | 
|  |  | 
|  | if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR || | 
|  | sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) { | 
|  | pr_info("Wrong interface group index for system information\n"); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | /* Check for given counter request value support */ | 
|  | arg = (void *)get_cpu_var(hv_gpci_reqb); | 
|  | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | arg->params.counter_request = cpu_to_be32(req); | 
|  |  | 
|  | ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, | 
|  | virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | put_cpu_var(hv_gpci_reqb); | 
|  |  | 
|  | /* | 
|  | * Add given counter request value attribute in the interface_attrs | 
|  | * attribute array, only for valid return types. | 
|  | */ | 
|  | if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) { | 
|  | attr = kzalloc(sizeof(*attr), GFP_KERNEL); | 
|  | if (!attr) | 
|  | return NULL; | 
|  |  | 
|  | sysfs_attr_init(&attr->attr); | 
|  | attr->attr.mode = 0444; | 
|  |  | 
|  | switch (sysinfo_interface_group_index) { | 
|  | case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR: | 
|  | attr->attr.name = "processor_bus_topology"; | 
|  | attr->show = processor_bus_topology_show; | 
|  | break; | 
|  | case INTERFACE_PROCESSOR_CONFIG_ATTR: | 
|  | attr->attr.name = "processor_config"; | 
|  | attr->show = processor_config_show; | 
|  | break; | 
|  | case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR: | 
|  | attr->attr.name = "affinity_domain_via_virtual_processor"; | 
|  | attr->show = affinity_domain_via_virtual_processor_show; | 
|  | break; | 
|  | case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR: | 
|  | attr->attr.name = "affinity_domain_via_domain"; | 
|  | attr->show = affinity_domain_via_domain_show; | 
|  | break; | 
|  | case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR: | 
|  | attr->attr.name = "affinity_domain_via_partition"; | 
|  | attr->show = affinity_domain_via_partition_show; | 
|  | break; | 
|  | } | 
|  | } else | 
|  | pr_devel("hcall failed, with error: 0x%lx\n", ret); | 
|  |  | 
|  | return attr; | 
|  | } | 
|  |  | 
|  | static void add_sysinfo_interface_files(void) | 
|  | { | 
|  | int sysfs_count; | 
|  | struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR]; | 
|  | int i; | 
|  |  | 
|  | sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR; | 
|  |  | 
|  | /* Get device attribute for a given counter request value */ | 
|  | for (i = 0; i < sysfs_count; i++) { | 
|  | attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR, | 
|  | sysinfo_counter_request[i]); | 
|  |  | 
|  | if (!attr[i]) | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | /* Add sysinfo interface attributes in the interface_attrs attribute array */ | 
|  | for (i = 0; i < sysfs_count; i++) | 
|  | interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr; | 
|  |  | 
|  | return; | 
|  |  | 
|  | out: | 
|  | /* | 
|  | * The sysinfo interface attributes will be added, only if hcall passed for | 
|  | * all the counter request values. Free the device attribute array incase | 
|  | * of any hcall failure. | 
|  | */ | 
|  | if (i > 0) { | 
|  | while (i >= 0) { | 
|  | kfree(attr[i]); | 
|  | i--; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | static int hv_gpci_init(void) | 
|  | { | 
|  | int r; | 
|  | unsigned long hret; | 
|  | struct hv_perf_caps caps; | 
|  | struct hv_gpci_request_buffer *arg; | 
|  |  | 
|  | hv_gpci_assert_offsets_correct(); | 
|  |  | 
|  | if (!firmware_has_feature(FW_FEATURE_LPAR)) { | 
|  | pr_debug("not a virtualized system, not enabling\n"); | 
|  | return -ENODEV; | 
|  | } | 
|  |  | 
|  | hret = hv_perf_caps_get(&caps); | 
|  | if (hret) { | 
|  | pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", | 
|  | hret); | 
|  | return -ENODEV; | 
|  | } | 
|  |  | 
|  | /* init cpuhotplug */ | 
|  | r = hv_gpci_cpu_hotplug_init(); | 
|  | if (r) | 
|  | return r; | 
|  |  | 
|  | /* sampling not supported */ | 
|  | h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; | 
|  |  | 
|  | arg = (void *)get_cpu_var(hv_gpci_reqb); | 
|  | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); | 
|  |  | 
|  | /* | 
|  | * hcall H_GET_PERF_COUNTER_INFO populates the output | 
|  | * counter_info_version value based on the system hypervisor. | 
|  | * Pass the counter request 0x10 corresponds to request type | 
|  | * 'Dispatch_timebase_by_processor', to get the supported | 
|  | * counter_info_version. | 
|  | */ | 
|  | arg->params.counter_request = cpu_to_be32(0x10); | 
|  |  | 
|  | r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, | 
|  | virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); | 
|  | if (r) { | 
|  | pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r); | 
|  | arg->params.counter_info_version_out = 0x8; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Use counter_info_version_out value to assign | 
|  | * required hv-gpci event list. | 
|  | */ | 
|  | if (arg->params.counter_info_version_out >= 0x8) | 
|  | event_group.attrs = hv_gpci_event_attrs; | 
|  | else | 
|  | event_group.attrs = hv_gpci_event_attrs_v6; | 
|  |  | 
|  | put_cpu_var(hv_gpci_reqb); | 
|  |  | 
|  | r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1); | 
|  | if (r) | 
|  | return r; | 
|  |  | 
|  | /* sysinfo interface files are only available for power10 and above platforms */ | 
|  | if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10) | 
|  | add_sysinfo_interface_files(); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | device_initcall(hv_gpci_init); |