From 4ba9cb1a07f642d40eb4a310615832c3c47f29ab Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Oct 2024 13:17:29 -0700 Subject: [PATCH 01/10] Add SBI v3.0 PMU enhancements SBI v3.0 specification[1] added two new improvements to the PMU chaper. The SBI v3.0 specification is frozen and under public review phase as per the RISC-V International guidelines. 1. Added an additional get_event_info function to query event availablity in bulk instead of individual SBI calls for each event. This helps in improving the boot time. 2. Raw event width allowed by the platform is widened to have 56 bits with RAW event v2 as per new clarification in the priv ISA[2]. Apart from implementing these new features, this series improves the gpa range check in KVM and updates the kvm SBI implementation to SBI v3.0. The opensbi patches have been merged. This series can be found at [3]. [1] https://github.com/riscv-non-isa/riscv-sbi-doc/releases/download/v3.0-rc7/riscv-sbi.pdf [2] https://github.com/riscv/riscv-isa-manual/issues/1578 [3] https://github.com/atishp04/linux/tree/b4/pmu_event_info_v3 To: Anup Patel To: Will Deacon To: Mark Rutland To: Paul Walmsley To: Palmer Dabbelt To: Mayuresh Chitale Cc: linux-riscv@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: Palmer Dabbelt Cc: kvm@vger.kernel.org Cc: kvm-riscv@lists.infradead.org Signed-off-by: Atish Patra --- Changes in v3: - Rebased on top of v6.15-rc7 - Link to v2: https://lore.kernel.org/r/20250115-pmu_event_info-v2-0-84815b70383b@rivosinc.com Changes in v2: - Dropped PATCH 2 to be taken during rcX. - Improved gpa range check validation by introducing a helper function and checking the entire range. - Link to v1: https://lore.kernel.org/r/20241119-pmu_event_info-v1-0-a4f9691421f8@rivosinc.com --- b4-submit-tracking --- # This section is used internally by b4 prep for tracking purposes. { "series": { "revision": 3, "change-id": "20241018-pmu_event_info-986e21ce6bd3", "prefixes": [], "history": { "v1": [ "20241119-pmu_event_info-v1-0-a4f9691421f8@rivosinc.com" ], "v2": [ "20250115-pmu_event_info-v2-0-84815b70383b@rivosinc.com" ] }, "prerequisites": [ "message-id: <20241212-pmu_event_fixes_v2-v2-0-813e8a4f5962@rivosinc.com>", "base-commit: v6.13-rc2" ] } } From ce60fee9226651c21bb1cd0449efa8d6c9702069 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Oct 2024 16:14:55 -0700 Subject: [PATCH 02/10] drivers/perf: riscv: Add SBI v3.0 flag There are new PMU related features introduced in SBI v3.0. 1. Raw Event v2 which allows mhpmeventX value to be 56 bit wide. 2. Get Event info function to do a bulk query at one shot. Signed-off-by: Atish Patra --- drivers/perf/riscv_pmu_sbi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 698de8ddf895ba..cfd6946fca42e6 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -63,6 +63,7 @@ PMU_FORMAT_ATTR(event, "config:0-47"); PMU_FORMAT_ATTR(firmware, "config:62-63"); static bool sbi_v2_available; +static bool sbi_v3_available; static DEFINE_STATIC_KEY_FALSE(sbi_pmu_snapshot_available); #define sbi_pmu_snapshot_available() \ static_branch_unlikely(&sbi_pmu_snapshot_available) @@ -1452,6 +1453,9 @@ static int __init pmu_sbi_devinit(void) if (sbi_spec_version >= sbi_mk_version(2, 0)) sbi_v2_available = true; + if (sbi_spec_version >= sbi_mk_version(3, 0)) + sbi_v3_available = true; + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING, "perf/riscv/pmu:starting", pmu_sbi_starting_cpu, pmu_sbi_dying_cpu); From 86fcff815864e1267ef94c990dadfe428a12bc2c Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 12 Nov 2024 14:16:01 -0800 Subject: [PATCH 03/10] drivers/perf: riscv: Add raw event v2 support SBI v3.0 introduced a new raw event type that allows wider mhpmeventX width to be programmed via CFG_MATCH. Use the raw event v2 if SBI v3.0 is available. Signed-off-by: Atish Patra --- arch/riscv/include/asm/sbi.h | 4 ++++ drivers/perf/riscv_pmu_sbi.c | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 3d250824178bd5..6ce385a3a7bb7d 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -160,7 +160,10 @@ struct riscv_pmu_snapshot_data { #define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0) #define RISCV_PMU_PLAT_FW_EVENT_MASK GENMASK_ULL(61, 0) +/* SBI v3.0 allows extended hpmeventX width value */ +#define RISCV_PMU_RAW_EVENT_V2_MASK GENMASK_ULL(55, 0) #define RISCV_PMU_RAW_EVENT_IDX 0x20000 +#define RISCV_PMU_RAW_EVENT_V2_IDX 0x30000 #define RISCV_PLAT_FW_EVENT 0xFFFF /** General pmu event codes specified in SBI PMU extension */ @@ -218,6 +221,7 @@ enum sbi_pmu_event_type { SBI_PMU_EVENT_TYPE_HW = 0x0, SBI_PMU_EVENT_TYPE_CACHE = 0x1, SBI_PMU_EVENT_TYPE_RAW = 0x2, + SBI_PMU_EVENT_TYPE_RAW_V2 = 0x3, SBI_PMU_EVENT_TYPE_FW = 0xf, }; diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index cfd6946fca42e6..273ed70098a388 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -59,7 +59,7 @@ asm volatile(ALTERNATIVE( \ #define PERF_EVENT_FLAG_USER_ACCESS BIT(SYSCTL_USER_ACCESS) #define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY) -PMU_FORMAT_ATTR(event, "config:0-47"); +PMU_FORMAT_ATTR(event, "config:0-55"); PMU_FORMAT_ATTR(firmware, "config:62-63"); static bool sbi_v2_available; @@ -527,8 +527,10 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) break; case PERF_TYPE_RAW: /* - * As per SBI specification, the upper 16 bits must be unused - * for a hardware raw event. + * As per SBI v0.3 specification, + * -- the upper 16 bits must be unused for a hardware raw event. + * As per SBI v3.0 specification, + * -- the upper 8 bits must be unused for a hardware raw event. * Bits 63:62 are used to distinguish between raw events * 00 - Hardware raw event * 10 - SBI firmware events @@ -537,8 +539,14 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) switch (config >> 62) { case 0: + if (sbi_v3_available) { + /* Return error any bits [56-63] is set as it is not allowed by the spec */ + if (!(config & ~RISCV_PMU_RAW_EVENT_V2_MASK)) { + *econfig = config & RISCV_PMU_RAW_EVENT_V2_MASK; + ret = RISCV_PMU_RAW_EVENT_V2_IDX; + } /* Return error any bits [48-63] is set as it is not allowed by the spec */ - if (!(config & ~RISCV_PMU_RAW_EVENT_MASK)) { + } else if (!(config & ~RISCV_PMU_RAW_EVENT_MASK)) { *econfig = config & RISCV_PMU_RAW_EVENT_MASK; ret = RISCV_PMU_RAW_EVENT_IDX; } From b4dc051533e195e1b484f0dd58016f6d8968393c Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 12 Nov 2024 14:17:48 -0800 Subject: [PATCH 04/10] RISC-V: KVM: Add support for Raw event v2 SBI v3.0 introuced a new raw event type v2 for wider mhpmeventX programming. Add the support in kvm for that. Signed-off-by: Atish Patra --- arch/riscv/kvm/vcpu_pmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 78ac3216a54ddb..15d71a7b75ba63 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -60,6 +60,7 @@ static u32 kvm_pmu_get_perf_event_type(unsigned long eidx) type = PERF_TYPE_HW_CACHE; break; case SBI_PMU_EVENT_TYPE_RAW: + case SBI_PMU_EVENT_TYPE_RAW_V2: case SBI_PMU_EVENT_TYPE_FW: type = PERF_TYPE_RAW; break; @@ -128,6 +129,9 @@ static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data) case SBI_PMU_EVENT_TYPE_RAW: config = evt_data & RISCV_PMU_RAW_EVENT_MASK; break; + case SBI_PMU_EVENT_TYPE_RAW_V2: + config = evt_data & RISCV_PMU_RAW_EVENT_V2_MASK; + break; case SBI_PMU_EVENT_TYPE_FW: if (ecode < SBI_PMU_FW_MAX) config = (1ULL << 63) | ecode; From 55e11b9a132c3d98f3b4005444249660df396e45 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 11 Nov 2024 11:38:54 -0800 Subject: [PATCH 05/10] drivers/perf: riscv: Implement PMU event info function With the new SBI PMU event info function, we can query the availability of the all standard SBI PMU events at boot time with a single ecall. This improves the bootime by avoiding making an SBI call for each standard PMU event. Since this function is defined only in SBI v3.0, invoke this only if the underlying SBI implementation is v3.0 or higher. Signed-off-by: Atish Patra --- arch/riscv/include/asm/sbi.h | 9 +++++ drivers/perf/riscv_pmu_sbi.c | 68 ++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 6ce385a3a7bb7d..77b6997eb6c5f8 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -135,6 +135,7 @@ enum sbi_ext_pmu_fid { SBI_EXT_PMU_COUNTER_FW_READ, SBI_EXT_PMU_COUNTER_FW_READ_HI, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, + SBI_EXT_PMU_EVENT_GET_INFO, }; union sbi_pmu_ctr_info { @@ -158,6 +159,14 @@ struct riscv_pmu_snapshot_data { u64 reserved[447]; }; +struct riscv_pmu_event_info { + u32 event_idx; + u32 output; + u64 event_data; +}; + +#define RISCV_PMU_EVENT_INFO_OUTPUT_MASK 0x01 + #define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0) #define RISCV_PMU_PLAT_FW_EVENT_MASK GENMASK_ULL(61, 0) /* SBI v3.0 allows extended hpmeventX width value */ diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 273ed70098a388..33d8348bf68a2d 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -299,6 +299,66 @@ static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] }, }; +static int pmu_sbi_check_event_info(void) +{ + int num_events = ARRAY_SIZE(pmu_hw_event_map) + PERF_COUNT_HW_CACHE_MAX * + PERF_COUNT_HW_CACHE_OP_MAX * PERF_COUNT_HW_CACHE_RESULT_MAX; + struct riscv_pmu_event_info *event_info_shmem; + phys_addr_t base_addr; + int i, j, k, result = 0, count = 0; + struct sbiret ret; + + event_info_shmem = kcalloc(num_events, sizeof(*event_info_shmem), GFP_KERNEL); + if (!event_info_shmem) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) + event_info_shmem[count++].event_idx = pmu_hw_event_map[i].event_idx; + + for (i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++) { + for (j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++) { + for (k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++) + event_info_shmem[count++].event_idx = + pmu_cache_event_map[i][j][k].event_idx; + } + } + + base_addr = __pa(event_info_shmem); + if (IS_ENABLED(CONFIG_32BIT)) + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_EVENT_GET_INFO, lower_32_bits(base_addr), + upper_32_bits(base_addr), count, 0, 0, 0); + else + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_EVENT_GET_INFO, base_addr, 0, + count, 0, 0, 0); + if (ret.error) { + result = -EOPNOTSUPP; + goto free_mem; + } + + for (i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) { + if (!(event_info_shmem[i].output & RISCV_PMU_EVENT_INFO_OUTPUT_MASK)) + pmu_hw_event_map[i].event_idx = -ENOENT; + } + + count = ARRAY_SIZE(pmu_hw_event_map); + + for (i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++) { + for (j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++) { + for (k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++) { + if (!(event_info_shmem[count].output & + RISCV_PMU_EVENT_INFO_OUTPUT_MASK)) + pmu_cache_event_map[i][j][k].event_idx = -ENOENT; + count++; + } + } + } + +free_mem: + kfree(event_info_shmem); + + return result; +} + static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata) { struct sbiret ret; @@ -316,6 +376,14 @@ static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata) static void pmu_sbi_check_std_events(struct work_struct *work) { + int ret; + + if (sbi_v3_available) { + ret = pmu_sbi_check_event_info(); + if (!ret) + return; + } + for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) pmu_sbi_check_event(&pmu_hw_event_map[i]); From 39bbf1fd7aba3d0b3e969b6bb1b577d41d052dd4 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 11 Nov 2024 11:42:49 -0800 Subject: [PATCH 06/10] drivers/perf: riscv: Export PMU event info function The event mapping function can be used in event info function to find out the corresponding SBI PMU event encoding during the get_event_info function as well. Refactor and export it so that it can be invoked from kvm and internal driver. Signed-off-by: Atish Patra --- drivers/perf/riscv_pmu_sbi.c | 124 ++++++++++++++++++--------------- include/linux/perf/riscv_pmu.h | 2 + 2 files changed, 69 insertions(+), 57 deletions(-) diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 33d8348bf68a2d..f5d3db6dba18be 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -100,6 +100,7 @@ static unsigned int riscv_pmu_irq; /* Cache the available counters in a bitmask */ static unsigned long cmask; +static int pmu_event_find_cache(u64 config); struct sbi_pmu_event_data { union { union { @@ -411,6 +412,71 @@ static bool pmu_sbi_ctr_is_fw(int cidx) return (info->type == SBI_PMU_CTR_TYPE_FW) ? true : false; } +int riscv_pmu_get_event_info(u32 type, u64 config, u64 *econfig) +{ + int ret = -ENOENT; + + switch (type) { + case PERF_TYPE_HARDWARE: + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + ret = pmu_hw_event_map[config].event_idx; + break; + case PERF_TYPE_HW_CACHE: + ret = pmu_event_find_cache(config); + break; + case PERF_TYPE_RAW: + /* + * As per SBI v0.3 specification, + * -- the upper 16 bits must be unused for a hardware raw event. + * As per SBI v3.0 specification, + * -- the upper 8 bits must be unused for a hardware raw event. + * Bits 63:62 are used to distinguish between raw events + * 00 - Hardware raw event + * 10 - SBI firmware events + * 11 - Risc-V platform specific firmware event + */ + switch (config >> 62) { + case 0: + if (sbi_v3_available) { + /* Return error any bits [56-63] is set as it is not allowed by the spec */ + if (!(config & ~RISCV_PMU_RAW_EVENT_V2_MASK)) { + if (econfig) + *econfig = config & RISCV_PMU_RAW_EVENT_V2_MASK; + ret = RISCV_PMU_RAW_EVENT_V2_IDX; + } + /* Return error any bits [48-63] is set as it is not allowed by the spec */ + } else if (!(config & ~RISCV_PMU_RAW_EVENT_MASK)) { + if (econfig) + *econfig = config & RISCV_PMU_RAW_EVENT_MASK; + ret = RISCV_PMU_RAW_EVENT_IDX; + } + break; + case 2: + ret = (config & 0xFFFF) | (SBI_PMU_EVENT_TYPE_FW << 16); + break; + case 3: + /* + * For Risc-V platform specific firmware events + * Event code - 0xFFFF + * Event data - raw event encoding + */ + ret = SBI_PMU_EVENT_TYPE_FW << 16 | RISCV_PLAT_FW_EVENT; + if (econfig) + *econfig = config & RISCV_PMU_PLAT_FW_EVENT_MASK; + break; + default: + break; + } + break; + default: + break; + } + + return ret; +} +EXPORT_SYMBOL_GPL(riscv_pmu_get_event_info); + /* * Returns the counter width of a programmable counter and number of hardware * counters. As we don't support heterogeneous CPUs yet, it is okay to just @@ -576,7 +642,6 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) { u32 type = event->attr.type; u64 config = event->attr.config; - int ret = -ENOENT; /* * Ensure we are finished checking standard hardware events for @@ -584,62 +649,7 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) */ flush_work(&check_std_events_work); - switch (type) { - case PERF_TYPE_HARDWARE: - if (config >= PERF_COUNT_HW_MAX) - return -EINVAL; - ret = pmu_hw_event_map[event->attr.config].event_idx; - break; - case PERF_TYPE_HW_CACHE: - ret = pmu_event_find_cache(config); - break; - case PERF_TYPE_RAW: - /* - * As per SBI v0.3 specification, - * -- the upper 16 bits must be unused for a hardware raw event. - * As per SBI v3.0 specification, - * -- the upper 8 bits must be unused for a hardware raw event. - * Bits 63:62 are used to distinguish between raw events - * 00 - Hardware raw event - * 10 - SBI firmware events - * 11 - Risc-V platform specific firmware event - */ - - switch (config >> 62) { - case 0: - if (sbi_v3_available) { - /* Return error any bits [56-63] is set as it is not allowed by the spec */ - if (!(config & ~RISCV_PMU_RAW_EVENT_V2_MASK)) { - *econfig = config & RISCV_PMU_RAW_EVENT_V2_MASK; - ret = RISCV_PMU_RAW_EVENT_V2_IDX; - } - /* Return error any bits [48-63] is set as it is not allowed by the spec */ - } else if (!(config & ~RISCV_PMU_RAW_EVENT_MASK)) { - *econfig = config & RISCV_PMU_RAW_EVENT_MASK; - ret = RISCV_PMU_RAW_EVENT_IDX; - } - break; - case 2: - ret = (config & 0xFFFF) | (SBI_PMU_EVENT_TYPE_FW << 16); - break; - case 3: - /* - * For Risc-V platform specific firmware events - * Event code - 0xFFFF - * Event data - raw event encoding - */ - ret = SBI_PMU_EVENT_TYPE_FW << 16 | RISCV_PLAT_FW_EVENT; - *econfig = config & RISCV_PMU_PLAT_FW_EVENT_MASK; - break; - default: - break; - } - break; - default: - break; - } - - return ret; + return riscv_pmu_get_event_info(type, config, econfig); } static void pmu_sbi_snapshot_free(struct riscv_pmu *pmu) diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 701974639ff2ca..4a5e3209c473bf 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -91,6 +91,8 @@ struct riscv_pmu *riscv_pmu_alloc(void); int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr); #endif +int riscv_pmu_get_event_info(u32 type, u64 config, u64 *econfig); + #endif /* CONFIG_RISCV_PMU */ #endif /* _RISCV_PMU_H */ From 91cb81a25dfd44a830b9d805ac5addd1bfe53e01 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 11 Dec 2024 14:14:54 -0800 Subject: [PATCH 07/10] KVM: Add a helper function to validate vcpu gpa range The arch specific code may need to validate a gpa range if it is a shared memory between the host and the guest. Currently, there are few places where it is used in RISC-V implementation. Given the nature of the function it may be used for other architectures. Hence, a common helper function is added. Signed-off-by: Atish Patra --- include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 291d49b9bf0549..adda61cc4072c0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1383,6 +1383,8 @@ static inline int kvm_vcpu_map_readonly(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); +int kvm_vcpu_validate_gpa_range(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long len, + bool write_access); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, int len); int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e85b33a92624d9..3f52f5571fa68b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3301,6 +3301,27 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, } EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest); +int kvm_vcpu_validate_gpa_range(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long len, + bool write_access) +{ + gfn_t gfn = gpa >> PAGE_SHIFT; + int seg; + int offset = offset_in_page(gpa); + bool writable = false; + unsigned long hva; + + while ((seg = next_segment(len, offset)) != 0) { + hva = kvm_vcpu_gfn_to_hva_prot(vcpu, gfn, &writable); + if (kvm_is_error_hva(hva) || (writable ^ write_access)) + return -EPERM; + offset = 0; + len -= seg; + ++gfn; + } + return 0; +} +EXPORT_SYMBOL_GPL(kvm_vcpu_validate_gpa_range); + static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len) From f902c4e23a9552743b6347d95cd7d226591ca5c9 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 11 Dec 2024 14:23:19 -0800 Subject: [PATCH 08/10] RISC-V: KVM: Use the new gpa range validate helper function Remove the duplicate code and use the new helper function to validate the shared memory gpa address. Signed-off-by: Atish Patra --- arch/riscv/kvm/vcpu_pmu.c | 5 +---- arch/riscv/kvm/vcpu_sbi_sta.c | 6 ++---- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 15d71a7b75ba63..163bd4403fd098 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -409,8 +409,6 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); int sbiret = 0; gpa_t saddr; - unsigned long hva; - bool writable; if (!kvpmu || flags) { sbiret = SBI_ERR_INVALID_PARAM; @@ -432,8 +430,7 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s goto out; } - hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable); - if (kvm_is_error_hva(hva) || !writable) { + if (kvm_vcpu_validate_gpa_range(vcpu, saddr, PAGE_SIZE, true)) { sbiret = SBI_ERR_INVALID_ADDRESS; goto out; } diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c index 5f35427114c1da..67dfb613df6a8d 100644 --- a/arch/riscv/kvm/vcpu_sbi_sta.c +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -85,8 +85,6 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu) unsigned long shmem_phys_hi = cp->a1; u32 flags = cp->a2; struct sbi_sta_struct zero_sta = {0}; - unsigned long hva; - bool writable; gpa_t shmem; int ret; @@ -111,8 +109,8 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu) return SBI_ERR_INVALID_ADDRESS; } - hva = kvm_vcpu_gfn_to_hva_prot(vcpu, shmem >> PAGE_SHIFT, &writable); - if (kvm_is_error_hva(hva) || !writable) + /* The spec requires the shmem to be 64-byte aligned. */ + if (kvm_vcpu_validate_gpa_range(vcpu, shmem, 64, true)) return SBI_ERR_INVALID_ADDRESS; ret = kvm_vcpu_write_guest(vcpu, shmem, &zero_sta, sizeof(zero_sta)); From 465dd5ea49a2e79455f517c98780dff296d4fbac Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 11 Nov 2024 11:45:10 -0800 Subject: [PATCH 09/10] RISC-V: KVM: Implement get event info function The new get_event_info funciton allows the guest to query the presence of multiple events with single SBI call. Currently, the perf driver in linux guest invokes it for all the standard SBI PMU events. Support the SBI function implementation in KVM as well. Signed-off-by: Atish Patra --- arch/riscv/include/asm/kvm_vcpu_pmu.h | 3 ++ arch/riscv/kvm/vcpu_pmu.c | 66 +++++++++++++++++++++++++++ arch/riscv/kvm/vcpu_sbi_pmu.c | 3 ++ 3 files changed, 72 insertions(+) diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h index 1d85b661750884..9a930afc8f5713 100644 --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h @@ -98,6 +98,9 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu); int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, unsigned long saddr_high, unsigned long flags, struct kvm_vcpu_sbi_return *retdata); +int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long num_events, + unsigned long flags, struct kvm_vcpu_sbi_return *retdata); void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu); diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 163bd4403fd098..70a6bdfc42f5e8 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -453,6 +453,72 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s return 0; } +int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long num_events, + unsigned long flags, struct kvm_vcpu_sbi_return *retdata) +{ + struct riscv_pmu_event_info *einfo; + int shmem_size = num_events * sizeof(*einfo); + gpa_t shmem; + u32 eidx, etype; + u64 econfig; + int ret; + + if (flags != 0 || (saddr_low & (SZ_16 - 1))) { + ret = SBI_ERR_INVALID_PARAM; + goto out; + } + + shmem = saddr_low; + if (saddr_high != 0) { + if (IS_ENABLED(CONFIG_32BIT)) { + shmem |= ((gpa_t)saddr_high << 32); + } else { + ret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + } + + if (kvm_vcpu_validate_gpa_range(vcpu, shmem, shmem_size, true)) { + ret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + + einfo = kzalloc(shmem_size, GFP_KERNEL); + if (!einfo) + return -ENOMEM; + + ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size); + if (ret) { + ret = SBI_ERR_FAILURE; + goto free_mem; + } + + for (int i = 0; i < num_events; i++) { + eidx = einfo[i].event_idx; + etype = kvm_pmu_get_perf_event_type(eidx); + econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data); + ret = riscv_pmu_get_event_info(etype, econfig, NULL); + if (ret > 0) + einfo[i].output = 1; + else + einfo[i].output = 0; + } + + kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size); + if (ret) { + ret = SBI_ERR_FAILURE; + goto free_mem; + } + +free_mem: + kfree(einfo); +out: + retdata->err_val = ret; + + return 0; +} + int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, struct kvm_vcpu_sbi_return *retdata) { diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c index e4be34e03e8384..a020d979d1795a 100644 --- a/arch/riscv/kvm/vcpu_sbi_pmu.c +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c @@ -73,6 +73,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM: ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata); break; + case SBI_EXT_PMU_EVENT_GET_INFO: + ret = kvm_riscv_vcpu_pmu_event_info(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, retdata); + break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; } From 166ee609e8944972971fe6c5d7243ccaed097bb4 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 11 Nov 2024 11:46:03 -0800 Subject: [PATCH 10/10] RISC-V: KVM: Upgrade the supported SBI version to 3.0 Upgrade the SBI version to v3.0 so that corresponding features can be enabled in the guest. Signed-off-by: Atish Patra --- arch/riscv/include/asm/kvm_vcpu_sbi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h index 4ed6203cdd3079..194299e0ab0ef0 100644 --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -11,7 +11,7 @@ #define KVM_SBI_IMPID 3 -#define KVM_SBI_VERSION_MAJOR 2 +#define KVM_SBI_VERSION_MAJOR 3 #define KVM_SBI_VERSION_MINOR 0 enum kvm_riscv_sbi_ext_status {