/*
    Copyright (C) 2010 Intel Corporation.  All Rights Reserved.

    This file is part of SEP Development Kit.

    SEP Development Kit is free software; you can redistribute it
    and/or modify it under the terms of the GNU General Public License
    version 2 as published by the Free Software Foundation.

    SEP Development Kit is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with SEP Development Kit; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

    As a special exception, you may use this file as part of a free software
    library without restriction.  Specifically, if other files instantiate
    templates or use macros or inline functions from this file, or you compile
    this file and link it with other files to produce an executable, this
    file does not by itself cause the resulting executable to be covered by
    the GNU General Public License.  This exception does not however
    invalidate any other reasons why the executable file might be covered by
    the GNU General Public License.
*/

#include "cpu.h"
#include "debug.h"
#include "kmem.h"
#include "kpti.h"
#include "modcfg.h"
#include "pcb.h"
#include "pebs.h"
#include "regs.h"

#define VTSS_PEBS_NR_RECORDS 2

#define VTSS_PEBS_RECORD_SIZE_MRM offsetof(struct vtss_pebs, applicable_counter)
#define VTSS_PEBS_RECORD_SIZE_NHM offsetof(struct vtss_pebs, eventing_ip)
#define VTSS_PEBS_RECORD_SIZE_HSW offsetof(struct vtss_pebs, tsc)
#define VTSS_PEBS_RECORD_SIZE_SKL sizeof(struct vtss_pebs)
#define VTSS_ARCHPEBS_RECORD_SIZE sizeof(struct vtss_archpebs)

#define vtss_pebs_core() (vtss_pebs_record_size == VTSS_PEBS_RECORD_SIZE_MRM)
#define vtss_pebs_has_eventing_ip() (vtss_pebs_record_size >= VTSS_PEBS_RECORD_SIZE_HSW)

#define vtss_dsa_buffer_size() (sizeof(struct vtss_dsa))
#define vtss_pebs_buffer_size()	(VTSS_PEBS_NR_RECORDS*vtss_pebs_record_size)

#ifdef VTSS_DISABLE_PEBS
atomic_t vtss_pebs_active = ATOMIC_INIT(-1);
#else
atomic_t vtss_pebs_active = ATOMIC_INIT(0);
#endif

#define vtss_pebs_disabled() (atomic_read(&vtss_pebs_active) == -1)
#define vtss_pebs_set_active() atomic_set(&vtss_pebs_active, 1)
#define vtss_pebs_set_inactive() (atomic_cmpxchg(&vtss_pebs_active, 1, 0) == 1)

static unsigned int vtss_arch_pebs_size = (PAGE_SIZE << 4);
static bool vtss_pebs_extended = false;
static size_t vtss_pebs_record_size = 0;

static unsigned long long vtss_pebs_enable_mask(int pmu)
{
	unsigned long long mask;

	mask = vtss_pmu_counters_mask(pmu, vtss_pebs_extended, true);
	if (vtss_pebs_core()) mask &= VTSS_PEBS_ENABLE_PMC0;
	return mask;
}

static int vtss_dsa_register(size_t size, int cpu)
{
	void *buffer;

	size = (PAGE_SIZE << get_order(size));

#ifdef VTSS_KPTI
	if (vtss_kpti_enabled && size > PAGE_SIZE) {
		vtss_pr_error("Failed to register more than CEA DSA allows: %ld", PAGE_SIZE);
		return -ENOMEM;
	}
	if (vtss_kpti_enabled) {
		/* CEA DSA should be already mapped to user and kernel space */
		void *cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
		memset(cea, 0, size);
		vtss_pcb(cpu).dsa = cea;
		return 0;
	}
#endif
	buffer = vtss_alloc_pages(size, GFP_KERNEL | __GFP_ZERO, cpu);
	if (buffer == NULL) {
		vtss_pr_error("Not enough memory for DSA buffer on cpu%d", cpu);
		return -ENOMEM;
	}
	vtss_pcb(cpu).dsa_virt = buffer;
#ifdef VTSS_KAISER
	if (vtss_kpti_enabled) {
		int rc = vtss_kaiser_register(buffer, size);
		if (rc) {
			vtss_pr_error("Failed to register DSA buffer on cpu%d", cpu);
			return rc;
		}
	}
#endif
	vtss_pcb(cpu).dsa = buffer;
	return 0;
}

static void vtss_dsa_unregister(size_t size, int cpu)
{
	size = (PAGE_SIZE << get_order(size));

#ifdef VTSS_KAISER
	if (vtss_kpti_enabled)
		vtss_kaiser_unregister(vtss_pcb(cpu).dsa, size);
#endif
	vtss_pcb(cpu).dsa = NULL;

	vtss_free_pages(vtss_pcb(cpu).dsa_virt, size);
	vtss_pcb(cpu).dsa_virt = NULL;
}

static int vtss_pebs_register(size_t size, int cpu)
{
	void *buffer;

	size = (PAGE_SIZE << get_order(size));

#ifdef VTSS_KPTI
	if (vtss_kpti_enabled && size > PEBS_BUFFER_SIZE) {
		vtss_pr_error("Failed to register more than CEA PEBS allows: %ld",
			      PEBS_BUFFER_SIZE);
		return -ENOMEM;
	}
#endif

	buffer = vtss_alloc_pages(size, GFP_KERNEL | __GFP_ZERO, cpu);
	if (buffer == NULL) {
		vtss_pr_error("Not enough memory for PEBS buffer on cpu%d", cpu);
		return -ENOMEM;
	}
	vtss_pcb(cpu).pebs_virt = buffer;

#ifdef VTSS_KPTI
	if (vtss_kpti_enabled) {
		void *cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
		int rc = vtss_cea_register(cea, buffer, size);
		if (rc) {
			vtss_pr_error("Failed to register PEBS buffer on cpu%d", cpu);
			return rc;
		}
		buffer = cea;
	}
#elif defined(VTSS_KAISER)
	if (vtss_kpti_enabled) {
		int rc = vtss_kaiser_register(buffer, size);
		if (rc) {
			vtss_pr_error("Failed to register PEBS buffer on cpu%d", cpu);
			return rc;
		}
	}
#endif
	vtss_pcb(cpu).pebs = buffer;
	return 0;
}

static void vtss_pebs_unregister(size_t size, int cpu)
{
	size = (PAGE_SIZE << get_order(size));

#ifdef VTSS_KPTI
	if (vtss_kpti_enabled)
		vtss_cea_unregister(vtss_pcb(cpu).pebs, size);
#elif defined(VTSS_KAISER)
	if (vtss_kpti_enabled)
		vtss_kaiser_unregister(vtss_pcb(cpu).pebs, size);
#endif
	vtss_pcb(cpu).pebs = NULL;

	vtss_free_pages(vtss_pcb(cpu).pebs_virt, size);
	vtss_pcb(cpu).pebs_virt = NULL;
}

static void vtss_dsa_save_cb(void *ctx)
{
	rdmsrl(VTSS_IA32_DS_AREA, vtss_pcb_cpu.msr_dsa);
	wrmsrl(VTSS_IA32_PEBS_ENABLE, 0);
}

static void vtss_dsa_restore_cb(void *ctx)
{
	wrmsrl(VTSS_IA32_PEBS_ENABLE, 0);
	wrmsrl(VTSS_IA32_DS_AREA, vtss_pcb_cpu.msr_dsa);
}

static void vtss_arch_pebs_init(void *ctx)
{
	wrmsrl(VTSS_IA32_ARCH_PEBS_BASE, vtss_pcb_cpu.archpebs_phys | 0x4);
	wrmsrl(VTSS_IA32_ARCH_PEBS_INDEX, (1ULL << 32) | (0x4ULL << 36));
}

static void vtss_arch_pebs_cleanup(void *ctx)
{
	wrmsrl(VTSS_IA32_ARCH_PEBS_BASE, 0);
	wrmsrl(VTSS_IA32_ARCH_PEBS_INDEX, 0);
}

int vtss_pebs_init(void)
{
	int rc = 0;
	int cpu, pmu;

	if (vtss_pebs_disabled()) {
		vtss_pr_warning("PEBS feature is disabled");
		return 0;
	}

	if (vtss_arch_pebs_avail) {
		vtss_pebs_extended = true;
		vtss_pebs_record_size = VTSS_ARCHPEBS_RECORD_SIZE;
	} else if (vtss_pmu_version >= 5) {
		vtss_pebs_extended = true;
		vtss_pebs_record_size = VTSS_PEBS_RECORD_SIZE_SKL;
	} else if (vtss_pmu_version == 4) {
		vtss_pebs_record_size = VTSS_PEBS_RECORD_SIZE_SKL;
	} else if (vtss_pmu_version == 3) {
		switch (vtss_hardcfg.model) {
		case VTSS_CPU_HSW:
		case VTSS_CPU_HSW_X:
		case VTSS_CPU_HSW_M:
		case VTSS_CPU_HSW_G:
		case VTSS_CPU_BDW:
		case VTSS_CPU_BDW_G:
		case VTSS_CPU_BDW_X:
		case VTSS_CPU_BDW_XD:
			vtss_pebs_record_size = VTSS_PEBS_RECORD_SIZE_HSW;
			break;
		case VTSS_CPU_NHM:
		case VTSS_CPU_NHM_G:
		case VTSS_CPU_NHM_EP:
		case VTSS_CPU_NHM_EX:
		case VTSS_CPU_WMR:
		case VTSS_CPU_WMR_EP:
		case VTSS_CPU_WMR_EX:
		case VTSS_CPU_SNB:
		case VTSS_CPU_SNB_X:
		case VTSS_CPU_IVB:
		case VTSS_CPU_IVB_X:
			vtss_pebs_record_size = VTSS_PEBS_RECORD_SIZE_NHM;
			break;
		}
	}
	if (vtss_pebs_record_size == 0) {
		vtss_pr_warning("Fallback to core PEBS");
		vtss_pebs_record_size = VTSS_PEBS_RECORD_SIZE_MRM;
	}

	if (vtss_arch_pebs_avail) {
		bool pebs_buffer_valid = true;
		for (cpu = 0; cpu < vtss_nr_cpus(); cpu++) {
			if (vtss_pcb(cpu).archpebs_virt == NULL) {
				pebs_buffer_valid = false;
				break;
			}
		}

		for (cpu = 0; cpu < vtss_nr_cpus(); cpu++) {
			on_each_cpu(vtss_arch_pebs_init, NULL, 1);
		}

		if (pebs_buffer_valid)
			vtss_pebs_set_active();

	} else {
		on_each_cpu(vtss_dsa_save_cb, NULL, 1);
		for (cpu = 0; cpu < vtss_nr_cpus(); cpu++) {
			rc = vtss_dsa_register(vtss_dsa_buffer_size(), cpu);
			if (rc)	goto out_fail;
			rc = vtss_pebs_register(vtss_pebs_buffer_size(), cpu);
			if (rc)	goto out_fail;
		}
		vtss_pebs_set_active();
	}

	for (pmu = 0; pmu < VTSS_NR_PMUS; pmu++) {
		if (!vtss_pebs_enable_mask(pmu)) continue;
		vtss_pr_notice("PEBS%d: record size: 0x%02lx, mask: 0x%02llx", pmu,
			       vtss_pebs_record_size, vtss_pebs_enable_mask(pmu));
	}
	return 0;

out_fail:
	vtss_pebs_cleanup();
	return rc;
}

void vtss_pebs_cleanup(void)
{
	int cpu;

	if (vtss_pebs_set_inactive()) {
		if (vtss_arch_pebs_avail) {
			on_each_cpu(vtss_arch_pebs_cleanup, NULL, 1);
		} else {
			on_each_cpu(vtss_dsa_restore_cb, NULL, 1);
		}
	}

	if (!vtss_arch_pebs_avail) {
		for (cpu = 0; cpu < vtss_nr_cpus(); cpu++) {
			vtss_pebs_unregister(vtss_pebs_buffer_size(), cpu);
			vtss_dsa_unregister(vtss_dsa_buffer_size(), cpu);
		}
	}
}

int vtss_pebs_allocate(void)
{
	unsigned int eax, ebx, ecx, edx;
	void *pebs_buffer_ptr = NULL;
	int cpu, node;
	struct page *page;

	if (vtss_pebs_disabled()) {
		vtss_pr_warning("PEBS feature is disabled");
		return 0;
	}

	eax = ecx = 0UL;
	cpuid(VTSS_CPUID_BASIC, &eax, &ebx, &ecx, &edx);
	if (eax >= 0x23) {
		eax = ecx = 0UL;
		cpuid(VTSS_CPUID_PMU_EXTENDED, &eax, &ebx, &ecx, &edx);
		if ((eax >> 5) & 0x1) {
			vtss_arch_pebs_avail = true;
			vtss_pr_notice("Architectural PEBS is supported");
		}
	}

	if (!vtss_arch_pebs_avail) {
		vtss_pr_warning("Arch PEBS is not supported");
		return 0;
	}

	for (cpu = 0; cpu < vtss_nr_cpus(); cpu++) {
		node = cpu_to_node(cpu);
		page = __alloc_pages_node(node,
			GFP_KERNEL | __GFP_ZERO, get_order(vtss_arch_pebs_size));
		pebs_buffer_ptr = page ? page_address(page) : NULL;
		vtss_pr_notice("Architectural PEBS buffer is allocated on cpu%u - %p.", cpu, pebs_buffer_ptr);

		if (pebs_buffer_ptr == NULL) {
			vtss_pr_error("Failed to allocate Arch PEBS buffer on cpu%d", cpu);
			return -ENOMEM;
		}
		vtss_pcb(cpu).archpebs_virt = pebs_buffer_ptr;
		vtss_pcb(cpu).archpebs_phys = (unsigned long long)virt_to_phys(pebs_buffer_ptr);
	}

	return 0;
}

int vtss_pebs_deallocate(void)
{
	int rc = 0;
	int cpu;

	if (vtss_pebs_disabled() || !vtss_arch_pebs_avail) {
		return 0;
	}

	for (cpu = 0; cpu < vtss_nr_cpus(); cpu++) {
		free_pages((unsigned long)vtss_pcb(cpu).archpebs_virt,
				get_order(vtss_arch_pebs_size));
		vtss_pcb(cpu).archpebs_virt = NULL;
		vtss_pcb(cpu).archpebs_phys = 0ULL;
	}

	vtss_pr_notice("Architectural PEBS buffer is deallocated");

	return rc;
}

void vtss_pebs_enable(void)
{
	if (vtss_pebs_disabled()) {
		return;
	}

	if (vtss_arch_pebs_avail) {
		unsigned long long pebs_index_val;
		/* Clear the wr_offset bits */
		rdmsrl(VTSS_IA32_ARCH_PEBS_INDEX, pebs_index_val);
		wrmsrl(VTSS_IA32_ARCH_PEBS_INDEX, pebs_index_val & 0xFFFFFFFF7800000F);
	} else {
		int i;
		struct vtss_dsa *dsa = vtss_pcb_cpu.dsa;
		struct vtss_pebs *pebs = vtss_pcb_cpu.pebs;

		if (dsa == NULL)
			return;
		if (pebs == NULL)
			return;

		/* setup PEBS in DSA */
		dsa->pebs_base   = pebs;
		dsa->pebs_index  = pebs;
		dsa->pebs_absmax = (char *)pebs + vtss_pebs_buffer_size();
		if (vtss_pebs_core())
			dsa->pebs_threshold = pebs;
		else
			dsa->pebs_threshold = (char *)pebs + vtss_pebs_record_size;
		/* reset PEBS counters */
		for (i = 0; i < VTSS_PMU_MAX_COUNTERS; i++)
			dsa->pebs_reset[i] = 0;
		/* invalidate the first PEBS record */
		pebs->ip = 0;

		/* enable DSA */
		wrmsrl(VTSS_IA32_DS_AREA, (unsigned long long)dsa);

		/* enable PEBS */
		wrmsrl(VTSS_IA32_PEBS_ENABLE, vtss_pebs_enable_mask(vtss_cpu_pmu_id()));
	}
}

void vtss_pebs_disable(void)
{
	if (vtss_pebs_disabled()) {
		return;
	}

	if (vtss_arch_pebs_avail) {
	} else {
	/**
	 * Disabled as there are CPUs which reboot if a PEBS PMI is
	 * encountered when PEBS is disabled.
	 * PEBS is effectively disabled when disabling PMU counters.
	 */
	}
}

unsigned long vtss_pebs_get_ip(int cpu)
{
	if (vtss_arch_pebs_avail) {
		struct vtss_archpebs *archpebs = NULL;
		unsigned long long global_status;

		/* Check the arch_pebs status in global ctrl status */
		rdmsrl(VTSS_IA32_PERF_GLOBAL_STATUS, global_status);

		if ((global_status >> 54) & 0x1) {
			archpebs = vtss_pcb(cpu).archpebs_virt;
		}

		if (archpebs == NULL)
			return 0;
		else
			return archpebs->eventing_ip;
	} else {
		struct vtss_dsa *dsa;
		struct vtss_pebs *pebs;

		dsa = vtss_pcb(cpu).dsa;
		if (dsa == NULL)
			return 0;

		if (dsa->pebs_index != dsa->pebs_base) {
			pebs = dsa->pebs_base;
			if (pebs == NULL)
				return 0;
			if (vtss_pebs_has_eventing_ip())
				return pebs->eventing_ip;
			else
				return pebs->ip;
		}
	}
	return 0;
}
