#define DSISR_ISSTORE 0x02000000 /* access was a store */
#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */
#define DSISR_NOSEGMENT 0x00200000 /* STAB/SLB miss */
+ #define DSISR_KEYFAULT 0x00200000 /* Key fault */
#define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */
#define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */
#define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */
#define LPCR_ISL (1ul << (63-2))
#define LPCR_VC_SH (63-2)
#define LPCR_DPFD_SH (63-11)
+ #define LPCR_VRMASD (0x1ful << (63-16))
#define LPCR_VRMA_L (1ul << (63-12))
#define LPCR_VRMA_LP0 (1ul << (63-15))
#define LPCR_VRMA_LP1 (1ul << (63-16))
#define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */
#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */
#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */
+ #define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */
+ #define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */
+ #define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */
#define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */
#define SRR1_WAKESYSERR 0x00300000 /* System error */
#define SRR1_WAKEEE 0x00200000 /* External interrupt */
#define proc_trap() asm volatile("trap")
-#ifdef CONFIG_PPC64
-
-extern void ppc64_runlatch_on(void);
-extern void __ppc64_runlatch_off(void);
-
-#define ppc64_runlatch_off() \
- do { \
- if (cpu_has_feature(CPU_FTR_CTRL) && \
- test_thread_flag(TIF_RUNLATCH)) \
- __ppc64_runlatch_off(); \
- } while (0)
+#define __get_SP() ({unsigned long sp; \
+ asm volatile("mr %0,1": "=r" (sp)); sp;})
extern unsigned long scom970_read(unsigned int address);
extern void scom970_write(unsigned int address, unsigned long value);
-#else
-#define ppc64_runlatch_on()
-#define ppc64_runlatch_off()
-
-#endif /* CONFIG_PPC64 */
-
-#define __get_SP() ({unsigned long sp; \
- asm volatile("mr %0,1": "=r" (sp)); sp;})
-
struct pt_regs;
extern void ppc_save_regs(struct pt_regs *regs);
#include <asm/hvcall.h>
#include <asm/xics.h>
#endif
-#ifdef CONFIG_PPC_ISERIES
-#include <asm/iseries/alpaca.h>
-#endif
#ifdef CONFIG_PPC_POWERNV
#include <asm/opal.h>
#endif
DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase));
DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
- DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
+ DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
#ifdef CONFIG_PPC_MM_SLICES
DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
#endif
-#ifdef CONFIG_PPC_ISERIES
- /* the assembler miscalculates the VSID values */
- DEFINE(PAGE_OFFSET_ESID, GET_ESID(PAGE_OFFSET));
- DEFINE(PAGE_OFFSET_VSID, KERNEL_VSID(PAGE_OFFSET));
- DEFINE(VMALLOC_START_ESID, GET_ESID(VMALLOC_START));
- DEFINE(VMALLOC_START_VSID, KERNEL_VSID(VMALLOC_START));
-
- /* alpaca */
- DEFINE(ALPACA_SIZE, sizeof(struct alpaca));
-#endif
-
DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
DEFINE(PTE_SIZE, sizeof(pte_t));
DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
#endif
- DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
- DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
- DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
- DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
+ DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
+ DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5));
+ DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6));
+ DEFINE(VCPU_SHARED_SPRG7, offsetof(struct kvm_vcpu_arch_shared, sprg7));
DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
DEFINE(VCPU_SHADOW_PID1, offsetof(struct kvm_vcpu, arch.shadow_pid1));
DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
+ DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0));
+ DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1));
+ DEFINE(VCPU_SHARED_MAS2, offsetof(struct kvm_vcpu_arch_shared, mas2));
+ DEFINE(VCPU_SHARED_MAS7_3, offsetof(struct kvm_vcpu_arch_shared, mas7_3));
+ DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4));
+ DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6));
+
/* book3s */
#ifdef CONFIG_KVM_BOOK3S_64_HV
DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu));
DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
+ DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
#endif
*
*/
+#include <asm/hw_irq.h>
#include <asm/exception-64s.h>
#include <asm/ptrace.h>
* We layout physical memory as follows:
* 0x0000 - 0x00ff : Secondary processor spin code
* 0x0100 - 0x2fff : pSeries Interrupt prologs
- * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs
+ * 0x3000 - 0x5fff : interrupt support common interrupt prologs
* 0x6000 - 0x6fff : Initial (CPU0) segment table
* 0x7000 - 0x7fff : FWNMI data area
* 0x8000 - : Early init and support code
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
#endif
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD,
- KVMTEST_PR, 0x300)
+ KVMTEST, 0x300)
. = 0x380
.globl data_access_slb_pSeries
data_access_slb_pSeries:
HMT_MEDIUM
SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
+ EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380)
std r3,PACA_EXSLB+EX_R3(r13)
mfspr r3,SPRN_DAR
#ifdef __DISABLED__
EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
#endif /* CONFIG_POWER4_ONLY */
- KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x300)
- KVM_HANDLER_PR_SKIP(PACA_EXSLB, EXC_STD, 0x380)
+ KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
+ KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400)
KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480)
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900)
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40)
/*
- * An interrupt came in while soft-disabled; clear EE in SRR1,
- * clear paca->hard_enabled and return.
+ * An interrupt came in while soft-disabled. We set paca->irq_happened,
+ * then, if it was a decrementer interrupt, we bump the dec to max and
+ * and return, else we hard disable and return. This is called with
+ * r10 containing the value to OR to the paca field.
*/
-masked_interrupt:
- stb r10,PACAHARDIRQEN(r13)
- mtcrf 0x80,r9
- ld r9,PACA_EXGEN+EX_R9(r13)
- mfspr r10,SPRN_SRR1
- rldicl r10,r10,48,1 /* clear MSR_EE */
- rotldi r10,r10,16
- mtspr SPRN_SRR1,r10
- ld r10,PACA_EXGEN+EX_R10(r13)
- GET_SCRATCH0(r13)
- rfid
+#define MASKED_INTERRUPT(_H) \
+masked_##_H##interrupt: \
+ std r11,PACA_EXGEN+EX_R11(r13); \
+ lbz r11,PACAIRQHAPPENED(r13); \
+ or r11,r11,r10; \
+ stb r11,PACAIRQHAPPENED(r13); \
+ andi. r10,r10,PACA_IRQ_DEC; \
+ beq 1f; \
+ lis r10,0x7fff; \
+ ori r10,r10,0xffff; \
+ mtspr SPRN_DEC,r10; \
+ b 2f; \
+1: mfspr r10,SPRN_##_H##SRR1; \
+ rldicl r10,r10,48,1; /* clear MSR_EE */ \
+ rotldi r10,r10,16; \
+ mtspr SPRN_##_H##SRR1,r10; \
+2: mtcrf 0x80,r9; \
+ ld r9,PACA_EXGEN+EX_R9(r13); \
+ ld r10,PACA_EXGEN+EX_R10(r13); \
+ ld r11,PACA_EXGEN+EX_R11(r13); \
+ GET_SCRATCH0(r13); \
+ ##_H##rfid; \
b .
+
+ MASKED_INTERRUPT()
+ MASKED_INTERRUPT(H)
-masked_Hinterrupt:
- stb r10,PACAHARDIRQEN(r13)
- mtcrf 0x80,r9
- ld r9,PACA_EXGEN+EX_R9(r13)
- mfspr r10,SPRN_HSRR1
- rldicl r10,r10,48,1 /* clear MSR_EE */
- rotldi r10,r10,16
- mtspr SPRN_HSRR1,r10
- ld r10,PACA_EXGEN+EX_R10(r13)
- GET_SCRATCH0(r13)
- hrfid
- b .
+/*
+ * Called from arch_local_irq_enable when an interrupt needs
+ * to be resent. r3 contains 0x500 or 0x900 to indicate which
+ * kind of interrupt. MSR:EE is already off. We generate a
+ * stackframe like if a real interrupt had happened.
+ *
+ * Note: While MSR:EE is off, we need to make sure that _MSR
+ * in the generated frame has EE set to 1 or the exception
+ * handler will not properly re-enable them.
+ */
+_GLOBAL(__replay_interrupt)
+ /* We are going to jump to the exception common code which
+ * will retrieve various register values from the PACA which
+ * we don't give a damn about, so we don't bother storing them.
+ */
+ mfmsr r12
+ mflr r11
+ mfcr r9
+ ori r12,r12,MSR_EE
+ andi. r3,r3,0x0800
+ bne decrementer_common
+ b hardware_interrupt_common
#ifdef CONFIG_PPC_PSERIES
/*
bl .machine_check_exception
b .ret_from_except
- STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt)
+ STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ)
+ STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt)
STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception)
STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception)
STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
- STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception)
+ STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception)
STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
#ifdef CONFIG_ALTIVEC
STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
system_call_entry:
b system_call_common
+ppc64_runlatch_on_trampoline:
+ b .__ppc64_runlatch_on
+
/*
* Here we have detected that the kernel stack pointer is bad.
* R9 contains the saved CR, r13 points to the paca,
mfspr r10,SPRN_DSISR
stw r10,PACA_EXGEN+EX_DSISR(r13)
EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
+ DISABLE_INTS
+ ld r12,_MSR(r1)
ld r3,PACA_EXGEN+EX_DAR(r13)
lwz r4,PACA_EXGEN+EX_DSISR(r13)
li r5,0x300
stw r10,PACA_EXGEN+EX_DSISR(r13)
EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
bl .save_nvgprs
+ DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
bl .unknown_exception
b .ret_from_except
.globl instruction_access_common
instruction_access_common:
EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
+ DISABLE_INTS
+ ld r12,_MSR(r1)
ld r3,_NIP(r1)
andis. r4,r12,0x5820
li r5,0x400
ld r10,PACA_EXSLB+EX_LR(r13)
ld r3,PACA_EXSLB+EX_R3(r13)
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
-#ifdef CONFIG_PPC_ISERIES
-BEGIN_FW_FTR_SECTION
- ld r11,PACALPPACAPTR(r13)
- ld r11,LPPACASRR0(r11) /* get SRR0 value */
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
-#endif /* CONFIG_PPC_ISERIES */
mtlr r10
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
.machine pop
-#ifdef CONFIG_PPC_ISERIES
-BEGIN_FW_FTR_SECTION
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r12
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
-#endif /* CONFIG_PPC_ISERIES */
ld r9,PACA_EXSLB+EX_R9(r13)
ld r10,PACA_EXSLB+EX_R10(r13)
ld r11,PACA_EXSLB+EX_R11(r13)
rfid
b . /* prevent speculative execution */
-2:
-#ifdef CONFIG_PPC_ISERIES
-BEGIN_FW_FTR_SECTION
- b unrecov_slb
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
-#endif /* CONFIG_PPC_ISERIES */
- mfspr r11,SPRN_SRR0
+2: mfspr r11,SPRN_SRR0
ld r10,PACAKBASE(r13)
LOAD_HANDLER(r10,unrecov_slb)
mtspr SPRN_SRR0,r10
bl .unrecoverable_exception
b 1b
- .align 7
- .globl hardware_interrupt_common
- .globl hardware_interrupt_entry
-hardware_interrupt_common:
- EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN)
- FINISH_NAP
-hardware_interrupt_entry:
- DISABLE_INTS
-BEGIN_FTR_SECTION
- bl .ppc64_runlatch_on
-END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl .do_IRQ
- b .ret_from_except_lite
#ifdef CONFIG_PPC_970_NAP
power4_fixup_nap:
EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
bne 1f /* if from user, just load it up */
bl .save_nvgprs
+ DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
- ENABLE_INTS
bl .kernel_fp_unavailable_exception
BUG_OPCODE
1: bl .load_up_fpu
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
bl .save_nvgprs
+ DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
- ENABLE_INTS
bl .altivec_unavailable_exception
b .ret_from_except
EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN)
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
- bne .load_up_vsx
+ beq 1f
+ b .load_up_vsx
1:
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
bl .save_nvgprs
+ DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
- ENABLE_INTS
bl .vsx_unavailable_exception
b .ret_from_except
.globl __end_handlers
__end_handlers:
-/*
- * Return from an exception with minimal checks.
- * The caller is assumed to have done EXCEPTION_PROLOG_COMMON.
- * If interrupts have been enabled, or anything has been
- * done that might have changed the scheduling status of
- * any task or sent any task a signal, you should use
- * ret_from_except or ret_from_except_lite instead of this.
- */
-fast_exc_return_irq: /* restores irq state too */
- ld r3,SOFTE(r1)
- TRACE_AND_RESTORE_IRQ(r3);
- ld r12,_MSR(r1)
- rldicl r4,r12,49,63 /* get MSR_EE to LSB */
- stb r4,PACAHARDIRQEN(r13) /* restore paca->hard_enabled */
- b 1f
-
- .globl fast_exception_return
-fast_exception_return:
- ld r12,_MSR(r1)
-1: ld r11,_NIP(r1)
- andi. r3,r12,MSR_RI /* check if RI is set */
- beq- unrecov_fer
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- andi. r3,r12,MSR_PR
- beq 2f
- ACCOUNT_CPU_USER_EXIT(r3, r4)
-2:
-#endif
-
- ld r3,_CCR(r1)
- ld r4,_LINK(r1)
- ld r5,_CTR(r1)
- ld r6,_XER(r1)
- mtcr r3
- mtlr r4
- mtctr r5
- mtxer r6
- REST_GPR(0, r1)
- REST_8GPRS(2, r1)
-
- mfmsr r10
- rldicl r10,r10,48,1 /* clear EE */
- rldicr r10,r10,16,61 /* clear RI (LE is 0 already) */
- mtmsrd r10,1
-
- mtspr SPRN_SRR1,r12
- mtspr SPRN_SRR0,r11
- REST_4GPRS(10, r1)
- ld r1,GPR1(r1)
- rfid
- b . /* prevent speculative execution */
-
-unrecov_fer:
- bl .save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unrecoverable_exception
- b 1b
-
-
/*
* Hash table stuff
*/
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
bne 77f /* then don't call hash_page now */
-
- /*
- * On iSeries, we soft-disable interrupts here, then
- * hard-enable interrupts so that the hash_page code can spin on
- * the hash_table_lock without problems on a shared processor.
- */
- DISABLE_INTS
-
- /*
- * Currently, trace_hardirqs_off() will be called by DISABLE_INTS
- * and will clobber volatile registers when irq tracing is enabled
- * so we need to reload them. It may be possible to be smarter here
- * and move the irq tracing elsewhere but let's keep it simple for
- * now
- */
-#ifdef CONFIG_TRACE_IRQFLAGS
- ld r3,_DAR(r1)
- ld r4,_DSISR(r1)
- ld r5,_TRAP(r1)
- ld r12,_MSR(r1)
- clrrdi r5,r5,4
-#endif /* CONFIG_TRACE_IRQFLAGS */
/*
* We need to set the _PAGE_USER bit if MSR_PR is set or if we are
* accessing a userspace segment (even from the kernel). We assume
* r4 contains the required access permissions
* r5 contains the trap number
*
- * at return r3 = 0 for success
+ * at return r3 = 0 for success, 1 for page fault, negative for error
*/
bl .hash_page /* build HPTE if possible */
cmpdi r3,0 /* see if hash_page succeeded */
-BEGIN_FW_FTR_SECTION
- /*
- * If we had interrupts soft-enabled at the point where the
- * DSI/ISI occurred, and an interrupt came in during hash_page,
- * handle it now.
- * We jump to ret_from_except_lite rather than fast_exception_return
- * because ret_from_except_lite will check for and handle pending
- * interrupts if necessary.
- */
- beq 13f
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
-
-BEGIN_FW_FTR_SECTION
- /*
- * Here we have interrupts hard-disabled, so it is sufficient
- * to restore paca->{soft,hard}_enable and get out.
- */
+ /* Success */
beq fast_exc_return_irq /* Return from exception on success */
-END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
-
- /* For a hash failure, we don't bother re-enabling interrupts */
- ble- 12f
-
- /*
- * hash_page couldn't handle it, set soft interrupt enable back
- * to what it was before the trap. Note that .arch_local_irq_restore
- * handles any interrupts pending at this point.
- */
- ld r3,SOFTE(r1)
- TRACE_AND_RESTORE_IRQ_PARTIAL(r3, 11f)
- bl .arch_local_irq_restore
- b 11f
-/* We have a data breakpoint exception - handle it */
-handle_dabr_fault:
- bl .save_nvgprs
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl .do_dabr
- b .ret_from_except_lite
+ /* Error */
+ blt- 13f
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
- ENABLE_INTS
11: ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl .do_page_fault
cmpdi r3,0
- beq+ 13f
+ beq+ 12f
bl .save_nvgprs
mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
bl .bad_page_fault
b .ret_from_except
-13: b .ret_from_except_lite
+/* We have a data breakpoint exception - handle it */
+handle_dabr_fault:
+ bl .save_nvgprs
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .do_dabr
+12: b .ret_from_except_lite
+
/* We have a page fault that hash_page could handle but HV refused
* the PTE insertion
*/
-12: bl .save_nvgprs
+13: bl .save_nvgprs
mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
ld r4,_DAR(r1)
.= 0x7000
.globl fwnmi_data_area
fwnmi_data_area:
-#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
- /* iSeries does not use the FWNMI stuff, so it is safe to put
- * this here, even if we later allow kernels that will boot on
- * both pSeries and iSeries */
-#ifdef CONFIG_PPC_ISERIES
- . = LPARMAP_PHYS
- .globl xLparMap
-xLparMap:
- .quad HvEsidsToMap /* xNumberEsids */
- .quad HvRangesToMap /* xNumberRanges */
- .quad STAB0_PAGE /* xSegmentTableOffs */
- .zero 40 /* xRsvd */
- /* xEsids (HvEsidsToMap entries of 2 quads) */
- .quad PAGE_OFFSET_ESID /* xKernelEsid */
- .quad PAGE_OFFSET_VSID /* xKernelVsid */
- .quad VMALLOC_START_ESID /* xKernelEsid */
- .quad VMALLOC_START_VSID /* xKernelVsid */
- /* xRanges (HvRangesToMap entries of 3 quads) */
- .quad HvPagesToMap /* xPages */
- .quad 0 /* xOffset */
- .quad PAGE_OFFSET_VSID << (SID_SHIFT - HW_PAGE_SHIFT) /* xVPN */
-
-#endif /* CONFIG_PPC_ISERIES */
-
-#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
/* pseries and powernv need to keep the whole page from
* 0x7000 to 0x8000 free for use by the firmware
*/
. = 0x8000
#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
-/*
- * Space for CPU0's segment table.
- *
- * On iSeries, the hypervisor must fill in at least one entry before
- * we get control (with relocate on). The address is given to the hv
- * as a page number (see xLparMap above), so this must be at a
- * fixed address (the linker can't compute (u64)&initial_stab >>
- * PAGE_SHIFT).
- */
- . = STAB0_OFFSET /* 0x8000 */
+/* Space for CPU0's segment table */
+ .balign 4096
.globl initial_stab
initial_stab:
.space 4096
+
#ifdef CONFIG_PPC_POWERNV
_GLOBAL(opal_mc_secondary_handler)
HMT_MEDIUM
#define MSR_USER32 MSR_USER
#define MSR_USER64 MSR_USER
#define HW_PAGE_SIZE PAGE_SIZE
+ #define __hard_irq_disable local_irq_disable
+ #define __hard_irq_enable local_irq_enable
#endif
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
#ifdef CONFIG_PPC_BOOK3S_64
- memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb));
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
sizeof(get_paca()->shadow_vcpu));
- to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max;
+ svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
+ svcpu_put(svcpu);
#endif
#ifdef CONFIG_PPC_BOOK3S_32
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_PPC_BOOK3S_64
- memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb));
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
sizeof(get_paca()->shadow_vcpu));
- to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max;
+ to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
+ svcpu_put(svcpu);
#endif
kvmppc_giveup_ext(vcpu, MSR_FP);
#ifdef CONFIG_PPC_BOOK3S_64
if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
kvmppc_mmu_book3s_64_init(vcpu);
- to_book3s(vcpu)->hior = 0xfff00000;
+ if (!to_book3s(vcpu)->hior_explicit)
+ to_book3s(vcpu)->hior = 0xfff00000;
to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
vcpu->arch.cpu_type = KVM_CPU_3S_64;
} else
#endif
{
kvmppc_mmu_book3s_32_init(vcpu);
- to_book3s(vcpu)->hior = 0;
+ if (!to_book3s(vcpu)->hior_explicit)
+ to_book3s(vcpu)->hior = 0;
to_book3s(vcpu)->msr_mask = 0xffffffffULL;
vcpu->arch.cpu_type = KVM_CPU_3S_32;
}
hpage_offset /= 4;
get_page(hpage);
- page = kmap_atomic(hpage, KM_USER0);
+ page = kmap_atomic(hpage);
/* patch dcbz into reserved instruction, so we trap */
for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
if ((page[i] & 0xff0007ff) == INS_DCBZ)
page[i] &= 0xfffffff7;
- kunmap_atomic(page, KM_USER0);
+ kunmap_atomic(page);
put_page(hpage);
}
if (page_found == -ENOENT) {
/* Page not found in guest PTE entries */
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
- vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
+ vcpu->arch.shared->dsisr = svcpu->fault_dsisr;
vcpu->arch.shared->msr |=
- (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
+ (svcpu->shadow_srr1 & 0x00000000f8000000ULL);
+ svcpu_put(svcpu);
kvmppc_book3s_queue_irqprio(vcpu, vec);
} else if (page_found == -EPERM) {
/* Storage protection */
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
- vcpu->arch.shared->dsisr =
- to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
+ vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE;
vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
vcpu->arch.shared->msr |=
- (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
+ svcpu->shadow_srr1 & 0x00000000f8000000ULL;
+ svcpu_put(svcpu);
kvmppc_book3s_queue_irqprio(vcpu, vec);
} else if (page_found == -EINVAL) {
/* Page not found in guest SLB */
run->ready_for_interrupt_injection = 1;
trace_kvm_book3s_exit(exit_nr, vcpu);
+ preempt_enable();
kvm_resched(vcpu);
switch (exit_nr) {
case BOOK3S_INTERRUPT_INST_STORAGE:
+ {
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ ulong shadow_srr1 = svcpu->shadow_srr1;
vcpu->stat.pf_instruc++;
#ifdef CONFIG_PPC_BOOK3S_32
/* We set segments as unused segments when invalidating them. So
* treat the respective fault as segment fault. */
- if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]
- == SR_INVALID) {
+ if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) {
kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
r = RESUME_GUEST;
+ svcpu_put(svcpu);
break;
}
#endif
+ svcpu_put(svcpu);
/* only care about PTEG not found errors, but leave NX alone */
- if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) {
+ if (shadow_srr1 & 0x40000000) {
r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
vcpu->stat.sp_instruc++;
} else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
r = RESUME_GUEST;
} else {
- vcpu->arch.shared->msr |=
- to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
+ vcpu->arch.shared->msr |= shadow_srr1 & 0x58000000;
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
r = RESUME_GUEST;
}
break;
+ }
case BOOK3S_INTERRUPT_DATA_STORAGE:
{
ulong dar = kvmppc_get_fault_dar(vcpu);
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ u32 fault_dsisr = svcpu->fault_dsisr;
vcpu->stat.pf_storage++;
#ifdef CONFIG_PPC_BOOK3S_32
/* We set segments as unused segments when invalidating them. So
* treat the respective fault as segment fault. */
- if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) {
+ if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) {
kvmppc_mmu_map_segment(vcpu, dar);
r = RESUME_GUEST;
+ svcpu_put(svcpu);
break;
}
#endif
+ svcpu_put(svcpu);
/* The only case we need to handle is missing shadow PTEs */
- if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
+ if (fault_dsisr & DSISR_NOHPTE) {
r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
} else {
vcpu->arch.shared->dar = dar;
- vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
+ vcpu->arch.shared->dsisr = fault_dsisr;
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
r = RESUME_GUEST;
}
case BOOK3S_INTERRUPT_PROGRAM:
{
enum emulation_result er;
+ struct kvmppc_book3s_shadow_vcpu *svcpu;
ulong flags;
program_interrupt:
- flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
+ svcpu = svcpu_get(vcpu);
+ flags = svcpu->shadow_srr1 & 0x1f0000ull;
+ svcpu_put(svcpu);
if (vcpu->arch.shared->msr & MSR_PR) {
#ifdef EXIT_DEBUG
r = RESUME_GUEST;
break;
default:
+ {
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ ulong shadow_srr1 = svcpu->shadow_srr1;
+ svcpu_put(svcpu);
/* Ugh - bork here! What did we get? */
printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
- exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1);
+ exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
r = RESUME_HOST;
BUG();
break;
}
-
+ }
if (!(r & RESUME_HOST)) {
/* To avoid clobbering exit_reason, only check for signals if
* we aren't already exiting to userspace for some other
* reason. */
+
+ /*
+ * Interrupts could be timers for the guest which we have to
+ * inject again, so let's postpone them until we're in the guest
+ * and if we really did time things so badly, then we just exit
+ * again due to a host external interrupt.
+ */
+ __hard_irq_disable();
if (signal_pending(current)) {
+ __hard_irq_enable();
#ifdef EXIT_DEBUG
printk(KERN_EMERG "KVM: Going back to host\n");
#endif
run->exit_reason = KVM_EXIT_INTR;
r = -EINTR;
} else {
+ preempt_disable();
+
/* In case an interrupt came in that was triggered
* from userspace (like DEC), we need to check what
* to inject now! */
- kvmppc_core_deliver_interrupts(vcpu);
+ kvmppc_core_prepare_to_enter(vcpu);
}
}
return 0;
}
+ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+ {
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_HIOR:
+ r = put_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr);
+ break;
+ default:
+ break;
+ }
+
+ return r;
+ }
+
+ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+ {
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_HIOR:
+ r = get_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr);
+ if (!r)
+ to_book3s(vcpu)->hior_explicit = true;
+ break;
+ default:
+ break;
+ }
+
+ return r;
+ }
+
int kvmppc_core_check_processor_compat(void)
{
return 0;
#endif
ulong ext_msr;
+ preempt_disable();
+
/* Check if we can run the vcpu at all */
if (!vcpu->arch.sane) {
kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
+ kvmppc_core_prepare_to_enter(vcpu);
+
+ /*
+ * Interrupts could be timers for the guest which we have to inject
+ * again, so let's postpone them until we're in the guest and if we
+ * really did time things so badly, then we just exit again due to
+ * a host external interrupt.
+ */
+ __hard_irq_disable();
+
/* No need to go into the guest when all we do is going out */
if (signal_pending(current)) {
+ __hard_irq_enable();
kvm_run->exit_reason = KVM_EXIT_INTR;
- return -EINTR;
+ ret = -EINTR;
+ goto out;
}
/* Save FPU state in stack */
kvm_guest_exit();
- local_irq_disable();
-
current->thread.regs->msr = ext_msr;
/* Make sure we save the guest FPU/Altivec/VSX state */
current->thread.used_vsr = used_vsr;
#endif
+ out:
+ preempt_enable();
return ret;
}
+ /*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+ struct kvm_dirty_log *log)
+ {
+ struct kvm_memory_slot *memslot;
+ struct kvm_vcpu *vcpu;
+ ulong ga, ga_end;
+ int is_dirty = 0;
+ int r;
+ unsigned long n;
+
+ mutex_lock(&kvm->slots_lock);
+
+ r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ if (r)
+ goto out;
+
+ /* If nothing is dirty, don't bother messing with page tables. */
+ if (is_dirty) {
+ memslot = id_to_memslot(kvm->memslots, log->slot);
+
+ ga = memslot->base_gfn << PAGE_SHIFT;
+ ga_end = ga + (memslot->npages << PAGE_SHIFT);
+
+ kvm_for_each_vcpu(n, vcpu, kvm)
+ kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
+
+ n = kvm_dirty_bitmap_bytes(memslot);
+ memset(memslot->dirty_bitmap, 0, n);
+ }
+
+ r = 0;
+ out:
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+ }
+
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem)
{
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/hugetlb.h>
+ #include <linux/export.h>
#include <linux/of_fdt.h>
#include <linux/memblock.h>
#include <linux/bootmem.h>
*shift = hugepd_shift(*hpdp);
return hugepte_offset(hpdp, ea, pdshift);
}
+ EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
int i;
strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
- parse_args("hugetlb gpages", cmdline, NULL, 0, &do_gpage_early_setup);
+ parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0,
+ &do_gpage_early_setup);
/*
* Walk gpage list in reverse, allocating larger page sizes first.
if (!PageHighMem(page)) {
__flush_dcache_icache(page_address(page+i));
} else {
- start = kmap_atomic(page+i, KM_PPC_SYNC_ICACHE);
+ start = kmap_atomic(page+i);
__flush_dcache_icache(start);
- kunmap_atomic(start, KM_PPC_SYNC_ICACHE);
+ kunmap_atomic(start);
}
}
}
if (rc == -EFAULT)
exception = 1;
- rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, inti->emerg.code);
+ rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code);
if (rc == -EFAULT)
exception = 1;
if (rc == -EFAULT)
exception = 1;
- rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, inti->extcall.code);
+ rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code);
if (rc == -EFAULT)
exception = 1;
if (rc == -EFAULT)
exception = 1;
- rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0d00);
+ rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00);
if (rc == -EFAULT)
exception = 1;
VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
inti->prefix.address);
vcpu->stat.deliver_prefix_signal++;
- vcpu->arch.sie_block->prefix = inti->prefix.address;
- vcpu->arch.sie_block->ihcpu = 0xffff;
+ kvm_s390_set_prefix(vcpu, inti->prefix.address);
break;
case KVM_S390_RESTART:
#define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16)
#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17)
#define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18)
+ #define ARCH_PERFMON_EVENTSEL_PIN_CONTROL (1ULL << 19)
#define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20)
#define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21)
#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
#ifdef CONFIG_PERF_EVENTS
extern void perf_events_lapic_init(void);
-#define PERF_EVENT_INDEX_OFFSET 0
-
/*
* Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
* This flag is otherwise unused and ABI specified to be 0, so nobody should
static inline void perf_events_lapic_init(void) { }
#endif
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
+ extern void amd_pmu_enable_virt(void);
+ extern void amd_pmu_disable_virt(void);
+#else
+ static inline void amd_pmu_enable_virt(void) { }
+ static inline void amd_pmu_disable_virt(void) { }
+#endif
+
#endif /* _ASM_X86_PERF_EVENT_H */
* most necessary things.
*/
cpu_init();
+ x86_cpuinit.early_percpu_clock_init();
preempt_disable();
smp_callin();
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
x86_platform.nmi_init();
- /*
- * Wait until the cpu which brought this one up marked it
- * online before enabling interrupts. If we don't do that then
- * we can end up waking up the softirq thread before this cpu
- * reached the active state, which makes the scheduler unhappy
- * and schedule the softirq thread on the wrong cpu. This is
- * only observable with forced threaded interrupts, but in
- * theory it could also happen w/o them. It's just way harder
- * to achieve.
- */
- while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask))
- cpu_relax();
-
/* enable local interrupts */
local_irq_enable();
* the targeted processor.
*/
- printk(KERN_DEBUG "smpboot cpu %d: start_ip = %lx\n", cpu, start_ip);
-
atomic_set(&init_deasserted, 0);
if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
schedule();
}
- if (cpumask_test_cpu(cpu, cpu_callin_mask))
+ if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
+ print_cpu_msr(&cpu_data(cpu));
pr_debug("CPU%d: has booted.\n", cpu);
- else {
+ } else {
boot_error = 1;
if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status)
== 0xA5A5A5A5)
if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
!physid_isset(apicid, phys_cpu_present_map) ||
- (!x2apic_mode && apicid >= 255)) {
+ !apic->apic_id_valid(apicid)) {
printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
return -EINVAL;
}
if (cpu_khz) {
*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
- *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR);
+ *offset = ns_now - mult_frac(tsc_now, *scale,
+ (1UL << CYC2NS_SCALE_FACTOR));
}
sched_clock_idle_wakeup_event(0);
static unsigned long long cyc2ns_suspend;
- void save_sched_clock_state(void)
+ void tsc_save_sched_clock_state(void)
{
if (!sched_clock_stable)
return;
* that sched_clock() continues from the point where it was left off during
* suspend.
*/
- void restore_sched_clock_state(void)
+ void tsc_restore_sched_clock_state(void)
{
unsigned long long offset;
unsigned long flags;
break;
case APIC_DM_INIT:
- if (level) {
+ if (!trig_mode || level) {
result = 1;
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
kvm_make_request(KVM_REQ_EVENT, vcpu);
u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
u64 ns = 0;
struct kvm_vcpu *vcpu = apic->vcpu;
- unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu);
+ unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
unsigned long flags;
if (unlikely(!tscdeadline || !this_tsc_khz))
if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
return;
- vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
+ vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr));
- kunmap_atomic(vapic, KM_USER0);
+ kunmap_atomic(vapic);
apic_set_tpr(vcpu->arch.apic, data & 0xff);
}
max_isr = 0;
data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
- vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
+ vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
*(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data;
- kunmap_atomic(vapic, KM_USER0);
+ kunmap_atomic(vapic);
}
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
slot = gfn_to_memslot(kvm, sp->gfn);
rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
- spte = rmap_next(kvm, rmapp, NULL);
+ spte = rmap_next(rmapp, NULL);
while (spte) {
if (is_writable_pte(*spte))
audit_printk(kvm, "shadow page has writable "
"mappings: gfn %llx role %x\n",
sp->gfn, sp->role.word);
- spte = rmap_next(kvm, rmapp, spte);
+ spte = rmap_next(rmapp, spte);
}
}
}
static bool mmu_audit;
-static struct jump_label_key mmu_audit_key;
+static struct static_key mmu_audit_key;
static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
{
static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
{
- if (static_branch((&mmu_audit_key)))
+ if (static_key_false((&mmu_audit_key)))
__kvm_mmu_audit(vcpu, point);
}
if (mmu_audit)
return;
- jump_label_inc(&mmu_audit_key);
+ static_key_slow_inc(&mmu_audit_key);
mmu_audit = true;
}
if (!mmu_audit)
return;
- jump_label_dec(&mmu_audit_key);
+ static_key_slow_dec(&mmu_audit_key);
mmu_audit = false;
}
#include <linux/ftrace_event.h>
#include <linux/slab.h>
+#include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/desc.h>
#include <asm/kvm_para.h>
#define MSRPM_OFFSETS 16
static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
+ /*
+ * Set osvw_len to higher value when updated Revision Guides
+ * are published and we know what the new status bits are
+ */
+ static uint64_t osvw_len = 4, osvw_status;
+
struct vcpu_svm {
struct kvm_vcpu vcpu;
struct vmcb *vmcb;
#else
static bool npt_enabled;
#endif
- static int npt = 1;
+ /* allow nested paging (virtualized MMU) for all guests */
+ static int npt = true;
module_param(npt, int, S_IRUGO);
- static int nested = 1;
+ /* allow nested virtualization in KVM/SVM */
+ static int nested = true;
module_param(nested, int, S_IRUGO);
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
erratum_383_found = true;
}
+ static void svm_init_osvw(struct kvm_vcpu *vcpu)
+ {
+ /*
+ * Guests should see errata 400 and 415 as fixed (assuming that
+ * HLT and IO instructions are intercepted).
+ */
+ vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
+ vcpu->arch.osvw.status = osvw_status & ~(6ULL);
+
+ /*
+ * By increasing VCPU's osvw.length to 3 we are telling the guest that
+ * all osvw.status bits inside that length, including bit 0 (which is
+ * reserved for erratum 298), are valid. However, if host processor's
+ * osvw_len is 0 then osvw_status[0] carries no information. We need to
+ * be conservative here and therefore we tell the guest that erratum 298
+ * is present (because we really don't know).
+ */
+ if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
+ vcpu->arch.osvw.status |= 1;
+ }
+
static int has_svm(void)
{
const char *msg;
wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
cpu_svm_disable();
+
+ amd_pmu_disable_virt();
}
static int svm_hardware_enable(void *garbage)
__get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
}
+
+ /*
+ * Get OSVW bits.
+ *
+ * Note that it is possible to have a system with mixed processor
+ * revisions and therefore different OSVW bits. If bits are not the same
+ * on different processors then choose the worst case (i.e. if erratum
+ * is present on one processor and not on another then assume that the
+ * erratum is present everywhere).
+ */
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
+ uint64_t len, status = 0;
+ int err;
+
+ len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
+ if (!err)
+ status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
+ &err);
+
+ if (err)
+ osvw_status = osvw_len = 0;
+ else {
+ if (len < osvw_len)
+ osvw_len = len;
+ osvw_status |= status;
+ osvw_status &= (1ULL << osvw_len) - 1;
+ }
+ } else
+ osvw_status = osvw_len = 0;
+
svm_init_erratum_383();
+ amd_pmu_enable_virt();
+
return 0;
}
return _tsc;
}
- static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
+ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 ratio;
u64 khz;
- /* TSC scaling supported? */
- if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR))
+ /* Guest TSC same frequency as host TSC? */
+ if (!scale) {
+ svm->tsc_ratio = TSC_RATIO_DEFAULT;
return;
+ }
- /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */
- if (user_tsc_khz == 0) {
- vcpu->arch.virtual_tsc_khz = 0;
- svm->tsc_ratio = TSC_RATIO_DEFAULT;
+ /* TSC scaling supported? */
+ if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+ if (user_tsc_khz > tsc_khz) {
+ vcpu->arch.tsc_catchup = 1;
+ vcpu->arch.tsc_always_catchup = 1;
+ } else
+ WARN(1, "user requested TSC rate below hardware speed\n");
return;
}
user_tsc_khz);
return;
}
- vcpu->arch.virtual_tsc_khz = user_tsc_khz;
svm->tsc_ratio = ratio;
}
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
- static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
+ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ WARN_ON(adjustment < 0);
+ if (host)
+ adjustment = svm_scale_tsc(vcpu, adjustment);
+
svm->vmcb->control.tsc_offset += adjustment;
if (is_guest_mode(vcpu))
svm->nested.hsave->control.tsc_offset += adjustment;
if (kvm_vcpu_is_bsp(&svm->vcpu))
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
+ svm_init_osvw(&svm->vcpu);
+
return &svm->vcpu;
free_page4:
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
}
+ static void svm_update_cpl(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+ int cpl;
+
+ if (!is_protmode(vcpu))
+ cpl = 0;
+ else if (svm->vmcb->save.rflags & X86_EFLAGS_VM)
+ cpl = 3;
+ else
+ cpl = svm->vmcb->save.cs.selector & 0x3;
+
+ svm->vmcb->save.cpl = cpl;
+ }
+
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
{
return to_svm(vcpu)->vmcb->save.rflags;
static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
+ unsigned long old_rflags = to_svm(vcpu)->vmcb->save.rflags;
+
to_svm(vcpu)->vmcb->save.rflags = rflags;
+ if ((old_rflags ^ rflags) & X86_EFLAGS_VM)
+ svm_update_cpl(vcpu);
}
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
}
if (seg == VCPU_SREG_CS)
- svm->vmcb->save.cpl
- = (svm->vmcb->save.cs.attrib
- >> SVM_SELECTOR_DPL_SHIFT) & 3;
+ svm_update_cpl(vcpu);
mark_dirty(svm->vmcb, VMCB_SEG);
}
(int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
skip_emulated_instruction(&svm->vcpu);
- if (kvm_task_switch(&svm->vcpu, tss_selector, reason,
+ if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
+ int_vec = -1;
+
+ if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
has_error_code, error_code) == EMULATE_FAIL) {
svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
static bool __read_mostly vmm_exclusive = 1;
module_param(vmm_exclusive, bool, S_IRUGO);
- static bool __read_mostly yield_on_hlt = 1;
- module_param(yield_on_hlt, bool, S_IRUGO);
-
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
#ifdef CONFIG_X86_64
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif
- if (__thread_has_fpu(current))
+ if (user_has_fpu())
clts();
load_gdt(&__get_cpu_var(host_gdt));
}
vmx_set_interrupt_shadow(vcpu, 0);
}
- static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
- {
- /* Ensure that we clear the HLT state in the VMCS. We don't need to
- * explicitly skip the instruction because if the HLT state is set, then
- * the instruction is already executing and RIP has already been
- * advanced. */
- if (!yield_on_hlt &&
- vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
- vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
- }
-
/*
* KVM wants to inject page-faults which it got to the guest. This function
* checks whether in a nested guest, we need to inject them to L1 or L2.
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
/* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
- if (!(vmcs12->exception_bitmap & PF_VECTOR))
+ if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
return 0;
nested_vmx_vmexit(vcpu);
intr_info |= INTR_TYPE_HARD_EXCEPTION;
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
- vmx_clear_hlt(vcpu);
}
static bool vmx_rdtscp_supported(void)
}
/*
- * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ
- * ioctl. In this case the call-back should update internal vmx state to make
- * the changes effective.
+ * Engage any workarounds for mis-matched TSC rates. Currently limited to
+ * software catchup for faster rates on slower CPUs.
*/
- static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
+ static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
{
- /* Nothing to do here */
+ if (!scale)
+ return;
+
+ if (user_tsc_khz > tsc_khz) {
+ vcpu->arch.tsc_catchup = 1;
+ vcpu->arch.tsc_always_catchup = 1;
+ } else
+ WARN(1, "user requested TSC rate below hardware speed\n");
}
/*
}
}
- static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
+ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
{
u64 offset = vmcs_read64(TSC_OFFSET);
vmcs_write64(TSC_OFFSET, offset + adjustment);
msr = find_msr_entry(vmx, msr_index);
if (msr) {
msr->data = data;
+ if (msr - vmx->guest_msrs < vmx->save_nmsrs)
+ kvm_set_shared_msr(msr->index, msr->data,
+ msr->mask);
break;
}
ret = kvm_set_msr_common(vcpu, msr_index, data);
&_pin_based_exec_control) < 0)
return -EIO;
- min =
+ min = CPU_BASED_HLT_EXITING |
#ifdef CONFIG_X86_64
CPU_BASED_CR8_LOAD_EXITING |
CPU_BASED_CR8_STORE_EXITING |
CPU_BASED_INVLPG_EXITING |
CPU_BASED_RDPMC_EXITING;
- if (yield_on_hlt)
- min |= CPU_BASED_HLT_EXITING;
-
opt = CPU_BASED_TPR_SHADOW |
CPU_BASED_USE_MSR_BITMAPS |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
} else
intr |= INTR_TYPE_EXT_INTR;
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
- vmx_clear_hlt(vcpu);
}
static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
}
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
- vmx_clear_hlt(vcpu);
}
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
bool has_error_code = false;
u32 error_code = 0;
u16 tss_selector;
- int reason, type, idt_v;
+ int reason, type, idt_v, idt_index;
idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
+ idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
type != INTR_TYPE_NMI_INTR))
skip_emulated_instruction(vcpu);
- if (kvm_task_switch(vcpu, tss_selector, reason,
- has_error_code, error_code) == EMULATE_FAIL) {
+ if (kvm_task_switch(vcpu, tss_selector,
+ type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
+ has_error_code, error_code) == EMULATE_FAIL) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
#include <asm/mtrr.h>
#include <asm/mce.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h> /* Ugh! */
#include <asm/xcr.h>
#include <asm/pvclock.h>
#include <asm/div64.h>
u32 kvm_max_guest_tsc_khz;
EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
+ /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
+ static u32 tsc_tolerance_ppm = 250;
+ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
+
#define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global {
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
unsigned long max_tsc_khz;
- static inline int kvm_tsc_changes_freq(void)
+ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
{
- int cpu = get_cpu();
- int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
- cpufreq_quick_get(cpu) != 0;
- put_cpu();
- return ret;
+ return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
+ vcpu->arch.virtual_tsc_shift);
}
- u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
+ static u32 adjust_tsc_khz(u32 khz, s32 ppm)
{
- if (vcpu->arch.virtual_tsc_khz)
- return vcpu->arch.virtual_tsc_khz;
- else
- return __this_cpu_read(cpu_tsc_khz);
+ u64 v = (u64)khz * (1000000 + ppm);
+ do_div(v, 1000000);
+ return v;
}
- static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
+ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
{
- u64 ret;
+ u32 thresh_lo, thresh_hi;
+ int use_scaling = 0;
- WARN_ON(preemptible());
- if (kvm_tsc_changes_freq())
- printk_once(KERN_WARNING
- "kvm: unreliable cycle conversion on adjustable rate TSC\n");
- ret = nsec * vcpu_tsc_khz(vcpu);
- do_div(ret, USEC_PER_SEC);
- return ret;
- }
-
- static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
- {
/* Compute a scale to convert nanoseconds in TSC cycles */
kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
- &vcpu->arch.tsc_catchup_shift,
- &vcpu->arch.tsc_catchup_mult);
+ &vcpu->arch.virtual_tsc_shift,
+ &vcpu->arch.virtual_tsc_mult);
+ vcpu->arch.virtual_tsc_khz = this_tsc_khz;
+
+ /*
+ * Compute the variation in TSC rate which is acceptable
+ * within the range of tolerance and decide if the
+ * rate being applied is within that bounds of the hardware
+ * rate. If so, no scaling or compensation need be done.
+ */
+ thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
+ thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
+ if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) {
+ pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
+ use_scaling = 1;
+ }
+ kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
}
static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
{
- u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
- vcpu->arch.tsc_catchup_mult,
- vcpu->arch.tsc_catchup_shift);
- tsc += vcpu->arch.last_tsc_write;
+ u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
+ vcpu->arch.virtual_tsc_mult,
+ vcpu->arch.virtual_tsc_shift);
+ tsc += vcpu->arch.this_tsc_write;
return tsc;
}
struct kvm *kvm = vcpu->kvm;
u64 offset, ns, elapsed;
unsigned long flags;
- s64 sdiff;
+ s64 usdiff;
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
ns = get_kernel_ns();
elapsed = ns - kvm->arch.last_tsc_nsec;
- sdiff = data - kvm->arch.last_tsc_write;
- if (sdiff < 0)
- sdiff = -sdiff;
+
+ /* n.b - signed multiplication and division required */
+ usdiff = data - kvm->arch.last_tsc_write;
+ #ifdef CONFIG_X86_64
+ usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
+ #else
+ /* do_div() only does unsigned */
+ asm("idivl %2; xor %%edx, %%edx"
+ : "=A"(usdiff)
+ : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
+ #endif
+ do_div(elapsed, 1000);
+ usdiff -= elapsed;
+ if (usdiff < 0)
+ usdiff = -usdiff;
/*
- * Special case: close write to TSC within 5 seconds of
- * another CPU is interpreted as an attempt to synchronize
- * The 5 seconds is to accommodate host load / swapping as
- * well as any reset of TSC during the boot process.
- *
- * In that case, for a reliable TSC, we can match TSC offsets,
- * or make a best guest using elapsed value.
- */
- if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) &&
- elapsed < 5ULL * NSEC_PER_SEC) {
+ * Special case: TSC write with a small delta (1 second) of virtual
+ * cycle time against real time is interpreted as an attempt to
+ * synchronize the CPU.
+ *
+ * For a reliable TSC, we can match TSC offsets, and for an unstable
+ * TSC, we add elapsed time in this computation. We could let the
+ * compensation code attempt to catch up if we fall behind, but
+ * it's better to try to match offsets from the beginning.
+ */
+ if (usdiff < USEC_PER_SEC &&
+ vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
if (!check_tsc_unstable()) {
- offset = kvm->arch.last_tsc_offset;
+ offset = kvm->arch.cur_tsc_offset;
pr_debug("kvm: matched tsc offset for %llu\n", data);
} else {
u64 delta = nsec_to_cycles(vcpu, elapsed);
- offset += delta;
+ data += delta;
+ offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
}
- ns = kvm->arch.last_tsc_nsec;
+ } else {
+ /*
+ * We split periods of matched TSC writes into generations.
+ * For each generation, we track the original measured
+ * nanosecond time, offset, and write, so if TSCs are in
+ * sync, we can match exact offset, and if not, we can match
+ * exact software computaion in compute_guest_tsc()
+ *
+ * These values are tracked in kvm->arch.cur_xxx variables.
+ */
+ kvm->arch.cur_tsc_generation++;
+ kvm->arch.cur_tsc_nsec = ns;
+ kvm->arch.cur_tsc_write = data;
+ kvm->arch.cur_tsc_offset = offset;
+ pr_debug("kvm: new tsc generation %u, clock %llu\n",
+ kvm->arch.cur_tsc_generation, data);
}
+
+ /*
+ * We also track th most recent recorded KHZ, write and time to
+ * allow the matching interval to be extended at each write.
+ */
kvm->arch.last_tsc_nsec = ns;
kvm->arch.last_tsc_write = data;
- kvm->arch.last_tsc_offset = offset;
- kvm_x86_ops->write_tsc_offset(vcpu, offset);
- raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
+ kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
/* Reset of TSC must disable overshoot protection below */
vcpu->arch.hv_clock.tsc_timestamp = 0;
- vcpu->arch.last_tsc_write = data;
- vcpu->arch.last_tsc_nsec = ns;
+ vcpu->arch.last_guest_tsc = data;
+
+ /* Keep track of which generation this VCPU has synchronized to */
+ vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
+ vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
+ vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
+
+ kvm_x86_ops->write_tsc_offset(vcpu, offset);
+ raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
}
+
EXPORT_SYMBOL_GPL(kvm_write_tsc);
static int kvm_guest_time_update(struct kvm_vcpu *v)
local_irq_save(flags);
tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
kernel_ns = get_kernel_ns();
- this_tsc_khz = vcpu_tsc_khz(v);
+ this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
if (unlikely(this_tsc_khz == 0)) {
local_irq_restore(flags);
kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
if (vcpu->tsc_catchup) {
u64 tsc = compute_guest_tsc(v, kernel_ns);
if (tsc > tsc_timestamp) {
- kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp);
+ adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
tsc_timestamp = tsc;
}
}
* observed by the guest and ensure the new system time is greater.
*/
max_kernel_ns = 0;
- if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) {
+ if (vcpu->hv_clock.tsc_timestamp) {
max_kernel_ns = vcpu->last_guest_tsc -
vcpu->hv_clock.tsc_timestamp;
max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
*/
vcpu->hv_clock.version += 2;
- shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
+ shared_kaddr = kmap_atomic(vcpu->time_page);
memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
sizeof(vcpu->hv_clock));
- kunmap_atomic(shared_kaddr, KM_USER0);
+ kunmap_atomic(shared_kaddr);
mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
return 0;
case MSR_K7_HWCR:
data &= ~(u64)0x40; /* ignore flush filter disable */
data &= ~(u64)0x100; /* ignore ignne emulation enable */
+ data &= ~(u64)0x8; /* ignore TLB cache disable */
if (data != 0) {
pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
data);
*/
pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
break;
+ case MSR_AMD64_OSVW_ID_LENGTH:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ vcpu->arch.osvw.length = data;
+ break;
+ case MSR_AMD64_OSVW_STATUS:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ vcpu->arch.osvw.status = data;
+ break;
default:
if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
return xen_hvm_config(vcpu, data);
*/
data = 0xbe702111;
break;
+ case MSR_AMD64_OSVW_ID_LENGTH:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ data = vcpu->arch.osvw.length;
+ break;
+ case MSR_AMD64_OSVW_STATUS:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ data = vcpu->arch.osvw.status;
+ break;
default:
if (kvm_pmu_msr(vcpu, msr))
return kvm_pmu_get_msr(vcpu, msr, pdata);
case KVM_CAP_XSAVE:
case KVM_CAP_ASYNC_PF:
case KVM_CAP_GET_TSC_KHZ:
+ case KVM_CAP_PCI_2_3:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
}
kvm_x86_ops->vcpu_load(vcpu, cpu);
- if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
- /* Make sure TSC doesn't go backwards */
- s64 tsc_delta;
- u64 tsc;
- tsc = kvm_x86_ops->read_l1_tsc(vcpu);
- tsc_delta = !vcpu->arch.last_guest_tsc ? 0 :
- tsc - vcpu->arch.last_guest_tsc;
+ /* Apply any externally detected TSC adjustments (due to suspend) */
+ if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
+ adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
+ vcpu->arch.tsc_offset_adjustment = 0;
+ set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+ }
+ if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
+ s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
+ native_read_tsc() - vcpu->arch.last_host_tsc;
if (tsc_delta < 0)
mark_tsc_unstable("KVM discovered backwards TSC");
if (check_tsc_unstable()) {
- kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
+ u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
+ vcpu->arch.last_guest_tsc);
+ kvm_x86_ops->write_tsc_offset(vcpu, offset);
vcpu->arch.tsc_catchup = 1;
}
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
{
kvm_x86_ops->vcpu_put(vcpu);
kvm_put_guest_fpu(vcpu);
- vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
+ vcpu->arch.last_host_tsc = native_read_tsc();
}
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
u32 user_tsc_khz;
r = -EINVAL;
- if (!kvm_has_tsc_control)
- break;
-
user_tsc_khz = (u32)arg;
if (user_tsc_khz >= kvm_max_guest_tsc_khz)
goto out;
- kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz);
+ if (user_tsc_khz == 0)
+ user_tsc_khz = tsc_khz;
+
+ kvm_set_tsc_khz(vcpu, user_tsc_khz);
r = 0;
goto out;
}
case KVM_GET_TSC_KHZ: {
- r = -EIO;
- if (check_tsc_unstable())
- goto out;
-
- r = vcpu_tsc_khz(vcpu);
-
+ r = vcpu->arch.virtual_tsc_khz;
goto out;
}
default:
return r;
}
+ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+ {
+ return VM_FAULT_SIGBUS;
+ }
+
static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
{
int ret;
unsigned long *dirty_bitmap,
unsigned long nr_dirty_pages)
{
+ spin_lock(&kvm->mmu_lock);
+
/* Not many dirty pages compared to # of shadow pages. */
if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
unsigned long gfn_offset;
for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) {
unsigned long gfn = memslot->base_gfn + gfn_offset;
- spin_lock(&kvm->mmu_lock);
kvm_mmu_rmap_write_protect(kvm, gfn, memslot);
- spin_unlock(&kvm->mmu_lock);
}
kvm_flush_remote_tlbs(kvm);
- } else {
- spin_lock(&kvm->mmu_lock);
+ } else
kvm_mmu_slot_remove_write_access(kvm, memslot->id);
- spin_unlock(&kvm->mmu_lock);
- }
+
+ spin_unlock(&kvm->mmu_lock);
}
/*
r = -EEXIST;
if (kvm->arch.vpic)
goto create_irqchip_unlock;
+ r = -EINVAL;
+ if (atomic_read(&kvm->online_vcpus))
+ goto create_irqchip_unlock;
r = -ENOMEM;
vpic = kvm_create_pic(kvm);
if (vpic) {
goto emul_write;
}
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
kaddr += offset_in_page(gpa);
switch (bytes) {
case 1:
default:
BUG();
}
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
kvm_release_page_dirty(page);
if (!exchanged)
return res;
}
+ static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val)
+ {
+ kvm_set_rflags(emul_to_vcpu(ctxt), val);
+ }
+
static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
{
return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
.set_idt = emulator_set_idt,
.get_cr = emulator_get_cr,
.set_cr = emulator_set_cr,
+ .set_rflags = emulator_set_rflags,
.cpl = emulator_get_cpl,
.get_dr = emulator_get_dr,
.set_dr = emulator_set_dr,
profile_hit(KVM_PROFILING, (void *)rip);
}
+ if (unlikely(vcpu->arch.tsc_always_catchup))
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
kvm_lapic_sync_from_vapic(vcpu);
return 0;
}
- int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
- bool has_error_code, u32 error_code)
+ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
+ int reason, bool has_error_code, u32 error_code)
{
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
int ret;
init_emulate_ctxt(vcpu);
- ret = emulator_task_switch(ctxt, tss_selector, reason,
+ ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
has_error_code, error_code);
if (ret)
struct kvm *kvm;
struct kvm_vcpu *vcpu;
int i;
+ int ret;
+ u64 local_tsc;
+ u64 max_tsc = 0;
+ bool stable, backwards_tsc = false;
kvm_shared_msr_cpu_online();
- list_for_each_entry(kvm, &vm_list, vm_list)
- kvm_for_each_vcpu(i, vcpu, kvm)
- if (vcpu->cpu == smp_processor_id())
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
- return kvm_x86_ops->hardware_enable(garbage);
+ ret = kvm_x86_ops->hardware_enable(garbage);
+ if (ret != 0)
+ return ret;
+
+ local_tsc = native_read_tsc();
+ stable = !check_tsc_unstable();
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!stable && vcpu->cpu == smp_processor_id())
+ set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+ if (stable && vcpu->arch.last_host_tsc > local_tsc) {
+ backwards_tsc = true;
+ if (vcpu->arch.last_host_tsc > max_tsc)
+ max_tsc = vcpu->arch.last_host_tsc;
+ }
+ }
+ }
+
+ /*
+ * Sometimes, even reliable TSCs go backwards. This happens on
+ * platforms that reset TSC during suspend or hibernate actions, but
+ * maintain synchronization. We must compensate. Fortunately, we can
+ * detect that condition here, which happens early in CPU bringup,
+ * before any KVM threads can be running. Unfortunately, we can't
+ * bring the TSCs fully up to date with real time, as we aren't yet far
+ * enough into CPU bringup that we know how much real time has actually
+ * elapsed; our helper function, get_kernel_ns() will be using boot
+ * variables that haven't been updated yet.
+ *
+ * So we simply find the maximum observed TSC above, then record the
+ * adjustment to TSC in each VCPU. When the VCPU later gets loaded,
+ * the adjustment will be applied. Note that we accumulate
+ * adjustments, in case multiple suspend cycles happen before some VCPU
+ * gets a chance to run again. In the event that no KVM threads get a
+ * chance to run, we will miss the entire elapsed period, as we'll have
+ * reset last_host_tsc, so VCPUs will not have the TSC adjusted and may
+ * loose cycle time. This isn't too big a deal, since the loss will be
+ * uniform across all VCPUs (not to mention the scenario is extremely
+ * unlikely). It is possible that a second hibernate recovery happens
+ * much faster than a first, causing the observed TSC here to be
+ * smaller; this would require additional padding adjustment, which is
+ * why we set last_host_tsc to the local tsc observed here.
+ *
+ * N.B. - this code below runs only on platforms with reliable TSC,
+ * as that is the only way backwards_tsc is set above. Also note
+ * that this runs for ALL vcpus, which is not a bug; all VCPUs should
+ * have the same delta_cyc adjustment applied if backwards_tsc
+ * is detected. Note further, this adjustment is only done once,
+ * as we reset last_host_tsc on all VCPUs to stop this from being
+ * called multiple times (one for each physical CPU bringup).
+ *
+ * Platforms with unnreliable TSCs don't have to deal with this, they
+ * will be compensated by the logic in vcpu_load, which sets the TSC to
+ * catchup mode. This will catchup all VCPUs to real time, but cannot
+ * guarantee that they stay in perfect synchronization.
+ */
+ if (backwards_tsc) {
+ u64 delta_cyc = max_tsc - local_tsc;
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ vcpu->arch.tsc_offset_adjustment += delta_cyc;
+ vcpu->arch.last_host_tsc = local_tsc;
+ }
+
+ /*
+ * We have to disable TSC offset matching.. if you were
+ * booting a VM while issuing an S4 host suspend....
+ * you may have some problem. Solving this issue is
+ * left as an exercise to the reader.
+ */
+ kvm->arch.last_tsc_nsec = 0;
+ kvm->arch.last_tsc_write = 0;
+ }
+
+ }
+ return 0;
}
void kvm_arch_hardware_disable(void *garbage)
kvm_x86_ops->check_processor_compatibility(rtn);
}
+ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
+ {
+ return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
+ }
+
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
struct page *page;
}
vcpu->arch.pio_data = page_address(page);
- kvm_init_tsc_catchup(vcpu, max_tsc_khz);
+ kvm_set_tsc_khz(vcpu, max_tsc_khz);
r = kvm_mmu_create(vcpu);
if (r < 0)
free_page((unsigned long)vcpu->arch.pio_data);
}
- int kvm_arch_init_vm(struct kvm *kvm)
+ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
+ if (type)
+ return -EINVAL;
+
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
put_page(kvm->arch.ept_identity_pagetable);
}
+ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+ {
+ int i;
+
+ for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+ if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
+ vfree(free->arch.lpage_info[i]);
+ free->arch.lpage_info[i] = NULL;
+ }
+ }
+ }
+
+ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+ {
+ int i;
+
+ for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+ unsigned long ugfn;
+ int lpages;
+ int level = i + 2;
+
+ lpages = gfn_to_index(slot->base_gfn + npages - 1,
+ slot->base_gfn, level) + 1;
+
+ slot->arch.lpage_info[i] =
+ vzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
+ if (!slot->arch.lpage_info[i])
+ goto out_free;
+
+ if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
+ slot->arch.lpage_info[i][0].write_count = 1;
+ if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
+ slot->arch.lpage_info[i][lpages - 1].write_count = 1;
+ ugfn = slot->userspace_addr >> PAGE_SHIFT;
+ /*
+ * If the gfn and userspace address are not aligned wrt each
+ * other, or if explicitly asked to, disable large page
+ * support for this slot
+ */
+ if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
+ !kvm_largepages_enabled()) {
+ unsigned long j;
+
+ for (j = 0; j < lpages; ++j)
+ slot->arch.lpage_info[i][j].write_count = 1;
+ }
+ }
+
+ return 0;
+
+ out_free:
+ for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+ vfree(slot->arch.lpage_info[i]);
+ slot->arch.lpage_info[i] = NULL;
+ }
+ return -ENOMEM;
+ }
+
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
#include <asm/xcr.h>
#include <asm/suspend.h>
#include <asm/debugreg.h>
+#include <asm/fpu-internal.h> /* pcntxt_mask */
#ifdef CONFIG_X86_32
static struct saved_context saved_context;
void save_processor_state(void)
{
__save_processor_state(&saved_context);
- save_sched_clock_state();
+ x86_platform.save_sched_clock_state();
}
#ifdef CONFIG_X86_32
EXPORT_SYMBOL(save_processor_state);
/* Needed by apm.c */
void restore_processor_state(void)
{
+ x86_platform.restore_sched_clock_state();
__restore_processor_state(&saved_context);
- restore_sched_clock_state();
}
#ifdef CONFIG_X86_32
EXPORT_SYMBOL(restore_processor_state);
#include <linux/spinlock.h>
#include <linux/signal.h>
#include <linux/sched.h>
+#include <linux/bug.h>
#include <linux/mm.h>
#include <linux/mmu_notifier.h>
#include <linux/preempt.h>
*/
#define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
- struct kvm_lpage_info {
- unsigned long rmap_pde;
- int write_count;
- };
-
struct kvm_memory_slot {
gfn_t base_gfn;
unsigned long npages;
unsigned long *dirty_bitmap;
unsigned long *dirty_bitmap_head;
unsigned long nr_dirty_pages;
- struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
+ struct kvm_arch_memory_slot arch;
unsigned long userspace_addr;
int user_alloc;
int id;
int __kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
int user_alloc);
+ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont);
+ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
int user_alloc);
+ bool kvm_largepages_enabled(void);
void kvm_disable_largepages(void);
void kvm_arch_flush_shadow(struct kvm *kvm);
unsigned int ioctl, unsigned long arg);
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
+ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
int kvm_dev_ioctl_check_extension(long ext);
}
#endif
- int kvm_arch_init_vm(struct kvm *kvm);
+ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
void kvm_arch_destroy_vm(struct kvm *kvm);
void kvm_free_all_assigned_devices(struct kvm *kvm);
void kvm_arch_sync_events(struct kvm *kvm);
unsigned int entries_nr;
int host_irq;
bool host_irq_disabled;
+ bool pci_2_3;
struct msix_entry *host_msix_entries;
int guest_irq;
struct msix_entry *guest_msix_entries;
struct pci_dev *dev;
struct kvm *kvm;
spinlock_t intx_lock;
+ spinlock_t intx_mask_lock;
char irq_name[32];
struct pci_saved_state *pci_saved_state;
};
current->flags &= ~PF_VCPU;
}
+ /*
+ * search_memslots() and __gfn_to_memslot() are here because they are
+ * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c.
+ * gfn_to_memslot() itself isn't here as an inline because that would
+ * bloat other code too much.
+ */
+ static inline struct kvm_memory_slot *
+ search_memslots(struct kvm_memslots *slots, gfn_t gfn)
+ {
+ struct kvm_memory_slot *memslot;
+
+ kvm_for_each_memslot(memslot, slots)
+ if (gfn >= memslot->base_gfn &&
+ gfn < memslot->base_gfn + memslot->npages)
+ return memslot;
+
+ return NULL;
+ }
+
+ static inline struct kvm_memory_slot *
+ __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
+ {
+ return search_memslots(slots, gfn);
+ }
+
static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
{
return gfn_to_memslot(kvm, gfn)->id;
}
+ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
+ {
+ /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
+ return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
+ (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
+ }
+
static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
gfn_t gfn)
{
if (unlikely(vcpu->kvm->mmu_notifier_count))
return 1;
/*
- * Both reads happen under the mmu_lock and both values are
- * modified under mmu_lock, so there's no need of smb_rmb()
- * here in between, otherwise mmu_notifier_count should be
- * read before mmu_notifier_seq, see
- * mmu_notifier_invalidate_range_end write side.
+ * Ensure the read of mmu_notifier_count happens before the read
+ * of mmu_notifier_seq. This interacts with the smp_wmb() in
+ * mmu_notifier_invalidate_range_end to make sure that the caller
+ * either sees the old (non-zero) value of mmu_notifier_count or
+ * the new (incremented) value of mmu_notifier_seq.
+ * PowerPC Book3s HV KVM calls this under a per-page lock
+ * rather than under kvm->mmu_lock, for scalability, so
+ * can't rely on kvm->mmu_lock to keep things ordered.
*/
+ smp_rmb();
if (vcpu->kvm->mmu_notifier_seq != mmu_seq)
return 1;
return 0;
{
return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
}
+
+ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu);
+
+ #else
+
+ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
+
#endif
#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT