Virtualbox source code analysis 17 APIC virtualization 2.APIC device simulation md

17.1 APIC device simulation

In VirtualBox, APIC is used as a PNP device of R0 to simulate:

const PDMDEVREG g_DeviceAPIC =
{
	 /* .u32Version = */             PDM_DEVREG_VERSION,
    /* .uReserved0 = */             0,
    /* .szName = */                 "apic",
    /* .fFlags = */                 PDM_DEVREG_FLAGS_DEFAULT_BITS | PDM_DEVREG_FLAGS_RZ | PDM_DEVREG_FLAGS_NEW_STYLE
                                    | PDM_DEVREG_FLAGS_REQUIRE_R0 | PDM_DEVREG_FLAGS_REQUIRE_RC,
    /* .fClass = */                 PDM_DEVREG_CLASS_PIC,
    /* .cMaxInstances = */          1,
    /* .uSharedVersion = */         42,
    /* .cbInstanceShared = */       sizeof(APICDEV),
    /* .cbInstanceCC = */           0,
    /* .cbInstanceRC = */           0,
    /* .cMaxPciDevices = */         0,
    /* .cMaxMsixVectors = */        0,
    /* .pszDescription = */         "Advanced Programmable Interrupt Controller",
#if defined(IN_RING3)
    /* .szRCMod = */                "VMMRC.rc",
    /* .szR0Mod = */                "VMMR0.r0",
    /* .pfnConstruct = */           apicR3Construct,
    /* .pfnDestruct = */            apicR3Destruct,
    /* .pfnRelocate = */            apicR3Relocate,
    /* .pfnMemSetup = */            NULL,
    /* .pfnPowerOn = */             NULL,
    /* .pfnReset = */               apicR3Reset,
    /* .pfnSuspend = */             NULL,
    /* .pfnResume = */              NULL,
    /* .pfnAttach = */              NULL,
    /* .pfnDetach = */              NULL,
    /* .pfnQueryInterface = */      NULL,
    /* .pfnInitComplete = */        apicR3InitComplete,
    /* .pfnPowerOff = */            NULL,
    ....
#elif defined(IN_RING0)
    /* .pfnEarlyConstruct = */      NULL,
    /* .pfnConstruct = */           apicRZConstruct,
    /* .pfnDestruct = */            NULL,
    /* .pfnFinalDestruct = */       NULL,
    /* .pfnRequest = */             NULL,
    ...
#elif defined(IN_RC)
    /* .pfnConstruct = */           apicRZConstruct,
    ...
#else
# error "Not in IN_RING3, IN_RING0 or IN_RC!"
#endif
    /* .u32VersionEnd = */          PDM_DEVREG_VERSION
};

When PDMR0Init is initialized, it is added to the G ﹣ pdmdevmodlist. The PDMR0DeviceCreateReqHandler function will traverse the G ﹣ pdmdevmodlist and create APIC devices. (the creation process of virtual machine equipment is described in the chapter PDM)

static const PDMDEVREGR0 *g_apVMM0DevRegs[] =
{
    &g_DeviceAPIC,
};

/**
 * Module device registration record for VMMR0.
 */
static PDMDEVMODREGR0 g_VBoxDDR0ModDevReg =
{
    /* .u32Version = */ PDM_DEVMODREGR0_VERSION,
    /* .cDevRegs = */   RT_ELEMENTS(g_apVMM0DevRegs),
    /* .papDevRegs = */ &g_apVMM0DevRegs[0],
    /* .hMod = */       NULL,
    /* .ListEntry = */  { NULL, NULL },
};

VMMR0_INT_DECL(void) PDMR0Init(void *hMod)
{
    RTListInit(&g_PDMDevModList);
    g_VBoxDDR0ModDevReg.hMod = hMod;
    RTListAppend(&g_PDMDevModList, &g_VBoxDDR0ModDevReg.ListEntry);
}

The implementation code is in VMM\VMMAlll\APICAll.cpp, VMM\VMMR3\APIC.cpp

apicR3Construct

Initialization function for R3

DECLCALLBACK(int) apicR3Construct(PPDMDEVINS pDevIns, int iInstance, PCFGMNODE pCfg)
{
	//Read configuration, whether IOAPIC is supported
	int rc = pHlp->pfnCFGMQueryBoolDef(pCfg, "IOAPIC", &pApic->fIoApicPresent, true);
  //Get MAX APIC mode
  uint8_t uMaxMode;
  rc = pHlp->pfnCFGMQueryU8Def(pCfg, "Mode", &uMaxMode, PDMAPICMODE_APIC);
  switch ((PDMAPICMODE)uMaxMode)
  {
    case PDMAPICMODE_NONE:
    case PDMAPICMODE_APIC:
    case PDMAPICMODE_X2APIC:
      break;
    default:
      return VMR3SetError(pVM->pUVM, VERR_INVALID_PARAMETER, RT_SRC_POS, "APIC mode %d unknown.", uMaxMode);
  }
  pApic->enmMaxMode = (PDMAPICMODE)uMaxMode;
  
  //Register APIC device with PNP Device Manager
  rc = PDMDevHlpApicRegister(pDevIns);
  //If x2APIC is supported, add MSR register corresponding to x2APIC to MSRRange array
  if (pApic->enmMaxMode == PDMAPICMODE_X2APIC)
  {
    rc = CPUMR3MsrRangesInsert(pVM, &g_MsrRange_x2Apic);
    AssertLogRelRCReturn(rc, rc);
  }
  else
  {
   	//x2APIC is not supported. Add a handle that will generate GP
    rc = CPUMR3MsrRangesInsert(pVM, &g_MsrRange_x2Apic_Invalid);
    AssertLogRelRCReturn(rc, rc);
  }
  apicR3SetCpuIdFeatureLevel(pVM, pApic->enmMaxMode);
  //Initialize the relevant data of APIC
  rc = apicR3InitState(pVM);

  //Register MMIO range, XAPICPAGE structure size at the beginning of gcphysicbase is MMIO address
  //The reading and writing of APIC memory are called to the two functions of apicWriteMmio/apicReadMmio
  PAPICCPU pApicCpu0 = VMCPU_TO_APICCPU(pVM->apCpusR3[0]);
  RTGCPHYS GCPhysApicBase = MSR_IA32_APICBASE_GET_ADDR(pApicCpu0->uApicBaseMsr);
  rc = PDMDevHlpMmioCreateAndMap(pDevIns, GCPhysApicBase, sizeof(XAPICPAGE), apicWriteMmio, apicReadMmio,
                                 IOMMMIO_FLAGS_READ_DWORD | IOMMMIO_FLAGS_WRITE_DWORD_ZEROED, "APIC", &pApicDev->hMmio);
  //Create APIC timer for each VCPU 
  for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
  {
    PVMCPU   pVCpu    = pVM->apCpusR3[idCpu];
    PAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpu);
    rc = PDMDevHlpTimerCreate(pDevIns, TMCLOCK_VIRTUAL_SYNC, apicR3TimerCallback, pVCpu, TMTIMER_FLAGS_NO_CRIT_SECT,
                              pApicCpu->szTimerDesc, &pApicCpu->hTimer);
  }
  //Register the callback of SSM
  rc = PDMDevHlpSSMRegister(pDevIns, APIC_SAVED_STATE_VERSION, sizeof(*pApicDev), apicR3SaveExec, apicR3LoadExec);
}

apicR3InitState

//APIC pending interrupt bitmap (PIB). It stores all pending interrupts
//There are two types of pending interrupts: edge trigglemode and level trigglemode
typedef struct APICPIB
{
  uint64_t volatile au64VectorBitmap[4];
  uint32_t volatile fOutstandingNotification;
  uint8_t           au8Reserved[APIC_CACHE_LINE_SIZE - sizeof(uint32_t) - (sizeof(uint64_t) * 4)];
} APICPIB;
static int apicR3InitState(PVM pVM)
{
  //Allocate memory to save edge trigglemode, which is mapped to R0 and R3 at the same time
  //Calculate how many pages are needed, and each VCPU has its own PIB
	pApic->cbApicPib    = RT_ALIGN_Z(pVM->cCpus * sizeof(APICPIB), PAGE_SIZE);
  size_t const cPages = pApic->cbApicPib >> PAGE_SHIFT;
  if (cPages == 1)
  {
    SUPPAGE SupApicPib;
    RT_ZERO(SupApicPib);
    SupApicPib.Phys = NIL_RTHCPHYS;
    //Assign 1 page size page
    int rc = SUPR3PageAllocEx(1 /* cPages */, 0 /* fFlags */, &pApic->pvApicPibR3, &pApic->pvApicPibR0, &SupApicPib);
    if (RT_SUCCESS(rc))
    {
      pApic->HCPhysApicPib = SupApicPib.Phys;
    }
  }
  else
    //Assign page size pages with continuous physical address
  	pApic->pvApicPibR3 = SUPR3ContAlloc(cPages, &pApic->pvApicPibR0, &pApic->HCPhysApicPib);
	if (pApic->pvApicPibR3)
  {
    RT_BZERO(pApic->pvApicPibR3, pApic->cbApicPib);
    for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
    {
      //Apply for a Virtual APIC page for each VCPU assignment
      SUPPAGE SupApicPage;
      RT_ZERO(SupApicPage);
      SupApicPage.Phys = NIL_RTHCPHYS;
      pApicCpu->cbApicPage = sizeof(XAPICPAGE);
      int rc = SUPR3PageAllocEx(1 /* cPages */, 0 /* fFlags */, &pApicCpu->pvApicPageR3, &pApicCpu->pvApicPageR0, &SupApicPage);
      if (RT_SUCCESS(rc))
      {
        pApicCpu->HCPhysApicPage = SupApicPage.Phys;
        //Get your own PIB memory address according to CPUID
        uint32_t const offApicPib  = idCpu * sizeof(APICPIB);
        pApicCpu->pvApicPibR0      = (RTR0PTR)((RTR0UINTPTR)pApic->pvApicPibR0 + offApicPib);
        pApicCpu->pvApicPibR3      = (RTR3PTR)((RTR3UINTPTR)pApic->pvApicPibR3 + offApicPib);
        //Initialize APIC
        RT_BZERO(pApicCpu->pvApicPageR3, pApicCpu->cbApicPage);
        apicResetCpu(pVCpu, true /* fResetApicBaseMsr */);
      }
    }
  }
}

apicRZConstruct:

Device initialization function for R0

static DECLCALLBACK(int) apicRZConstruct(PPDMDEVINS pDevIns)
{
    PAPICDEV pThis = PDMDEVINS_2_DATA(pDevIns, PAPICDEV);
    PVMCC    pVM   = PDMDevHlpGetVM(pDevIns);
    pVM->apicr0.s.pDevInsR0 = pDevIns;
    int rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns));
  	//Setting up APIC devices
    rc = PDMDevHlpApicSetUpContext(pDevIns);
		//Set the callback function of MMIO memory read and write
    rc = PDMDevHlpMmioSetUpContext(pDevIns, pThis->hMmio, apicWriteMmio, apicReadMmio, NULL /*pvUser*/);
    return VINF_SUCCESS;
}

static DECLCALLBACK(int) pdmR0DevHlp_ApicSetUpContext(PPDMDEVINS pDevIns)
{
  pGVM->pdm.s.Apic.pDevInsR0 = pDevIns;
  return VINF_SUCCESS;
}

static DECLCALLBACK(int) pdmR0DevHlp_MmioSetUpContextEx(PPDMDEVINS pDevIns, IOMMMIOHANDLE hRegion, PFNIOMMMIONEWWRITE pfnWrite,
                                                        PFNIOMMMIONEWREAD pfnRead, PFNIOMMMIONEWFILL pfnFill, void *pvUser)
{
  PGVM pGVM = pDevIns->Internal.s.pGVM;
  //Call the function in IOM to set the read/write function corresponding to mmio memory
  int rc = IOMR0MmioSetUpContext(pGVM, pDevIns, hRegion, pfnWrite, pfnRead, pfnFill, pvUser);
  return rc;
}

apicR3Reset

Reset function for R3

DECLCALLBACK(void) apicR3Reset(PPDMDEVINS pDevIns)
{
	for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
  {
    PVMCPU   pVCpuDest = pVM->apCpusR3[idCpu];
    PAPICCPU pApicCpu  = VMCPU_TO_APICCPU(pVCpuDest);

    //If the current CPU turns on APIC timer, stop timer first
    if (PDMDevHlpTimerIsActive(pDevIns, pApicCpu->hTimer))
      PDMDevHlpTimerStop(pDevIns, pApicCpu->hTimer);
    apicResetCpu(pVCpuDest, true /* fResetApicBaseMsr */);
    //clear APIC related interrupt information
    apicClearInterruptFF(pVCpuDest, PDMAPICIRQ_HARDWARE);
  }
}

//Reinitialize each APIC CPU
void apicResetCpu(PVMCPUCC pVCpu, bool fResetApicBaseMsr)
{
  //Initialize Ipi
  apicInitIpi(pVCpu);
  PXAPICPAGE pXApicPage = VMCPU_TO_XAPICPAGE(pVCpu);
  pXApicPage->version.u.u8MaxLvtEntry = XAPIC_MAX_LVT_ENTRIES_P4 - 1;
  pXApicPage->version.u.u8Version     = XAPIC_HARDWARE_VERSION_P4;
  if (fResetApicBaseMsr)
    apicResetBaseMsr(pVCpu);
  pXApicPage->id.u8ApicId = pVCpu->idCpu;
}
//To reset the value of APICBase MSR is to save it to the global variable of apicpu
static void apicResetBaseMsr(PVMCPUCC pVCpu)
{
  PAPICCPU pApicCpu     = VMCPU_TO_APICCPU(pVCpu);
  PAPIC    pApic        = VM_TO_APIC(pVCpu->CTX_SUFF(pVM));
  //msrbase set to default (fee00000)
  uint64_t uApicBaseMsr = MSR_IA32_APICBASE_ADDR;
  //CPU 0 is set as the starting core: BSP: the bootstrap processor
  if (pVCpu->idCpu == 0)
    uApicBaseMsr |= MSR_IA32_APICBASE_BSP;
  //Non apicmode? None means LAPIC is enabled
  if (pApic->enmMaxMode != PDMAPICMODE_NONE)
  {
    //Set APICBASE enable
    uApicBaseMsr |= MSR_IA32_APICBASE_EN;
    //Set CPUM
    CPUMSetGuestCpuIdPerCpuApicFeature(pVCpu, true /*fVisible*/);
  }
  //Set ApicBase to ApicCpu
  ASMAtomicWriteU64(&pApicCpu->uApicBaseMsr, uApicBaseMsr);
}

apicR3Destruct

//Release the memory requested in apicR3InitState
static void apicR3TermState(PVM pVM)
{
	//Free PIB memory
  if (pApic->pvApicPibR3 != NIL_RTR3PTR)
  {
    size_t const cPages = pApic->cbApicPib >> PAGE_SHIFT;
    if (cPages == 1)
    	SUPR3PageFreeEx(pApic->pvApicPibR3, cPages);
    else
    	SUPR3ContFree(pApic->pvApicPibR3, cPages);
    pApic->pvApicPibR3 = NIL_RTR3PTR;
    pApic->pvApicPibR0 = NIL_RTR0PTR;
  }

  //Release Virtual APIC page
  for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
  {
    PVMCPU   pVCpu    = pVM->apCpusR3[idCpu];
    PAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpu);
    pApicCpu->pvApicPibR3 = NIL_RTR3PTR;
    pApicCpu->pvApicPibR0 = NIL_RTR0PTR;
    if (pApicCpu->pvApicPageR3 != NIL_RTR3PTR)
    {
      SUPR3PageFreeEx(pApicCpu->pvApicPageR3, 1 /* cPages */);
      pApicCpu->pvApicPageR3 = NIL_RTR3PTR;
      pApicCpu->pvApicPageR0 = NIL_RTR0PTR;
    }
  }
}

17.2 setting up APIC Base MSR

VMM_INT_DECL(int) APICSetBaseMsr(PVMCPUCC pVCpu, uint64_t u64BaseMsr)
{
  APICMODE enmOldMode = apicGetMode(pApicCpu->uApicBaseMsr);
  APICMODE enmNewMode = apicGetMode(u64BaseMsr);
  uint64_t uBaseMsr   = pApicCpu->uApicBaseMsr;
  
 //If the APIC mode is modified
 if (enmNewMode != enmOldMode)
 {
   switch (enmNewMode)
   {
       //Close APIC
     case APICMODE_DISABLED:
       {
         //Reset APIC CPU Information
         apicResetCpu(pVCpu, false /* fResetApicBaseMsr */);
         uBaseMsr &= ~(MSR_IA32_APICBASE_EN | MSR_IA32_APICBASE_EXTD);
         //Notify CPUM APIC that it has been turned off
         CPUMSetGuestCpuIdPerCpuApicFeature(pVCpu, false /*fVisible*/);
         break;
       }
     //Switch to xAPIC
     case APICMODE_XAPIC:
       {
         //You can only switch from disable mode to xAPIC mode
         if (enmOldMode != APICMODE_DISABLED)
         {
           return apicMsrAccessError(pVCpu, MSR_IA32_APICBASE, APICMSRACCESS_WRITE_INVALID);
         }
         uBaseMsr |= MSR_IA32_APICBASE_EN;
         //Setting to open APIC
         CPUMSetGuestCpuIdPerCpuApicFeature(pVCpu, true /*fVisible*/);
         break;
       }
     case APICMODE_X2APIC:
       {
         //Configuration does not support x2APIC, error returned
         if (pApic->enmMaxMode != PDMAPICMODE_X2APIC)
         {
           return apicMsrAccessError(pVCpu, MSR_IA32_APICBASE, APICMSRACCESS_WRITE_INVALID);
         }
				 //You can only switch from xAPIC mode to x2APIC mode
         if (enmOldMode != APICMODE_XAPIC)
         {
           return apicMsrAccessError(pVCpu, MSR_IA32_APICBASE, APICMSRACCESS_WRITE_INVALID);
         }

         uBaseMsr |= MSR_IA32_APICBASE_EN | MSR_IA32_APICBASE_EXTD;
         //u32ApicId is set to current VCPUID, x2APIC does not support software setting APIC ID
         PX2APICPAGE pX2ApicPage = VMCPU_TO_X2APICPAGE(pVCpu);
         ASMMemZero32(&pX2ApicPage->id, sizeof(pX2ApicPage->id));
         pX2ApicPage->id.u32ApicId = pVCpu->idCpu;

         //LDR initialization occurs when entering x2APIC mode.
         pX2ApicPage->ldr.u32LogicalApicId = ((pX2ApicPage->id.u32ApicId & UINT32_C(0xffff0)) << 16)
           | (UINT32_C(1) << pX2ApicPage->id.u32ApicId & UINT32_C(0xf));
         break;
       }
   }
 	}
}

17.3 related functions accessed by APIC page

apicReadMmio

DECLCALLBACK(VBOXSTRICTRC) apicReadMmio(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void *pv, unsigned cb)
{
  VBOXSTRICTRC rc = VBOXSTRICTRC_VAL(apicReadRegister(pDevIns, pVCpu, offReg, &uValue));
}
//Most APIC registers are read directly
DECLINLINE(VBOXSTRICTRC) apicReadRegister(PPDMDEVINS pDevIns, PVMCPUCC pVCpu, uint16_t offReg, uint32_t *puValue)
{
    PXAPICPAGE   pXApicPage = VMCPU_TO_XAPICPAGE(pVCpu);
    uint32_t     uValue = 0;
    VBOXSTRICTRC rc = VINF_SUCCESS;
    switch (offReg)
    {
        case XAPIC_OFF_ID:
        case XAPIC_OFF_VERSION:
        case XAPIC_OFF_TPR:
        case XAPIC_OFF_EOI:
        case XAPIC_OFF_RRD:
        case XAPIC_OFF_LDR:
        case XAPIC_OFF_DFR:
        case XAPIC_OFF_SVR:
        case XAPIC_OFF_ISR0:    case XAPIC_OFF_ISR1:    case XAPIC_OFF_ISR2:    case XAPIC_OFF_ISR3:
        case XAPIC_OFF_ISR4:    case XAPIC_OFF_ISR5:    case XAPIC_OFF_ISR6:    case XAPIC_OFF_ISR7:
        case XAPIC_OFF_TMR0:    case XAPIC_OFF_TMR1:    case XAPIC_OFF_TMR2:    case XAPIC_OFF_TMR3:
        case XAPIC_OFF_TMR4:    case XAPIC_OFF_TMR5:    case XAPIC_OFF_TMR6:    case XAPIC_OFF_TMR7:
        case XAPIC_OFF_IRR0:    case XAPIC_OFF_IRR1:    case XAPIC_OFF_IRR2:    case XAPIC_OFF_IRR3:
        case XAPIC_OFF_IRR4:    case XAPIC_OFF_IRR5:    case XAPIC_OFF_IRR6:    case XAPIC_OFF_IRR7:
        case XAPIC_OFF_ESR:
        case XAPIC_OFF_ICR_LO:
        case XAPIC_OFF_ICR_HI:
        case XAPIC_OFF_LVT_TIMER:
#if XAPIC_HARDWARE_VERSION == XAPIC_HARDWARE_VERSION_P4
        case XAPIC_OFF_LVT_THERMAL:
#endif
        case XAPIC_OFF_LVT_PERF:
        case XAPIC_OFF_LVT_LINT0:
        case XAPIC_OFF_LVT_LINT1:
        case XAPIC_OFF_LVT_ERROR:
        case XAPIC_OFF_TIMER_ICR:
        case XAPIC_OFF_TIMER_DCR:
        {
          	//Read the value in Virtual APIC page directly
            uValue = apicReadRaw32(pXApicPage, offReg);
            break;
        }
        case XAPIC_OFF_PPR:
        {
          	//Gets the priority of the current process
            uValue = apicGetPpr(pVCpu);
            break;
        }
        case XAPIC_OFF_TIMER_CCR:
        {
          	//Get APIC time timer
            rc = apicGetTimerCcr(pDevIns, pVCpu, VINF_IOM_R3_MMIO_READ, &uValue);
            break;
        }
        case XAPIC_OFF_APR:
        {
#if XAPIC_HARDWARE_VERSION == XAPIC_HARDWARE_VERSION_P4
            /* Unsupported on Pentium 4 and Xeon CPUs, invalid in x2APIC mode. */
            Assert(!XAPIC_IN_X2APIC_MODE(pVCpu));
#else
# error "Implement Pentium and P6 family APIC architectures"
#endif
            break;
        }
        default:
        {
            //Set error flag
            rc = PDMDevHlpDBGFStop(pDevIns, RT_SRC_POS, "VCPU[%u]: offReg=%#RX16\n", pVCpu->idCpu, offReg);
            apicSetError(pVCpu, XAPIC_ESR_ILLEGAL_REG_ADDRESS);
            break;
        }
    }
    *puValue = uValue;
    return rc;
}

apicWriteMmio

Write operation of APIC register is much more complex, and many items need special processing

DECLINLINE(VBOXSTRICTRC) apicWriteRegister(PPDMDEVINS pDevIns, PVMCPUCC pVCpu, uint16_t offReg, uint32_t uValue)
{
    VMCPU_ASSERT_EMT(pVCpu);
    Assert(offReg <= XAPIC_OFF_MAX_VALID);
    Assert(!XAPIC_IN_X2APIC_MODE(pVCpu));

    VBOXSTRICTRC rcStrict = VINF_SUCCESS;
    switch (offReg)
    {
        case XAPIC_OFF_TPR:
        {
            rcStrict = apicSetTprEx(pVCpu, uValue, false /* fForceX2ApicBehaviour */);
            break;
        }
        case XAPIC_OFF_LVT_TIMER:
#if XAPIC_HARDWARE_VERSION == XAPIC_HARDWARE_VERSION_P4
        case XAPIC_OFF_LVT_THERMAL:
#endif
        case XAPIC_OFF_LVT_PERF:
        case XAPIC_OFF_LVT_LINT0:
        case XAPIC_OFF_LVT_LINT1:
        case XAPIC_OFF_LVT_ERROR:
        {
            rcStrict = apicSetLvtEntry(pVCpu, offReg, uValue);
            break;
        }
        case XAPIC_OFF_TIMER_ICR:
        {
            rcStrict = apicSetTimerIcr(pDevIns, pVCpu, VINF_IOM_R3_MMIO_WRITE, uValue);
            break;
        }
        case XAPIC_OFF_EOI:
        {
            rcStrict = apicSetEoi(pVCpu, uValue, VINF_IOM_R3_MMIO_WRITE, false /* fForceX2ApicBehaviour */);
            break;
        }
        case XAPIC_OFF_LDR:
        {
            rcStrict = apicSetLdr(pVCpu, uValue);
            break;
        }
        case XAPIC_OFF_DFR:
        {
            rcStrict = apicSetDfr(pVCpu, uValue);
            break;
        }
        case XAPIC_OFF_SVR:
        {
            rcStrict = apicSetSvr(pVCpu, uValue);
            break;
        }
        case XAPIC_OFF_ICR_LO:
        {
            rcStrict = apicSetIcrLo(pVCpu, uValue, VINF_IOM_R3_MMIO_WRITE, true /* fUpdateStat */);
            break;
        }
        case XAPIC_OFF_ICR_HI:
        {
            rcStrict = apicSetIcrHi(pVCpu, uValue);
            break;
        }
        case XAPIC_OFF_TIMER_DCR:
        {
            rcStrict = apicSetTimerDcr(pVCpu, uValue);
            break;
        }
        case XAPIC_OFF_ESR:
        {
            rcStrict = apicSetEsr(pVCpu, uValue);
            break;
        }
        case XAPIC_OFF_APR:
        case XAPIC_OFF_RRD:
        {
          //These two registers are not supported temporarily
#if XAPIC_HARDWARE_VERSION == XAPIC_HARDWARE_VERSION_P4
#else
# error "Implement Pentium and P6 family APIC architectures"
#endif
            break;
        }
        /* Read-only, write ignored: */
        case XAPIC_OFF_VERSION:
        case XAPIC_OFF_ID:
            break;
        /* Unavailable/reserved in xAPIC mode: */
        case X2APIC_OFF_SELF_IPI:
        /* Read-only registers: */
        case XAPIC_OFF_PPR:
        case XAPIC_OFF_ISR0:    case XAPIC_OFF_ISR1:    case XAPIC_OFF_ISR2:    case XAPIC_OFF_ISR3:
        case XAPIC_OFF_ISR4:    case XAPIC_OFF_ISR5:    case XAPIC_OFF_ISR6:    case XAPIC_OFF_ISR7:
        case XAPIC_OFF_TMR0:    case XAPIC_OFF_TMR1:    case XAPIC_OFF_TMR2:    case XAPIC_OFF_TMR3:
        case XAPIC_OFF_TMR4:    case XAPIC_OFF_TMR5:    case XAPIC_OFF_TMR6:    case XAPIC_OFF_TMR7:
        case XAPIC_OFF_IRR0:    case XAPIC_OFF_IRR1:    case XAPIC_OFF_IRR2:    case XAPIC_OFF_IRR3:
        case XAPIC_OFF_IRR4:    case XAPIC_OFF_IRR5:    case XAPIC_OFF_IRR6:    case XAPIC_OFF_IRR7:
        case XAPIC_OFF_TIMER_CCR:
        default:
        {
          	//Read only register read requires exception flag to be set
            rcStrict = PDMDevHlpDBGFStop(pDevIns, RT_SRC_POS, "APIC%u: offReg=%#RX16\n", pVCpu->idCpu, offReg);
            apicSetError(pVCpu, XAPIC_ESR_ILLEGAL_REG_ADDRESS);
            break;
        }
    }
    return rcStrict;
}

The next one looks at all the set functions

apicSetTprEx

static int apicSetTprEx(PVMCPUCC pVCpu, uint32_t uTpr, bool fForceX2ApicBehaviour)
{
    bool const fX2ApicMode = XAPIC_IN_X2APIC_MODE(pVCpu) || fForceX2ApicBehaviour;
    if (   fX2ApicMode
        && (uTpr & ~XAPIC_TPR_VALID))
        return apicMsrAccessError(pVCpu, MSR_IA32_X2APIC_TPR, APICMSRACCESS_WRITE_RSVD_BITS);

    PXAPICPAGE pXApicPage = VMCPU_TO_XAPICPAGE(pVCpu);
 		//Write to Tpr
    pXApicPage->tpr.u8Tpr = uTpr;
  	//Update ppr
    apicUpdatePpr(pVCpu);
  	//Check whether there are pending interrupts that can be waked up and sent to VCPU
    apicSignalNextPendingIntr(pVCpu);
    return VINF_SUCCESS;
}
//When Tpr is set or an interrupt is waked up, this function will be called to update Ppr
static void apicUpdatePpr(PVMCPUCC pVCpu)
{
  PXAPICPAGE    pXApicPage = VMCPU_TO_XAPICPAGE(pVCpu);
  //Gets the highest priority of the interrupt currently being processed
  uint8_t const uIsrv      = apicGetHighestSetBitInReg(&pXApicPage->isr, 0 /* rcNotFound */);
  uint8_t       uPpr;
  //PPR is assigned to the value of TPR and the higher of the maximum priority in ISR
  if (XAPIC_TPR_GET_TP(pXApicPage->tpr.u8Tpr) >= XAPIC_PPR_GET_PP(uIsrv))
    uPpr = pXApicPage->tpr.u8Tpr;
  else
    uPpr = XAPIC_PPR_GET_PP(uIsrv);
  pXApicPage->ppr.u8Ppr = uPpr;
}
//Wake up next high priority interrupt
static void apicSignalNextPendingIntr(PVMCPUCC pVCpu)
{
    VMCPU_ASSERT_EMT_OR_NOT_RUNNING(pVCpu);

    PCXAPICPAGE pXApicPage = VMCPU_TO_CXAPICPAGE(pVCpu);
    if (pXApicPage->svr.u.fApicSoftwareEnable)
    {
      	//Get the highest priority pending interrupt from irr
        int const irrv = apicGetHighestSetBitInReg(&pXApicPage->irr, -1 /* rcNotFound */);
        if (irrv >= 0)
        {
            uint8_t const uVector = irrv;
            uint8_t const uPpr    = pXApicPage->ppr.u8Ppr;
          	//If the interrupt priority is higher than that of the current CPU, set the flag bit to indicate that an interrupt can be waked up
            if (   !uPpr
                ||  XAPIC_PPR_GET_PP(uVector) > XAPIC_PPR_GET_PP(uPpr))
            {
                apicSetInterruptFF(pVCpu, PDMAPICIRQ_HARDWARE);
            }
        }
    }
}

apicSetLvtEntry

static VBOXSTRICTRC apicSetLvtEntry(PVMCPUCC pVCpu, uint16_t offLvt, uint32_t uLvt)
{
  PCAPIC pApic = VM_TO_APIC(pVCpu->CTX_SUFF(pVM));
  if (offLvt == XAPIC_OFF_LVT_TIMER)
  {
    //If it is APIC Timer, TSC deadlock mode is not supported, but TSC deadlock mode is set in Lvt
    if (   !pApic->fSupportsTscDeadline
        && (uLvt & XAPIC_LVT_TIMER_TSCDEADLINE))
    {
      //Error returned by x2APIC
      if (XAPIC_IN_X2APIC_MODE(pVCpu))
        return apicMsrAccessError(pVCpu, XAPIC_GET_X2APIC_MSR(offLvt), APICMSRACCESS_WRITE_RSVD_BITS);
      //xAPIC mode directly removes this bit
      uLvt &= ~XAPIC_LVT_TIMER_TSCDEADLINE;
    }
  }
  //Get the sequence in 7 lvts
  uint16_t const idxLvt = (offLvt - XAPIC_OFF_LVT_START) >> 4;
  //Check whether the input value is correct. Different LVT interrupts and the significant bits in LVT are different. See Figure 3 of the previous article for the specific effective bits
  if (   XAPIC_IN_X2APIC_MODE(pVCpu)
      && (uLvt & ~g_au32LvtValidMasks[idxLvt]))
    return apicMsrAccessError(pVCpu, XAPIC_GET_X2APIC_MSR(offLvt), APICMSRACCESS_WRITE_RSVD_BITS);
  //Get LVT Mask
  uLvt &= g_au32LvtValidMasks[idxLvt];
  //Xapic? LVT? Mask needs to be set if SoftwareEnable (hardware interrupt only) is not turned on
  PXAPICPAGE pXApicPage = VMCPU_TO_XAPICPAGE(pVCpu);
  if (!pXApicPage->svr.u.fApicSoftwareEnable)
       uLvt |= XAPIC_LVT_MASK;
  //Check passed, write LVT register
  apicWriteRaw32(pXApicPage, offLvt, uLvt);
  return VINF_SUCCESS;
}

apicSetIcrHi/apicSetIcrLo

ICR writing is equivalent to sending an IPI interrupt. First write the high bit, then write the low bit. When writing the low bit, send the IPI interrupt

static VBOXSTRICTRC apicSetIcrHi(PVMCPUCC pVCpu, uint32_t uIcrHi)
{
  	//Write register high
    PXAPICPAGE pXApicPage = VMCPU_TO_XAPICPAGE(pVCpu);
    pXApicPage->icr_hi.all.u32IcrHi = uIcrHi & XAPIC_ICR_HI_DEST;
    return VINF_SUCCESS;
}
static VBOXSTRICTRC apicSetIcrLo(PVMCPUCC pVCpu, uint32_t uIcrLo, int rcRZ, bool fUpdateStat)
{
  	//Write register low
    PXAPICPAGE pXApicPage  = VMCPU_TO_XAPICPAGE(pVCpu);
    pXApicPage->icr_lo.all.u32IcrLo = uIcrLo & XAPIC_ICR_LO_WR_VALID;
  	//Send IPI interrupt
    return apicSendIpi(pVCpu, rcRZ);
}

apicSetSvr :

Special cases may occur when a processor raises its PPR (task priority) to an interrupt level greater than or equal to the INTR signal of the current assertion processor. If the interrupt to be allocated is masked (programmed by software) when the INTA cycle is issued, the local APIC will pass a spurious interrupt vector. Assigning a fake interrupt vector does not affect ISR, so the handler for that vector should return without EOI.

static int apicSetSvr(PVMCPUCC pVCpu, uint32_t uSvr)
{
	uint32_t   uValidMask = XAPIC_SVR_VALID;
  PXAPICPAGE pXApicPage = VMCPU_TO_XAPICPAGE(pVCpu);
  //If disable EOI broadcast is on, add xapic ﹣ SVR ﹣ express ﹣ EOI ﹣ broadcast flag
  if (pXApicPage->version.u.fEoiBroadcastSupression)
    uValidMask |= XAPIC_SVR_SUPRESS_EOI_BROADCAST;
	//Invalid bit is 1 not allowed in x2APIC mode
  if (   XAPIC_IN_X2APIC_MODE(pVCpu)
      && (uSvr & ~uValidMask))
    return apicMsrAccessError(pVCpu, MSR_IA32_X2APIC_SVR, APICMSRACCESS_WRITE_RSVD_BITS);
	//Write directly to SVR register
  apicWriteRaw32(pXApicPage, XAPIC_OFF_SVR, uSvr);
  //If the software interrupt is turned off, you need to reset the mask in LVT
  if (!pXApicPage->svr.u.fApicSoftwareEnable)
  {
    pXApicPage->lvt_timer.u.u1Mask   = 1;
#if XAPIC_HARDWARE_VERSION == XAPIC_HARDWARE_VERSION_P4
    pXApicPage->lvt_thermal.u.u1Mask = 1;
#endif
    pXApicPage->lvt_perf.u.u1Mask    = 1;
    pXApicPage->lvt_lint0.u.u1Mask   = 1;
    pXApicPage->lvt_lint1.u.u1Mask   = 1;
    pXApicPage->lvt_error.u.u1Mask   = 1;
  }
}

apicSetEsr

static int apicSetEsr(PVMCPUCC pVCpu, uint32_t uEsr)
{
  	//Invalid bit is 1 not allowed in x2APIC mode
    if (   XAPIC_IN_X2APIC_MODE(pVCpu)
        && (uEsr & ~XAPIC_ESR_WO_VALID))
        return apicMsrAccessError(pVCpu, MSR_IA32_X2APIC_ESR, APICMSRACCESS_WRITE_RSVD_BITS);

    //This is just to clear all internal errors, and no ESR register is actually set
    PXAPICPAGE pXApicPage = VMCPU_TO_XAPICPAGE(pVCpu);
    pXApicPage->esr.all.u32Errors = apicClearAllErrors(pVCpu);
    return VINF_SUCCESS;
}

apicSetTimerDcr

Timer Divide Configuration Register (DCR).

static VBOXSTRICTRC apicSetTimerDcr(PVMCPUCC pVCpu, uint32_t uTimerDcr)
{
  //In case of x2APIC mode, only input with valid bit value is accepted
	if (   XAPIC_IN_X2APIC_MODE(pVCpu)
      && (uTimerDcr & ~XAPIC_TIMER_DCR_VALID))
    return apicMsrAccessError(pVCpu, MSR_IA32_X2APIC_TIMER_DCR, APICMSRACCESS_WRITE_RSVD_BITS);
 	//Write DCR value
  PXAPICPAGE pXApicPage = VMCPU_TO_XAPICPAGE(pVCpu);
  apicWriteRaw32(pXApicPage, XAPIC_OFF_TIMER_DCR, uTimerDcr);
  return VINF_SUCCESS;
}

apicSetTimerIcr

timer's Initial-Count Register (ICR).

static VBOXSTRICTRC apicSetTimerIcr(PPDMDEVINS pDevIns, PVMCPUCC pVCpu, int rcBusy, uint32_t uInitialCount)
{
    PAPIC      pApic      = VM_TO_APIC(pVCpu->CTX_SUFF(pVM));
    PAPICCPU   pApicCpu   = VMCPU_TO_APICCPU(pVCpu);
    PXAPICPAGE pXApicPage = VMCPU_TO_XAPICPAGE(pVCpu);
    // TSC deadlock mode does not use ICR, ignoring
    if (   pApic->fSupportsTscDeadline
        && pXApicPage->lvt_timer.u.u2TimerMode == XAPIC_TIMER_MODE_TSC_DEADLINE)
        return VINF_SUCCESS;

    TMTIMERHANDLE hTimer = pApicCpu->hTimer;
    VBOXSTRICTRC rc = PDMDevHlpTimerLockClock(pDevIns, hTimer, rcBusy);
    if (rc == VINF_SUCCESS)
    {
        pXApicPage->timer_icr.u32InitialCount = uInitialCount;
      	//Set CCR value equal to ICR
        pXApicPage->timer_ccr.u32CurrentCount = uInitialCount;
      	//Start Timer if ICR is not equal to 0
        if (uInitialCount)
            apicStartTimer(pVCpu, uInitialCount);
        else
        //ICR equals 0, stop Timer
            apicStopTimer(pVCpu);
        PDMDevHlpTimerUnlockClock(pDevIns, hTimer);
    }
    return rc;
}

apicSetEoi

Interrupt completion setting

static VBOXSTRICTRC apicSetEoi(PVMCPUCC pVCpu, uint32_t uEoi, int rcBusy, bool fForceX2ApicBehaviour)
{
	bool const fX2ApicMode = XAPIC_IN_X2APIC_MODE(pVCpu) || fForceX2ApicBehaviour;
  if (   fX2ApicMode
      && (uEoi & ~XAPIC_EOI_WO_VALID))
    return apicMsrAccessError(pVCpu, MSR_IA32_X2APIC_EOI, APICMSRACCESS_WRITE_RSVD_BITS);
  int isrv = apicGetHighestSetBitInReg(&pXApicPage->isr, -1 /* rcNotFound */);
  //Get the next interrupt to process
  if (isrv >= 0)
  {
    uint8_t const uVector      = isrv;
    //Whether it is level triggered interrupt
    bool const fLevelTriggered = apicTestVectorInReg(&pXApicPage->tmr, uVector);
    if (fLevelTriggered)
    {
      //Broadcast EOI
      VBOXSTRICTRC rc = PDMIoApicBroadcastEoi(pVCpu->CTX_SUFF(pVM), uVector);
      if (rc == VINF_SUCCESS)
      { /* likely */ }
      else
        return rcBusy;
      //clear TMR register (interrupt complete)
      apicClearVectorInReg(&pXApicPage->tmr, uVector);
      //On LINT0, the level is triggered, and the interrupt of fixedmode needs to clear the Remote IRR flag after receiving the EOI command
      uint32_t const uLvtLint0 = pXApicPage->lvt_lint0.all.u32LvtLint0;
      if (   XAPIC_LVT_GET_REMOTE_IRR(uLvtLint0)
          && XAPIC_LVT_GET_VECTOR(uLvtLint0) == uVector
          && XAPIC_LVT_GET_DELIVERY_MODE(uLvtLint0) == XAPICDELIVERYMODE_FIXED)
      {
        ASMAtomicAndU32((volatile uint32_t *)&pXApicPage->lvt_lint0.all.u32LvtLint0, ~XAPIC_LVT_REMOTE_IRR);
      }
    }
    //Clear ISR register (current interrupt processed)
    apicClearVectorInReg(&pXApicPage->isr, uVector);
    //Update Ppr
    apicUpdatePpr(pVCpu);
    //Select next interrupt to process
    apicSignalNextPendingIntr(pVCpu);
  }
}

apicSetLdr

Set Logical Destination Register (LDR)

static VBOXSTRICTRC apicSetLdr(PVMCPUCC pVCpu, uint32_t uLdr)
{
  	//Write directly to virtual APIC page
    PCAPIC pApic = VM_TO_APIC(pVCpu->CTX_SUFF(pVM));
    PXAPICPAGE pXApicPage = VMCPU_TO_XAPICPAGE(pVCpu);
    apicWriteRaw32(pXApicPage, XAPIC_OFF_LDR, uLdr & XAPIC_LDR_VALID);
    return VINF_SUCCESS;
}

apicSetDfr

Destination Format Register (DFR).

static VBOXSTRICTRC apicSetDfr(PVMCPUCC pVCpu, uint32_t uDfr)
{
    uDfr &= XAPIC_DFR_VALID;
    uDfr |= XAPIC_DFR_RSVD_MB1;

    PXAPICPAGE pXApicPage = VMCPU_TO_XAPICPAGE(pVCpu);
    apicWriteRaw32(pXApicPage, XAPIC_OFF_DFR, uDfr);
    return VINF_SUCCESS;
}

APICGetTpr

VMMDECL(int) APICGetTpr(PCVMCPUCC pVCpu, uint8_t *pu8Tpr, bool *pfPending, uint8_t *pu8PendingIntr)
{
    VMCPU_ASSERT_EMT(pVCpu);
    if (APICIsEnabled(pVCpu))
    {
        PCXAPICPAGE pXApicPage = VMCPU_TO_CXAPICPAGE(pVCpu);
        if (pfPending)
        {
            //Get the highest priority of pending interrupt in IRR
            *pfPending = apicGetHighestPendingInterrupt(pVCpu, pu8PendingIntr);
        }
				//Returns the value in the TPR register of this interrupt
        *pu8Tpr = pXApicPage->tpr.u8Tpr;
        return VINF_SUCCESS;
    }

    *pu8Tpr = 0;
    return VERR_PDM_NO_APIC_INSTANCE;
}

17.4 functions related to APIC timer

These functions are called when ICR is written

apicStartTimer

void apicStartTimer(PVMCPUCC pVCpu, uint32_t uInitialCount)
{
	PCXAPICPAGE    pXApicPage   = APICCPU_TO_CXAPICPAGE(pApicCpu);
  uint8_t  const uTimerShift  = apicGetTimerShift(pXApicPage);
  uint64_t const cTicksToNext = (uint64_t)uInitialCount << uTimerShift;
  //Finally call TMTimerSetRelative in TM. This function is introduced in TM (Time Manager)
  PDMDevHlpTimerSetRelative(pDevIns, pApicCpu->hTimer, cTicksToNext, &pApicCpu->u64TimerInitial);
  apicHintTimerFreq(pDevIns, pApicCpu, uInitialCount, uTimerShift);
}

apicStopTimer

static void apicStopTimer(PVMCPUCC pVCpu)
{
 	//Call TMTimerStop function in TM to stop timer
  PDMDevHlpTimerStop(pDevIns, pApicCpu->hTimer);
  pApicCpu->uHintedTimerInitialCount = 0;
  pApicCpu->uHintedTimerShift = 0;
}

apicHintTimerFreq

Tell TM the current APIC frequency

void apicHintTimerFreq(PPDMDEVINS pDevIns, PAPICCPU pApicCpu, uint32_t uInitialCount, uint8_t uTimerShift)
{
  	//Timer will only be set once after it starts
    if (   pApicCpu->uHintedTimerInitialCount != uInitialCount
        || pApicCpu->uHintedTimerShift        != uTimerShift)
    {
        uint32_t uHz;
        if (uInitialCount)
        {
          	//Turn on timer, call TMTimerGetFreq function to get frequency
            uint64_t cTicksPerPeriod = (uint64_t)uInitialCount << uTimerShift;
            uHz = PDMDevHlpTimerGetFreq(pDevIns, pApicCpu->hTimer) / cTicksPerPeriod;
        }
        else
            uHz = 0;
				//Call TMTimerSetFrequencyHint to set time frequency
        PDMDevHlpTimerSetFrequencyHint(pDevIns, pApicCpu->hTimer, uHz);
        pApicCpu->uHintedTimerInitialCount = uInitialCount;
        pApicCpu->uHintedTimerShift = uTimerShift;
    }
}

APICGetTimerFreq

Get the current APIC frequency

VMM_INT_DECL(int) APICGetTimerFreq(PVMCC pVM, uint64_t *pu64Value)
{
    PVMCPUCC pVCpu = pVM->CTX_SUFF(apCpus)[0];
    if (APICIsEnabled(pVCpu))
    {
        PCAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpu);
        // Call TMTimerGetFreq function to get frequency
        *pu64Value = PDMDevHlpTimerGetFreq(VMCPU_TO_DEVINS(pVCpu), pApicCpu->hTimer);
        return VINF_SUCCESS;
    }
    return VERR_PDM_NO_APIC_INSTANCE;
}

apicR3TimerCallback

callback after the time slice arrives

static DECLCALLBACK(void) apicR3TimerCallback(PPDMDEVINS pDevIns, PTMTIMER pTimer, void *pvUser)
{
  PXAPICPAGE     pXApicPage = VMCPU_TO_XAPICPAGE(pVCpu);
  uint32_t const uLvtTimer  = pXApicPage->lvt_timer.all.u32LvtTimer;
  if (!XAPIC_LVT_IS_MASKED(uLvtTimer))
  {
    //When the time slice arrives, send the interrupt to the target VCPU according to the information in LVT Timer
    uint8_t uVector = XAPIC_LVT_GET_VECTOR(uLvtTimer);
    apicPostInterrupt(pVCpu, uVector, XAPICTRIGGERMODE_EDGE, 0 /* uSrcTag */);
  }
	//Determine whether to reset the timer according to Timer mode
  XAPICTIMERMODE enmTimerMode = XAPIC_LVT_GET_TIMER_MODE(uLvtTimer);
  switch (enmTimerMode)
  {
    case XAPICTIMERMODE_PERIODIC:
      {
        //PERIODIC needs to reset timer and reassign u32CurrentCount
        uint32_t const uInitialCount = pXApicPage->timer_icr.u32InitialCount;
        pXApicPage->timer_ccr.u32CurrentCount = uInitialCount;
        if (uInitialCount)
        {
          //reset timer 
          apicStartTimer(pVCpu, uInitialCount);
        }
        break;
      }
    case XAPICTIMERMODE_ONESHOT:
      {
        //When the time slice arrives, it will not be reset, so only u32CurrentCount is set to 0
        pXApicPage->timer_ccr.u32CurrentCount = 0;
        break;
      }

    case XAPICTIMERMODE_TSC_DEADLINE:
      {
        //timer for creating TSC deadlock is not supported temporarily
        break;
      }
  }
}

reference material:

https://blog.csdn.net/omnispace/article/details/61415994

https://blog.csdn.net/ustc_dylan/article/details/4132046

18 original articles published, praised 0, 366 visitors
Private letter follow

Tags: VirtualBox

Posted on Thu, 06 Feb 2020 07:33:25 -0500 by damnedbee