diff options
author | Jean-Marc Lenoir | 2018-01-14 11:59:24 +0100 |
---|---|---|
committer | Jean-Marc Lenoir | 2018-01-14 11:59:24 +0100 |
commit | b091575043e114ae3f1741823aec15c4e9941ae0 (patch) | |
tree | a83a487e7e7cdd6a669570fe137f0863f30b411b /vmmon.patch | |
download | aur-b091575043e114ae3f1741823aec15c4e9941ae0.tar.gz |
Initial commit.
Diffstat (limited to 'vmmon.patch')
-rw-r--r-- | vmmon.patch | 1318 |
1 files changed, 1318 insertions, 0 deletions
diff --git a/vmmon.patch b/vmmon.patch new file mode 100644 index 000000000000..98121825e1f3 --- /dev/null +++ b/vmmon.patch @@ -0,0 +1,1318 @@ +diff --git a/vmmon/Makefile b/vmmon/Makefile +index de8162e..6124a71 100644 +--- a/vmmon/Makefile ++++ b/vmmon/Makefile +@@ -43,7 +43,11 @@ INCLUDE += -I$(SRCROOT)/shared + endif + + ++ifdef KVERSION ++VM_UNAME = $(KVERSION) ++else + VM_UNAME = $(shell uname -r) ++endif + + # Header directory for the running kernel + ifdef LINUXINCLUDE +diff --git a/vmmon/common/hostif.h b/vmmon/common/hostif.h +index 865abc0..2b94ce1 100644 +--- a/vmmon/common/hostif.h ++++ b/vmmon/common/hostif.h +@@ -122,14 +122,10 @@ EXTERN MPN64 HostIF_GetNextAnonPage(VMDriver *vm, MPN64 mpn); + EXTERN int HostIF_GetLockedPageList(VMDriver *vm, VA64 uAddr, + unsigned int numPages); + +-EXTERN int HostIF_ReadPage(MPN64 mpn, VA64 addr, Bool kernelBuffer); +-EXTERN int HostIF_WritePage(MPN64 mpn, VA64 addr, Bool kernelBuffer); +-#ifdef _WIN32 +-/* Add a HostIF_ReadMachinePage() if/when needed */ ++EXTERN int HostIF_ReadPage(VMDriver *vm, MPN64 mpn, VA64 addr, Bool kernelBuffer); ++EXTERN int HostIF_WritePage(VMDriver *vm, MPN64 mpn, VA64 addr, ++ Bool kernelBuffer); + EXTERN int HostIF_WriteMachinePage(MPN64 mpn, VA64 addr); +-#else +-#define HostIF_WriteMachinePage(_a, _b) HostIF_WritePage((_a), (_b), TRUE) +-#endif + #if defined __APPLE__ + // There is no need for a fast clock lock on Mac OS. + #define HostIF_FastClockLock(_callerID) do {} while (0) +@@ -145,4 +141,8 @@ EXTERN void HostIF_FreeMachinePage(MPN64 mpn); + + EXTERN int HostIF_SafeRDMSR(uint32 msr, uint64 *val); + ++#if defined __APPLE__ ++EXTERN void HostIF_PageUnitTest(void); ++#endif ++ + #endif // ifdef _HOSTIF_H_ +diff --git a/vmmon/common/memtrack.c b/vmmon/common/memtrack.c +index e53daeb..eec9844 100644 +--- a/vmmon/common/memtrack.c ++++ b/vmmon/common/memtrack.c +@@ -88,6 +88,7 @@ + + #include "vmware.h" + #include "hostif.h" ++#include "vmx86.h" + + #include "memtrack.h" + +@@ -146,12 +147,11 @@ typedef struct MemTrackHT { + typedef uint64 MemTrackHTKey; + + typedef struct MemTrack { ++ VMDriver *vm; /* The VM instance. */ + unsigned numPages; /* Number of pages tracked. */ + MemTrackDir1 dir1; /* First level directory. */ + MemTrackHT vpnHashTable; /* VPN to entry hashtable. */ +-#if defined(MEMTRACK_MPN_LOOKUP) + MemTrackHT mpnHashTable; /* MPN to entry hashtable. */ +-#endif + } MemTrack; + + /* +@@ -304,11 +304,9 @@ MemTrackCleanup(MemTrack *mt) // IN + if (mt->vpnHashTable.pages[idx] != NULL) { + HostIF_FreePage(mt->vpnHashTable.pages[idx]); + } +-#if defined(MEMTRACK_MPN_LOOKUP) + if (mt->mpnHashTable.pages[idx] != NULL) { + HostIF_FreePage(mt->mpnHashTable.pages[idx]); + } +-#endif + } + + HostIF_FreeKernelMem(mt); +@@ -332,7 +330,7 @@ MemTrackCleanup(MemTrack *mt) // IN + */ + + MemTrack * +-MemTrack_Init(void) ++MemTrack_Init(VMDriver *vm) // IN: + { + MemTrack *mt; + unsigned idx; +@@ -349,6 +347,7 @@ MemTrack_Init(void) + goto error; + } + memset(mt, 0, sizeof *mt); ++ mt->vm = vm; + + for (idx = 0; idx < MEMTRACK_HT_PAGES; idx++) { + MemTrackHTPage *htPage = MemTrackAllocPage(); +@@ -360,7 +359,6 @@ MemTrack_Init(void) + mt->vpnHashTable.pages[idx] = htPage; + } + +-#if defined(MEMTRACK_MPN_LOOKUP) + for (idx = 0; idx < MEMTRACK_HT_PAGES; idx++) { + MemTrackHTPage *htPage = MemTrackAllocPage(); + +@@ -370,7 +368,6 @@ MemTrack_Init(void) + } + mt->mpnHashTable.pages[idx] = htPage; + } +-#endif + + return mt; + +@@ -409,6 +406,8 @@ MemTrack_Add(MemTrack *mt, // IN + MemTrackDir3 *dir3; + MEMTRACK_IDX2DIR(idx, p1, p2, p3); + ++ ASSERT(HostIF_VMLockIsHeld(mt->vm)); ++ + if (p1 >= MEMTRACK_DIR1_ENTRIES || + p2 >= MEMTRACK_DIR2_ENTRIES || + p3 >= MEMTRACK_DIR3_ENTRIES) { +@@ -430,9 +429,7 @@ MemTrack_Add(MemTrack *mt, // IN + ent->mpn = mpn; + + MemTrackHTInsert(&mt->vpnHashTable, ent, &ent->vpnChain, ent->vpn); +-#if defined(MEMTRACK_MPN_LOOKUP) + MemTrackHTInsert(&mt->mpnHashTable, ent, &ent->mpnChain, ent->mpn); +-#endif + + mt->numPages++; + +@@ -461,6 +458,7 @@ MemTrack_LookupVPN(MemTrack *mt, // IN + VPN64 vpn) // IN + { + MemTrackEntry *next = *MemTrackHTLookup(&mt->vpnHashTable, vpn); ++ ASSERT(HostIF_VMLockIsHeld(mt->vm)); + + while (next != NULL) { + if (next->vpn == vpn) { +@@ -473,7 +471,6 @@ MemTrack_LookupVPN(MemTrack *mt, // IN + } + + +-#if defined(MEMTRACK_MPN_LOOKUP) + /* + *---------------------------------------------------------------------- + * +@@ -493,7 +490,9 @@ MemTrackEntry * + MemTrack_LookupMPN(MemTrack *mt, // IN + MPN64 mpn) // IN + { +- MemTrackEntry *next = *MemTrackHTLookup(&mt->mpnHashTable, mpn); ++ MemTrackEntry *next; ++ ASSERT(HostIF_VMLockIsHeld(mt->vm)); ++ next = *MemTrackHTLookup(&mt->mpnHashTable, mpn); + + while (next != NULL) { + if (next->mpn == mpn) { +@@ -504,7 +503,6 @@ MemTrack_LookupMPN(MemTrack *mt, // IN + + return NULL; + } +-#endif + + + /* +diff --git a/vmmon/common/memtrack.h b/vmmon/common/memtrack.h +index 977c5af..6d104ad 100644 +--- a/vmmon/common/memtrack.h ++++ b/vmmon/common/memtrack.h +@@ -31,30 +31,22 @@ + #define INCLUDE_ALLOW_VMCORE + #include "includeCheck.h" + +-#if defined(VMX86_DEBUG) +-#define MEMTRACK_MPN_LOOKUP +-#endif +- + struct MemTrack; + + typedef struct MemTrackEntry { + VPN64 vpn; + MPN64 mpn; + struct MemTrackEntry *vpnChain; +-#if defined(MEMTRACK_MPN_LOOKUP) + struct MemTrackEntry *mpnChain; +-#endif + } MemTrackEntry; + + typedef void (MemTrackCleanupCb)(void *cData, MemTrackEntry *entry); + +-extern struct MemTrack *MemTrack_Init(void); ++extern struct MemTrack *MemTrack_Init(VMDriver *vm); + extern unsigned MemTrack_Cleanup(struct MemTrack *mt, MemTrackCleanupCb *cb, + void *cbData); + extern MemTrackEntry *MemTrack_Add(struct MemTrack *mt, VPN64 vpn, MPN64 mpn); + extern MemTrackEntry *MemTrack_LookupVPN(struct MemTrack *mt, VPN64 vpn); +-#if defined(MEMTRACK_MPN_LOOKUP) + extern MemTrackEntry *MemTrack_LookupMPN(struct MemTrack *mt, MPN64 mpn); +-#endif + + #endif // _MEMTRACK_H_ +diff --git a/vmmon/common/task.c b/vmmon/common/task.c +index de2429e..228996e 100644 +--- a/vmmon/common/task.c ++++ b/vmmon/common/task.c +@@ -39,6 +39,9 @@ + # include <linux/string.h> /* memset() in the kernel */ + + # define EXPORT_SYMTAB ++# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0) ++# define LINUX_GDT_IS_RO ++# endif + #else + # include <string.h> + #endif +@@ -59,6 +62,13 @@ + #include "x86vtinstr.h" + #include "apic.h" + ++#ifdef LINUX_GDT_IS_RO ++# include <asm/desc.h> ++# define default_rw_gdt get_current_gdt_rw() ++#else ++# define default_rw_gdt NULL ++#endif ++ + #if defined(_WIN64) + # include "x86.h" + # include "vmmon-asm-x86-64.h" +@@ -708,11 +718,28 @@ TaskRestoreHostGDTTRLDT(Descriptor *tempGDTBase, + */ + + desc = (Descriptor *)((VA)HOST_KERNEL_LA_2_VA(hostGDT64.offset + tr)); ++#ifdef LINUX_GDT_IS_RO ++ /* ++ * If GDT is read-only, we must always load TR from alternative gdt, ++ * otherwise CPU gets page fault when marking TR busy. ++ */ ++ { ++ DTR64 rwGDT64; ++ ++ rwGDT64.offset = (unsigned long)tempGDTBase; ++ rwGDT64.limit = hostGDT64.limit; ++ Desc_SetType((Descriptor *)((unsigned long)tempGDTBase + tr), TASK_DESC); ++ _Set_GDT((DTR *)&rwGDT64); ++ SET_TR(tr); ++ _Set_GDT((DTR *)&hostGDT64); ++ } ++#else + if (Desc_Type(desc) == TASK_DESC_BUSY) { + Desc_SetType(desc, TASK_DESC); + } + _Set_GDT((DTR *)&hostGDT64); + SET_TR(tr); ++#endif + SET_LDT(ldt); + } + } +@@ -1775,7 +1802,8 @@ Task_Switch(VMDriver *vm, // IN + ASSERT(pCPU < ARRAYSIZE(hvRootPage) && pCPU < ARRAYSIZE(tmpGDT)); + + hvRootMPN = Atomic_Read64(&hvRootPage[pCPU]); +- tempGDTBase = USE_TEMPORARY_GDT ? Atomic_ReadPtr(&tmpGDT[pCPU]) : NULL; ++ tempGDTBase = USE_TEMPORARY_GDT ? Atomic_ReadPtr(&tmpGDT[pCPU]) ++ : default_rw_gdt; + + /* + * We can't allocate memory with interrupts disabled on all hosts +diff --git a/vmmon/common/vmx86.c b/vmmon/common/vmx86.c +index dbe794c..156e94a 100644 +--- a/vmmon/common/vmx86.c ++++ b/vmmon/common/vmx86.c +@@ -720,6 +720,35 @@ cleanup: + } + + ++/* ++ *---------------------------------------------------------------------- ++ * ++ * Vmx86_LookupUserMPN -- ++ * ++ * Look up the MPN of a locked user page by user VA under the VM lock. ++ * ++ * Results: ++ * A status code and the MPN on success. ++ * ++ * Side effects: ++ * None ++ * ++ *---------------------------------------------------------------------- ++ */ ++ ++int ++Vmx86_LookupUserMPN(VMDriver *vm, // IN: VMDriver ++ VA64 uAddr, // IN: user VA of the page ++ MPN64 *mpn) // OUT ++{ ++ int ret; ++ HostIF_VMLock(vm, 38); ++ ret = HostIF_LookupUserMPN(vm, uAddr, mpn); ++ HostIF_VMUnlock(vm, 38); ++ return ret; ++} ++ ++ + /* + *---------------------------------------------------------------------- + * +diff --git a/vmmon/common/vmx86.h b/vmmon/common/vmx86.h +index 71ea01b..9e227ca 100644 +--- a/vmmon/common/vmx86.h ++++ b/vmmon/common/vmx86.h +@@ -106,6 +106,7 @@ extern PseudoTSC pseudoTSC; + #define MAX_LOCKED_PAGES (-1) + + extern VMDriver *Vmx86_CreateVM(void); ++extern int Vmx86_LookupUserMPN(VMDriver *vm, VA64 uAddr, MPN64 *mpn); + extern int Vmx86_ReleaseVM(VMDriver *vm); + extern int Vmx86_InitVM(VMDriver *vm, InitBlock *initParams); + extern int Vmx86_LateInitVM(VMDriver *vm); +diff --git a/vmmon/include/compat_cred.h b/vmmon/include/compat_cred.h +--- a/vmmon/include/compat_cred.h ++++ b/vmmon/include/compat_cred.h +@@ -24,7 +24,11 @@ + * Include linux/cred.h via linux/sched.h - it is not nice, but + * as cpp does not have #ifexist... + */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + #include <linux/sched.h> ++#else ++#include <linux/cred.h> ++#endif + + #if !defined(current_fsuid) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) + #define current_uid() (current->uid) +diff --git a/vmmon/include/compat_pgtable.h b/vmmon/include/compat_pgtable.h +index dedc25a..4722d4e 100644 +--- a/vmmon/include/compat_pgtable.h ++++ b/vmmon/include/compat_pgtable.h +@@ -30,80 +30,32 @@ + #include <asm/pgtable.h> + + +-/* pte_page() API modified in 2.3.23 to return a struct page * --hpreg */ +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 23) +-# define compat_pte_page pte_page +-#else +-# include "compat_page.h" +- +-# define compat_pte_page(_pte) virt_to_page(pte_page(_pte)) +-#endif +- +- +-/* Appeared in 2.5.5 --hpreg */ +-#ifndef pte_offset_map +-/* Appeared in SuSE 8.0's 2.4.18 --hpreg */ +-# ifdef pte_offset_atomic +-# define pte_offset_map pte_offset_atomic +-# define pte_unmap pte_kunmap +-# else +-# define pte_offset_map pte_offset +-# define pte_unmap(_pte) +-# endif +-#endif +- +- +-/* Appeared in 2.5.74-mmX --petr */ +-#ifndef pmd_offset_map +-# define pmd_offset_map(pgd, address) pmd_offset(pgd, address) +-# define pmd_unmap(pmd) +-#endif +- +- + /* +- * Appeared in 2.6.10-rc2-mm1. Older kernels did L4 page tables as +- * part of pgd_offset, or they did not have L4 page tables at all. +- * In 2.6.11 pml4 -> pgd -> pmd -> pte hierarchy was replaced by +- * pgd -> pud -> pmd -> pte hierarchy. ++ * p4d level appeared in 4.12. + */ +-#ifdef PUD_MASK +-# define compat_pgd_offset(mm, address) pgd_offset(mm, address) +-# define compat_pgd_present(pgd) pgd_present(pgd) +-# define compat_pud_offset(pgd, address) pud_offset(pgd, address) +-# define compat_pud_present(pud) pud_present(pud) +-typedef pgd_t compat_pgd_t; +-typedef pud_t compat_pud_t; +-#elif defined(pml4_offset) +-# define compat_pgd_offset(mm, address) pml4_offset(mm, address) +-# define compat_pgd_present(pml4) pml4_present(pml4) +-# define compat_pud_offset(pml4, address) pml4_pgd_offset(pml4, address) +-# define compat_pud_present(pgd) pgd_present(pgd) +-typedef pml4_t compat_pgd_t; +-typedef pgd_t compat_pud_t; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0) ++# define compat_p4d_offset(pgd, address) p4d_offset(pgd, address) ++# define compat_p4d_present(p4d) p4d_present(p4d) ++# define compat_p4d_large(p4d) p4d_large(p4d) ++# define compat_p4d_pfn(p4d) p4d_pfn(p4d) ++# define COMPAT_P4D_MASK P4D_MASK ++typedef p4d_t compat_p4d_t; + #else +-# define compat_pgd_offset(mm, address) pgd_offset(mm, address) +-# define compat_pgd_present(pgd) pgd_present(pgd) +-# define compat_pud_offset(pgd, address) (pgd) +-# define compat_pud_present(pud) (1) +-typedef pgd_t compat_pgd_t; +-typedef pgd_t compat_pud_t; ++# define compat_p4d_offset(pgd, address) (pgd) ++# define compat_p4d_present(p4d) (1) ++# define compat_p4d_large(p4d) (0) ++# define compat_p4d_pfn(p4d) INVALID_MPN /* Not used */ ++# define COMPAT_P4D_MASK 0 /* Not used */ ++typedef pgd_t compat_p4d_t; + #endif +- +- +-#define compat_pgd_offset_k(mm, address) pgd_offset_k(address) +- +- +-/* Introduced somewhere in 2.6.0, + backported to some 2.4 RedHat kernels */ +-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(pte_pfn) +-# define pte_pfn(pte) page_to_pfn(compat_pte_page(pte)) ++/* p[gu]d_large did not exist before 2.6.25 */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25) ++# define pud_large(pud) 0 ++# define pgd_large(pgd) 0 + #endif +- +- +-/* A page_table_lock field is added to struct mm_struct in 2.3.10 --hpreg */ +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 10) +-# define compat_get_page_table_lock(_mm) (&(_mm)->page_table_lock) +-#else +-# define compat_get_page_table_lock(_mm) NULL ++/* pud_pfn did not exist before 3.8. */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) ++# define pud_pfn(pud) INVALID_MPN + #endif + + +@@ -128,12 +80,8 @@ typedef pgd_t compat_pud_t; + #define VM_PAGE_KERNEL_EXEC PAGE_KERNEL + #endif + #else +-#ifdef PAGE_KERNEL_EXECUTABLE +-#define VM_PAGE_KERNEL_EXEC PAGE_KERNEL_EXECUTABLE +-#else + #define VM_PAGE_KERNEL_EXEC PAGE_KERNEL_EXEC + #endif +-#endif + + + #endif /* __COMPAT_PGTABLE_H__ */ +diff --git a/vmmon/include/pgtbl.h b/vmmon/include/pgtbl.h +index 39ef4e1..0935e09 100644 +--- a/vmmon/include/pgtbl.h ++++ b/vmmon/include/pgtbl.h +@@ -26,154 +26,14 @@ + #include "compat_spinlock.h" + #include "compat_page.h" + +-/* +- *----------------------------------------------------------------------------- +- * +- * PgtblPte2MPN -- +- * +- * Returns the page structure associated to a Page Table Entry. +- * +- * This function is not allowed to schedule() because it can be called while +- * holding a spinlock --hpreg +- * +- * Results: +- * INVALID_MPN on failure +- * mpn on success +- * +- * Side effects: +- * None +- * +- *----------------------------------------------------------------------------- +- */ +- +-static INLINE MPN64 +-PgtblPte2MPN(pte_t *pte) // IN +-{ +- MPN64 mpn; +- if (pte_present(*pte) == 0) { +- return INVALID_MPN; +- } +- mpn = pte_pfn(*pte); +- if (mpn >= INVALID_MPN) { +- return INVALID_MPN; +- } +- return mpn; +-} +- +- +-/* +- *----------------------------------------------------------------------------- +- * +- * PgtblPte2Page -- +- * +- * Returns the page structure associated to a Page Table Entry. +- * +- * This function is not allowed to schedule() because it can be called while +- * holding a spinlock --hpreg +- * +- * Results: +- * The page structure if the page table entry points to a physical page +- * NULL if the page table entry does not point to a physical page +- * +- * Side effects: +- * None +- * +- *----------------------------------------------------------------------------- +- */ +- +-static INLINE struct page * +-PgtblPte2Page(pte_t *pte) // IN +-{ +- if (pte_present(*pte) == 0) { +- return NULL; +- } +- +- return compat_pte_page(*pte); +-} +- +- +-/* +- *----------------------------------------------------------------------------- +- * +- * PgtblPGD2PTELocked -- +- * +- * Walks through the hardware page tables to try to find the pte +- * associated to a virtual address. +- * +- * Results: +- * pte. Caller must call pte_unmap if valid pte returned. +- * +- * Side effects: +- * None +- * +- *----------------------------------------------------------------------------- +- */ +- +-static INLINE pte_t * +-PgtblPGD2PTELocked(compat_pgd_t *pgd, // IN: PGD to start with +- VA addr) // IN: Address in the virtual address +- // space of that process +-{ +- compat_pud_t *pud; +- pmd_t *pmd; +- pte_t *pte; +- +- if (compat_pgd_present(*pgd) == 0) { +- return NULL; +- } +- +- pud = compat_pud_offset(pgd, addr); +- if (compat_pud_present(*pud) == 0) { +- return NULL; +- } +- +- pmd = pmd_offset_map(pud, addr); +- if (pmd_present(*pmd) == 0) { +- pmd_unmap(pmd); +- return NULL; +- } +- +- pte = pte_offset_map(pmd, addr); +- pmd_unmap(pmd); +- return pte; +-} +- +- +-/* +- *----------------------------------------------------------------------------- +- * +- * PgtblVa2PTELocked -- +- * +- * Walks through the hardware page tables to try to find the pte +- * associated to a virtual address. +- * +- * Results: +- * pte. Caller must call pte_unmap if valid pte returned. +- * +- * Side effects: +- * None +- * +- *----------------------------------------------------------------------------- +- */ +- +-static INLINE pte_t * +-PgtblVa2PTELocked(struct mm_struct *mm, // IN: Mm structure of a process +- VA addr) // IN: Address in the virtual address +- // space of that process +-{ +- return PgtblPGD2PTELocked(compat_pgd_offset(mm, addr), addr); +-} +- + + /* + *----------------------------------------------------------------------------- + * + * PgtblVa2MPNLocked -- + * +- * Retrieve MPN for a given va. +- * +- * Caller must call pte_unmap if valid pte returned. The mm->page_table_lock +- * must be held, so this function is not allowed to schedule() --hpreg ++ * Walks through the hardware page tables to try to find the pte ++ * associated to a virtual address. Then maps PTE to MPN. + * + * Results: + * INVALID_MPN on failure +@@ -188,89 +48,64 @@ PgtblVa2PTELocked(struct mm_struct *mm, // IN: Mm structure of a process + static INLINE MPN64 + PgtblVa2MPNLocked(struct mm_struct *mm, // IN: Mm structure of a process + VA addr) // IN: Address in the virtual address ++ // space of that process + { +- pte_t *pte; ++ pgd_t *pgd; ++ compat_p4d_t *p4d; ++ MPN64 mpn; + +- pte = PgtblVa2PTELocked(mm, addr); +- if (pte != NULL) { +- MPN64 mpn = PgtblPte2MPN(pte); +- pte_unmap(pte); +- return mpn; ++ pgd = pgd_offset(mm, addr); ++ if (pgd_present(*pgd) == 0) { ++ return INVALID_MPN; + } +- return INVALID_MPN; +-} +- +- +-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) +-/* +- *----------------------------------------------------------------------------- +- * +- * PgtblKVa2MPNLocked -- +- * +- * Retrieve MPN for a given kernel va. +- * +- * Caller must call pte_unmap if valid pte returned. The mm->page_table_lock +- * must be held, so this function is not allowed to schedule() --hpreg +- * +- * Results: +- * INVALID_MPN on failure +- * mpn on success +- * +- * Side effects: +- * None +- * +- *----------------------------------------------------------------------------- +- */ +- +-static INLINE MPN64 +-PgtblKVa2MPNLocked(struct mm_struct *mm, // IN: Mm structure of a caller +- VA addr) // IN: Address in the virtual address +-{ +- pte_t *pte; +- +- pte = PgtblPGD2PTELocked(compat_pgd_offset_k(mm, addr), addr); +- if (pte != NULL) { +- MPN64 mpn = PgtblPte2MPN(pte); +- pte_unmap(pte); +- return mpn; ++ if (pgd_large(*pgd)) { ++ /* Linux kernel does not support PGD huge pages. */ ++ /* return pgd_pfn(*pgd) + ((addr & PGD_MASK) >> PAGE_SHIFT); */ ++ return INVALID_MPN; + } +- return INVALID_MPN; +-} +-#endif +- + +-/* +- *----------------------------------------------------------------------------- +- * +- * PgtblVa2PageLocked -- +- * +- * Return the "page" struct for a given va. +- * +- * Results: +- * struct page or NULL. The mm->page_table_lock must be held, so this +- * function is not allowed to schedule() --hpreg +- * +- * Side effects: +- * None +- * +- *----------------------------------------------------------------------------- +- */ +- +-static INLINE struct page * +-PgtblVa2PageLocked(struct mm_struct *mm, // IN: Mm structure of a process +- VA addr) // IN: Address in the virtual address +-{ +- pte_t *pte; +- +- pte = PgtblVa2PTELocked(mm, addr); +- if (pte != NULL) { +- struct page *page = PgtblPte2Page(pte); +- pte_unmap(pte); +- return page; ++ p4d = compat_p4d_offset(pgd, addr); ++ if (compat_p4d_present(*p4d) == 0) { ++ return INVALID_MPN; ++ } ++ if (compat_p4d_large(*p4d)) { ++ mpn = compat_p4d_pfn(*p4d) + ((addr & ~COMPAT_P4D_MASK) >> PAGE_SHIFT); + } else { +- return NULL; ++ pud_t *pud; ++ ++ pud = pud_offset(p4d, addr); ++ if (pud_present(*pud) == 0) { ++ return INVALID_MPN; ++ } ++ if (pud_large(*pud)) { ++ mpn = pud_pfn(*pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); ++ } else { ++ pmd_t *pmd; ++ ++ pmd = pmd_offset(pud, addr); ++ if (pmd_present(*pmd) == 0) { ++ return INVALID_MPN; ++ } ++ if (pmd_large(*pmd)) { ++ mpn = pmd_pfn(*pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); ++ } else { ++ pte_t *pte; ++ ++ pte = pte_offset_map(pmd, addr); ++ if (pte_present(*pte) == 0) { ++ pte_unmap(pte); ++ return INVALID_MPN; ++ } ++ mpn = pte_pfn(*pte); ++ pte_unmap(pte); ++ } ++ } ++ } ++ if (mpn >= INVALID_MPN) { ++ mpn = INVALID_MPN; + } +-} ++ return mpn; ++} + + + /* +@@ -298,85 +133,10 @@ PgtblVa2MPN(VA addr) // IN + + /* current->mm is NULL for kernel threads, so use active_mm. */ + mm = current->active_mm; +- if (compat_get_page_table_lock(mm)) { +- spin_lock(compat_get_page_table_lock(mm)); +- } ++ spin_lock(&mm->page_table_lock); + mpn = PgtblVa2MPNLocked(mm, addr); +- if (compat_get_page_table_lock(mm)) { +- spin_unlock(compat_get_page_table_lock(mm)); +- } ++ spin_unlock(&mm->page_table_lock); + return mpn; + } + +- +-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) +-/* +- *----------------------------------------------------------------------------- +- * +- * PgtblKVa2MPN -- +- * +- * Walks through the hardware page tables of the current process to try to +- * find the page structure associated to a virtual address. +- * +- * Results: +- * Same as PgtblVa2MPNLocked() +- * +- * Side effects: +- * None +- * +- *----------------------------------------------------------------------------- +- */ +- +-static INLINE MPN64 +-PgtblKVa2MPN(VA addr) // IN +-{ +- struct mm_struct *mm = current->active_mm; +- MPN64 mpn; +- +- if (compat_get_page_table_lock(mm)) { +- spin_lock(compat_get_page_table_lock(mm)); +- } +- mpn = PgtblKVa2MPNLocked(mm, addr); +- if (compat_get_page_table_lock(mm)) { +- spin_unlock(compat_get_page_table_lock(mm)); +- } +- return mpn; +-} +-#endif +- +- +-/* +- *----------------------------------------------------------------------------- +- * +- * PgtblVa2Page -- +- * +- * Walks through the hardware page tables of the current process to try to +- * find the page structure associated to a virtual address. +- * +- * Results: +- * Same as PgtblVa2PageLocked() +- * +- * Side effects: +- * None +- * +- *----------------------------------------------------------------------------- +- */ +- +-static INLINE struct page * +-PgtblVa2Page(VA addr) // IN +-{ +- struct mm_struct *mm = current->active_mm; +- struct page *page; +- +- if (compat_get_page_table_lock(mm)) { +- spin_lock(compat_get_page_table_lock(mm)); +- } +- page = PgtblVa2PageLocked(mm, addr); +- if (compat_get_page_table_lock(mm)) { +- spin_unlock(compat_get_page_table_lock(mm)); +- } +- return page; +-} +- +- + #endif /* __PGTBL_H__ */ +diff --git a/vmmon/linux/driver.c b/vmmon/linux/driver.c +index 87cf45b..5390a93 100644 +--- a/vmmon/linux/driver.c ++++ b/vmmon/linux/driver.c +@@ -108,7 +108,9 @@ + + static int LinuxDriver_Close(struct inode *inode, struct file *filp); + static unsigned int LinuxDriverPoll(struct file *file, poll_table *wait); +-#if defined(VMW_NOPAGE_2624) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) ++static int LinuxDriverFault(struct vm_fault *fault); ++#elif defined(VMW_NOPAGE_2624) + static int LinuxDriverFault(struct vm_area_struct *vma, struct vm_fault *fault); + #else + static struct page *LinuxDriverNoPage(struct vm_area_struct *vma, +@@ -117,7 +117,7 @@ + #endif + static int LinuxDriverMmap(struct file *filp, struct vm_area_struct *vma); + +-static void LinuxDriverPollTimeout(unsigned long clientData); ++static void LinuxDriverPollTimeout(struct timer_list *clientData); + + static struct vm_operations_struct vmuser_mops = { + #ifdef VMW_NOPAGE_2624 +@@ -244,7 +259,7 @@ LinuxDriverEstimateTSCkHz(void) + */ + + static void +-LinuxDriverComputeTSCFreq(unsigned long data) // IN: ++LinuxDriverComputeTSCFreq(struct timer_list *data) // IN: + { + Vmx86_GetkHzEstimate(&linuxState.startTime); + } +@@ -287,9 +302,13 @@ init_module(void) + */ + + init_waitqueue_head(&linuxState.pollQueue); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) && !defined(timer_setup) + init_timer(&linuxState.pollTimer); + linuxState.pollTimer.data = 0; +- linuxState.pollTimer.function = LinuxDriverPollTimeout; ++ linuxState.pollTimer.function = (void *)LinuxDriverPollTimeout; ++#else ++ timer_setup(&linuxState.pollTimer, LinuxDriverPollTimeout, 0); ++#endif + + linuxState.fastClockThread = NULL; + linuxState.fastClockFile = NULL; +@@ -365,9 +363,13 @@ init_module(void) + */ + + Vmx86_ReadTSCAndUptime(&linuxState.startTime); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) && !defined(timer_setup) + init_timer(&tscTimer); + tscTimer.data = 0; +- tscTimer.function = LinuxDriverComputeTSCFreq; ++ tscTimer.function = (void *)LinuxDriverComputeTSCFreq; ++#else ++ timer_setup(&tscTimer, LinuxDriverComputeTSCFreq, 0); ++#endif + tscTimer.expires = jiffies + 4 * HZ; + add_timer(&tscTimer); + +@@ -903,7 +907,7 @@ + */ + + static void +-LinuxDriverPollTimeout(unsigned long clientData) // IN: ++LinuxDriverPollTimeout(struct timer_list *clientData) // IN: + { + LinuxDriverWakeUp(FALSE); + } +@@ -928,7 +934,10 @@ + *----------------------------------------------------------------------------- + */ + +-#if defined(VMW_NOPAGE_2624) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) ++static int ++LinuxDriverFault(struct vm_fault *fault) //IN/OUT ++#elif defined(VMW_NOPAGE_2624) + static int LinuxDriverFault(struct vm_area_struct *vma, //IN + struct vm_fault *fault) //IN/OUT + #else +@@ -937,6 +946,9 @@ + int *type) //OUT: Fault type + #endif + { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) ++ struct vm_area_struct *vma = fault->vma; ++#endif + VMLinux *vmLinux = (VMLinux *) vma->vm_file->private_data; + unsigned long pg; + struct page* page; +@@ -1398,7 +1410,6 @@ LinuxDriver_Ioctl(struct file *filp, // IN: + case IOCTL_VMX86_CREATE_VM: + case IOCTL_VMX86_INIT_CROSSGDT: + case IOCTL_VMX86_SET_UID: +- case IOCTL_VMX86_LOOK_UP_MPN: + #if defined(__linux__) && defined(VMX86_DEVEL) + case IOCTL_VMX86_LOOK_UP_LARGE_MPN: + #endif +@@ -1411,8 +1423,6 @@ LinuxDriver_Ioctl(struct file *filp, // IN: + case IOCTL_VMX86_GET_KHZ_ESTIMATE: + case IOCTL_VMX86_GET_ALL_CPUID: + case IOCTL_VMX86_GET_ALL_MSRS: +- case IOCTL_VMX86_READ_PAGE: +- case IOCTL_VMX86_WRITE_PAGE: + case IOCTL_VMX86_SET_POLL_TIMEOUT_PTR: + case IOCTL_VMX86_GET_KERNEL_CLOCK_RATE: + case IOCTL_VMX86_GET_REFERENCE_CLOCK_HZ: +@@ -1579,7 +1589,7 @@ LinuxDriver_Ioctl(struct file *filp, // IN: + if (retval) { + break; + } +- args.ret.status = HostIF_LookupUserMPN(vm, args.uAddr, &args.ret.mpn); ++ args.ret.status = Vmx86_LookupUserMPN(vm, args.uAddr, &args.ret.mpn); + retval = HostIF_CopyToUser((void *)ioarg, &args, sizeof args); + break; + } +@@ -1912,7 +1922,7 @@ LinuxDriver_Ioctl(struct file *filp, // IN: + if (retval) { + break; + } +- retval = HostIF_ReadPage(req.mpn, req.uAddr, FALSE); ++ retval = HostIF_ReadPage(vm, req.mpn, req.uAddr, FALSE); + break; + } + +@@ -1923,7 +1933,7 @@ LinuxDriver_Ioctl(struct file *filp, // IN: + if (retval) { + break; + } +- retval = HostIF_WritePage(req.mpn, req.uAddr, FALSE); ++ retval = HostIF_WritePage(vm, req.mpn, req.uAddr, FALSE); + break; + } + +diff --git a/vmmon/linux/hostif.c b/vmmon/linux/hostif.c +index fd32013..583d6da 100644 +--- a/vmmon/linux/hostif.c ++++ b/vmmon/linux/hostif.c +@@ -77,19 +77,22 @@ + #include <linux/capability.h> + #include <linux/kthread.h> + #include <linux/wait.h> +- ++#include <linux/signal.h> ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) ++#include <linux/taskstats_kern.h> // For linux/sched/signal.h without version check ++#endif + + #include "vmware.h" + #include "x86apic.h" + #include "vm_asm.h" + #include "modulecall.h" ++#include "driver.h" + #include "memtrack.h" + #include "phystrack.h" + #include "cpuid.h" + #include "cpuid_info.h" + #include "hostif.h" + #include "hostif_priv.h" +-#include "driver.h" + #include "vmhost.h" + #include "x86msr.h" + #include "apic.h" +@@ -1010,7 +1013,7 @@ HostIF_FreeLockedPages(VMDriver *vm, // IN: VM instance pointer + int + HostIF_Init(VMDriver *vm) // IN: + { +- vm->memtracker = MemTrack_Init(); ++ vm->memtracker = MemTrack_Init(vm); + if (vm->memtracker == NULL) { + return -1; + } +@@ -1165,10 +1173,7 @@ + { + int retval; + +- down_read(¤t->mm->mmap_sem); +- retval = get_user_pages(current, current->mm, (unsigned long)uvAddr, +- numPages, 0, 0, ppages, NULL); +- up_read(¤t->mm->mmap_sem); ++ retval = get_user_pages_fast((unsigned long)uvAddr, numPages, 0, ppages); + + return retval != numPages; + } +@@ -1626,12 +1631,34 @@ HostIF_EstimateLockedPageLimit(const VMDriver* vm, // IN + unsigned int reservedPages = MEMDEFAULTS_MIN_HOST_PAGES; + unsigned int hugePages = (vm == NULL) ? 0 : + BYTES_2_PAGES(vm->memInfo.hugePageBytes); +- unsigned int lockedPages = global_page_state(NR_PAGETABLE) + +- global_page_state(NR_SLAB_UNRECLAIMABLE) + +- global_page_state(NR_UNEVICTABLE) + +- hugePages + reservedPages; +- unsigned int anonPages = global_page_state(NR_ANON_PAGES); ++ unsigned int lockedPages = hugePages + reservedPages; ++ unsigned int anonPages; + unsigned int swapPages = BYTES_2_PAGES(linuxState.swapSize); ++ ++ /* global_page_state is global_zone_page_state in 4.14. */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0) ++ lockedPages += global_zone_page_state(NR_PAGETABLE); ++#else ++ lockedPages += global_page_state(NR_PAGETABLE); ++#endif ++ /* NR_SLAB_* moved from zone to node in 4.13. */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) ++ lockedPages += global_node_page_state(NR_SLAB_UNRECLAIMABLE); ++#else ++ lockedPages += global_page_state(NR_SLAB_UNRECLAIMABLE); ++#endif ++ /* NR_UNEVICTABLE moved from global to node in 4.8. */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) ++ lockedPages += global_node_page_state(NR_UNEVICTABLE); ++#else ++ lockedPages += global_page_state(NR_UNEVICTABLE); ++#endif ++ /* NR_ANON_MAPPED moved & changed name in 4.8. */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) ++ anonPages = global_node_page_state(NR_ANON_MAPPED); ++#else ++ anonPages = global_page_state(NR_ANON_PAGES); ++#endif + + if (anonPages > swapPages) { + lockedPages += anonPages - swapPages; +@@ -1726,9 +1753,6 @@ + unsigned long jifs, jifBase; + unsigned int attempts = 0; + +- /* Assert that HostIF_InitUptime has been called. */ +- ASSERT(uptimeState.timer.function); +- + retry: + do { + version = VersionedAtomic_BeginTryRead(&uptimeState.version); +@@ -1794,7 +1818,7 @@ HostIFReadUptimeWork(unsigned long *j) // OUT: current jiffies + */ + + static void +-HostIFUptimeResyncMono(unsigned long data) // IN: ignored ++HostIFUptimeResyncMono(struct timer_list *timer) // IN: ignored + { + unsigned long jifs; + uintptr_t flags; +@@ -1856,8 +1880,13 @@ HostIF_InitUptime(void) + -(tv.tv_usec * (UPTIME_FREQ / 1000000) + + tv.tv_sec * UPTIME_FREQ)); + ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) && !defined(timer_setup) + init_timer(&uptimeState.timer); +- uptimeState.timer.function = HostIFUptimeResyncMono; ++ uptimeState.timer.function = (void *)HostIFUptimeResyncMono; ++ uptimeState.timer.data = (unsigned long)&uptimeState.timer; ++#else ++ timer_setup(&uptimeState.timer, HostIFUptimeResyncMono, 0); ++#endif + mod_timer(&uptimeState.timer, jiffies + HZ); + } + +@@ -2028,15 +2052,15 @@ HostIF_MapCrossPage(VMDriver *vm, // IN + return NULL; + } + vPgAddr = (VA) MapCrossPage(page); +- HostIF_GlobalLock(16); ++ HostIF_VMLock(vm, 27); + if (vm->vmhost->crosspagePagesCount >= MAX_INITBLOCK_CPUS) { +- HostIF_GlobalUnlock(16); ++ HostIF_VMUnlock(vm, 27); + UnmapCrossPage(page, (void*)vPgAddr); + + return NULL; + } + vm->vmhost->crosspagePages[vm->vmhost->crosspagePagesCount++] = page; +- HostIF_GlobalUnlock(16); ++ HostIF_VMUnlock(vm, 27); + + ret = vPgAddr | (((VA)p) & (PAGE_SIZE - 1)); + +@@ -2850,13 +2874,75 @@ HostIF_CallOnEachCPU(void (*func)(void*), // IN: function to call + } + + ++/* ++ *----------------------------------------------------------------------------- ++ * ++ * HostIFCheckTrackedMPN -- ++ * ++ * Check if a given MPN is tracked for the specified VM. ++ * ++ * Result: ++ * TRUE if the MPN is tracked in one of the trackers for the specified VM, ++ * FALSE otherwise. ++ * ++ * Side effects: ++ * None ++ * ++ *----------------------------------------------------------------------------- ++ */ ++ ++Bool ++HostIFCheckTrackedMPN(VMDriver *vm, // IN: The VM instance ++ MPN64 mpn) // IN: The MPN ++{ ++ VMHost * const vmh = vm->vmhost; ++ ++ if (vmh == NULL) { ++ return FALSE; ++ } ++ ++ HostIF_VMLock(vm, 32); // Debug version of PhysTrack wants VM's lock. ++ if (vmh->lockedPages) { ++ if (PhysTrack_Test(vmh->lockedPages, mpn)) { ++ HostIF_VMUnlock(vm, 32); ++ return TRUE; ++ } ++ } ++ ++ if (vmh->AWEPages) { ++ if (PhysTrack_Test(vmh->AWEPages, mpn)) { ++ HostIF_VMUnlock(vm, 32); ++ return TRUE; ++ } ++ } ++ ++ if (vm->memtracker) { ++ if (MemTrack_LookupMPN(vm->memtracker, mpn) != NULL) { ++ HostIF_VMUnlock(vm, 32); ++ return TRUE; ++ } ++ } ++ HostIF_VMUnlock(vm, 32); ++ ++ if (vmx86_debug) { ++ /* ++ * The monitor may have old KSeg mappings to pages which it no longer ++ * owns. Minimize customer noise by only logging this for developers. ++ */ ++ Log("%s: MPN %" FMT64 "x not owned by this VM\n", __FUNCTION__, mpn); ++ } ++ return FALSE; ++} ++ ++ + /* + *---------------------------------------------------------------------- + * + * HostIF_ReadPage -- + * +- * puts the content of a machine page into a kernel or user mode +- * buffer. ++ * Reads one page of data from a machine page and returns it in the ++ * specified kernel or user buffer. The machine page must be owned by ++ * the specified VM. + * + * Results: + * 0 on success +@@ -2869,7 +2955,8 @@ HostIF_CallOnEachCPU(void (*func)(void*), // IN: function to call + */ + + int +-HostIF_ReadPage(MPN64 mpn, // MPN of the page ++HostIF_ReadPage(VMDriver *vm, // IN: The VM instance ++ MPN64 mpn, // MPN of the page + VA64 addr, // buffer for data + Bool kernelBuffer) // is the buffer in kernel space? + { +@@ -2881,6 +2968,9 @@ HostIF_ReadPage(MPN64 mpn, // MPN of the page + if (mpn == INVALID_MPN) { + return -EFAULT; + } ++ if (HostIFCheckTrackedMPN(vm, mpn) == FALSE) { ++ return -EFAULT; ++ } + + page = pfn_to_page(mpn); + ptr = kmap(page); +@@ -2904,8 +2994,8 @@ HostIF_ReadPage(MPN64 mpn, // MPN of the page + * + * HostIF_WritePage -- + * +- * Put the content of a kernel or user mode buffer into a machine +- * page. ++ * Writes one page of data from a kernel or user buffer onto the specified ++ * machine page. The machine page must be owned by the specified VM. + * + * Results: + * 0 on success +@@ -2918,9 +3008,9 @@ HostIF_ReadPage(MPN64 mpn, // MPN of the page + */ + + int +-HostIF_WritePage(MPN64 mpn, // MPN of the page +- VA64 addr, // data to write to the page +- Bool kernelBuffer) // is the buffer in kernel space? ++HostIFWritePageWork(MPN64 mpn, // MPN of the page ++ VA64 addr, // data to write to the page ++ Bool kernelBuffer) // is the buffer in kernel space? + { + void const *buf = VA64ToPtr(addr); + int ret = 0; +@@ -2947,6 +3037,45 @@ HostIF_WritePage(MPN64 mpn, // MPN of the page + return ret; + } + ++int ++HostIF_WritePage(VMDriver *vm, // IN: The VM instance ++ MPN64 mpn, // MPN of the page ++ VA64 addr, // data to write to the page ++ Bool kernelBuffer) // is the buffer in kernel space? ++{ ++ if (HostIFCheckTrackedMPN(vm, mpn) == FALSE) { ++ return -EFAULT; ++ } ++ return HostIFWritePageWork(mpn, addr, kernelBuffer); ++} ++ ++ ++/* ++ *----------------------------------------------------------------------------- ++ * ++ * HostIF_WriteMachinePage -- ++ * ++ * Puts the content of a machine page into a kernel or user mode ++ * buffer. This should only be used for host-global pages, not any ++ * VM-owned pages. ++ * ++ * Results: ++ * On success: 0 ++ * On failure: a negative error code ++ * ++ * Side effects: ++ * None ++ * ++ *----------------------------------------------------------------------------- ++ */ ++ ++int ++HostIF_WriteMachinePage(MPN64 mpn, // IN: MPN of the page ++ VA64 addr) // IN: data to write to the page ++{ ++ return HostIFWritePageWork(mpn, addr, TRUE); ++} ++ + + /* + *---------------------------------------------------------------------- +diff --git a/vmmon/vmcore/moduleloop.c b/vmmon/vmcore/moduleloop.c +index 94aab9e..a6b2c1a 100644 +--- a/vmmon/vmcore/moduleloop.c ++++ b/vmmon/vmcore/moduleloop.c +@@ -205,11 +205,13 @@ skipTaskSwitch:; + uint32 nPages = (uint32)crosspage->args[1]; + VA64 uAddr = (VA64)VPN_2_VA(vpn); + ASSERT(nPages <= MODULECALL_NUM_ARGS); ++ HostIF_VMLock(vm, 38); + for (i = 0; i < nPages; i++) { + MPN64 mpn; + HostIF_LookupUserMPN(vm, uAddr + i * PAGE_SIZE, &mpn); + crosspage->args[i] = mpn; + } ++ HostIF_VMUnlock(vm, 38); + break; + } + |