⛏️ index : haiku.git

/*
 * Copyright 2022 Haiku, Inc. All Rights Reserved.
 * Distributed under the terms of the MIT License.
 */
#include "VMSAv8TranslationMap.h"

#include <algorithm>
#include <slab/Slab.h>
#include <util/AutoLock.h>
#include <util/ThreadAutoLock.h>
#include <vm/VMAddressSpace.h>
#include <vm/VMCache.h>
#include <vm/vm_page.h>
#include <vm/vm_priv.h>


//#define DO_TRACE
#ifdef DO_TRACE
#	define TRACE(x...) dprintf(x)
#else
#	define TRACE(x...) ;
#endif


uint32_t VMSAv8TranslationMap::fHwFeature;
uint64_t VMSAv8TranslationMap::fMair;

// ASID Management
static constexpr size_t kAsidBits = 8;
static constexpr size_t kNumAsids = (1 << kAsidBits);
static spinlock sAsidLock = B_SPINLOCK_INITIALIZER;
// A bitmap to track which ASIDs are in use.
static uint64 sAsidBitMap[kNumAsids / 64] = {};
// A mapping from ASID to translation map.
static VMSAv8TranslationMap* sAsidMapping[kNumAsids] = {};


static void
free_asid(size_t asid)
{
	for (size_t i = 0; i < B_COUNT_OF(sAsidBitMap); ++i) {
		if (asid < 64) {
			sAsidBitMap[i] &= ~(uint64_t{1} << asid);
			return;
		}
		asid -= 64;
	}

	panic("Could not free ASID!");
}


static void
flush_tlb_whole_asid(uint64_t asid)
{
	asm("dsb ishst");
	asm("tlbi aside1is, %0" ::"r"(asid << 48));
	asm("dsb ish");
	asm("isb");
}


static size_t
alloc_first_free_asid(void)
{
	int asid = 0;
	for (size_t i = 0; i < B_COUNT_OF(sAsidBitMap); ++i) {
		int avail = __builtin_ffsll(~sAsidBitMap[i]);
		if (avail != 0) {
			sAsidBitMap[i] |= (uint64_t{1} << (avail-1));
			asid += (avail - 1);
			return asid;
		}
		asid += 64;
	}

	return kNumAsids;
}


static bool
is_pte_dirty(uint64_t pte)
{
	if ((pte & kAttrSWDIRTY) != 0)
		return true;

	return (pte & kAttrAPReadOnly) == 0;
}


static uint64_t
set_pte_dirty(uint64_t pte)
{
	if ((pte & kAttrSWDBM) != 0)
		return pte & ~kAttrAPReadOnly;

	return pte | kAttrSWDIRTY;
}


static uint64_t
set_pte_clean(uint64_t pte)
{
	pte &= ~kAttrSWDIRTY;
	return pte | kAttrAPReadOnly;
}


static bool
is_pte_accessed(uint64_t pte)
{
	return (pte & kPteValidMask) != 0 && (pte & kAttrAF) != 0;
}


VMSAv8TranslationMap::VMSAv8TranslationMap(
	bool kernel, phys_addr_t pageTable, int pageBits, int vaBits, int minBlockLevel)
	:
	fIsKernel(kernel),
	fPageTable(pageTable),
	fPageBits(pageBits),
	fVaBits(vaBits),
	fMinBlockLevel(minBlockLevel),
	fASID(kernel ? 0 : -1),
	fRefcount(0)
{
	TRACE("+VMSAv8TranslationMap(%p, %d, 0x%" B_PRIxADDR ", %d, %d, %d)\n", this,
		kernel, pageTable, pageBits, vaBits, minBlockLevel);

	fInitialLevel = CalcStartLevel(fVaBits, fPageBits);
}


VMSAv8TranslationMap::~VMSAv8TranslationMap()
{
	TRACE("-VMSAv8TranslationMap(%p)\n", this);
	TRACE("  fIsKernel: %d, fPageTable: 0x%" B_PRIxADDR ", fASID: %d, fRefcount: %d\n",
		fIsKernel, fPageTable, fASID, fRefcount);

	ASSERT(!fIsKernel);
	ASSERT(fRefcount == 0);

	ThreadCPUPinner pinner(thread_get_current_thread());
	InterruptsSpinLocker locker(sAsidLock);

	vm_page_reservation reservation = {};
	FreeTable(fPageTable, 0, fInitialLevel, &reservation);
	vm_page_unreserve_pages(&reservation);

	if (fASID != -1) {
		sAsidMapping[fASID] = NULL;
		free_asid(fASID);
	}
}


// Switch user map into TTBR0.
// Passing kernel map here configures empty page table.
void
VMSAv8TranslationMap::SwitchUserMap(VMSAv8TranslationMap *from, VMSAv8TranslationMap *to)
{
	InterruptsSpinLocker locker(sAsidLock);

	if (!from->fIsKernel) {
		from->fRefcount--;
	}

	if (!to->fIsKernel) {
		to->fRefcount++;
	} else {
		arch_vm_install_empty_table_ttbr0();
		return;
	}

	ASSERT(to->fPageTable != 0);
	uint64_t ttbr = to->fPageTable | ((fHwFeature & HW_COMMON_NOT_PRIVATE) != 0 ? 1 : 0);

	if (to->fASID != -1) {
		WRITE_SPECIALREG(TTBR0_EL1, ((uint64_t)to->fASID << 48) | ttbr);
		asm("isb");
		return;
	}

	size_t allocatedAsid = alloc_first_free_asid();
	if (allocatedAsid != kNumAsids) {
		to->fASID = allocatedAsid;
		sAsidMapping[allocatedAsid] = to;

		WRITE_SPECIALREG(TTBR0_EL1, (allocatedAsid << 48) | ttbr);
		flush_tlb_whole_asid(allocatedAsid);
		return;
	}

	for (size_t i = 0; i < kNumAsids; ++i) {
		if (sAsidMapping[i]->fRefcount == 0) {
			sAsidMapping[i]->fASID = -1;
			to->fASID = i;
			sAsidMapping[i] = to;

			WRITE_SPECIALREG(TTBR0_EL1, (i << 48) | ttbr);
			flush_tlb_whole_asid(i);
			return;
		}
	}

	panic("cannot assign ASID");
}


int
VMSAv8TranslationMap::CalcStartLevel(int vaBits, int pageBits)
{
	int level = 4;

	int bitsLeft = vaBits - pageBits;
	while (bitsLeft > 0) {
		int tableBits = pageBits - 3;
		bitsLeft -= tableBits;
		level--;
	}

	ASSERT(level >= 0);

	return level;
}


bool
VMSAv8TranslationMap::Lock()
{
	TRACE("VMSAv8TranslationMap::Lock()\n");
	recursive_lock_lock(&fLock);
	return true;
}


void
VMSAv8TranslationMap::Unlock()
{
	TRACE("VMSAv8TranslationMap::Unlock()\n");
	recursive_lock_unlock(&fLock);
}


addr_t
VMSAv8TranslationMap::MappedSize() const
{
	panic("VMSAv8TranslationMap::MappedSize not implemented");
	return 0;
}


size_t
VMSAv8TranslationMap::MaxPagesNeededToMap(addr_t start, addr_t end) const
{
	constexpr uint64_t level3Range = B_PAGE_SIZE * 512;
	constexpr uint64_t level2Range = level3Range * 512;
	constexpr uint64_t level1Range = level2Range * 512;
	constexpr uint64_t level0Range = level1Range * 512;

	if (start == 0) {
		start = level3Range - B_PAGE_SIZE;
		end += start;
	}

	size_t requiredPages[] = {
		end / level0Range + 1 - start / level0Range,
		end / level1Range + 1 - start / level1Range,
		end / level2Range + 1 - start / level2Range,
		end / level3Range + 1 - start / level3Range
	};

	size_t ret = 0;
	for (int i = fInitialLevel; i < 4; ++i) {
		ret += requiredPages[i];
	}

	return ret;
}


uint64_t*
VMSAv8TranslationMap::TableFromPa(phys_addr_t pa)
{
	return reinterpret_cast<uint64_t*>(KERNEL_PMAP_BASE + pa);
}


void
VMSAv8TranslationMap::FreeTable(phys_addr_t ptPa, uint64_t va, int level,
	vm_page_reservation* reservation)
{
	ASSERT(level < 4);

	int tableBits = fPageBits - 3;
	uint64_t tableSize = 1UL << tableBits;
	uint64_t vaMask = (1UL << fVaBits) - 1;

	int shift = tableBits * (3 - level) + fPageBits;
	uint64_t entrySize = 1UL << shift;

	uint64_t nextVa = va;
	uint64_t* pt = TableFromPa(ptPa);
	for (uint64_t i = 0; i < tableSize; i++) {
		uint64_t oldPte = (uint64_t) atomic_get_and_set64((int64*) &pt[i], 0);

		if (level < 3 && (oldPte & kPteTypeMask) == kPteTypeL012Table) {
			FreeTable(oldPte & kPteAddrMask, nextVa, level + 1, reservation);
		} else if ((oldPte & kPteTypeMask) != 0) {
			uint64_t fullVa = (fIsKernel ? ~vaMask : 0) | nextVa;

			// Use this rather than FlushVAIfAccessed so that we don't have to
			// acquire sAsidLock for every entry.
			flush_va_if_accessed(oldPte, nextVa, fASID);
		}

		nextVa += entrySize;
	}

	vm_page* page = vm_lookup_page(ptPa >> fPageBits);
	DEBUG_PAGE_ACCESS_START(page);
	vm_page_free_etc(NULL, page, reservation);
}


// Make a new page sub-table.
// The parent table is `ptPa`, and the new sub-table's PTE will be at `index`
// in it.
// Returns the physical address of the new table, or the address of the existing
// one if the PTE is already filled.
phys_addr_t
VMSAv8TranslationMap::GetOrMakeTable(phys_addr_t ptPa, int level, int index,
	vm_page_reservation* reservation)
{
	ASSERT(level < 3);

	uint64_t* ptePtr = TableFromPa(ptPa) + index;
	uint64_t oldPte = atomic_get64((int64*) ptePtr);

	int type = oldPte & kPteTypeMask;
	ASSERT(type != kPteTypeL12Block);

	if (type == kPteTypeL012Table) {
		// This is table entry already, just return it
		return oldPte & kPteAddrMask;
	} else if (reservation != nullptr) {
		// Create new table there
		vm_page* page = vm_page_allocate_page(reservation, PAGE_STATE_WIRED | VM_PAGE_ALLOC_CLEAR);
		phys_addr_t newTablePa = page->physical_page_number << fPageBits;
		DEBUG_PAGE_ACCESS_END(page);

		// We only create mappings at the final level so we don't need to handle
		// splitting block mappings
		ASSERT(type != kPteTypeL12Block);

		// Ensure that writes to page being attached have completed
		asm("dsb ishst");

		uint64_t oldPteRefetch = (uint64_t)atomic_test_and_set64((int64*) ptePtr,
			newTablePa | kPteTypeL012Table, oldPte);
		if (oldPteRefetch != oldPte) {
			// If the old PTE has mutated, it must be because another thread has allocated the
			// sub-table at the same time as us. If that has happened, deallocate the page we
			// setup and use the one they installed instead.
			ASSERT((oldPteRefetch & kPteTypeMask) == kPteTypeL012Table);
			DEBUG_PAGE_ACCESS_START(page);
			vm_page_free_etc(NULL, page, reservation);
			return oldPteRefetch & kPteAddrMask;
		}

		return newTablePa;
	}

	// There's no existing table and we have no reservation
	return 0;
}


bool
flush_va_if_accessed(uint64_t pte, addr_t va, int asid)
{
	if (!is_pte_accessed(pte))
		return false;

	if ((pte & kAttrNG) == 0) {
		// Flush from all address spaces
		asm("dsb ishst"); // Ensure PTE write completed
		asm("tlbi vaae1is, %0" ::"r"(((va >> 12) & kTLBIMask)));
		asm("dsb ish");
		asm("isb");
	} else if (asid != -1) {
		asm("dsb ishst"); // Ensure PTE write completed
        asm("tlbi vae1is, %0" ::"r"(((va >> 12) & kTLBIMask) | (uint64_t(asid) << 48)));
		asm("dsb ish"); // Wait for TLB flush to complete
		asm("isb");
		return true;
	}

	return false;
}

bool
VMSAv8TranslationMap::FlushVAIfAccessed(uint64_t pte, addr_t va)
{
	InterruptsSpinLocker locker(sAsidLock);
	return flush_va_if_accessed(pte, va, fASID);
}


bool
VMSAv8TranslationMap::AttemptPteBreakBeforeMake(uint64_t* ptePtr, uint64_t oldPte, addr_t va)
{
	uint64_t loadedPte = atomic_test_and_set64((int64_t*)ptePtr, 0, oldPte);
	if (loadedPte != oldPte)
		return false;
		
	FlushVAIfAccessed(oldPte, va);

	return true;
}


template<typename UpdatePte>
void
VMSAv8TranslationMap::ProcessRange(phys_addr_t ptPa, int level, addr_t va, size_t size,
    vm_page_reservation* reservation, UpdatePte&& updatePte)
{
	ASSERT(level < 4);
	ASSERT(ptPa != 0);

	uint64_t pageMask = (1UL << fPageBits) - 1;
	uint64_t vaMask = (1UL << fVaBits) - 1;

	ASSERT((va & pageMask) == 0);

	int tableBits = fPageBits - 3;
	uint64_t tableMask = (1UL << tableBits) - 1;

	int shift = tableBits * (3 - level) + fPageBits;
	uint64_t entrySize = 1UL << shift;
	uint64_t entryMask = entrySize - 1;

	uint64_t alignedDownVa = va & ~entryMask;
	uint64_t end = va + size - 1;
	if (level == 3)
		ASSERT(alignedDownVa == va);

    for (uint64_t effectiveVa = alignedDownVa; effectiveVa < end; effectiveVa += entrySize) {
		int index = ((effectiveVa & vaMask) >> shift) & tableMask;
		uint64_t* ptePtr = TableFromPa(ptPa) + index;

		if (level == 3) {
			updatePte(ptePtr, effectiveVa);
		} else {
			phys_addr_t subTable = GetOrMakeTable(ptPa, level, index, reservation);

			// When reservation is null, we can't create a new subtable. This can be intentional,
			// for example when called from Unmap().
			if (subTable == 0)
				continue;

			if (effectiveVa < va) {
				// The range begins inside the slot.
				if (effectiveVa + entrySize - 1 > end) {
					// The range ends within the slot.
					ProcessRange(subTable, level + 1, va, size, reservation, updatePte);
				} else {
					// The range extends past the end of the slot.
					ProcessRange(subTable, level + 1, va, effectiveVa + entrySize - va, reservation, updatePte);
				}
			} else {
				// The range beginning is aligned to the slot.
				if (effectiveVa + entrySize - 1 > end) {
					// The range ends within the slot.
					ProcessRange(subTable, level + 1, effectiveVa, end - effectiveVa + 1,
						reservation, updatePte);
				} else {
					// The range extends past the end of the slot.
					ProcessRange(subTable, level + 1, effectiveVa, entrySize, reservation, updatePte);
				}
			}
		}
	}
}


uint8_t
VMSAv8TranslationMap::MairIndex(uint8_t type)
{
	for (int i = 0; i < 8; i++)
		if (((fMair >> (i * 8)) & 0xff) == type)
			return i;

	panic("MAIR entry not found");
	return 0;
}


uint64_t
VMSAv8TranslationMap::GetMemoryAttr(uint32 attributes, uint32 memoryType, bool isKernel)
{
	uint64_t attr = 0;

	if (!isKernel)
		attr |= kAttrNG;

	if ((attributes & B_EXECUTE_AREA) == 0)
		attr |= kAttrUXN;
	if ((attributes & B_KERNEL_EXECUTE_AREA) == 0)
		attr |= kAttrPXN;

	// SWDBM is software reserved bit that we use to mark that
	// writes are allowed, and fault handler should clear kAttrAPReadOnly.
	// In that case kAttrAPReadOnly doubles as not-dirty bit.
	// Additionally dirty state can be stored in SWDIRTY, in order not to lose
	// dirty state when changing protection from RW to RO.

	// All page permissions begin life in RO state.
	attr |= kAttrAPReadOnly;

	// User-Execute implies User-Read, because it would break PAN otherwise
	if ((attributes & B_READ_AREA) != 0 || (attributes & B_EXECUTE_AREA) != 0)
		attr |= kAttrAPUserAccess; // Allow user reads

	if ((attributes & B_WRITE_AREA) != 0 || (attributes & B_KERNEL_WRITE_AREA) != 0)
		attr |= kAttrSWDBM; // Mark as writeable

	// When supported by hardware copy our SWDBM bit into DBM,
	// so that kAttrAPReadOnly is cleared on write attempt automatically
	// without going through fault handler.
	if ((fHwFeature & HW_DIRTY) != 0 && (attr & kAttrSWDBM) != 0)
		attr |= kAttrDBM;

	attr |= kAttrSHInnerShareable; // Inner Shareable

	uint8_t type = MAIR_NORMAL_WB;

	switch (memoryType & B_MEMORY_TYPE_MASK) {
		case B_UNCACHED_MEMORY:
			// TODO: This probably should be nGnRE for PCI
			type = MAIR_DEVICE_nGnRnE;
			break;
		case B_WRITE_COMBINING_MEMORY:
			type = MAIR_NORMAL_NC;
			break;
		case B_WRITE_THROUGH_MEMORY:
			type = MAIR_NORMAL_WT;
			break;
		case B_WRITE_PROTECTED_MEMORY:
			type = MAIR_NORMAL_WT;
			break;
		default:
		case B_WRITE_BACK_MEMORY:
			type = MAIR_NORMAL_WB;
			break;
	}

	attr |= MairIndex(type) << 2;

	return attr;
}


status_t
VMSAv8TranslationMap::Map(addr_t va, phys_addr_t pa, uint32 attributes, uint32 memoryType,
	vm_page_reservation* reservation)
{
	TRACE("VMSAv8TranslationMap::Map(0x%" B_PRIxADDR ", 0x%" B_PRIxADDR
		", 0x%x, 0x%x)\n", va, pa, attributes, memoryType);

	ThreadCPUPinner pinner(thread_get_current_thread());

	ASSERT(ValidateVa(va));
	uint64_t attr = GetMemoryAttr(attributes, memoryType, fIsKernel);

	// During first mapping we need to allocate root table
	if (fPageTable == 0) {
		vm_page* page = vm_page_allocate_page(reservation, PAGE_STATE_WIRED | VM_PAGE_ALLOC_CLEAR);
		DEBUG_PAGE_ACCESS_END(page);
		fPageTable = page->physical_page_number << fPageBits;
	}

	ProcessRange(fPageTable, fInitialLevel, va, B_PAGE_SIZE, reservation,
		[=](uint64_t* ptePtr, uint64_t effectiveVa) {
			while (true) {
				phys_addr_t effectivePa = effectiveVa - va + pa;
				uint64_t oldPte = atomic_get64((int64*)ptePtr);
				uint64_t newPte = effectivePa | attr | kPteTypeL3Page;

				if (newPte == oldPte)
					return;

				if ((oldPte & kPteValidMask) != 0) {
					// ARM64 requires "break-before-make". We must set the PTE to an invalid
					// entry and flush the TLB as appropriate before we can write the new PTE.
					if (!AttemptPteBreakBeforeMake(ptePtr, oldPte, effectiveVa))
						continue;
				}

				// Install the new PTE
				atomic_set64((int64*)ptePtr, newPte);
				asm("dsb ishst"); // Ensure PTE write completed
				asm("isb");
				break;
			}
		});

	return B_OK;
}


status_t
VMSAv8TranslationMap::Unmap(addr_t start, addr_t end)
{
	TRACE("VMSAv8TranslationMap::Unmap(0x%" B_PRIxADDR ", 0x%" B_PRIxADDR
		")\n", start, end);
	ThreadCPUPinner pinner(thread_get_current_thread());

	size_t size = end - start + 1;
	ASSERT(ValidateVa(start));

	if (fPageTable == 0)
		return B_OK;

	ProcessRange(fPageTable, fInitialLevel, start, size, nullptr,
		[=](uint64_t* ptePtr, uint64_t effectiveVa) {
			ASSERT(effectiveVa <= end);
			uint64_t oldPte = atomic_get_and_set64((int64_t*)ptePtr, 0);
			FlushVAIfAccessed(oldPte, effectiveVa);
		});

	return B_OK;
}


status_t
VMSAv8TranslationMap::UnmapPage(VMArea* area, addr_t address,
	bool updatePageQueue, bool deletingAddressSpace, uint32* _flags)
{
	ASSERT(address % B_PAGE_SIZE == 0);
	ASSERT(_flags == NULL || !updatePageQueue);

	TRACE("VMSAv8TranslationMap::UnmapPage(0x%" B_PRIxADDR "(%s), 0x%"
		B_PRIxADDR ", %d)\n", (addr_t)area, area->name, address,
		updatePageQueue);

	ASSERT(ValidateVa(address));
	ThreadCPUPinner pinner(thread_get_current_thread());
	RecursiveLocker locker(fLock);

	uint64_t oldPte = 0;
	ProcessRange(fPageTable, fInitialLevel, address, B_PAGE_SIZE, nullptr,
		[=, &oldPte](uint64_t* ptePtr, uint64_t effectiveVa) {
			oldPte = atomic_get_and_set64((int64_t*)ptePtr, 0);
			if (!deletingAddressSpace)
				FlushVAIfAccessed(oldPte, effectiveVa);
		});

	if ((oldPte & kPteValidMask) == 0)
		return B_ENTRY_NOT_FOUND;

	pinner.Unlock();

	if (_flags != NULL) {
		locker.Detach();
		PageUnmapped(area, (oldPte & kPteAddrMask) >> fPageBits, is_pte_accessed(oldPte),
			is_pte_dirty(oldPte), updatePageQueue);
	} else {
		uint32 flags = PAGE_PRESENT;
		if (is_pte_accessed(oldPte))
			flags |= PAGE_ACCESSED;
		if (is_pte_dirty(oldPte))
			flags |= PAGE_MODIFIED;
		*_flags = flags;
	}

	return B_OK;
}


void
VMSAv8TranslationMap::UnmapPages(VMArea* area, addr_t address, size_t size,
	bool updatePageQueue, bool deletingAddressSpace)
{
	TRACE("VMSAv8TranslationMap::UnmapPages(0x%" B_PRIxADDR "(%s), 0x%"
		B_PRIxADDR ", 0x%" B_PRIxSIZE ", %d)\n", (addr_t)area,
		area->name, address, size, updatePageQueue);

	ASSERT(ValidateVa(address));
	VMAreaMappings queue;
	ThreadCPUPinner pinner(thread_get_current_thread());
	RecursiveLocker locker(fLock);

	ProcessRange(fPageTable, fInitialLevel, address, size, nullptr,
		[=, &queue](uint64_t* ptePtr, uint64_t effectiveVa) {
			uint64_t oldPte = atomic_get_and_set64((int64_t*)ptePtr, 0);
			FlushVAIfAccessed(oldPte, effectiveVa);
			if ((oldPte & kPteValidMask) == 0)
				return;

			if (area->cache_type == CACHE_TYPE_DEVICE)
				return;

			page_num_t page = (oldPte & kPteAddrMask) >> fPageBits;
			PageUnmapped(area, page,
				is_pte_accessed(oldPte), is_pte_dirty(oldPte),
				updatePageQueue, &queue);
		});

	// TODO: As in UnmapPage() we can lose page dirty flags here. ATM it's not
	// really critical here, as in all cases this method is used, the unmapped
	// area range is unmapped for good (resized/cut) and the pages will likely
	// be freed.

	locker.Unlock();

	// free removed mappings
	bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
	uint32 freeFlags = CACHE_DONT_WAIT_FOR_MEMORY
		| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0);

	while (vm_page_mapping* mapping = queue.RemoveHead())
		vm_free_page_mapping(mapping->page->physical_page_number, mapping, freeFlags);
}


bool
VMSAv8TranslationMap::ValidateVa(addr_t va)
{
	uint64_t vaMask = (1UL << fVaBits) - 1;
	bool kernelAddr = (va & (1UL << 63)) != 0;
	if (kernelAddr != fIsKernel)
		return false;
	if ((va & ~vaMask) != (fIsKernel ? ~vaMask : 0))
		return false;
	return true;
}


status_t
VMSAv8TranslationMap::Query(addr_t va, phys_addr_t* pa, uint32* flags)
{
	*flags = 0;
	*pa = 0;

	uint64_t pageMask = (1UL << fPageBits) - 1;
	va &= ~pageMask;

	ThreadCPUPinner pinner(thread_get_current_thread());
	ASSERT(ValidateVa(va));

	ProcessRange(fPageTable, fInitialLevel, va, B_PAGE_SIZE, nullptr,
		[=](uint64_t* ptePtr, uint64_t effectiveVa) {
			uint64_t pte = atomic_get64((int64_t*)ptePtr);
			*pa = pte & kPteAddrMask;
			*flags |= PAGE_PRESENT | B_KERNEL_READ_AREA;
			if (is_pte_accessed(pte))
				*flags |= PAGE_ACCESSED;
			if (is_pte_dirty(pte))
				*flags |= PAGE_MODIFIED;

			if ((pte & kAttrPXN) == 0)
				*flags |= B_KERNEL_EXECUTE_AREA;

			if ((pte & kAttrAPUserAccess) != 0) {
				*flags |= B_READ_AREA;
				if ((pte & kAttrUXN) == 0)
					*flags |= B_EXECUTE_AREA;
			}

			if ((pte & kAttrSWDBM) != 0) {
				*flags |= B_KERNEL_WRITE_AREA;
				if ((pte & kAttrAPUserAccess) != 0)
					*flags |= B_WRITE_AREA;
			}
		});

	return B_OK;
}


status_t
VMSAv8TranslationMap::QueryInterrupt(
	addr_t virtualAddress, phys_addr_t* _physicalAddress, uint32* _flags)
{
	return Query(virtualAddress, _physicalAddress, _flags);
}


status_t
VMSAv8TranslationMap::Protect(addr_t start, addr_t end, uint32 attributes, uint32 memoryType)
{
	TRACE("VMSAv8TranslationMap::Protect(0x%" B_PRIxADDR ", 0x%"
		B_PRIxADDR ", 0x%x, 0x%x)\n", start, end, attributes, memoryType);

	uint64_t attr = GetMemoryAttr(attributes, memoryType, fIsKernel);
	size_t size = end - start + 1;
	ASSERT(ValidateVa(start));

	ThreadCPUPinner pinner(thread_get_current_thread());

	ProcessRange(fPageTable, fInitialLevel, start, size, nullptr,
		[=](uint64_t* ptePtr, uint64_t effectiveVa) {
			ASSERT(effectiveVa <= end);

			// We need to use an atomic compare-swap loop because we must
			// need to clear somes bits while setting others.
			while (true) {
				uint64_t oldPte = atomic_get64((int64_t*)ptePtr);
				uint64_t newPte = oldPte & ~kPteAttrMask;
				newPte |= attr;

				// Preserve access bit.
				newPte |= oldPte & kAttrAF;

				// Preserve the dirty bit.
				if (is_pte_dirty(oldPte))
					newPte = set_pte_dirty(newPte);

				uint64_t oldMemoryType = oldPte & (kAttrShareability | kAttrMemoryAttrIdx);
				uint64_t newMemoryType = newPte & (kAttrShareability | kAttrMemoryAttrIdx);
				if (oldMemoryType != newMemoryType) {
					// ARM64 requires "break-before-make". We must set the PTE to an invalid
					// entry and flush the TLB as appropriate before we can write the new PTE.
					// In this case specifically, it applies any time we change cacheability or
					// shareability.
					if (!AttemptPteBreakBeforeMake(ptePtr, oldPte, effectiveVa))
						continue;

					atomic_set64((int64_t*)ptePtr, newPte);
					asm("dsb ishst"); // Ensure PTE write completed
					asm("isb");

					// No compare-exchange loop required in this case.
					break;
				} else {
					if ((uint64_t)atomic_test_and_set64((int64_t*)ptePtr, newPte, oldPte) == oldPte) {
						FlushVAIfAccessed(oldPte, effectiveVa);
						break;
					}
				}
			}
		});

	return B_OK;
}


status_t
VMSAv8TranslationMap::ClearFlags(addr_t va, uint32 flags)
{
	ASSERT(ValidateVa(va));

	bool clearAF = flags & PAGE_ACCESSED;
	bool setRO = flags & PAGE_MODIFIED;

	if (!clearAF && !setRO)
		return B_OK;

	ThreadCPUPinner pinner(thread_get_current_thread());

	ProcessRange(fPageTable, fInitialLevel, va, B_PAGE_SIZE, nullptr,
		[=](uint64_t* ptePtr, uint64_t effectiveVa) {
			if (clearAF && setRO) {
				// We need to use an atomic compare-swap loop because we must
				// need to clear one bit while setting the other.
				while (true) {
					uint64_t oldPte = atomic_get64((int64_t*)ptePtr);
					uint64_t newPte = oldPte & ~kAttrAF;
					newPte = set_pte_clean(newPte);

                    if ((uint64_t)atomic_test_and_set64((int64_t*)ptePtr, newPte, oldPte) == oldPte) {
						FlushVAIfAccessed(oldPte, va);
						break;
					}
				}
			} else if (clearAF) {
				atomic_and64((int64_t*)ptePtr, ~kAttrAF);
			} else {
				while (true) {
					uint64_t oldPte = atomic_get64((int64_t*)ptePtr);
					if (!is_pte_dirty(oldPte))
						return;
					uint64_t newPte = set_pte_clean(oldPte);
                    if ((uint64_t)atomic_test_and_set64((int64_t*)ptePtr, newPte, oldPte) == oldPte) {
						FlushVAIfAccessed(oldPte, va);
						break;
					}
				}
			}
		});

	return B_OK;
}


bool
VMSAv8TranslationMap::ClearAccessedAndModified(
	VMArea* area, addr_t address, bool unmapIfUnaccessed, bool& _modified)
{
	TRACE("VMSAv8TranslationMap::ClearAccessedAndModified(0x%"
		B_PRIxADDR "(%s), 0x%" B_PRIxADDR ", %d)\n", (addr_t)area,
		area->name, address, unmapIfUnaccessed);
	ASSERT(ValidateVa(address));

	RecursiveLocker locker(fLock);
	ThreadCPUPinner pinner(thread_get_current_thread());

	uint64_t oldPte = 0;
	ProcessRange(fPageTable, fInitialLevel, address, B_PAGE_SIZE, nullptr,
		[=, &oldPte](uint64_t* ptePtr, uint64_t effectiveVa) {
			// We need to use an atomic compare-swap loop because we must
			// first read the old PTE and make decisions based on the AF
			// bit to proceed.
			while (true) {
				oldPte = atomic_get64((int64_t*)ptePtr);
				uint64_t newPte = oldPte & ~kAttrAF;
				newPte = set_pte_clean(newPte);

				// If the page has been not be accessed, then unmap it.
				if (unmapIfUnaccessed && (oldPte & kAttrAF) == 0)
					newPte = 0;

				if ((uint64_t)atomic_test_and_set64((int64_t*)ptePtr, newPte, oldPte) == oldPte)
					break;
			}
			asm("dsb ishst"); // Ensure PTE write completed
		});

	pinner.Unlock();
	_modified = is_pte_dirty(oldPte);

	if (FlushVAIfAccessed(oldPte, address))
		return true;

	if (!unmapIfUnaccessed)
		return false;

	locker.Detach(); // UnaccessedPageUnmapped takes ownership
	phys_addr_t oldPa = oldPte & kPteAddrMask;
	UnaccessedPageUnmapped(area, oldPa >> fPageBits);
	return false;
}


void
VMSAv8TranslationMap::Flush()
{
	// Necessary invalidation is performed during mapping,
	// no need to do anything more here.
}