/* * Copyright 2007, Ingo Weinhold, ingo_weinhold@gmx.de. * Copyright 2019-2024, Haiku, Inc. All rights reserved. * Distributed under the terms of the MIT license. */ #include "DataContainer.h" #include #include #include #include #include #include #include #include #include "VMAnonymousNoSwapCache.h" #include "vnode_store.h" #include "AllocationInfo.h" #include "DebugSupport.h" #include "Misc.h" #include "Volume.h" #include "cache_support.h" // Initial size of the DataContainer's small buffer. If it contains data up to // this size, nothing is allocated, but the small buffer is used instead. // 16 bytes are for free, since they are shared with the block list. // (actually even more, since the list has an initial size). // I ran a test analyzing what sizes the attributes in my system have: // size percentage bytes used in average // <= 0 0.00 93.45 // <= 4 25.46 75.48 // <= 8 30.54 73.02 // <= 16 52.98 60.37 // <= 32 80.19 51.74 // <= 64 94.38 70.54 // <= 126 96.90 128.23 // // For average memory usage it is assumed, that attributes larger than 126 // bytes have size 127, that the list has an initial capacity of 10 entries // (40 bytes), that the block reference consumes 4 bytes and the block header // 12 bytes. The optimal length is actually 35, with 51.05 bytes per // attribute, but I conservatively rounded to 32. static const off_t kMinimumSmallBufferSize = 32; static const off_t kMaximumSmallBufferSize = (B_PAGE_SIZE / 4); // We don't use VMVnodeCache because it's for caching pages that exist on disk. // All we need is an AnonymousCache that tracks when the vnode is referenced. class VMForVnodeCache final : public VMAnonymousNoSwapCache { public: status_t Init() { fVnode = NULL; return VMAnonymousNoSwapCache::Init(false, 0, 0, 0); } status_t AcquireUnreferencedStoreRef() override { return B_NOT_SUPPORTED; } void AcquireStoreRef() override { vfs_acquire_vnode(fVnode); } void ReleaseStoreRef() override { vfs_put_vnode(fVnode); } protected: virtual void DeleteObject() { static_assert(sizeof(VMForVnodeCache) <= sizeof(VMVnodeCache), "cache too large"); object_cache_delete(gVnodeCacheObjectCache, this); } private: friend class DataContainer; struct vnode* fVnode; }; DataContainer::DataContainer(Volume *volume) : fVolume(volume), fSize(0), fCache(NULL), fSmallBuffer(NULL), fSmallBufferSize(0) { } DataContainer::~DataContainer() { if (fCache != NULL) { fCache->Lock(); fCache->ReleaseRefAndUnlock(); fCache = NULL; } if (fSmallBuffer != NULL) { free(fSmallBuffer); fSmallBuffer = NULL; } } status_t DataContainer::InitCheck() const { return (fVolume != NULL ? B_OK : B_ERROR); } VMCache* DataContainer::GetCache(struct vnode* vnode) { // TODO: Because we always get the cache for files on creation vs. on demand, // this means files (no matter how small) always use cache mode at present. if (!_IsCacheMode()) _SwitchToCacheMode(); ((VMForVnodeCache*)fCache)->fVnode = vnode; return fCache; } status_t DataContainer::Resize(off_t newSize) { // PRINT("DataContainer::Resize(%lld), fSize: %lld\n", newSize, fSize); if (_RequiresCacheMode(newSize)) { if (newSize < fSize) { // shrink // resize the VMCache, which will automatically free pages AutoLocker _(fCache); status_t error = fCache->Resize(newSize, VM_PRIORITY_USER); if (error != B_OK) return error; } else { // grow status_t error = B_OK; if (!_IsCacheMode()) error = _SwitchToCacheMode(); if (error != B_OK) return error; AutoLocker _(fCache); error = fCache->Resize(newSize, VM_PRIORITY_USER); if (error != B_OK) return error; // pages will be added as they are written to; so nothing else // needs to be done here. } } else if (fSmallBufferSize < newSize || (fSmallBufferSize - newSize) > (kMaximumSmallBufferSize / 2)) { const size_t newBufferSize = max_c(next_power_of_2(newSize), kMinimumSmallBufferSize); void* newBuffer = realloc(fSmallBuffer, newBufferSize); if (newBuffer == NULL) return B_NO_MEMORY; fSmallBufferSize = newBufferSize; fSmallBuffer = (uint8*)newBuffer; } fSize = newSize; return B_OK; } status_t DataContainer::ReadAt(off_t offset, void *_buffer, size_t size, size_t *bytesRead) { uint8 *buffer = (uint8*)_buffer; status_t error = (buffer && offset >= 0 && bytesRead ? B_OK : B_BAD_VALUE); if (error != B_OK) return error; // read not more than we have to offer offset = min(offset, fSize); size = min(size, size_t(fSize - offset)); if (!_IsCacheMode()) { // in non-cache mode, use the "small buffer" if (IS_USER_ADDRESS(buffer)) { error = user_memcpy(buffer, fSmallBuffer + offset, size); if (error != B_OK) size = 0; } else { memcpy(buffer, fSmallBuffer + offset, size); } if (bytesRead != NULL) *bytesRead = size; return error; } // cache mode error = _DoCacheIO(offset, buffer, size, bytesRead, false); return error; } status_t DataContainer::WriteAt(off_t offset, const void *_buffer, size_t size, size_t *bytesWritten) { PRINT("DataContainer::WriteAt(%lld, %p, %lu, %p), fSize: %lld\n", offset, _buffer, size, bytesWritten, fSize); const uint8 *buffer = (const uint8*)_buffer; status_t error = (buffer && offset >= 0 && bytesWritten ? B_OK : B_BAD_VALUE); if (error != B_OK) return error; // resize the container, if necessary if ((offset + (off_t)size) > fSize) error = Resize(offset + size); if (error != B_OK) return error; if (!_IsCacheMode()) { // in non-cache mode, use the "small buffer" if (IS_USER_ADDRESS(buffer)) { error = user_memcpy(fSmallBuffer + offset, buffer, size); if (error != B_OK) size = 0; } else { memcpy(fSmallBuffer + offset, buffer, size); } if (bytesWritten != NULL) *bytesWritten = size; return error; } // cache mode error = _DoCacheIO(offset, (uint8*)buffer, size, bytesWritten, true); PRINT("DataContainer::WriteAt() done: %lx, fSize: %lld\n", error, fSize); return error; } void DataContainer::GetAllocationInfo(AllocationInfo &info) { if (_IsCacheMode()) { info.AddAreaAllocation(fCache->committed_size); } else { // ... } } inline bool DataContainer::_RequiresCacheMode(size_t size) { // we cannot back out of cache mode after entering it, // as there may be other consumers of our VMCache return _IsCacheMode() || (size > kMaximumSmallBufferSize); } inline bool DataContainer::_IsCacheMode() const { return fCache != NULL; } inline int32 DataContainer::_CountBlocks() const { if (_IsCacheMode()) return fCache->page_count; else if (fSize == 0) // small buffer mode, empty buffer return 0; return 1; // small buffer mode, non-empty buffer } status_t DataContainer::_SwitchToCacheMode() { VMForVnodeCache* cache = new(gVnodeCacheObjectCache, 0) VMForVnodeCache; if (cache == NULL) return B_NO_MEMORY; status_t error = cache->Init(); if (error != B_OK) return error; AutoLocker locker(cache); fCache = cache; fCache->temporary = 1; fCache->type = 0; fCache->virtual_end = fSize; error = fCache->Commit(fSize, VM_PRIORITY_USER); if (error != B_OK) return error; if (fSize != 0) error = _DoCacheIO(0, fSmallBuffer, fSize, NULL, true); free(fSmallBuffer); fSmallBuffer = NULL; fSmallBufferSize = 0; return error; } status_t DataContainer::_DoCacheIO(off_t offset, uint8* buffer, ssize_t length, size_t* bytesProcessed, bool isWrite, bool retriesAllowed) { const size_t originalLength = length; const bool user = IS_USER_ADDRESS(buffer); const off_t rounded_offset = ROUNDDOWN(offset, B_PAGE_SIZE); const size_t rounded_len = ROUNDUP((length) + (offset - rounded_offset), B_PAGE_SIZE); BStackOrHeapArray pages(rounded_len / B_PAGE_SIZE); if (!pages.IsValid()) return B_NO_MEMORY; cache_get_pages(fCache, rounded_offset, rounded_len, isWrite, pages); thread_get_current_thread()->page_fault_waits_allowed--; status_t error = B_OK; size_t index = 0; while (length > 0) { vm_page* page = pages[index]; phys_addr_t at = (page != NULL) ? (page->physical_page_number * B_PAGE_SIZE) : 0; ssize_t bytes = B_PAGE_SIZE; if (index == 0) { const uint32 pageoffset = (offset % B_PAGE_SIZE); at += pageoffset; bytes -= pageoffset; } bytes = min(length, bytes); if (isWrite) { page->modified = true; error = vm_memcpy_to_physical(at, buffer, bytes, user); } else { if (page != NULL) { error = vm_memcpy_from_physical(buffer, at, bytes, user); } else { if (user) error = user_memset(buffer, 0, bytes); else memset(buffer, 0, bytes); } } if (error != B_OK) break; offset += bytes; buffer += bytes; length -= bytes; index++; } thread_get_current_thread()->page_fault_waits_allowed++; cache_put_pages(fCache, rounded_offset, rounded_len, pages, error == B_OK); if (error == B_BUSY && retriesAllowed) { // See comment in the file_cache's cache_io() routine. if (user) { error = user_memset(buffer, 0, length); } else { memset(buffer, 0, length); error = B_OK; } if (error == B_OK) { size_t processed; error = _DoCacheIO(offset, buffer, length, &processed, isWrite, false); length -= processed; } } if (bytesProcessed != NULL) *bytesProcessed = length > 0 ? originalLength - length : originalLength; return error; }