* Copyright 2004-2020, Axel Dörfler, axeld@pinc-software.de.
* Distributed under the terms of the MIT License.
*/
#include <block_cache.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/uio.h>
#include <KernelExport.h>
#include <fs_cache.h>
#include <condition_variable.h>
#include <lock.h>
#include <low_resource_manager.h>
#include <slab/Slab.h>
#include <tracing.h>
#include <util/kernel_cpp.h>
#include <util/DoublyLinkedList.h>
#include <util/AutoLock.h>
#include <StackOrHeapArray.h>
#include <vm/vm_page.h>
#ifndef BUILDING_USERLAND_FS_SERVER
#include "IORequest.h"
#endif
#include "kernel_debug_config.h"
#ifdef _KERNEL_MODE
# define TRACE_ALWAYS(x...) dprintf(x)
#else
# define TRACE_ALWAYS(x...) printf(x)
#endif
#ifdef TRACE_BLOCK_CACHE
# define TRACE(x) TRACE_ALWAYS(x)
#else
# define TRACE(x) ;
#endif
#define FATAL(x) panic x
static const bigtime_t kTransactionIdleTime = 2000000LL;
namespace {
struct cache_transaction;
struct cached_block;
struct block_cache;
typedef DoublyLinkedListLink<cached_block> block_link;
struct cached_block {
cached_block* next;
cached_block* transaction_next;
block_link link;
off_t block_number;
void* current_data;
void* original_data;
void* parent_data;
#if BLOCK_CACHE_DEBUG_CHANGED
void* compare;
#endif
int32 ref_count;
int32 last_accessed;
bool busy_reading : 1;
bool busy_writing : 1;
bool is_writing : 1;
bool is_dirty : 1;
bool unused : 1;
bool discard : 1;
bool busy_reading_waiters : 1;
bool busy_writing_waiters : 1;
cache_transaction* transaction;
cache_transaction* previous_transaction;
bool CanBeWritten() const;
int32 LastAccess() const
{ return system_time() / 1000000L - last_accessed; }
};
typedef DoublyLinkedList<cached_block,
DoublyLinkedListMemberGetLink<cached_block,
&cached_block::link> > block_list;
struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> {
static inline void* operator new(size_t size);
static inline void operator delete(void* block);
int32 transaction_id;
int32 events_pending;
int32 events;
transaction_notification_hook hook;
void* data;
bool delete_after_event;
};
typedef DoublyLinkedList<cache_notification> NotificationList;
struct cache_listener;
typedef DoublyLinkedListLink<cache_listener> listener_link;
struct cache_listener : cache_notification {
listener_link link;
};
typedef DoublyLinkedList<cache_listener,
DoublyLinkedListMemberGetLink<cache_listener,
&cache_listener::link> > ListenerList;
static object_cache* sCacheNotificationCache;
void*
cache_notification::operator new(size_t size)
{
ASSERT(size <= sizeof(cache_listener));
return object_cache_alloc(sCacheNotificationCache, 0);
}
void
cache_notification::operator delete(void* block)
{
object_cache_free(sCacheNotificationCache, block, 0);
}
struct BlockHash {
typedef off_t KeyType;
typedef cached_block ValueType;
size_t HashKey(KeyType key) const
{
return key;
}
size_t Hash(ValueType* block) const
{
return block->block_number;
}
bool Compare(KeyType key, ValueType* block) const
{
return block->block_number == key;
}
ValueType*& GetLink(ValueType* value) const
{
return value->next;
}
};
typedef BOpenHashTable<BlockHash> BlockTable;
struct TransactionHash {
typedef int32 KeyType;
typedef cache_transaction ValueType;
size_t HashKey(KeyType key) const
{
return key;
}
size_t Hash(ValueType* transaction) const;
bool Compare(KeyType key, ValueType* transaction) const;
ValueType*& GetLink(ValueType* value) const;
};
typedef BOpenHashTable<TransactionHash> TransactionTable;
struct block_cache : DoublyLinkedListLinkImpl<block_cache> {
rw_lock lock;
BlockTable hash;
const int fd;
off_t max_blocks;
const size_t block_size;
int32 next_transaction_id;
cache_transaction* last_transaction;
TransactionTable transaction_hash;
object_cache* buffer_cache;
spinlock unused_blocks_lock;
block_list unused_blocks;
uint32 unused_block_count;
ConditionVariable busy_reading_condition;
uint32 busy_reading_count;
bool busy_reading_waiters;
ConditionVariable busy_writing_condition;
uint32 busy_writing_count;
bool busy_writing_waiters;
bigtime_t last_block_write;
bigtime_t last_block_write_duration;
uint32 num_dirty_blocks;
const bool read_only;
NotificationList pending_notifications;
ConditionVariable condition_variable;
block_cache(int fd, off_t numBlocks, size_t blockSize,
bool readOnly);
~block_cache();
status_t Init();
void Free(void* buffer);
void* Allocate();
void FreeBlock(cached_block* block);
cached_block* NewBlock(off_t blockNumber);
void FreeBlockParentData(cached_block* block);
void RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0);
void RemoveBlock(cached_block* block);
void DiscardBlock(cached_block* block);
private:
static void _LowMemoryHandler(void* data, uint32 resources,
int32 level);
cached_block* _GetUnusedBlock();
};
struct cache_transaction {
cache_transaction();
cache_transaction* next;
int32 id;
int32 num_blocks;
int32 main_num_blocks;
int32 sub_num_blocks;
cached_block* first_block;
block_list blocks;
ListenerList listeners;
bool open;
bool has_sub_transaction;
bigtime_t last_used;
int32 busy_writing_count;
};
class BlockWriter {
public:
BlockWriter(block_cache* cache,
size_t max = SIZE_MAX);
~BlockWriter();
bool Add(cached_block* block,
cache_transaction* transaction = NULL);
bool Add(cache_transaction* transaction,
bool& hasLeftOvers);
status_t Write(cache_transaction* transaction = NULL,
bool canUnlock = true);
bool DeletedTransaction() const
{ return fDeletedTransaction; }
static status_t WriteBlock(block_cache* cache,
cached_block* block);
private:
void* _Data(cached_block* block) const;
status_t _WriteBlocks(cached_block** blocks, uint32 count);
void _BlockDone(cached_block* block,
cache_transaction* transaction);
void _UnmarkWriting(cached_block* block);
static int _CompareBlocks(const void* _blockA,
const void* _blockB);
private:
static const size_t kBufferSize = 64;
block_cache* fCache;
cached_block* fBuffer[kBufferSize];
cached_block** fBlocks;
size_t fCount;
size_t fTotal;
size_t fCapacity;
size_t fMax;
status_t fStatus;
bool fDeletedTransaction;
};
#ifndef BUILDING_USERLAND_FS_SERVER
class BlockPrefetcher {
public:
BlockPrefetcher(block_cache* cache, off_t fBlockNumber,
size_t numBlocks);
~BlockPrefetcher();
status_t Allocate();
status_t ReadAsync(WriteLocker& cacheLocker);
size_t NumAllocated() { return fNumAllocated; }
private:
static void _IOFinishedCallback(void* cookie, io_request* request,
status_t status, bool partialTransfer,
generic_size_t bytesTransferred);
void _IOFinished(status_t status, generic_size_t bytesTransferred);
void _RemoveAllocated(size_t unbusyCount, size_t removeCount);
private:
block_cache* fCache;
off_t fBlockNumber;
size_t fNumRequested;
size_t fNumAllocated;
cached_block** fBlocks;
generic_io_vec* fDestVecs;
};
#endif
class TransactionLocking {
public:
inline bool Lock(block_cache* cache)
{
rw_lock_write_lock(&cache->lock);
while (cache->busy_writing_count != 0) {
ConditionVariableEntry entry;
cache->busy_writing_condition.Add(&entry);
cache->busy_writing_waiters = true;
rw_lock_write_unlock(&cache->lock);
entry.Wait();
rw_lock_write_lock(&cache->lock);
}
return true;
}
inline void Unlock(block_cache* cache)
{
rw_lock_write_unlock(&cache->lock);
}
};
typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker;
}
#if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
namespace BlockTracing {
class Action : public AbstractTraceEntry {
public:
Action(block_cache* cache, cached_block* block)
:
fCache(cache),
fBlockNumber(block->block_number),
fIsDirty(block->is_dirty),
fHasOriginal(block->original_data != NULL),
fHasParent(block->parent_data != NULL),
fTransactionID(-1),
fPreviousID(-1)
{
if (block->transaction != NULL)
fTransactionID = block->transaction->id;
if (block->previous_transaction != NULL)
fPreviousID = block->previous_transaction->id;
}
virtual void AddDump(TraceOutput& out)
{
out.Print("block cache %p, %s %" B_PRIu64 ", %c%c%c transaction %" B_PRId32
" (previous id %" B_PRId32 ")\n", fCache, _Action(), fBlockNumber,
fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-',
fHasParent ? 'p' : '-', fTransactionID, fPreviousID);
}
virtual const char* _Action() const = 0;
private:
block_cache* fCache;
uint64 fBlockNumber;
bool fIsDirty;
bool fHasOriginal;
bool fHasParent;
int32 fTransactionID;
int32 fPreviousID;
};
class Get : public Action {
public:
Get(block_cache* cache, cached_block* block)
:
Action(cache, block)
{
Initialized();
}
virtual const char* _Action() const { return "get"; }
};
class Put : public Action {
public:
Put(block_cache* cache, cached_block* block)
:
Action(cache, block)
{
Initialized();
}
virtual const char* _Action() const { return "put"; }
};
class Read : public Action {
public:
Read(block_cache* cache, cached_block* block)
:
Action(cache, block)
{
Initialized();
}
virtual const char* _Action() const { return "read"; }
};
class Write : public Action {
public:
Write(block_cache* cache, cached_block* block)
:
Action(cache, block)
{
Initialized();
}
virtual const char* _Action() const { return "write"; }
};
class Flush : public Action {
public:
Flush(block_cache* cache, cached_block* block, bool getUnused = false)
:
Action(cache, block),
fGetUnused(getUnused)
{
Initialized();
}
virtual const char* _Action() const
{ return fGetUnused ? "get-unused" : "flush"; }
private:
bool fGetUnused;
};
class Error : public AbstractTraceEntry {
public:
Error(block_cache* cache, uint64 blockNumber, const char* message,
status_t status = B_OK)
:
fCache(cache),
fBlockNumber(blockNumber),
fMessage(message),
fStatus(status)
{
Initialized();
}
virtual void AddDump(TraceOutput& out)
{
out.Print("block cache %p, error %" B_PRIu64 ", %s%s%s",
fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "",
fStatus != B_OK ? strerror(fStatus) : "");
}
private:
block_cache* fCache;
uint64 fBlockNumber;
const char* fMessage;
status_t fStatus;
};
#if BLOCK_CACHE_BLOCK_TRACING >= 2
class BlockData : public AbstractTraceEntry {
public:
enum {
kCurrent = 0x01,
kParent = 0x02,
kOriginal = 0x04
};
BlockData(block_cache* cache, cached_block* block, const char* message)
:
fCache(cache),
fSize(cache->block_size),
fBlockNumber(block->block_number),
fMessage(message)
{
_Allocate(fCurrent, block->current_data);
_Allocate(fParent, block->parent_data);
_Allocate(fOriginal, block->original_data);
#if KTRACE_PRINTF_STACK_TRACE
fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
false);
#endif
Initialized();
}
virtual void AddDump(TraceOutput& out)
{
out.Print("block cache %p, block %" B_PRIu64 ", data %c%c%c: %s",
fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-',
fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-',
fMessage);
}
#if KTRACE_PRINTF_STACK_TRACE
virtual void DumpStackTrace(TraceOutput& out)
{
out.PrintStackTrace(fStackTrace);
}
#endif
void DumpBlocks(uint32 which, uint32 offset, uint32 size)
{
if ((which & kCurrent) != 0)
DumpBlock(kCurrent, offset, size);
if ((which & kParent) != 0)
DumpBlock(kParent, offset, size);
if ((which & kOriginal) != 0)
DumpBlock(kOriginal, offset, size);
}
void DumpBlock(uint32 which, uint32 offset, uint32 size)
{
if (offset > fSize) {
kprintf("invalid offset (block size %" B_PRIu32 ")\n", fSize);
return;
}
if (offset + size > fSize)
size = fSize - offset;
const char* label;
uint8* data;
if ((which & kCurrent) != 0) {
label = "current";
data = fCurrent;
} else if ((which & kParent) != 0) {
label = "parent";
data = fParent;
} else if ((which & kOriginal) != 0) {
label = "original";
data = fOriginal;
} else
return;
kprintf("%s: offset %" B_PRIu32 ", %" B_PRIu32 " bytes\n", label, offset, size);
static const uint32 kBlockSize = 16;
data += offset;
for (uint32 i = 0; i < size;) {
int start = i;
kprintf(" %04" B_PRIx32 " ", i);
for (; i < start + kBlockSize; i++) {
if (!(i % 4))
kprintf(" ");
if (i >= size)
kprintf(" ");
else
kprintf("%02x", data[i]);
}
kprintf("\n");
}
}
private:
void _Allocate(uint8*& target, void* source)
{
if (source == NULL) {
target = NULL;
return;
}
target = alloc_tracing_buffer_memcpy(source, fSize, false);
}
block_cache* fCache;
uint32 fSize;
uint64 fBlockNumber;
const char* fMessage;
uint8* fCurrent;
uint8* fParent;
uint8* fOriginal;
#if KTRACE_PRINTF_STACK_TRACE
tracing_stack_trace* fStackTrace;
#endif
};
#endif
}
# define TB(x) new(std::nothrow) BlockTracing::x;
#else
# define TB(x) ;
#endif
#if BLOCK_CACHE_BLOCK_TRACING >= 2
# define TB2(x) new(std::nothrow) BlockTracing::x;
#else
# define TB2(x) ;
#endif
#if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
namespace TransactionTracing {
class Action : public AbstractTraceEntry {
public:
Action(const char* label, block_cache* cache,
cache_transaction* transaction)
:
fCache(cache),
fTransaction(transaction),
fID(transaction->id),
fSub(transaction->has_sub_transaction),
fNumBlocks(transaction->num_blocks),
fSubNumBlocks(transaction->sub_num_blocks)
{
strlcpy(fLabel, label, sizeof(fLabel));
Initialized();
}
virtual void AddDump(TraceOutput& out)
{
out.Print("block cache %p, %s transaction %p (id %" B_PRId32 ")%s"
", %" B_PRId32 "/%" B_PRId32 " blocks", fCache, fLabel, fTransaction,
fID, fSub ? " sub" : "", fNumBlocks, fSubNumBlocks);
}
private:
char fLabel[12];
block_cache* fCache;
cache_transaction* fTransaction;
int32 fID;
bool fSub;
int32 fNumBlocks;
int32 fSubNumBlocks;
};
class Detach : public AbstractTraceEntry {
public:
Detach(block_cache* cache, cache_transaction* transaction,
cache_transaction* newTransaction)
:
fCache(cache),
fTransaction(transaction),
fID(transaction->id),
fSub(transaction->has_sub_transaction),
fNewTransaction(newTransaction),
fNewID(newTransaction->id)
{
Initialized();
}
virtual void AddDump(TraceOutput& out)
{
out.Print("block cache %p, detach transaction %p (id %" B_PRId32 ")"
"from transaction %p (id %" B_PRId32 ")%s",
fCache, fNewTransaction, fNewID, fTransaction, fID,
fSub ? " sub" : "");
}
private:
block_cache* fCache;
cache_transaction* fTransaction;
int32 fID;
bool fSub;
cache_transaction* fNewTransaction;
int32 fNewID;
};
class Abort : public AbstractTraceEntry {
public:
Abort(block_cache* cache, cache_transaction* transaction)
:
fCache(cache),
fTransaction(transaction),
fID(transaction->id),
fNumBlocks(0)
{
bool isSub = transaction->has_sub_transaction;
fNumBlocks = isSub ? transaction->sub_num_blocks
: transaction->num_blocks;
fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t));
if (fBlocks != NULL) {
cached_block* block = transaction->first_block;
for (int32 i = 0; block != NULL && i < fNumBlocks;
block = block->transaction_next) {
fBlocks[i++] = block->block_number;
}
} else
fNumBlocks = 0;
#if KTRACE_PRINTF_STACK_TRACE
fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
false);
#endif
Initialized();
}
virtual void AddDump(TraceOutput& out)
{
out.Print("block cache %p, abort transaction "
"%p (id %" B_PRId32 "), blocks", fCache, fTransaction, fID);
for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++)
out.Print(" %" B_PRIdOFF, fBlocks[i]);
}
#if KTRACE_PRINTF_STACK_TRACE
virtual void DumpStackTrace(TraceOutput& out)
{
out.PrintStackTrace(fStackTrace);
}
#endif
private:
block_cache* fCache;
cache_transaction* fTransaction;
int32 fID;
off_t* fBlocks;
int32 fNumBlocks;
#if KTRACE_PRINTF_STACK_TRACE
tracing_stack_trace* fStackTrace;
#endif
};
}
# define T(x) new(std::nothrow) TransactionTracing::x;
#else
# define T(x) ;
#endif
static DoublyLinkedList<block_cache> sCaches;
static mutex sCachesLock = MUTEX_INITIALIZER("block caches");
static mutex sCachesMemoryUseLock
= MUTEX_INITIALIZER("block caches memory use");
static size_t sUsedMemory;
static sem_id sEventSemaphore;
static mutex sNotificationsLock
= MUTEX_INITIALIZER("block cache notifications");
static thread_id sNotifierWriterThread;
static DoublyLinkedListLink<block_cache> sMarkCache;
static object_cache* sBlockCache;
static void mark_block_busy_reading(block_cache* cache, cached_block* block);
static void mark_block_unbusy_reading(block_cache* cache, cached_block* block);
static inline bool
is_closing_event(int32 event)
{
return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0;
}
static inline bool
is_written_event(int32 event)
{
return (event & TRANSACTION_WRITTEN) != 0;
}
event, and return that one in \a _event.
If there is no pending event anymore, it will return \c false.
*/
static bool
get_next_pending_event(cache_notification* notification, int32* _event)
{
for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) {
int32 pending = atomic_and(¬ification->events_pending,
~eventMask);
bool more = (pending & ~eventMask) != 0;
if ((pending & eventMask) != 0) {
*_event = eventMask;
return more;
}
}
return false;
}
static void
flush_pending_notifications(block_cache* cache)
{
ASSERT_LOCKED_MUTEX(&sCachesLock);
while (true) {
MutexLocker locker(sNotificationsLock);
cache_notification* notification = cache->pending_notifications.Head();
if (notification == NULL)
return;
bool deleteAfterEvent = false;
int32 event = -1;
if (!get_next_pending_event(notification, &event)) {
cache->pending_notifications.Remove(notification);
deleteAfterEvent = notification->delete_after_event;
}
if (event >= 0) {
cache_notification copy = *notification;
locker.Unlock();
copy.hook(copy.transaction_id, event, copy.data);
locker.Lock();
}
if (deleteAfterEvent)
delete notification;
}
}
functions.
Must not be called with a cache lock held.
*/
static void
flush_pending_notifications()
{
MutexLocker _(sCachesLock);
DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
while (iterator.HasNext()) {
block_cache* cache = iterator.Next();
flush_pending_notifications(cache);
}
}
static void
set_notification(cache_transaction* transaction,
cache_notification ¬ification, int32 events,
transaction_notification_hook hook, void* data)
{
notification.transaction_id = transaction != NULL ? transaction->id : -1;
notification.events_pending = 0;
notification.events = events;
notification.hook = hook;
notification.data = data;
notification.delete_after_event = false;
}
when possible, or marks it for deletion if the notification is pending.
*/
static void
delete_notification(cache_notification* notification)
{
MutexLocker locker(sNotificationsLock);
if (notification->events_pending != 0)
notification->delete_after_event = true;
else
delete notification;
}
already part of it, updates its events_pending field.
Also marks the notification to be deleted if \a deleteNotification
is \c true.
Triggers the notifier thread to run.
*/
static void
add_notification(block_cache* cache, cache_notification* notification,
int32 event, bool deleteNotification)
{
if (notification->hook == NULL)
return;
int32 pending = atomic_or(¬ification->events_pending, event);
if (pending == 0) {
MutexLocker locker(sNotificationsLock);
if (deleteNotification)
notification->delete_after_event = true;
cache->pending_notifications.Add(notification);
} else if (deleteNotification) {
delete_notification(notification);
}
release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE);
}
If \a event is a closing event (ie. TRANSACTION_ENDED, and
TRANSACTION_ABORTED), all listeners except those listening to
TRANSACTION_WRITTEN will be removed.
*/
static void
notify_transaction_listeners(block_cache* cache, cache_transaction* transaction,
int32 event)
{
T(Action("notify", cache, transaction));
bool isClosing = is_closing_event(event);
bool isWritten = is_written_event(event);
ListenerList::Iterator iterator = transaction->listeners.GetIterator();
while (iterator.HasNext()) {
cache_listener* listener = iterator.Next();
bool remove = (isClosing && !is_written_event(listener->events))
|| (isWritten && is_written_event(listener->events));
if (remove)
iterator.Remove();
if ((listener->events & event) != 0)
add_notification(cache, listener, event, remove);
else if (remove)
delete_notification(listener);
}
}
transaction.
*/
static void
remove_transaction_listeners(block_cache* cache, cache_transaction* transaction)
{
ListenerList::Iterator iterator = transaction->listeners.GetIterator();
while (iterator.HasNext()) {
cache_listener* listener = iterator.Next();
iterator.Remove();
delete_notification(listener);
}
}
static status_t
add_transaction_listener(block_cache* cache, cache_transaction* transaction,
int32 events, transaction_notification_hook hookFunction, void* data)
{
ListenerList::Iterator iterator = transaction->listeners.GetIterator();
while (iterator.HasNext()) {
cache_listener* listener = iterator.Next();
if (listener->data == data && listener->hook == hookFunction) {
listener->events |= events;
return B_OK;
}
}
cache_listener* listener = new cache_listener;
if (listener == NULL)
return B_NO_MEMORY;
set_notification(transaction, *listener, events, hookFunction, data);
transaction->listeners.Add(listener);
return B_OK;
}
cache_transaction::cache_transaction()
{
num_blocks = 0;
main_num_blocks = 0;
sub_num_blocks = 0;
first_block = NULL;
open = true;
has_sub_transaction = false;
last_used = system_time();
busy_writing_count = 0;
}
static void
delete_transaction(block_cache* cache, cache_transaction* transaction)
{
if (cache->last_transaction == transaction)
cache->last_transaction = NULL;
remove_transaction_listeners(cache, transaction);
delete transaction;
}
static cache_transaction*
lookup_transaction(block_cache* cache, int32 id)
{
return cache->transaction_hash.Lookup(id);
}
size_t TransactionHash::Hash(cache_transaction* transaction) const
{
return transaction->id;
}
bool TransactionHash::Compare(int32 key, cache_transaction* transaction) const
{
return transaction->id == key;
}
cache_transaction*& TransactionHash::GetLink(cache_transaction* value) const
{
return value->next;
}
part of a previous transacton.
*/
static status_t
write_blocks_in_previous_transaction(block_cache* cache,
cache_transaction* transaction)
{
BlockWriter writer(cache);
cached_block* block = transaction->first_block;
for (; block != NULL; block = block->transaction_next) {
if (block->previous_transaction != NULL) {
writer.Add(block);
}
}
return writer.Write();
}
bool
cached_block::CanBeWritten() const
{
return !busy_writing && !busy_reading
&& (previous_transaction != NULL
|| (transaction == NULL && is_dirty && !is_writing));
}
BlockWriter::BlockWriter(block_cache* cache, size_t max)
:
fCache(cache),
fBlocks(fBuffer),
fCount(0),
fTotal(0),
fCapacity(kBufferSize),
fMax(max),
fStatus(B_OK),
fDeletedTransaction(false)
{
}
BlockWriter::~BlockWriter()
{
if (fBlocks != fBuffer)
free(fBlocks);
}
be added, false is returned, otherwise true.
*/
bool
BlockWriter::Add(cached_block* block, cache_transaction* transaction)
{
ASSERT(block->CanBeWritten());
if (fTotal == fMax)
return false;
if (fCount >= fCapacity) {
cached_block** newBlocks;
size_t newCapacity = max_c(256, fCapacity * 2);
if (fBlocks == fBuffer)
newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*));
else {
newBlocks = (cached_block**)realloc(fBlocks,
newCapacity * sizeof(void*));
}
if (newBlocks == NULL) {
Write(transaction, false);
} else {
if (fBlocks == fBuffer)
memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*));
fBlocks = newBlocks;
fCapacity = newCapacity;
}
}
fBlocks[fCount++] = block;
fTotal++;
block->busy_writing = true;
fCache->busy_writing_count++;
if (block->previous_transaction != NULL)
block->previous_transaction->busy_writing_count++;
return true;
}
If no more blocks can be added, false is returned, otherwise true.
*/
bool
BlockWriter::Add(cache_transaction* transaction, bool& hasLeftOvers)
{
ASSERT(!transaction->open);
if (transaction->busy_writing_count != 0) {
hasLeftOvers = true;
return true;
}
hasLeftOvers = false;
block_list::Iterator blockIterator = transaction->blocks.GetIterator();
while (cached_block* block = blockIterator.Next()) {
if (!block->CanBeWritten()) {
hasLeftOvers = true;
continue;
}
if (!Add(block, transaction))
return false;
if (DeletedTransaction())
break;
}
return true;
}
while the blocks are written back.
*/
status_t
BlockWriter::Write(cache_transaction* transaction, bool canUnlock)
{
if (fCount == 0)
return B_OK;
if (canUnlock)
rw_lock_write_unlock(&fCache->lock);
qsort(fBlocks, fCount, sizeof(void*), &_CompareBlocks);
fDeletedTransaction = false;
bigtime_t start = system_time();
for (uint32 i = 0; i < fCount; i++) {
uint32 blocks = 1;
for (; (i + blocks) < fCount && blocks < IOV_MAX; blocks++) {
const uint32 j = i + blocks;
if (fBlocks[j]->block_number != (fBlocks[j - 1]->block_number + 1))
break;
}
status_t status = _WriteBlocks(fBlocks + i, blocks);
if (status != B_OK) {
if (fStatus == B_OK)
fStatus = status;
for (uint32 j = i; j < (i + blocks); j++) {
_UnmarkWriting(fBlocks[j]);
fBlocks[j] = NULL;
}
}
i += (blocks - 1);
}
bigtime_t finish = system_time();
if (canUnlock)
rw_lock_write_lock(&fCache->lock);
if (fStatus == B_OK && fCount >= 8) {
fCache->last_block_write = finish;
fCache->last_block_write_duration = (fCache->last_block_write - start)
/ fCount;
}
for (uint32 i = 0; i < fCount; i++)
_BlockDone(fBlocks[i], transaction);
fCount = 0;
return fStatus;
}
the oldest change of the block if it is part of more than one transaction.
It will automatically send out TRANSACTION_WRITTEN notices, as well as
delete transactions when they are no longer used, and \a deleteTransaction
is \c true.
*/
status_t
BlockWriter::WriteBlock(block_cache* cache, cached_block* block)
{
BlockWriter writer(cache);
writer.Add(block);
return writer.Write();
}
void*
BlockWriter::_Data(cached_block* block) const
{
return block->previous_transaction != NULL && block->original_data != NULL
? block->original_data : block->current_data;
}
status_t
BlockWriter::_WriteBlocks(cached_block** blocks, uint32 count)
{
const size_t blockSize = fCache->block_size;
BStackOrHeapArray<iovec, 8> vecs(count);
for (uint32 i = 0; i < count; i++) {
cached_block* block = blocks[i];
ASSERT(block->busy_writing);
ASSERT(i == 0 || block->block_number == (blocks[i - 1]->block_number + 1));
TRACE(("BlockWriter::_WriteBlocks(block %" B_PRIdOFF ", count %" B_PRIu32 ")\n",
block->block_number, count));
TB(Write(fCache, block));
TB2(BlockData(fCache, block, "before write"));
vecs[i].iov_base = _Data(block);
vecs[i].iov_len = blockSize;
}
ssize_t written = writev_pos(fCache->fd,
blocks[0]->block_number * blockSize, vecs, count);
if (written != (ssize_t)(blockSize * count)) {
TB(Error(fCache, block->block_number, "write failed", written));
status_t error = errno;
TRACE_ALWAYS("could not write back %" B_PRIu32 " blocks (start block %" B_PRIdOFF
"): %s\n", count, blocks[0]->block_number, strerror(error));
if (written < 0 && error != 0)
return error;
return B_IO_ERROR;
}
return B_OK;
}
void
BlockWriter::_BlockDone(cached_block* block,
cache_transaction* transaction)
{
if (block == NULL) {
return;
}
if (fCache->num_dirty_blocks > 0)
fCache->num_dirty_blocks--;
if (_Data(block) == block->current_data)
block->is_dirty = false;
_UnmarkWriting(block);
cache_transaction* previous = block->previous_transaction;
if (previous != NULL) {
previous->blocks.Remove(block);
block->previous_transaction = NULL;
if (block->original_data != NULL && block->transaction == NULL) {
fCache->Free(block->original_data);
block->original_data = NULL;
}
if (--previous->num_blocks == 0) {
TRACE(("cache transaction %" B_PRId32 " finished!\n", previous->id));
T(Action("written", fCache, previous));
notify_transaction_listeners(fCache, previous,
TRANSACTION_WRITTEN);
if (transaction != NULL) {
fCache->transaction_hash.RemoveUnchecked(transaction);
} else
fCache->transaction_hash.Remove(previous);
delete_transaction(fCache, previous);
fDeletedTransaction = true;
}
}
if (block->transaction == NULL && block->ref_count == 0 && !block->unused) {
ASSERT(block->original_data == NULL && block->parent_data == NULL);
block->unused = true;
fCache->unused_blocks.Add(block);
fCache->unused_block_count++;
}
TB2(BlockData(fCache, block, "after write"));
}
void
BlockWriter::_UnmarkWriting(cached_block* block)
{
block->busy_writing = false;
if (block->previous_transaction != NULL)
block->previous_transaction->busy_writing_count--;
fCache->busy_writing_count--;
if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0)
|| block->busy_writing_waiters) {
fCache->busy_writing_waiters = false;
block->busy_writing_waiters = false;
fCache->busy_writing_condition.NotifyAll();
}
}
int
BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB)
{
cached_block* blockA = *(cached_block**)_blockA;
cached_block* blockB = *(cached_block**)_blockB;
off_t diff = blockA->block_number - blockB->block_number;
if (diff > 0)
return 1;
return diff < 0 ? -1 : 0;
}
#ifndef BUILDING_USERLAND_FS_SERVER
BlockPrefetcher::BlockPrefetcher(block_cache* cache, off_t blockNumber, size_t numBlocks)
:
fCache(cache),
fBlockNumber(blockNumber),
fNumRequested(numBlocks),
fNumAllocated(0)
{
fBlocks = new cached_block*[numBlocks];
fDestVecs = new generic_io_vec[numBlocks];
}
BlockPrefetcher::~BlockPrefetcher()
{
delete[] fBlocks;
delete[] fDestVecs;
}
@return If an error is returned, then no blocks have been allocated.
@post Blocks have been constructed (including allocating the current_data member)
but current_data is uninitialized.
*/
status_t
BlockPrefetcher::Allocate()
{
TRACE(("BlockPrefetcher::Allocate: looking up %" B_PRIuSIZE " blocks, starting with %"
B_PRIdOFF "\n", fNumBlocks, fBlockNumber));
ASSERT_LOCKED_MUTEX(&fCache->lock);
size_t finalNumBlocks = fNumRequested;
for (size_t i = 0; i < fNumRequested; ++i) {
off_t blockNumIter = fBlockNumber + i;
if (blockNumIter < 0 || blockNumIter >= fCache->max_blocks) {
panic("BlockPrefetcher::Allocate: invalid block number %" B_PRIdOFF " (max %"
B_PRIdOFF ")", blockNumIter, fCache->max_blocks - 1);
return B_BAD_VALUE;
}
cached_block* block = fCache->hash.Lookup(blockNumIter);
if (block != NULL) {
TRACE(("BlockPrefetcher::Allocate: found an existing block (%" B_PRIdOFF ")\n",
blockNumIter));
fBlocks[i] = NULL;
finalNumBlocks = i;
break;
}
}
for (size_t i = 0; i < finalNumBlocks; ++i) {
cached_block* block = fCache->NewBlock(fBlockNumber + i);
if (block == NULL) {
_RemoveAllocated(0, i);
return B_NO_MEMORY;
}
fCache->hash.Insert(block);
block->unused = true;
fCache->unused_blocks.Add(block);
fCache->unused_block_count++;
fBlocks[i] = block;
}
fNumAllocated = finalNumBlocks;
return B_OK;
}
\post The calling object will eventually be deleted by IOFinishedCallback.
*/
status_t
BlockPrefetcher::ReadAsync(WriteLocker& cacheLocker)
{
TRACE(("BlockPrefetcher::Read: reading %" B_PRIuSIZE " blocks\n", fNumAllocated));
size_t blockSize = fCache->block_size;
generic_io_vec* vecs = fDestVecs;
for (size_t i = 0; i < fNumAllocated; ++i) {
vecs[i].base = reinterpret_cast<generic_addr_t>(fBlocks[i]->current_data);
vecs[i].length = blockSize;
mark_block_busy_reading(fCache, fBlocks[i]);
}
IORequest* request = new IORequest;
status_t status = request->Init(fBlockNumber * blockSize, vecs, fNumAllocated,
fNumAllocated * blockSize, false, B_DELETE_IO_REQUEST);
if (status != B_OK) {
TB(Error(fCache, fBlockNumber, "IORequest::Init starting here failed", status));
TRACE_ALWAYS("BlockPrefetcher::Read: failed to initialize IO request for %" B_PRIuSIZE
" blocks starting with %" B_PRIdOFF ": %s\n",
fNumAllocated, fBlockNumber, strerror(status));
_RemoveAllocated(fNumAllocated, fNumAllocated);
delete request;
return status;
}
request->SetFinishedCallback(_IOFinishedCallback, this);
cacheLocker.Unlock();
return do_fd_io(fCache->fd, request);
}
void
BlockPrefetcher::_IOFinishedCallback(void* cookie, io_request* request, status_t status,
bool partialTransfer, generic_size_t bytesTransferred)
{
TRACE(("BlockPrefetcher::_IOFinishedCallback: status %s, partial %d\n",
strerror(status), partialTransfer));
((BlockPrefetcher*)cookie)->_IOFinished(status, bytesTransferred);
}
void
BlockPrefetcher::_IOFinished(status_t status, generic_size_t bytesTransferred)
{
WriteLocker locker(&fCache->lock);
if (bytesTransferred < (fNumAllocated * fCache->block_size)) {
_RemoveAllocated(fNumAllocated, fNumAllocated);
TB(Error(cache, fBlockNumber, "prefetch starting here failed", status));
TRACE_ALWAYS("BlockPrefetcher::_IOFinished: transferred only %" B_PRIuGENADDR
" bytes in attempt to read %" B_PRIuSIZE " blocks (start block %" B_PRIdOFF "): %s\n",
bytesTransferred, fNumAllocated, fBlockNumber, strerror(status));
} else {
for (size_t i = 0; i < fNumAllocated; i++) {
TB(Read(cache, fBlockNumber + i));
mark_block_unbusy_reading(fCache, fBlocks[i]);
fBlocks[i]->last_accessed = system_time() / 1000000L;
}
}
delete this;
}
is cancelled.
*/
void
BlockPrefetcher::_RemoveAllocated(size_t unbusyCount, size_t removeCount)
{
TRACE(("BlockPrefetcher::_RemoveAllocated: unbusy %" B_PRIuSIZE " and remove %" B_PRIuSIZE
" starting with %" B_PRIdOFF "\n", unbusyCount, removeCount, (*fBlocks)->block_number));
ASSERT_LOCKED_MUTEX(&fCache->lock);
for (size_t i = 0; i < unbusyCount; ++i)
mark_block_unbusy_reading(fCache, fBlocks[i]);
for (size_t i = 0; i < removeCount; ++i) {
ASSERT(fBlocks[i]->is_dirty == false && fBlocks[i]->unused == true);
fCache->unused_blocks.Remove(fBlocks[i]);
fCache->unused_block_count--;
fCache->RemoveBlock(fBlocks[i]);
fBlocks[i] = NULL;
}
fNumAllocated = 0;
return;
}
#endif
block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize,
bool readOnly)
:
fd(_fd),
max_blocks(numBlocks),
block_size(blockSize),
next_transaction_id(1),
last_transaction(NULL),
buffer_cache(NULL),
unused_block_count(0),
busy_reading_count(0),
busy_reading_waiters(false),
busy_writing_count(0),
busy_writing_waiters(0),
last_block_write(0),
last_block_write_duration(0),
num_dirty_blocks(0),
read_only(readOnly)
{
}
block_cache::~block_cache()
{
unregister_low_resource_handler(&_LowMemoryHandler, this);
delete_object_cache(buffer_cache);
rw_lock_destroy(&lock);
}
status_t
block_cache::Init()
{
rw_lock_init(&lock, "block cache");
B_INITIALIZE_SPINLOCK(&unused_blocks_lock);
busy_reading_condition.Init(this, "cache block busy_reading");
busy_writing_condition.Init(this, "cache block busy writing");
condition_variable.Init(this, "cache transaction sync");
buffer_cache = create_object_cache("block cache buffers", block_size,
CACHE_NO_DEPOT | CACHE_LARGE_SLAB);
if (buffer_cache == NULL)
return B_NO_MEMORY;
if (hash.Init(1024) != B_OK)
return B_NO_MEMORY;
if (transaction_hash.Init(16) != B_OK)
return B_NO_MEMORY;
return register_low_resource_handler(&_LowMemoryHandler, this,
B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
| B_KERNEL_RESOURCE_ADDRESS_SPACE, 0);
}
void
block_cache::Free(void* buffer)
{
if (buffer != NULL)
object_cache_free(buffer_cache, buffer, 0);
}
void*
block_cache::Allocate()
{
void* block = object_cache_alloc(buffer_cache, 0);
if (block != NULL)
return block;
RemoveUnusedBlocks(100);
return object_cache_alloc(buffer_cache, 0);
}
void
block_cache::FreeBlock(cached_block* block)
{
Free(block->current_data);
if (block->original_data != NULL || block->parent_data != NULL) {
panic("block_cache::FreeBlock(): %" B_PRIdOFF ", original %p, parent %p\n",
block->block_number, block->original_data, block->parent_data);
}
#if BLOCK_CACHE_DEBUG_CHANGED
Free(block->compare);
#endif
object_cache_free(sBlockCache, block, 0);
}
cached_block*
block_cache::NewBlock(off_t blockNumber)
{
cached_block* block = NULL;
if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
| B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) {
block = _GetUnusedBlock();
}
if (block == NULL) {
block = (cached_block*)object_cache_alloc(sBlockCache, 0);
if (block != NULL) {
block->current_data = Allocate();
if (block->current_data == NULL) {
object_cache_free(sBlockCache, block, 0);
return NULL;
}
} else {
TB(Error(this, blockNumber, "allocation failed"));
TRACE_ALWAYS("block allocation failed, unused list is %sempty.\n",
unused_blocks.IsEmpty() ? "" : "not ");
block = _GetUnusedBlock();
if (block == NULL) {
TB(Error(this, blockNumber, "get unused failed"));
FATAL(("could not allocate block!\n"));
return NULL;
}
}
}
block->block_number = blockNumber;
block->ref_count = 0;
block->last_accessed = 0;
block->transaction_next = NULL;
block->transaction = block->previous_transaction = NULL;
block->original_data = NULL;
block->parent_data = NULL;
block->busy_reading = false;
block->busy_writing = false;
block->is_writing = false;
block->is_dirty = false;
block->unused = false;
block->discard = false;
block->busy_reading_waiters = false;
block->busy_writing_waiters = false;
#if BLOCK_CACHE_DEBUG_CHANGED
block->compare = NULL;
#endif
return block;
}
void
block_cache::FreeBlockParentData(cached_block* block)
{
ASSERT(block->parent_data != NULL);
if (block->parent_data != block->current_data)
Free(block->parent_data);
block->parent_data = NULL;
}
void
block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld)
{
TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count));
for (block_list::Iterator iterator = unused_blocks.GetIterator();
cached_block* block = iterator.Next();) {
if (minSecondsOld >= block->LastAccess()) {
break;
}
if (block->busy_reading || block->busy_writing)
continue;
TB(Flush(this, block));
TRACE((" remove block %" B_PRIdOFF ", last accessed %" B_PRId32 "\n",
block->block_number, block->last_accessed));
if (block->is_dirty && !block->discard) {
if (block->busy_writing)
continue;
BlockWriter::WriteBlock(this, block);
}
iterator.Remove();
unused_block_count--;
RemoveBlock(block);
if (--count <= 0)
break;
}
}
void
block_cache::RemoveBlock(cached_block* block)
{
hash.Remove(block);
FreeBlock(block);
}
for blocks not part of a transaction).
*/
void
block_cache::DiscardBlock(cached_block* block)
{
ASSERT(block->discard);
ASSERT(block->previous_transaction == NULL);
if (block->parent_data != NULL)
FreeBlockParentData(block);
if (block->original_data != NULL) {
Free(block->original_data);
block->original_data = NULL;
}
RemoveBlock(block);
}
void
block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level)
{
TRACE(("block_cache: low memory handler called with level %" B_PRId32 "\n", level));
block_cache* cache = (block_cache*)data;
if (cache->unused_block_count <= 1)
return;
int32 free = 0;
int32 secondsOld = 0;
switch (level) {
case B_NO_LOW_RESOURCE:
return;
case B_LOW_RESOURCE_NOTE:
free = cache->unused_block_count / 4;
secondsOld = 120;
break;
case B_LOW_RESOURCE_WARNING:
free = cache->unused_block_count / 2;
secondsOld = 10;
break;
case B_LOW_RESOURCE_CRITICAL:
free = cache->unused_block_count - 1;
secondsOld = 0;
break;
}
WriteLocker locker(&cache->lock);
if (!locker.IsLocked()) {
return;
}
#ifdef TRACE_BLOCK_CACHE
uint32 oldUnused = cache->unused_block_count;
#endif
cache->RemoveUnusedBlocks(free, secondsOld);
TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %" B_PRIu32 " -> %" B_PRIu32 "\n",
cache, oldUnused, cache->unused_block_count));
}
cached_block*
block_cache::_GetUnusedBlock()
{
TRACE(("block_cache: get unused block\n"));
for (block_list::Iterator iterator = unused_blocks.GetIterator();
cached_block* block = iterator.Next();) {
TB(Flush(this, block, true));
if (block->is_dirty && !block->busy_writing && !block->discard)
BlockWriter::WriteBlock(this, block);
iterator.Remove();
unused_block_count--;
hash.Remove(block);
ASSERT(block->original_data == NULL && block->parent_data == NULL);
block->unused = false;
#if BLOCK_CACHE_DEBUG_CHANGED
if (block->compare != NULL)
Free(block->compare);
#endif
return block;
}
return NULL;
}
*/
static void
mark_block_busy_reading(block_cache* cache, cached_block* block)
{
block->busy_reading = true;
cache->busy_reading_count++;
}
*/
static void
mark_block_unbusy_reading(block_cache* cache, cached_block* block)
{
block->busy_reading = false;
cache->busy_reading_count--;
if ((cache->busy_reading_waiters && cache->busy_reading_count == 0)
|| block->busy_reading_waiters) {
cache->busy_reading_waiters = false;
block->busy_reading_waiters = false;
cache->busy_reading_condition.NotifyAll();
}
}
*/
static void
wait_for_busy_reading_block(block_cache* cache, cached_block* block)
{
while (block->busy_reading) {
ConditionVariableEntry entry;
cache->busy_reading_condition.Add(&entry);
block->busy_reading_waiters = true;
rw_lock_write_unlock(&cache->lock);
entry.Wait();
rw_lock_write_lock(&cache->lock);
}
}
*/
static void
wait_for_busy_reading_blocks(block_cache* cache)
{
while (cache->busy_reading_count != 0) {
ConditionVariableEntry entry;
cache->busy_reading_condition.Add(&entry);
cache->busy_reading_waiters = true;
rw_lock_write_unlock(&cache->lock);
entry.Wait();
rw_lock_write_lock(&cache->lock);
}
}
*/
static void
wait_for_busy_writing_block(block_cache* cache, cached_block* block)
{
while (block->busy_writing) {
ConditionVariableEntry entry;
cache->busy_writing_condition.Add(&entry);
block->busy_writing_waiters = true;
rw_lock_write_unlock(&cache->lock);
entry.Wait();
rw_lock_write_lock(&cache->lock);
}
}
*/
static void
wait_for_busy_writing_blocks(block_cache* cache)
{
while (cache->busy_writing_count != 0) {
ConditionVariableEntry entry;
cache->busy_writing_condition.Add(&entry);
cache->busy_writing_waiters = true;
rw_lock_write_unlock(&cache->lock);
entry.Wait();
rw_lock_write_lock(&cache->lock);
}
}
reference, the block is moved into the unused list.
In low memory situations, it will also free some blocks from that list,
but not necessarily the \a block it just released.
*/
static void
put_cached_block(block_cache* cache, cached_block* block, WriteLocker* writeLocker = NULL)
{
#if BLOCK_CACHE_DEBUG_CHANGED
if (block->compare != NULL
&& memcmp(block->current_data, block->compare, cache->block_size) != 0) {
if (writeLocker != NULL && !writeLocker->IsLocked()) {
rw_lock_read_unlock(&cache->lock);
writeLocker->Lock();
}
TRACE_ALWAYS("new block:\n");
dump_block((const char*)block->current_data, 256, " ");
TRACE_ALWAYS("unchanged block:\n");
dump_block((const char*)block->compare, 256, " ");
BlockWriter::WriteBlock(cache, block);
panic("block_cache: supposed to be clean block was changed!\n");
cache->Free(block->compare);
block->compare = NULL;
}
#endif
TB(Put(cache, block));
if (writeLocker != NULL && !writeLocker->IsLocked()) {
#ifdef _KERNEL_MODE
if (!block->discard && !block->is_writing
&& block->transaction == NULL
&& block->previous_transaction == NULL) {
if (atomic_add(&block->ref_count, -1) == 1) {
InterruptsSpinLocker unusedLocker(cache->unused_blocks_lock);
if (atomic_get(&block->ref_count) == 0 && !block->unused) {
cache->unused_blocks.Add(block);
cache->unused_block_count++;
block->unused = true;
}
}
return;
}
#endif
rw_lock_read_unlock(&cache->lock);
writeLocker->Lock();
}
if (block->ref_count < 1) {
panic("Invalid ref_count for block %p, cache %p\n", block, cache);
return;
}
if (--block->ref_count == 0
&& block->transaction == NULL
&& block->previous_transaction == NULL) {
block->is_writing = false;
if (block->discard) {
cache->RemoveBlock(block);
} else {
ASSERT(!block->unused);
block->unused = true;
ASSERT(block->original_data == NULL && block->parent_data == NULL);
cache->unused_blocks.Add(block);
cache->unused_block_count++;
}
}
}
static void
put_cached_block(block_cache* cache, off_t blockNumber, WriteLocker* writeLocker = NULL)
{
if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
panic("put_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
blockNumber, cache->max_blocks - 1);
}
cached_block* block = cache->hash.Lookup(blockNumber);
if (block != NULL) {
put_cached_block(cache, block, writeLocker);
} else {
TB(Error(cache, blockNumber, "put unknown"));
}
}
there, or reads it from the disk.
You need to have the cache locked when calling this function.
\param _allocated tells you whether or not a new block has been allocated
to satisfy your request.
\param readBlock if \c false, the block will not be read in case it was
not already in the cache. The block you retrieve may contain random
data. If \c true, the cache will be temporarily unlocked while the
block is read in.
*/
static status_t
get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated,
bool readBlock, cached_block** _block)
{
ASSERT_LOCKED_MUTEX(&cache->lock);
if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
panic("get_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
blockNumber, cache->max_blocks - 1);
return B_BAD_VALUE;
}
retry:
cached_block* block = cache->hash.Lookup(blockNumber);
*_allocated = false;
if (block == NULL) {
block = cache->NewBlock(blockNumber);
if (block == NULL)
return B_NO_MEMORY;
cache->hash.Insert(block);
*_allocated = true;
} else if (block->busy_reading) {
wait_for_busy_reading_block(cache, block);
goto retry;
}
if (block->unused) {
block->unused = false;
cache->unused_blocks.Remove(block);
cache->unused_block_count--;
}
if (*_allocated && readBlock) {
int32 blockSize = cache->block_size;
mark_block_busy_reading(cache, block);
rw_lock_write_unlock(&cache->lock);
ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize,
block->current_data, blockSize);
rw_lock_write_lock(&cache->lock);
if (bytesRead < blockSize) {
cache->RemoveBlock(block);
TB(Error(cache, blockNumber, "read failed", bytesRead));
status_t error = errno;
TRACE_ALWAYS("could not read block %" B_PRIdOFF ": bytesRead: %zd,"
" error: %s\n", blockNumber, bytesRead, strerror(error));
if (error == B_OK)
return B_IO_ERROR;
return error;
}
TB(Read(cache, block));
mark_block_unbusy_reading(cache, block);
}
block->ref_count++;
block->last_accessed = system_time() / 1000000L;
*_block = block;
return B_OK;
}
If \a cleared is true, the block is not read from disk; an empty block
is returned.
This is the only method to insert a block into a transaction. It makes
sure that the previous block contents are preserved in that case.
*/
static status_t
get_writable_cached_block(block_cache* cache, off_t blockNumber,
int32 transactionID, bool cleared, void** _block)
{
TRACE(("get_writable_cached_block(blockNumber = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
blockNumber, transactionID));
if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
panic("get_writable_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
blockNumber, cache->max_blocks - 1);
return B_BAD_VALUE;
}
bool allocated;
cached_block* block;
status_t status = get_cached_block(cache, blockNumber, &allocated,
!cleared, &block);
if (status != B_OK)
return status;
if (block->busy_writing)
wait_for_busy_writing_block(cache, block);
block->discard = false;
if (transactionID == -1) {
if (cleared) {
mark_block_busy_reading(cache, block);
rw_lock_write_unlock(&cache->lock);
memset(block->current_data, 0, cache->block_size);
rw_lock_write_lock(&cache->lock);
mark_block_unbusy_reading(cache, block);
}
block->is_writing = true;
if (!block->is_dirty) {
cache->num_dirty_blocks++;
block->is_dirty = true;
#if BLOCK_CACHE_DEBUG_CHANGED
if (block->compare != NULL) {
cache->Free(block->compare);
block->compare = NULL;
}
#endif
}
TB(Get(cache, block));
*_block = block->current_data;
return B_OK;
}
cache_transaction* transaction = block->transaction;
if (transaction != NULL && transaction->id != transactionID) {
panic("get_writable_cached_block(): asked to get busy writable block "
"(transaction %" B_PRId32 ")\n", block->transaction->id);
put_cached_block(cache, block);
return B_BAD_VALUE;
}
if (transaction == NULL && transactionID != -1) {
transaction = lookup_transaction(cache, transactionID);
if (transaction == NULL) {
panic("get_writable_cached_block(): invalid transaction %" B_PRId32 "!\n",
transactionID);
put_cached_block(cache, block);
return B_BAD_VALUE;
}
if (!transaction->open) {
panic("get_writable_cached_block(): transaction already done!\n");
put_cached_block(cache, block);
return B_BAD_VALUE;
}
block->transaction = transaction;
block->transaction_next = transaction->first_block;
transaction->first_block = block;
transaction->num_blocks++;
}
if (transaction != NULL)
transaction->last_used = system_time();
bool wasUnchanged = block->original_data == NULL
|| block->previous_transaction != NULL;
if (!(allocated && cleared) && block->original_data == NULL) {
block->original_data = cache->Allocate();
if (block->original_data == NULL) {
TB(Error(cache, blockNumber, "allocate original failed"));
FATAL(("could not allocate original_data\n"));
put_cached_block(cache, block);
return B_NO_MEMORY;
}
mark_block_busy_reading(cache, block);
rw_lock_write_unlock(&cache->lock);
memcpy(block->original_data, block->current_data, cache->block_size);
rw_lock_write_lock(&cache->lock);
mark_block_unbusy_reading(cache, block);
}
if (block->parent_data == block->current_data) {
block->parent_data = cache->Allocate();
if (block->parent_data == NULL) {
TB(Error(cache, blockNumber, "allocate parent failed"));
FATAL(("could not allocate parent\n"));
put_cached_block(cache, block);
return B_NO_MEMORY;
}
mark_block_busy_reading(cache, block);
rw_lock_write_unlock(&cache->lock);
memcpy(block->parent_data, block->current_data, cache->block_size);
rw_lock_write_lock(&cache->lock);
mark_block_unbusy_reading(cache, block);
transaction->sub_num_blocks++;
} else if (transaction != NULL && transaction->has_sub_transaction
&& block->parent_data == NULL && wasUnchanged)
transaction->sub_num_blocks++;
if (cleared) {
mark_block_busy_reading(cache, block);
rw_lock_write_unlock(&cache->lock);
memset(block->current_data, 0, cache->block_size);
rw_lock_write_lock(&cache->lock);
mark_block_unbusy_reading(cache, block);
}
block->is_dirty = true;
#if BLOCK_CACHE_DEBUG_CHANGED
if (block->compare != NULL) {
cache->Free(block->compare);
block->compare = NULL;
}
#endif
TB(Get(cache, block));
TB2(BlockData(cache, block, "get writable"));
*_block = block->current_data;
return B_OK;
}
#if DEBUG_BLOCK_CACHE
static void
dump_block(cached_block* block)
{
kprintf("%08lx %9" B_PRIdOFF " %08lx %08lx %08lx %5" B_PRId32 " %6" B_PRId32
" %c%c%c%c%c%c %08lx %08lx\n",
(addr_t)block, block->block_number,
(addr_t)block->current_data, (addr_t)block->original_data,
(addr_t)block->parent_data, block->ref_count, block->LastAccess(),
block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-',
block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-',
block->unused ? 'U' : '-', block->discard ? 'D' : '-',
(addr_t)block->transaction,
(addr_t)block->previous_transaction);
}
static void
dump_block_long(cached_block* block)
{
kprintf("BLOCK %p\n", block);
kprintf(" current data: %p\n", block->current_data);
kprintf(" original data: %p\n", block->original_data);
kprintf(" parent data: %p\n", block->parent_data);
#if BLOCK_CACHE_DEBUG_CHANGED
kprintf(" compare data: %p\n", block->compare);
#endif
kprintf(" ref_count: %" B_PRId32 "\n", block->ref_count);
kprintf(" accessed: %" B_PRId32 "\n", block->LastAccess());
kprintf(" flags: ");
if (block->busy_reading)
kprintf(" busy_reading");
if (block->busy_writing)
kprintf(" busy_writing");
if (block->is_writing)
kprintf(" is-writing");
if (block->is_dirty)
kprintf(" is-dirty");
if (block->unused)
kprintf(" unused");
if (block->discard)
kprintf(" discard");
kprintf("\n");
if (block->transaction != NULL) {
kprintf(" transaction: %p (%" B_PRId32 ")\n", block->transaction,
block->transaction->id);
if (block->transaction_next != NULL) {
kprintf(" next in transaction: %" B_PRIdOFF "\n",
block->transaction_next->block_number);
}
}
if (block->previous_transaction != NULL) {
kprintf(" previous transaction: %p (%" B_PRId32 ")\n",
block->previous_transaction,
block->previous_transaction->id);
}
set_debug_variable("_current", (addr_t)block->current_data);
set_debug_variable("_original", (addr_t)block->original_data);
set_debug_variable("_parent", (addr_t)block->parent_data);
}
static int
dump_cached_block(int argc, char** argv)
{
if (argc != 2) {
kprintf("usage: %s <block-address>\n", argv[0]);
return 0;
}
dump_block_long((struct cached_block*)(addr_t)parse_expression(argv[1]));
return 0;
}
static int
dump_cache(int argc, char** argv)
{
bool showTransactions = false;
bool showBlocks = false;
int32 i = 1;
while (argv[i] != NULL && argv[i][0] == '-') {
for (char* arg = &argv[i][1]; arg[0]; arg++) {
switch (arg[0]) {
case 'b':
showBlocks = true;
break;
case 't':
showTransactions = true;
break;
default:
print_debugger_command_usage(argv[0]);
return 0;
}
}
i++;
}
if (i >= argc) {
print_debugger_command_usage(argv[0]);
return 0;
}
block_cache* cache = (struct block_cache*)(addr_t)parse_expression(argv[i]);
if (cache == NULL) {
kprintf("invalid cache address\n");
return 0;
}
off_t blockNumber = -1;
if (i + 1 < argc) {
blockNumber = parse_expression(argv[i + 1]);
cached_block* block = cache->hash.Lookup(blockNumber);
if (block != NULL)
dump_block_long(block);
else
kprintf("block %" B_PRIdOFF " not found\n", blockNumber);
return 0;
}
kprintf("BLOCK CACHE: %p\n", cache);
kprintf(" fd: %d\n", cache->fd);
kprintf(" max_blocks: %" B_PRIdOFF "\n", cache->max_blocks);
kprintf(" block_size: %zu\n", cache->block_size);
kprintf(" next_transaction_id: %" B_PRId32 "\n", cache->next_transaction_id);
kprintf(" buffer_cache: %p\n", cache->buffer_cache);
kprintf(" busy_reading: %" B_PRIu32 ", %s waiters\n", cache->busy_reading_count,
cache->busy_reading_waiters ? "has" : "no");
kprintf(" busy_writing: %" B_PRIu32 ", %s waiters\n", cache->busy_writing_count,
cache->busy_writing_waiters ? "has" : "no");
if (!cache->pending_notifications.IsEmpty()) {
kprintf(" pending notifications:\n");
NotificationList::Iterator iterator
= cache->pending_notifications.GetIterator();
while (iterator.HasNext()) {
cache_notification* notification = iterator.Next();
kprintf(" %p %5" B_PRIx32 " %p - %p\n", notification,
notification->events_pending, notification->hook,
notification->data);
}
}
if (showTransactions) {
kprintf(" transactions:\n");
kprintf("address id state blocks main sub\n");
TransactionTable::Iterator iterator(&cache->transaction_hash);
while (iterator.HasNext()) {
cache_transaction* transaction = iterator.Next();
kprintf("%p %5" B_PRId32 " %-7s %5" B_PRId32 " %5" B_PRId32 " %5"
B_PRId32 "\n", transaction, transaction->id,
transaction->open ? "open" : "closed",
transaction->num_blocks, transaction->main_num_blocks,
transaction->sub_num_blocks);
}
}
if (showBlocks) {
kprintf(" blocks:\n");
kprintf("address block no. current original parent refs access "
"flags transact prev. trans\n");
}
uint32 referenced = 0;
uint32 count = 0;
uint32 dirty = 0;
uint32 discarded = 0;
BlockTable::Iterator iterator(&cache->hash);
while (iterator.HasNext()) {
cached_block* block = iterator.Next();
if (showBlocks)
dump_block(block);
if (block->is_dirty)
dirty++;
if (block->discard)
discarded++;
if (block->ref_count)
referenced++;
count++;
}
kprintf(" %" B_PRIu32 " blocks total, %" B_PRIu32 " dirty, %" B_PRIu32
" discarded, %" B_PRIu32 " referenced, %" B_PRIu32 " busy, %" B_PRIu32
" in unused.\n",
count, dirty, discarded, referenced, cache->busy_reading_count,
cache->unused_block_count);
return 0;
}
static int
dump_transaction(int argc, char** argv)
{
bool showBlocks = false;
int i = 1;
if (argc > 1 && !strcmp(argv[1], "-b")) {
showBlocks = true;
i++;
}
if (argc - i < 1 || argc - i > 2) {
print_debugger_command_usage(argv[0]);
return 0;
}
cache_transaction* transaction = NULL;
if (argc - i == 1) {
transaction = (cache_transaction*)(addr_t)parse_expression(argv[i]);
} else {
block_cache* cache = (block_cache*)(addr_t)parse_expression(argv[i]);
int32 id = parse_expression(argv[i + 1]);
transaction = lookup_transaction(cache, id);
if (transaction == NULL) {
kprintf("No transaction with ID %" B_PRId32 " found.\n", id);
return 0;
}
}
kprintf("TRANSACTION %p\n", transaction);
kprintf(" id: %" B_PRId32 "\n", transaction->id);
kprintf(" num block: %" B_PRId32 "\n", transaction->num_blocks);
kprintf(" main num block: %" B_PRId32 "\n", transaction->main_num_blocks);
kprintf(" sub num block: %" B_PRId32 "\n", transaction->sub_num_blocks);
kprintf(" has sub: %d\n", transaction->has_sub_transaction);
kprintf(" state: %s\n", transaction->open ? "open" : "closed");
kprintf(" idle: %" B_PRId64 " secs\n",
(system_time() - transaction->last_used) / 1000000);
kprintf(" listeners:\n");
ListenerList::Iterator iterator = transaction->listeners.GetIterator();
while (iterator.HasNext()) {
cache_listener* listener = iterator.Next();
kprintf(" %p %5" B_PRIx32 " %p - %p\n", listener, listener->events_pending,
listener->hook, listener->data);
}
if (!showBlocks)
return 0;
kprintf(" blocks:\n");
kprintf("address block no. current original parent refs access "
"flags transact prev. trans\n");
cached_block* block = transaction->first_block;
while (block != NULL) {
dump_block(block);
block = block->transaction_next;
}
kprintf("--\n");
block_list::Iterator blockIterator = transaction->blocks.GetIterator();
while (blockIterator.HasNext()) {
block = blockIterator.Next();
dump_block(block);
}
return 0;
}
static int
dump_caches(int argc, char** argv)
{
kprintf("Block caches:\n");
DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator();
while (i.HasNext()) {
block_cache* cache = i.Next();
if (cache == (block_cache*)&sMarkCache)
continue;
kprintf(" %p\n", cache);
}
return 0;
}
#if BLOCK_CACHE_BLOCK_TRACING >= 2
static int
dump_block_data(int argc, char** argv)
{
using namespace BlockTracing;
bool printStackTrace = true;
uint32 which = 0;
int32 i = 1;
while (i < argc && argv[i][0] == '-') {
char* arg = &argv[i][1];
while (arg[0]) {
switch (arg[0]) {
case 'c':
which |= BlockData::kCurrent;
break;
case 'p':
which |= BlockData::kParent;
break;
case 'o':
which |= BlockData::kOriginal;
break;
default:
kprintf("invalid block specifier (only o/c/p are "
"allowed).\n");
return 0;
}
arg++;
}
i++;
}
if (which == 0)
which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal;
if (i == argc) {
print_debugger_command_usage(argv[0]);
return 0;
}
int64 from = parse_expression(argv[i]);
int64 to = from;
if (argc > i + 1)
to = parse_expression(argv[i + 1]);
if (to < from)
to = from;
uint32 offset = 0;
uint32 size = LONG_MAX;
if (argc > i + 2)
offset = parse_expression(argv[i + 2]);
if (argc > i + 3)
size = parse_expression(argv[i + 3]);
TraceEntryIterator iterator;
iterator.MoveTo(from - 1);
static char sBuffer[1024];
LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID);
while (TraceEntry* entry = iterator.Next()) {
int32 index = iterator.Index();
if (index > to)
break;
Action* action = dynamic_cast<Action*>(entry);
if (action != NULL) {
out.Clear();
out.DumpEntry(action);
continue;
}
BlockData* blockData = dynamic_cast<BlockData*>(entry);
if (blockData == NULL)
continue;
out.Clear();
const char* dump = out.DumpEntry(entry);
int length = strlen(dump);
if (length > 0 && dump[length - 1] == '\n')
length--;
kprintf("%5" B_PRId32 ". %.*s\n", index, length, dump);
if (printStackTrace) {
out.Clear();
entry->DumpStackTrace(out);
if (out.Size() > 0)
kputs(out.Buffer());
}
blockData->DumpBlocks(which, offset, size);
}
return 0;
}
#endif
#endif
other. The cache returned is automatically locked when you get it, and
unlocked with the next call to this function. Ignores caches that are in
deletion state.
Returns \c NULL when the end of the list is reached.
*/
static block_cache*
get_next_locked_block_cache(block_cache* last)
{
MutexLocker _(sCachesLock);
block_cache* cache;
if (last != NULL) {
rw_lock_write_unlock(&last->lock);
cache = sCaches.GetNext((block_cache*)&sMarkCache);
sCaches.Remove((block_cache*)&sMarkCache);
} else
cache = sCaches.Head();
if (cache != NULL) {
rw_lock_write_lock(&cache->lock);
sCaches.InsertBefore(sCaches.GetNext(cache), (block_cache*)&sMarkCache);
}
return cache;
}
all caches.
Every two seconds, it will also write back up to 64 blocks per cache.
*/
static status_t
block_notifier_and_writer(void* )
{
const bigtime_t kDefaultTimeout = 2000000LL;
bigtime_t timeout = kDefaultTimeout;
while (true) {
bigtime_t start = system_time();
status_t status = acquire_sem_etc(sEventSemaphore, 1,
B_RELATIVE_TIMEOUT, timeout);
if (status == B_OK) {
flush_pending_notifications();
timeout -= system_time() - start;
continue;
}
timeout = kDefaultTimeout;
size_t usedMemory;
object_cache_get_usage(sBlockCache, &usedMemory);
block_cache* cache = NULL;
while ((cache = get_next_locked_block_cache(cache)) != NULL) {
const bigtime_t next = cache->last_block_write
+ cache->last_block_write_duration * 2 * 64;
if (cache->busy_writing_count > 16 || system_time() < next) {
if (cache->last_block_write_duration > 0) {
timeout = min_c(timeout,
cache->last_block_write_duration * 2 * 64);
}
continue;
}
BlockWriter writer(cache, 64);
bool hasMoreBlocks = false;
size_t cacheUsedMemory;
object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory);
usedMemory += cacheUsedMemory;
if (cache->num_dirty_blocks) {
BlockTable::Iterator iterator(&cache->hash);
while (iterator.HasNext()) {
cached_block* block = iterator.Next();
if (block->CanBeWritten() && !writer.Add(block)) {
hasMoreBlocks = true;
break;
}
}
} else {
TransactionTable::Iterator iterator(&cache->transaction_hash);
while (iterator.HasNext()) {
cache_transaction* transaction = iterator.Next();
if (transaction->open) {
if (system_time() > transaction->last_used
+ kTransactionIdleTime) {
notify_transaction_listeners(cache, transaction,
TRANSACTION_IDLE);
}
continue;
}
bool hasLeftOvers;
if (!writer.Add(transaction, hasLeftOvers)) {
hasMoreBlocks = true;
break;
}
}
}
writer.Write();
if (hasMoreBlocks && cache->last_block_write_duration > 0) {
timeout = min_c(timeout,
cache->last_block_write_duration * 2 * 64);
}
if ((block_cache_used_memory() / B_PAGE_SIZE)
> vm_page_num_pages() / 2) {
cache->RemoveUnusedBlocks(1000, 10);
}
}
MutexLocker _(sCachesMemoryUseLock);
sUsedMemory = usedMemory;
}
return B_OK;
}
static void
notify_sync(int32 transactionID, int32 event, void* _cache)
{
block_cache* cache = (block_cache*)_cache;
cache->condition_variable.NotifyOne();
}
static bool
is_valid_cache(block_cache* cache)
{
ASSERT_LOCKED_MUTEX(&sCachesLock);
DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
while (iterator.HasNext()) {
if (cache == iterator.Next())
return true;
}
return false;
}
Safe to be called from the block writer/notifier thread.
You must not hold the \a cache lock when calling this function.
*/
static void
wait_for_notifications(block_cache* cache)
{
MutexLocker locker(sCachesLock);
if (find_thread(NULL) == sNotifierWriterThread) {
if (is_valid_cache(cache))
flush_pending_notifications(cache);
return;
}
cache_notification notification;
set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync,
cache);
ConditionVariableEntry entry;
cache->condition_variable.Add(&entry);
add_notification(cache, ¬ification, TRANSACTION_WRITTEN, false);
locker.Unlock();
entry.Wait();
}
status_t
block_cache_init(void)
{
sBlockCache = create_object_cache("cached blocks", sizeof(cached_block),
CACHE_LARGE_SLAB);
if (sBlockCache == NULL)
return B_NO_MEMORY;
sCacheNotificationCache = create_object_cache("cache notifications",
sizeof(cache_listener), 0);
if (sCacheNotificationCache == NULL)
return B_NO_MEMORY;
new (&sCaches) DoublyLinkedList<block_cache>;
sEventSemaphore = create_sem(0, "block cache event");
if (sEventSemaphore < B_OK)
return sEventSemaphore;
sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer,
"block notifier/writer", B_LOW_PRIORITY, NULL);
if (sNotifierWriterThread >= B_OK)
resume_thread(sNotifierWriterThread);
#if DEBUG_BLOCK_CACHE
add_debugger_command_etc("block_caches", &dump_caches,
"dumps all block caches", "\n", 0);
add_debugger_command_etc("block_cache", &dump_cache,
"dumps a specific block cache",
"[-bt] <cache-address> [block-number]\n"
" -t lists the transactions\n"
" -b lists all blocks\n", 0);
add_debugger_command("cached_block", &dump_cached_block,
"dumps the specified cached block");
add_debugger_command_etc("transaction", &dump_transaction,
"dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n"
"Either use a block cache pointer and an ID or a pointer to the transaction.\n"
" -b lists all blocks that are part of this transaction\n", 0);
# if BLOCK_CACHE_BLOCK_TRACING >= 2
add_debugger_command_etc("block_cache_data", &dump_block_data,
"dumps the data blocks logged for the actions",
"[-cpo] <from> [<to> [<offset> [<size>]]]\n"
"If no data specifier is used, all blocks are shown by default.\n"
" -c the current data is shown, if available.\n"
" -p the parent data is shown, if available.\n"
" -o the original data is shown, if available.\n"
" <from> first index of tracing entries to show.\n"
" <to> if given, the last entry. If not, only <from> is shown.\n"
" <offset> the offset of the block data.\n"
" <from> the size of the block data that is dumped\n", 0);
# endif
#endif
return B_OK;
}
size_t
block_cache_used_memory(void)
{
MutexLocker _(sCachesMemoryUseLock);
return sUsedMemory;
}
int32
cache_start_transaction(void* _cache)
{
block_cache* cache = (block_cache*)_cache;
TransactionLocker locker(cache);
if (cache->last_transaction && cache->last_transaction->open) {
panic("last transaction (%" B_PRId32 ") still open!\n",
cache->last_transaction->id);
}
cache_transaction* transaction = new(std::nothrow) cache_transaction;
if (transaction == NULL)
return B_NO_MEMORY;
transaction->id = atomic_add(&cache->next_transaction_id, 1);
cache->last_transaction = transaction;
TRACE(("cache_start_transaction(): id %" B_PRId32 " started\n", transaction->id));
T(Action("start", cache, transaction));
cache->transaction_hash.Insert(transaction);
return transaction->id;
}
status_t
cache_sync_transaction(void* _cache, int32 id)
{
block_cache* cache = (block_cache*)_cache;
bool hadBusy;
TRACE(("cache_sync_transaction(id %" B_PRId32 ")\n", id));
do {
TransactionLocker locker(cache);
hadBusy = false;
BlockWriter writer(cache);
TransactionTable::Iterator iterator(&cache->transaction_hash);
while (iterator.HasNext()) {
cache_transaction* transaction = iterator.Next();
if (transaction->busy_writing_count != 0) {
hadBusy = true;
continue;
}
if (transaction->id <= id && !transaction->open) {
T(Action("sync", cache, transaction));
bool hasLeftOvers;
writer.Add(transaction, hasLeftOvers);
if (hasLeftOvers) {
hadBusy = true;
}
}
}
status_t status = writer.Write();
if (status != B_OK)
return status;
} while (hadBusy);
wait_for_notifications(cache);
return B_OK;
}
status_t
cache_end_transaction(void* _cache, int32 id,
transaction_notification_hook hook, void* data)
{
block_cache* cache = (block_cache*)_cache;
TransactionLocker locker(cache);
TRACE(("cache_end_transaction(id = %" B_PRId32 ")\n", id));
cache_transaction* transaction = lookup_transaction(cache, id);
if (transaction == NULL) {
panic("cache_end_transaction(): invalid transaction ID\n");
return B_BAD_VALUE;
}
status_t status = write_blocks_in_previous_transaction(cache, transaction);
if (status != B_OK)
return status;
notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
if (hook != NULL
&& add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN,
hook, data) != B_OK) {
return B_NO_MEMORY;
}
T(Action("end", cache, transaction));
cached_block* next;
for (cached_block* block = transaction->first_block; block != NULL;
block = next) {
next = block->transaction_next;
ASSERT(block->previous_transaction == NULL);
if (block->discard) {
cache->DiscardBlock(block);
transaction->num_blocks--;
continue;
}
if (block->original_data != NULL) {
cache->Free(block->original_data);
block->original_data = NULL;
}
if (block->parent_data != NULL) {
ASSERT(transaction->has_sub_transaction);
cache->FreeBlockParentData(block);
}
transaction->blocks.Add(block);
block->previous_transaction = transaction;
block->transaction_next = NULL;
block->transaction = NULL;
}
transaction->open = false;
return B_OK;
}
status_t
cache_abort_transaction(void* _cache, int32 id)
{
block_cache* cache = (block_cache*)_cache;
TransactionLocker locker(cache);
TRACE(("cache_abort_transaction(id = %" B_PRId32 ")\n", id));
cache_transaction* transaction = lookup_transaction(cache, id);
if (transaction == NULL) {
panic("cache_abort_transaction(): invalid transaction ID\n");
return B_BAD_VALUE;
}
T(Abort(cache, transaction));
notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
cached_block* block = transaction->first_block;
cached_block* next;
for (; block != NULL; block = next) {
next = block->transaction_next;
if (block->original_data != NULL) {
TRACE(("cache_abort_transaction(id = %" B_PRId32 "): restored contents of "
"block %" B_PRIdOFF "\n", transaction->id, block->block_number));
memcpy(block->current_data, block->original_data,
cache->block_size);
cache->Free(block->original_data);
block->original_data = NULL;
}
if (transaction->has_sub_transaction && block->parent_data != NULL)
cache->FreeBlockParentData(block);
block->transaction_next = NULL;
block->transaction = NULL;
block->discard = false;
if (block->previous_transaction == NULL)
block->is_dirty = false;
}
cache->transaction_hash.Remove(transaction);
delete_transaction(cache, transaction);
return B_OK;
}
from its sub transaction.
The new transaction also gets a new transaction ID.
*/
int32
cache_detach_sub_transaction(void* _cache, int32 id,
transaction_notification_hook hook, void* data)
{
block_cache* cache = (block_cache*)_cache;
TransactionLocker locker(cache);
TRACE(("cache_detach_sub_transaction(id = %" B_PRId32 ")\n", id));
cache_transaction* transaction = lookup_transaction(cache, id);
if (transaction == NULL) {
panic("cache_detach_sub_transaction(): invalid transaction ID\n");
return B_BAD_VALUE;
}
if (!transaction->has_sub_transaction)
return B_BAD_VALUE;
status_t status = write_blocks_in_previous_transaction(cache, transaction);
if (status != B_OK)
return status;
cache_transaction* newTransaction = new(std::nothrow) cache_transaction;
if (newTransaction == NULL)
return B_NO_MEMORY;
newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
T(Detach(cache, transaction, newTransaction));
notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook,
data) != B_OK) {
delete newTransaction;
return B_NO_MEMORY;
}
cached_block* last = NULL;
cached_block* next;
for (cached_block* block = transaction->first_block; block != NULL;
block = next) {
next = block->transaction_next;
ASSERT(block->previous_transaction == NULL);
if (block->discard) {
cache->DiscardBlock(block);
transaction->main_num_blocks--;
continue;
}
if (block->parent_data != NULL) {
ASSERT(block->original_data != NULL);
cache->Free(block->original_data);
if (block->parent_data != block->current_data) {
block->original_data = block->parent_data;
} else {
block->original_data = NULL;
}
block->parent_data = NULL;
transaction->blocks.Add(block);
block->previous_transaction = transaction;
}
if (block->original_data != NULL) {
ASSERT(block->parent_data == NULL);
if (last == NULL)
newTransaction->first_block = block;
else
last->transaction_next = block;
block->transaction = newTransaction;
last = block;
} else
block->transaction = NULL;
block->transaction_next = NULL;
}
newTransaction->num_blocks = transaction->sub_num_blocks;
transaction->open = false;
transaction->has_sub_transaction = false;
transaction->num_blocks = transaction->main_num_blocks;
transaction->sub_num_blocks = 0;
cache->transaction_hash.Insert(newTransaction);
cache->last_transaction = newTransaction;
return newTransaction->id;
}
status_t
cache_abort_sub_transaction(void* _cache, int32 id)
{
block_cache* cache = (block_cache*)_cache;
TransactionLocker locker(cache);
TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 ")\n", id));
cache_transaction* transaction = lookup_transaction(cache, id);
if (transaction == NULL) {
panic("cache_abort_sub_transaction(): invalid transaction ID\n");
return B_BAD_VALUE;
}
if (!transaction->has_sub_transaction)
return B_BAD_VALUE;
T(Abort(cache, transaction));
notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
cached_block* block = transaction->first_block;
cached_block* last = NULL;
cached_block* next;
for (; block != NULL; block = next) {
next = block->transaction_next;
if (block->parent_data == NULL) {
if (block->original_data != NULL) {
memcpy(block->current_data, block->original_data,
cache->block_size);
}
if (last != NULL)
last->transaction_next = next;
else
transaction->first_block = next;
block->transaction_next = NULL;
block->transaction = NULL;
transaction->num_blocks--;
if (block->previous_transaction == NULL) {
cache->Free(block->original_data);
block->original_data = NULL;
block->is_dirty = false;
if (block->ref_count == 0) {
block->unused = true;
cache->unused_blocks.Add(block);
cache->unused_block_count++;
}
}
} else {
if (block->parent_data != block->current_data) {
TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 "): "
"restored contents of block %" B_PRIdOFF "\n",
transaction->id, block->block_number));
memcpy(block->current_data, block->parent_data,
cache->block_size);
cache->Free(block->parent_data);
}
block->parent_data = NULL;
last = block;
}
block->discard = false;
}
transaction->has_sub_transaction = false;
transaction->sub_num_blocks = 0;
return B_OK;
}
status_t
cache_start_sub_transaction(void* _cache, int32 id)
{
block_cache* cache = (block_cache*)_cache;
TransactionLocker locker(cache);
TRACE(("cache_start_sub_transaction(id = %" B_PRId32 ")\n", id));
cache_transaction* transaction = lookup_transaction(cache, id);
if (transaction == NULL) {
panic("cache_start_sub_transaction(): invalid transaction ID %" B_PRId32 "\n",
id);
return B_BAD_VALUE;
}
notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
cached_block* block = transaction->first_block;
cached_block* next;
for (; block != NULL; block = next) {
next = block->transaction_next;
if (block->parent_data != NULL) {
ASSERT(transaction->has_sub_transaction);
cache->FreeBlockParentData(block);
}
if (block->discard) {
ASSERT(block->previous_transaction == NULL);
if (block->original_data != NULL) {
memcpy(block->current_data, block->original_data,
cache->block_size);
cache->Free(block->original_data);
block->original_data = NULL;
}
continue;
}
block->parent_data = block->current_data;
}
transaction->has_sub_transaction = true;
transaction->main_num_blocks = transaction->num_blocks;
transaction->sub_num_blocks = 0;
T(Action("start-sub", cache, transaction));
return B_OK;
}
is ended, aborted, written, or idle as specified by \a events.
The listener gets automatically removed when the transaction ends.
*/
status_t
cache_add_transaction_listener(void* _cache, int32 id, int32 events,
transaction_notification_hook hook, void* data)
{
block_cache* cache = (block_cache*)_cache;
TransactionLocker locker(cache);
cache_transaction* transaction = lookup_transaction(cache, id);
if (transaction == NULL)
return B_BAD_VALUE;
return add_transaction_listener(cache, transaction, events, hook, data);
}
status_t
cache_remove_transaction_listener(void* _cache, int32 id,
transaction_notification_hook hookFunction, void* data)
{
block_cache* cache = (block_cache*)_cache;
TransactionLocker locker(cache);
cache_transaction* transaction = lookup_transaction(cache, id);
if (transaction == NULL)
return B_BAD_VALUE;
ListenerList::Iterator iterator = transaction->listeners.GetIterator();
while (iterator.HasNext()) {
cache_listener* listener = iterator.Next();
if (listener->data == data && listener->hook == hookFunction) {
iterator.Remove();
if (listener->events_pending != 0) {
MutexLocker _(sNotificationsLock);
if (listener->events_pending != 0)
cache->pending_notifications.Remove(listener);
}
delete listener;
return B_OK;
}
}
return B_ENTRY_NOT_FOUND;
}
status_t
cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly,
long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData)
{
cached_block* block = (cached_block*)*_cookie;
block_cache* cache = (block_cache*)_cache;
TransactionLocker locker(cache);
cache_transaction* transaction = lookup_transaction(cache, id);
if (transaction == NULL || !transaction->open)
return B_BAD_VALUE;
if (block == NULL)
block = transaction->first_block;
else
block = block->transaction_next;
if (transaction->has_sub_transaction) {
if (mainOnly) {
while (block != NULL && block->parent_data == NULL)
block = block->transaction_next;
} else {
while (block != NULL && block->discard)
block = block->transaction_next;
}
}
if (block == NULL)
return B_ENTRY_NOT_FOUND;
if (_blockNumber)
*_blockNumber = block->block_number;
if (_data)
*_data = mainOnly ? block->parent_data : block->current_data;
if (_unchangedData)
*_unchangedData = block->original_data;
*_cookie = (addr_t)block;
return B_OK;
}
int32
cache_blocks_in_transaction(void* _cache, int32 id)
{
block_cache* cache = (block_cache*)_cache;
TransactionLocker locker(cache);
cache_transaction* transaction = lookup_transaction(cache, id);
if (transaction == NULL)
return B_BAD_VALUE;
return transaction->num_blocks;
}
transaction does not have a sub transaction yet, this is the same value as
cache_blocks_in_transaction() would return.
*/
int32
cache_blocks_in_main_transaction(void* _cache, int32 id)
{
block_cache* cache = (block_cache*)_cache;
TransactionLocker locker(cache);
cache_transaction* transaction = lookup_transaction(cache, id);
if (transaction == NULL)
return B_BAD_VALUE;
if (transaction->has_sub_transaction)
return transaction->main_num_blocks;
return transaction->num_blocks;
}
int32
cache_blocks_in_sub_transaction(void* _cache, int32 id)
{
block_cache* cache = (block_cache*)_cache;
TransactionLocker locker(cache);
cache_transaction* transaction = lookup_transaction(cache, id);
if (transaction == NULL)
return B_BAD_VALUE;
return transaction->sub_num_blocks;
}
*/
bool
cache_has_block_in_transaction(void* _cache, int32 id, off_t blockNumber)
{
block_cache* cache = (block_cache*)_cache;
TransactionLocker locker(cache);
cached_block* block = cache->hash.Lookup(blockNumber);
return (block != NULL && block->transaction != NULL
&& block->transaction->id == id);
}
void
block_cache_delete(void* _cache, bool allowWrites)
{
block_cache* cache = (block_cache*)_cache;
if (allowWrites)
block_cache_sync(cache);
mutex_lock(&sCachesLock);
sCaches.Remove(cache);
mutex_unlock(&sCachesLock);
rw_lock_write_lock(&cache->lock);
wait_for_busy_reading_blocks(cache);
wait_for_busy_writing_blocks(cache);
cached_block* block = cache->hash.Clear(true);
while (block != NULL) {
cached_block* next = block->next;
cache->FreeBlock(block);
block = next;
}
cache_transaction* transaction = cache->transaction_hash.Clear(true);
while (transaction != NULL) {
cache_transaction* next = transaction->next;
delete transaction;
transaction = next;
}
delete cache;
}
void*
block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly)
{
block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize,
readOnly);
if (cache == NULL)
return NULL;
if (cache->Init() != B_OK) {
delete cache;
return NULL;
}
MutexLocker _(sCachesLock);
sCaches.Add(cache);
return cache;
}
status_t
block_cache_sync(void* _cache)
{
block_cache* cache = (block_cache*)_cache;
WriteLocker locker(&cache->lock);
BlockWriter writer(cache);
BlockTable::Iterator iterator(&cache->hash);
while (iterator.HasNext()) {
cached_block* block = iterator.Next();
if (block->CanBeWritten())
writer.Add(block);
}
status_t status = writer.Write();
locker.Unlock();
wait_for_notifications(cache);
return status;
}
status_t
block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks)
{
block_cache* cache = (block_cache*)_cache;
if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
panic("block_cache_sync_etc: invalid block number %" B_PRIdOFF
" (max %" B_PRIdOFF ")",
blockNumber, cache->max_blocks - 1);
return B_BAD_VALUE;
}
WriteLocker locker(&cache->lock);
BlockWriter writer(cache);
for (; numBlocks > 0; numBlocks--, blockNumber++) {
cached_block* block = cache->hash.Lookup(blockNumber);
if (block == NULL)
continue;
if (block->CanBeWritten())
writer.Add(block);
}
status_t status = writer.Write();
locker.Unlock();
wait_for_notifications(cache);
return status;
}
You have to call this function when you no longer use a block, ie. when it
might be reclaimed by the file cache in order to make sure they won't
interfere.
*/
void
block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks)
{
block_cache* cache = (block_cache*)_cache;
TransactionLocker locker(cache);
BlockWriter writer(cache);
for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
cached_block* block = cache->hash.Lookup(blockNumber);
if (block != NULL && block->previous_transaction != NULL)
writer.Add(block);
}
writer.Write();
blockNumber -= numBlocks;
for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
cached_block* block = cache->hash.Lookup(blockNumber);
if (block == NULL)
continue;
ASSERT(block->previous_transaction == NULL);
if (block->unused) {
cache->unused_blocks.Remove(block);
cache->unused_block_count--;
cache->RemoveBlock(block);
} else {
if (block->transaction != NULL && block->parent_data != NULL
&& block->parent_data != block->current_data) {
panic("Discarded block %" B_PRIdOFF " has already been changed in this "
"transaction!", blockNumber);
}
block->discard = true;
}
}
}
status_t
block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction)
{
block_cache* cache = (block_cache*)_cache;
WriteLocker locker(&cache->lock);
if (cache->read_only) {
panic("tried to make block writable on a read-only cache!");
return B_ERROR;
}
void* block;
status_t status = get_writable_cached_block(cache, blockNumber,
transaction, false, &block);
if (status == B_OK) {
put_cached_block((block_cache*)_cache, blockNumber);
return B_OK;
}
return status;
}
status_t
block_cache_get_writable_etc(void* _cache, off_t blockNumber,
int32 transaction, void** _block)
{
block_cache* cache = (block_cache*)_cache;
WriteLocker locker(&cache->lock);
TRACE(("block_cache_get_writable_etc(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
blockNumber, transaction));
if (cache->read_only)
panic("tried to get writable block on a read-only cache!");
return get_writable_cached_block(cache, blockNumber,
transaction, false, _block);
}
void*
block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction)
{
void* block;
if (block_cache_get_writable_etc(_cache, blockNumber,
transaction, &block) == B_OK)
return block;
return NULL;
}
void*
block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction)
{
block_cache* cache = (block_cache*)_cache;
WriteLocker locker(&cache->lock);
TRACE(("block_cache_get_empty(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
blockNumber, transaction));
if (cache->read_only)
panic("tried to get empty writable block on a read-only cache!");
void* block;
if (get_writable_cached_block((block_cache*)_cache, blockNumber,
transaction, true, &block) == B_OK)
return block;
return NULL;
}
status_t
block_cache_get_etc(void* _cache, off_t blockNumber, const void** _block)
{
block_cache* cache = (block_cache*)_cache;
WriteLocker writeLocker(&cache->lock, false, false);
#ifndef _KERNEL_MODE
cached_block* block;
{
#else
rw_lock_read_lock(&cache->lock);
cached_block* block = cache->hash.Lookup(blockNumber);
if (block != NULL && !block->busy_reading) {
if (atomic_add(&block->ref_count, 1) == 0) {
InterruptsSpinLocker unusedLocker(cache->unused_blocks_lock);
if (block->unused) {
cache->unused_blocks.Remove(block);
cache->unused_block_count--;
block->unused = false;
}
}
atomic_set(&block->last_accessed, system_time() / 1000000L);
rw_lock_read_unlock(&cache->lock);
} else {
rw_lock_read_unlock(&cache->lock);
#endif
writeLocker.Lock();
bool allocated;
status_t status = get_cached_block(cache, blockNumber, &allocated, true,
&block);
if (status != B_OK)
return status;
}
#if BLOCK_CACHE_DEBUG_CHANGED
if (block->compare == NULL) {
if (!writeLocker.IsLocked())
writeLocker.Lock();
if (block->compare == NULL && !block->is_dirty) {
block->compare = cache->Allocate();
if (block->compare != NULL)
memcpy(block->compare, block->current_data, cache->block_size);
}
}
#endif
TB(Get(cache, block));
*_block = block->current_data;
return B_OK;
}
const void*
block_cache_get(void* _cache, off_t blockNumber)
{
const void* block;
if (block_cache_get_etc(_cache, blockNumber, &block) == B_OK)
return block;
return NULL;
}
helpful in case you realize you don't need to change that block anymore
for whatever reason.
Note, you must only use this function on blocks that were acquired
writable!
*/
status_t
block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty,
int32 transaction)
{
block_cache* cache = (block_cache*)_cache;
WriteLocker locker(&cache->lock);
cached_block* block = cache->hash.Lookup(blockNumber);
if (block == NULL)
return B_BAD_VALUE;
if (block->is_dirty == dirty) {
return B_OK;
}
if (dirty)
panic("block_cache_set_dirty(): not yet implemented that way!\n");
return B_OK;
}
void
block_cache_put(void* _cache, off_t blockNumber)
{
block_cache* cache = (block_cache*)_cache;
WriteLocker locker(&cache->lock, false, false);
rw_lock_read_lock(&cache->lock);
put_cached_block(cache, blockNumber, &locker);
if (!locker.IsLocked())
rw_lock_read_unlock(&cache->lock);
}
blocks.
@param blockNumber The index of the first requested block.
@param _numBlocks As input, the number of blocks requested. As output, the number of
blocks actually scheduled. Prefetching will stop short if the requested range includes a
block that is already cached.
*/
status_t
block_cache_prefetch(void* _cache, off_t blockNumber, size_t* _numBlocks)
{
#ifndef BUILDING_USERLAND_FS_SERVER
TRACE(("block_cache_prefetch: fetching %" B_PRIuSIZE " blocks starting with %" B_PRIdOFF "\n",
*_numBlocks, blockNumber));
block_cache* cache = reinterpret_cast<block_cache*>(_cache);
WriteLocker locker(&cache->lock);
size_t numBlocks = *_numBlocks;
*_numBlocks = 0;
BlockPrefetcher* blockPrefetcher = new BlockPrefetcher(cache, blockNumber, numBlocks);
status_t status = blockPrefetcher->Allocate();
if (status != B_OK || blockPrefetcher->NumAllocated() == 0) {
TRACE(("block_cache_prefetch returning early (%s): allocated %" B_PRIuSIZE "\n",
strerror(status), blockPrefetcher->NumAllocated()));
delete blockPrefetcher;
return status;
}
numBlocks = blockPrefetcher->NumAllocated();
status = blockPrefetcher->ReadAsync(locker);
if (status == B_OK)
*_numBlocks = numBlocks;
return status;
#else
*_numBlocks = 0;
return B_UNSUPPORTED;
#endif
}