* Copyright 2001-2020, Axel Dörfler, axeld@pinc-software.de.
* This file may be used under the terms of the MIT License.
*/
#include "Debug.h"
#include "Inode.h"
#include "BPlusTree.h"
#include "Index.h"
#if BFS_TRACING && !defined(FS_SHELL) && !defined(_BOOT_MODE)
namespace BFSInodeTracing {
class Create : public AbstractTraceEntry {
public:
Create(Inode* inode, Inode* parent, const char* name, int32 mode,
int openMode, uint32 type)
:
fInode(inode),
fID(inode->ID()),
fParent(parent),
fParentID(parent != NULL ? parent->ID() : 0),
fMode(mode),
fOpenMode(openMode),
fType(type)
{
if (name != NULL)
strlcpy(fName, name, sizeof(fName));
else
fName[0] = '\0';
Initialized();
}
virtual void AddDump(TraceOutput& out)
{
out.Print("bfs:Create %lld (%p), parent %lld (%p), \"%s\", "
"mode %lx, omode %x, type %lx", fID, fInode, fParentID,
fParent, fName, fMode, fOpenMode, fType);
}
private:
Inode* fInode;
ino_t fID;
Inode* fParent;
ino_t fParentID;
char fName[32];
int32 fMode;
int fOpenMode;
uint32 fType;
};
class Remove : public AbstractTraceEntry {
public:
Remove(Inode* inode, const char* name)
:
fInode(inode),
fID(inode->ID())
{
strlcpy(fName, name, sizeof(fName));
Initialized();
}
virtual void AddDump(TraceOutput& out)
{
out.Print("bfs:Remove %lld (%p), \"%s\"", fID, fInode, fName);
}
private:
Inode* fInode;
ino_t fID;
char fName[32];
};
class Action : public AbstractTraceEntry {
public:
Action(const char* action, Inode* inode)
:
fInode(inode),
fID(inode->ID())
{
strlcpy(fAction, action, sizeof(fAction));
Initialized();
}
virtual void AddDump(TraceOutput& out)
{
out.Print("bfs:%s %lld (%p)\n", fAction, fID, fInode);
}
private:
Inode* fInode;
ino_t fID;
char fAction[16];
};
class Resize : public AbstractTraceEntry {
public:
Resize(Inode* inode, off_t oldSize, off_t newSize, bool trim)
:
fInode(inode),
fID(inode->ID()),
fOldSize(oldSize),
fNewSize(newSize),
fTrim(trim)
{
Initialized();
}
virtual void AddDump(TraceOutput& out)
{
out.Print("bfs:%s %lld (%p), %lld -> %lld", fTrim ? "Trim" : "Resize",
fID, fInode, fOldSize, fNewSize);
}
private:
Inode* fInode;
ino_t fID;
off_t fOldSize;
off_t fNewSize;
bool fTrim;
};
}
# define T(x) new(std::nothrow) BFSInodeTracing::x;
#else
# define T(x) ;
#endif
of an inode creation in progress.
This class will make sure everything is cleaned up properly.
*/
class InodeAllocator {
public:
InodeAllocator(Transaction& transaction);
~InodeAllocator();
status_t New(block_run* parentRun, mode_t mode, uint32 flags,
block_run& run, fs_vnode_ops* vnodeOps,
Inode** _inode);
status_t CreateTree();
status_t Keep(fs_vnode_ops* vnodeOps, uint32 publishFlags);
private:
static void _TransactionListener(int32 id, int32 event,
void* _inode);
Transaction* fTransaction;
block_run fRun;
Inode* fInode;
};
InodeAllocator::InodeAllocator(Transaction& transaction)
:
fTransaction(&transaction),
fInode(NULL)
{
}
InodeAllocator::~InodeAllocator()
{
if (fTransaction != NULL) {
Volume* volume = fTransaction->GetVolume();
if (fInode != NULL) {
fInode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE);
fInode->Free(*fTransaction);
if (fInode->fTree != NULL)
fTransaction->RemoveListener(fInode->fTree);
fTransaction->RemoveListener(fInode);
remove_vnode(volume->FSVolume(), fInode->ID());
} else
volume->Free(*fTransaction, fRun);
}
delete fInode;
}
status_t
InodeAllocator::New(block_run* parentRun, mode_t mode, uint32 publishFlags,
block_run& run, fs_vnode_ops* vnodeOps, Inode** _inode)
{
Volume* volume = fTransaction->GetVolume();
status_t status = volume->AllocateForInode(*fTransaction, parentRun, mode,
fRun);
if (status < B_OK) {
fTransaction = NULL;
RETURN_ERROR(status);
}
run = fRun;
fInode = new(std::nothrow) Inode(volume, *fTransaction,
volume->ToVnode(run), mode, run);
if (fInode == NULL)
RETURN_ERROR(B_NO_MEMORY);
if (!volume->IsInitializing()
&& (publishFlags & BFS_DO_NOT_PUBLISH_VNODE) == 0) {
status = new_vnode(volume->FSVolume(), fInode->ID(), fInode,
vnodeOps != NULL ? vnodeOps : &gBFSVnodeOps);
if (status < B_OK) {
delete fInode;
fInode = NULL;
RETURN_ERROR(status);
}
}
fInode->WriteLockInTransaction(*fTransaction);
*_inode = fInode;
return B_OK;
}
status_t
InodeAllocator::CreateTree()
{
Volume* volume = fTransaction->GetVolume();
if ((fInode->Mode() & S_INDEX_TYPES) == 0)
fInode->Node().mode |= HOST_ENDIAN_TO_BFS_INT32(S_STR_INDEX);
BPlusTree* tree = new(std::nothrow) BPlusTree(*fTransaction, fInode);
if (tree == NULL)
return B_ERROR;
status_t status = tree->InitCheck();
if (status != B_OK) {
delete tree;
return status;
}
fInode->fTree = tree;
if (fInode->IsRegularNode()) {
if (tree->Insert(*fTransaction, ".", fInode->ID()) < B_OK
|| tree->Insert(*fTransaction, "..",
volume->ToVnode(fInode->Parent())) < B_OK)
return B_ERROR;
}
return B_OK;
}
status_t
InodeAllocator::Keep(fs_vnode_ops* vnodeOps, uint32 publishFlags)
{
ASSERT(fInode != NULL && fTransaction != NULL);
Volume* volume = fTransaction->GetVolume();
status_t status = fInode->WriteBack(*fTransaction);
if (status < B_OK) {
FATAL(("writing new inode %" B_PRIdINO " failed!\n", fInode->ID()));
return status;
}
if (!fInode->IsSymLink() && !volume->IsInitializing()
&& (publishFlags & BFS_DO_NOT_PUBLISH_VNODE) == 0) {
status = publish_vnode(volume->FSVolume(), fInode->ID(), fInode,
vnodeOps != NULL ? vnodeOps : &gBFSVnodeOps, fInode->Mode(),
publishFlags);
}
if (status == B_OK) {
cache_add_transaction_listener(volume->BlockCache(), fTransaction->ID(),
TRANSACTION_ABORTED, &_TransactionListener, fInode);
}
fTransaction = NULL;
fInode = NULL;
return status;
}
void
InodeAllocator::_TransactionListener(int32 id, int32 event, void* _inode)
{
Inode* inode = (Inode*)_inode;
if (event == TRANSACTION_ABORTED)
panic("transaction %d aborted, inode %p still around!\n", (int)id, inode);
}
status_t
bfs_inode::InitCheck(Volume* volume) const
{
if (Magic1() != INODE_MAGIC1
|| !(Flags() & INODE_IN_USE)
|| inode_num.Length() != 1
|| (uint32)InodeSize() != volume->InodeSize()
|| parent.AllocationGroup() > int32(volume->AllocationGroups())
|| parent.AllocationGroup() < 0
|| parent.Start() > (1L << volume->AllocationGroupShift())
|| parent.Length() != 1
|| attributes.AllocationGroup() > int32(volume->AllocationGroups())
|| attributes.AllocationGroup() < 0
|| attributes.Start() > (1L << volume->AllocationGroupShift()))
RETURN_ERROR(B_BAD_DATA);
if (Flags() & INODE_DELETED)
return B_NOT_ALLOWED;
return B_OK;
}
Inode::Inode(Volume* volume, ino_t id)
:
fVolume(volume),
fID(id),
fTree(NULL),
fAttributes(NULL),
fCache(NULL),
fMap(NULL)
{
PRINT(("Inode::Inode(volume = %p, id = %" B_PRIdINO ") @ %p\n",
volume, id, this));
rw_lock_init(&fLock, "bfs inode");
recursive_lock_init(&fSmallDataLock, "bfs inode small data");
if (UpdateNodeFromDisk() != B_OK) {
return;
}
fOldSize = Size();
fOldLastModified = LastModified();
if (IsContainer())
fTree = new(std::nothrow) BPlusTree(this);
if (NeedsFileCache()) {
SetFileCache(file_cache_create(fVolume->ID(), ID(), Size()));
SetMap(file_map_create(volume->ID(), ID(), Size()));
}
}
Inode::Inode(Volume* volume, Transaction& transaction, ino_t id, mode_t mode,
block_run& run)
:
fVolume(volume),
fID(id),
fTree(NULL),
fAttributes(NULL),
fCache(NULL),
fMap(NULL)
{
PRINT(("Inode::Inode(volume = %p, transaction = %p, id = %" B_PRIdINO
") @ %p\n", volume, &transaction, id, this));
rw_lock_init(&fLock, "bfs inode");
recursive_lock_init(&fSmallDataLock, "bfs inode small data");
NodeGetter node(volume);
status_t status = node.SetToWritable(transaction, this, true);
if (status != B_OK) {
FATAL(("Could not read inode block %" B_PRId64 ": %s!\n", BlockNumber(),
strerror(status)));
return;
}
memset(&fNode, 0, sizeof(bfs_inode));
Node().magic1 = HOST_ENDIAN_TO_BFS_INT32(INODE_MAGIC1);
Node().inode_num = run;
Node().mode = HOST_ENDIAN_TO_BFS_INT32(mode);
Node().flags = HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE);
Node().create_time = Node().last_modified_time = Node().status_change_time
= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(real_time_clock_usecs()));
Node().inode_size = HOST_ENDIAN_TO_BFS_INT32(volume->InodeSize());
fOldSize = Size();
fOldLastModified = LastModified();
}
Inode::~Inode()
{
PRINT(("Inode::~Inode() @ %p\n", this));
file_cache_delete(FileCache());
file_map_delete(Map());
delete fTree;
rw_lock_destroy(&fLock);
recursive_lock_destroy(&fSmallDataLock);
}
status_t
Inode::InitCheck(bool checkNode) const
{
if (checkNode) {
status_t status = Node().InitCheck(fVolume);
if (status == B_BUSY)
return B_BUSY;
if (status != B_OK) {
FATAL(("inode at block %" B_PRIdOFF " corrupt!\n", BlockNumber()));
RETURN_ERROR(B_BAD_DATA);
}
}
if (IsContainer()) {
if (fTree == NULL)
RETURN_ERROR(B_NO_MEMORY);
status_t status = fTree->InitCheck();
if (status != B_OK) {
FATAL(("inode tree at block %" B_PRIdOFF " corrupt!\n",
BlockNumber()));
RETURN_ERROR(B_BAD_DATA);
}
}
if (NeedsFileCache() && (fCache == NULL || fMap == NULL))
return B_NO_MEMORY;
return B_OK;
}
be write locked until the transaction ended.
To ensure that the inode will stay valid until that point, an extra reference
is acquired to it as long as this transaction stays active.
*/
void
Inode::WriteLockInTransaction(Transaction& transaction)
{
if ((Flags() & INODE_IN_TRANSACTION) != 0)
return;
if ((Flags() & INODE_DELETED) != 0)
fVolume->RemovedInodes().Remove(this);
if (!fVolume->IsInitializing() && this != fVolume->IndicesNode())
acquire_vnode(fVolume->FSVolume(), ID());
rw_lock_write_lock(&Lock());
Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_IN_TRANSACTION);
transaction.AddListener(this);
}
status_t
Inode::WriteBack(Transaction& transaction)
{
NodeGetter node(fVolume);
status_t status = node.SetToWritable(transaction, this);
if (status != B_OK)
return status;
memcpy(node.WritableNode(), &Node(), sizeof(bfs_inode));
return B_OK;
}
status_t
Inode::UpdateNodeFromDisk()
{
NodeGetter node(fVolume);
status_t status = node.SetTo(this);
if (status != B_OK) {
FATAL(("Failed to read block %" B_PRId64 " from disk: %s!\n",
BlockNumber(), strerror(status)));
return status;
}
memcpy(&fNode, node.Node(), sizeof(bfs_inode));
fNode.flags &= HOST_ENDIAN_TO_BFS_INT32(INODE_PERMANENT_FLAGS);
return B_OK;
}
status_t
Inode::CheckPermissions(int accessMode) const
{
if ((accessMode & W_OK) != 0 && fVolume->IsReadOnly())
return B_READ_ONLY_DEVICE;
return check_access_permissions(accessMode, Mode(), (gid_t)fNode.GroupID(),
(uid_t)fNode.UserID());
}
void
Inode::_AddIterator(AttributeIterator* iterator)
{
RecursiveLocker _(fSmallDataLock);
fIterators.Add(iterator);
}
void
Inode::_RemoveIterator(AttributeIterator* iterator)
{
RecursiveLocker _(fSmallDataLock);
fIterators.Remove(iterator);
}
attributes to real files. Used for system attributes like the name.
You need to hold the fSmallDataLock when you call this method
*/
status_t
Inode::_MakeSpaceForSmallData(Transaction& transaction, bfs_inode* node,
const char* name, int32 bytes)
{
ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
while (bytes > 0) {
small_data* item = node->SmallDataStart();
small_data* max = NULL;
int32 index = 0, maxIndex = 0;
for (; !item->IsLast(node); item = item->Next(), index++) {
if (*item->Name() == FILE_NAME_NAME || !strcmp(name, item->Name()))
continue;
if (max == NULL || max->Size() < item->Size()) {
maxIndex = index;
max = item;
}
if (bytes < (int32)item->Size())
break;
}
if (item->IsLast(node) || (int32)item->Size() < bytes || max == NULL)
return B_ERROR;
bytes -= max->Size();
Inode* attribute;
status_t status = CreateAttribute(transaction, item->Name(),
item->Type(), &attribute);
if (status != B_OK)
RETURN_ERROR(status);
size_t length = item->DataSize();
status = attribute->WriteAt(transaction, 0, item->Data(), &length);
ReleaseAttribute(attribute);
if (status != B_OK) {
Vnode vnode(fVolume, Attributes());
Inode* attributes;
if (vnode.Get(&attributes) < B_OK
|| attributes->Remove(transaction, name) < B_OK) {
FATAL(("Could not remove newly created attribute!\n"));
}
RETURN_ERROR(status);
}
_RemoveSmallData(node, max, maxIndex);
}
return B_OK;
}
section.
You need to hold the fSmallDataLock when you call this method
*/
status_t
Inode::_RemoveSmallData(bfs_inode* node, small_data* item, int32 index)
{
ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
small_data* next = item->Next();
if (!next->IsLast(node)) {
small_data* last = next;
while (!last->IsLast(node))
last = last->Next();
int32 size = (uint8*)last - (uint8*)next;
if (size < 0
|| size > (uint8*)node + fVolume->BlockSize() - (uint8*)next)
return B_BAD_DATA;
memmove(item, next, size);
last = (small_data*)((uint8*)last - ((uint8*)next - (uint8*)item));
memset(last, 0, (uint8*)node + fVolume->BlockSize() - (uint8*)last);
} else
memset(item, 0, item->Size());
SinglyLinkedList<AttributeIterator>::ConstIterator iterator
= fIterators.GetIterator();
while (iterator.HasNext()) {
iterator.Next()->Update(index, -1);
}
return B_OK;
}
status_t
Inode::_RemoveSmallData(Transaction& transaction, NodeGetter& nodeGetter,
const char* name)
{
if (name == NULL)
return B_BAD_VALUE;
bfs_inode* node = nodeGetter.WritableNode();
RecursiveLocker locker(fSmallDataLock);
small_data* item = node->SmallDataStart();
int32 index = 0;
while (!item->IsLast(node) && strcmp(item->Name(), name)) {
item = item->Next();
index++;
}
if (item->IsLast(node))
return B_ENTRY_NOT_FOUND;
status_t status = nodeGetter.MakeWritable(transaction);
if (status != B_OK)
return status;
status = _RemoveSmallData(node, item, index);
if (status == B_OK) {
Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
bfs_inode::ToInode(real_time_clock_usecs()));
status = WriteBack(transaction);
}
return status;
}
new attribute is too big to fit in that section, it returns B_DEVICE_FULL.
In that case, the attribute should be written to a real attribute file;
it's the caller's responsibility to remove any existing attributes in the
small data section if that's the case.
Note that you need to write back the inode yourself after having called that
method - it's a bad API decision that it needs a transaction but enforces
you to write back the inode all by yourself, but it's just more efficient
in most cases...
*/
status_t
Inode::_AddSmallData(Transaction& transaction, NodeGetter& nodeGetter,
const char* name, uint32 type, off_t pos, const uint8* data, size_t length,
bool force)
{
bfs_inode* node = nodeGetter.WritableNode();
if (node == NULL || name == NULL || data == NULL)
return B_BAD_VALUE;
uint32 nameLength = strlen(name);
uint32 spaceNeeded = sizeof(small_data) + nameLength + 3 + pos + length + 1;
if (spaceNeeded > fVolume->InodeSize() - sizeof(bfs_inode))
return B_DEVICE_FULL;
status_t status = nodeGetter.MakeWritable(transaction);
if (status != B_OK)
return status;
RecursiveLocker locker(fSmallDataLock);
small_data* item = node->SmallDataStart();
int32 index = 0;
while (!item->IsLast(node) && strcmp(item->Name(), name)) {
item = item->Next();
index++;
}
if (!item->IsLast(node)) {
small_data* last = item;
while (!last->IsLast(node))
last = last->Next();
if (item->data_size > pos + length
|| force
|| ((uint8*)last + pos + length - item->DataSize())
<= ((uint8*)node + fVolume->InodeSize())) {
if (force && ((uint8*)last + pos + length - item->DataSize())
> ((uint8*)node + fVolume->InodeSize())) {
uint32 needed = pos + length - item->DataSize() -
(uint32)((uint8*)node + fVolume->InodeSize()
- (uint8*)last);
if (_MakeSpaceForSmallData(transaction, node, name, needed)
!= B_OK)
return B_ERROR;
item = node->SmallDataStart();
index = 0;
while (!item->IsLast(node) && strcmp(item->Name(), name)) {
item = item->Next();
index++;
}
last = item;
while (!last->IsLast(node))
last = last->Next();
}
size_t oldDataSize = item->DataSize();
if (pos + length != item->DataSize()) {
small_data* next = item->Next();
if (!next->IsLast(node)) {
memmove((uint8*)item + spaceNeeded, next,
(uint8*)last - (uint8*)next);
}
last = (small_data*)((uint8*)last
- ((uint8*)next - ((uint8*)item + spaceNeeded)));
if ((uint8*)last < (uint8*)node + fVolume->BlockSize()) {
memset(last, 0, (uint8*)node + fVolume->BlockSize()
- (uint8*)last);
}
item->data_size = HOST_ENDIAN_TO_BFS_INT16(pos + length);
}
item->type = HOST_ENDIAN_TO_BFS_INT32(type);
if ((uint64)oldDataSize < (uint64)pos) {
memset(item->Data() + oldDataSize, 0, pos - oldDataSize);
}
if (user_memcpy(item->Data() + pos, data, length) < B_OK)
return B_BAD_ADDRESS;
item->Data()[pos + length] = '\0';
return B_OK;
}
return B_DEVICE_FULL;
}
if ((uint8*)item + spaceNeeded > (uint8*)node + fVolume->InodeSize()) {
if (!force)
return B_DEVICE_FULL;
if (_MakeSpaceForSmallData(transaction, node, name, spaceNeeded) < B_OK)
return B_ERROR;
item = node->SmallDataStart();
index = 0;
while (!item->IsLast(node)) {
item = item->Next();
index++;
}
}
memset(item, 0, spaceNeeded);
item->type = HOST_ENDIAN_TO_BFS_INT32(type);
item->name_size = HOST_ENDIAN_TO_BFS_INT16(nameLength);
item->data_size = HOST_ENDIAN_TO_BFS_INT16(length);
strcpy(item->Name(), name);
if (user_memcpy(item->Data() + pos, data, length) < B_OK)
return B_BAD_ADDRESS;
item = item->Next();
if (!item->IsLast(node))
memset(item, 0, (uint8*)node + fVolume->InodeSize() - (uint8*)item);
SinglyLinkedList<AttributeIterator>::ConstIterator iterator
= fIterators.GetIterator();
while (iterator.HasNext()) {
iterator.Next()->Update(index, 1);
}
return B_OK;
}
To start at the beginning of this section, you let smallData
point to NULL, like:
small_data* data = NULL;
while (inode->GetNextSmallData(&data) { ... }
This function is reentrant and doesn't allocate any memory;
you can safely stop calling it at any point (you don't need
to iterate through the whole list).
You need to hold the fSmallDataLock when you call this method
*/
status_t
Inode::_GetNextSmallData(bfs_inode* node, small_data** _smallData) const
{
if (node == NULL)
RETURN_ERROR(B_BAD_VALUE);
ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
small_data* data = *_smallData;
if (data == NULL)
data = node->SmallDataStart();
else
data = data->Next();
if (data->IsLast(node))
return B_ENTRY_NOT_FOUND;
*_smallData = data;
return B_OK;
}
returns a pointer to it (or NULL if it doesn't exist).
You need to hold the fSmallDataLock when you call this method
*/
small_data*
Inode::FindSmallData(const bfs_inode* node, const char* name) const
{
ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
small_data* smallData = NULL;
while (_GetNextSmallData(const_cast<bfs_inode*>(node), &smallData)
== B_OK) {
if (!strcmp(smallData->Name(), name))
return smallData;
}
return NULL;
}
section, NULL otherwise.
You need to hold the fSmallDataLock when you call this method
*/
const char*
Inode::Name(const bfs_inode* node) const
{
ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
small_data* smallData = NULL;
while (_GetNextSmallData((bfs_inode*)node, &smallData) == B_OK) {
if (*smallData->Name() == FILE_NAME_NAME
&& smallData->NameSize() == FILE_NAME_NAME_LENGTH)
return (const char*)smallData->Data();
}
return NULL;
}
The buffer should be B_FILE_NAME_LENGTH bytes large.
*/
status_t
Inode::GetName(char* buffer, size_t size) const
{
NodeGetter node(fVolume);
status_t status = node.SetTo(this);
if (status != B_OK)
return status;
RecursiveLocker locker(fSmallDataLock);
const char* name = Name(node.Node());
if (name == NULL)
return B_ENTRY_NOT_FOUND;
strlcpy(buffer, name, size);
return B_OK;
}
doesn't change it in the parent directory's b+tree.
Note that you need to write back the inode yourself after having called
that method. It suffers from the same API decision as AddSmallData() does
(and for the same reason).
*/
status_t
Inode::SetName(Transaction& transaction, const char* name)
{
if (name == NULL || *name == '\0')
return B_BAD_VALUE;
NodeGetter node(fVolume);
status_t status = node.SetToWritable(transaction, this);
if (status != B_OK)
return status;
const char nameTag[2] = {FILE_NAME_NAME, 0};
return _AddSmallData(transaction, node, nameTag, FILE_NAME_TYPE, 0,
(uint8*)name, strlen(name), true);
}
status_t
Inode::_RemoveAttribute(Transaction& transaction, const char* name,
bool hasIndex, Index* index)
{
Vnode vnode(fVolume, Attributes());
Inode* attributes;
status_t status = vnode.Get(&attributes);
if (status < B_OK)
return status;
if (index != NULL) {
Inode* attribute;
if ((hasIndex || fVolume->CheckForLiveQuery(name))
&& GetAttribute(name, &attribute) == B_OK) {
uint8 data[MAX_INDEX_KEY_LENGTH];
size_t length = MAX_INDEX_KEY_LENGTH;
if (attribute->ReadAt(0, data, &length) == B_OK) {
index->Update(transaction, name, attribute->Type(), data,
length, NULL, 0, this);
}
ReleaseAttribute(attribute);
}
}
if ((status = attributes->Remove(transaction, name)) < B_OK)
return status;
if (attributes->IsEmpty()) {
attributes->WriteLockInTransaction(transaction);
if (remove_vnode(fVolume->FSVolume(), attributes->ID()) == B_OK) {
attributes->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED);
if (attributes->WriteBack(transaction) == B_OK) {
Attributes().SetTo(0, 0, 0);
WriteBack(transaction);
} else {
unremove_vnode(fVolume->FSVolume(), attributes->ID());
attributes->Node().flags
&= ~HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED);
}
}
}
return status;
}
This is a high-level attribute function that understands attributes
in the small_data section as well as real attribute files.
*/
status_t
Inode::ReadAttribute(const char* name, int32 type, off_t pos, uint8* buffer,
size_t* _length)
{
if (pos < 0)
pos = 0;
{
NodeGetter node(fVolume);
status_t status = node.SetTo(this);
if (status != B_OK)
return status;
RecursiveLocker locker(fSmallDataLock);
small_data* smallData = FindSmallData(node.Node(), name);
if (smallData != NULL) {
size_t length = *_length;
if (pos >= smallData->data_size) {
*_length = 0;
return B_OK;
}
if (length + pos > smallData->DataSize())
length = smallData->DataSize() - pos;
status_t error = user_memcpy(buffer, smallData->Data() + pos,
length);
*_length = length;
return error;
}
}
Inode* attribute;
status_t status = GetAttribute(name, &attribute);
if (status == B_OK) {
status = attribute->ReadAt(pos, (uint8*)buffer, _length);
ReleaseAttribute(attribute);
}
RETURN_ERROR(status);
}
This is a high-level attribute function that understands attributes
in the small_data section as well as real attribute files.
*/
status_t
Inode::WriteAttribute(Transaction& transaction, const char* name, int32 type,
off_t pos, const uint8* buffer, size_t* _length, bool* _created)
{
if (pos < 0)
return B_BAD_VALUE;
uint8 oldBuffer[MAX_INDEX_KEY_LENGTH];
uint8* oldData = NULL;
size_t oldLength = 0;
bool created = false;
Index index(fVolume);
bool hasIndex = index.SetTo(name) == B_OK;
Inode* attribute = NULL;
status_t status = B_OK;
if (GetAttribute(name, &attribute) != B_OK) {
NodeGetter node(fVolume);
status = node.SetToWritable(transaction, this);
if (status != B_OK)
return status;
recursive_lock_lock(&fSmallDataLock);
small_data* smallData = FindSmallData(node.Node(), name);
if (smallData != NULL) {
oldLength = smallData->DataSize();
if (oldLength > 0) {
if (oldLength > MAX_INDEX_KEY_LENGTH)
oldLength = MAX_INDEX_KEY_LENGTH;
memcpy(oldData = oldBuffer, smallData->Data(), oldLength);
}
} else
created = true;
recursive_lock_unlock(&fSmallDataLock);
status = _AddSmallData(transaction, node, name, type, pos, buffer,
*_length);
if (status == B_DEVICE_FULL) {
if (smallData != NULL) {
status = _RemoveSmallData(transaction, node, name);
} else
status = B_OK;
if (status == B_OK)
status = CreateAttribute(transaction, name, type, &attribute);
if (status != B_OK)
RETURN_ERROR(status);
created = true;
} else if (status == B_OK) {
Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
bfs_inode::ToInode(real_time_clock_usecs()));
status = WriteBack(transaction);
}
}
if (attribute != NULL) {
WriteLocker writeLocker(attribute->fLock);
if (hasIndex || fVolume->CheckForLiveQuery(name)) {
while (attribute->Size() > 0) {
bigtime_t oldModified = attribute->LastModified();
writeLocker.Unlock();
oldLength = MAX_INDEX_KEY_LENGTH;
if (attribute->ReadAt(0, oldBuffer, &oldLength) == B_OK)
oldData = oldBuffer;
writeLocker.Lock();
if (oldModified == attribute->LastModified())
break;
oldLength = 0;
}
}
NodeGetter node(fVolume);
status = node.SetToWritable(transaction, this);
if (status != B_OK)
return status;
status = _AddSmallData(transaction, node, name, type, pos, buffer,
*_length);
if (status == B_OK) {
writeLocker.Unlock();
status = _RemoveAttribute(transaction, name, false, NULL);
} else {
attribute->Node().type = HOST_ENDIAN_TO_BFS_INT32(type);
status = attribute->WriteBack(transaction);
writeLocker.Unlock();
if (status == B_OK) {
status = attribute->WriteAt(transaction, pos, buffer,
_length);
}
}
if (status == B_OK) {
Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
bfs_inode::ToInode(real_time_clock_usecs()));
status = WriteBack(transaction);
}
attribute->WriteLockInTransaction(transaction);
ReleaseAttribute(attribute);
}
if (status == B_OK && pos == 0) {
uint16 length = *_length;
if (length > MAX_INDEX_KEY_LENGTH)
length = MAX_INDEX_KEY_LENGTH;
uint8 indexBuffer[MAX_INDEX_KEY_LENGTH];
user_memcpy(indexBuffer, buffer, length);
if (pos < length || (uint64)pos < (uint64)oldLength) {
index.Update(transaction, name, type, oldData, oldLength,
indexBuffer, length, this);
}
}
if (_created != NULL)
*_created = created;
return status;
}
This is a high-level attribute function that understands attributes
in the small_data section as well as real attribute files.
*/
status_t
Inode::RemoveAttribute(Transaction& transaction, const char* name)
{
Index index(fVolume);
bool hasIndex = index.SetTo(name) == B_OK;
NodeGetter node(fVolume);
status_t status = node.SetTo(this);
if (status != B_OK)
return status;
{
RecursiveLocker _(fSmallDataLock);
small_data* smallData = FindSmallData(node.Node(), name);
if (smallData != NULL) {
uint32 length = smallData->DataSize();
if (length > MAX_INDEX_KEY_LENGTH)
length = MAX_INDEX_KEY_LENGTH;
index.Update(transaction, name, smallData->Type(),
smallData->Data(), length, NULL, 0, this);
}
}
status = _RemoveSmallData(transaction, node, name);
if (status == B_ENTRY_NOT_FOUND && !Attributes().IsZero()) {
status = _RemoveAttribute(transaction, name, hasIndex, &index);
if (status == B_OK) {
Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
bfs_inode::ToInode(real_time_clock_usecs()));
WriteBack(transaction);
}
}
return status;
}
This method can only return real attribute files; the attributes in the
small data section are ignored.
*/
status_t
Inode::GetAttribute(const char* name, Inode** _attribute)
{
if (Attributes().IsZero())
return B_ENTRY_NOT_FOUND;
Vnode vnode(fVolume, Attributes());
Inode* attributes;
if (vnode.Get(&attributes) < B_OK) {
FATAL(("get_vnode() failed in Inode::GetAttribute(name = \"%s\")\n",
name));
return B_ERROR;
}
BPlusTree* tree = attributes->Tree();
if (tree == NULL)
return B_BAD_VALUE;
InodeReadLocker locker(attributes);
ino_t id;
status_t status = tree->Find((uint8*)name, (uint16)strlen(name), &id);
if (status == B_OK) {
Vnode vnode(fVolume, id);
Inode* inode;
if (vnode.Get(&inode) != B_OK || !inode->IsAttribute())
return B_ERROR;
*_attribute = inode;
vnode.Keep();
return B_OK;
}
return status;
}
void
Inode::ReleaseAttribute(Inode* attribute)
{
if (attribute == NULL)
return;
put_vnode(fVolume->FSVolume(), attribute->ID());
}
status_t
Inode::CreateAttribute(Transaction& transaction, const char* name, uint32 type,
Inode** attribute)
{
if (Attributes().IsZero()) {
status_t status = Inode::Create(transaction, this, NULL,
S_ATTR_DIR | S_DIRECTORY | 0666, 0, 0, NULL);
if (status < B_OK)
RETURN_ERROR(status);
}
Vnode vnode(fVolume, Attributes());
Inode* attributes;
if (vnode.Get(&attributes) < B_OK)
return B_ERROR;
return Inode::Create(transaction, attributes, name,
S_ATTR | S_FILE | 0666, 0, type, NULL, NULL, attribute);
}
bool
Inode::IsEmpty()
{
TreeIterator iterator(fTree);
uint32 count = 0;
char name[MAX_INDEX_KEY_LENGTH + 1];
uint16 length;
ino_t id;
while (iterator.GetNextEntry(name, &length, MAX_INDEX_KEY_LENGTH + 1,
&id) == B_OK) {
if ((Mode() & (S_ATTR_DIR | S_INDEX_DIR)) != 0)
return false;
if (++count > 2 || (strcmp(".", name) != 0 && strcmp("..", name) != 0))
return false;
}
return true;
}
status_t
Inode::ContainerContentsChanged(Transaction& transaction)
{
ASSERT(!InLastModifiedIndex());
Node().last_modified_time = Node().status_change_time
= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(real_time_clock_usecs()));
return WriteBack(transaction);
}
This includes the file meta blocks used for maintaining its data stream.
TODO: However, the attributes in extra files are not really accounted for;
depending on the speed penalty, this should be changed, though (the value
could be cached in the inode structure or Inode object, though).
*/
off_t
Inode::AllocatedSize() const
{
if (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0) {
return Node().InodeSize();
}
const data_stream& data = Node().data;
uint32 blockSize = fVolume->BlockSize();
off_t size = blockSize;
if (data.MaxDoubleIndirectRange() != 0) {
off_t doubleIndirectSize = data.MaxDoubleIndirectRange()
- data.MaxIndirectRange();
int32 indirectSize = double_indirect_max_indirect_size(
data.double_indirect.Length(), fVolume->BlockSize());
size += (2 * data.double_indirect.Length()
+ doubleIndirectSize / indirectSize)
* blockSize + data.MaxDoubleIndirectRange();
} else if (data.MaxIndirectRange() != 0)
size += data.indirect.Length() + data.MaxIndirectRange();
else
size += data.MaxDirectRange();
if (!Node().attributes.IsZero()) {
size += 2 * blockSize;
}
return size;
}
the inode.
If successful, "offset" will then be set to the file offset
of the block_run returned; so "pos - offset" is for the block_run
what "pos" is for the whole stream.
The caller has to make sure that "pos" is inside the stream.
*/
status_t
Inode::FindBlockRun(off_t pos, block_run& run, off_t& offset)
{
data_stream* data = &Node().data;
if (data->MaxIndirectRange() > 0 && pos >= data->MaxDirectRange()) {
if (data->MaxDoubleIndirectRange() > 0
&& pos >= data->MaxIndirectRange()) {
CachedBlock cached(fVolume);
int32 runsPerBlock;
int32 directSize;
int32 indirectSize;
get_double_indirect_sizes(data->double_indirect.Length(),
fVolume->BlockSize(), runsPerBlock, directSize, indirectSize);
if (directSize <= 0 || indirectSize <= 0)
RETURN_ERROR(B_BAD_DATA);
off_t start = pos - data->MaxIndirectRange();
int32 index = start / indirectSize;
status_t status = cached.SetTo(fVolume->ToBlock(
data->double_indirect) + index / runsPerBlock);
if (status != B_OK)
RETURN_ERROR(status);
block_run* indirect = (block_run*)cached.Block();
int32 current = (start % indirectSize) / directSize;
status = cached.SetTo(fVolume->ToBlock(indirect[
index % runsPerBlock]) + current / runsPerBlock);
if (status != B_OK)
RETURN_ERROR(status);
indirect = (block_run*)cached.Block();
run = indirect[current % runsPerBlock];
if (run.Length() != data->double_indirect.Length())
RETURN_ERROR(B_BAD_DATA);
offset = data->MaxIndirectRange() + (index * indirectSize)
+ (current * directSize);
} else {
int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
off_t runBlockEnd = data->MaxDirectRange();
CachedBlock cached(fVolume);
off_t block = fVolume->ToBlock(data->indirect);
for (int32 i = 0; i < data->indirect.Length(); i++) {
status_t status = cached.SetTo(block + i);
if (status != B_OK)
RETURN_ERROR(status);
block_run* indirect = (block_run*)cached.Block();
int32 current = -1;
while (++current < runsPerBlock) {
if (indirect[current].IsZero())
break;
runBlockEnd += (uint32)indirect[current].Length()
<< cached.BlockShift();
if (runBlockEnd > pos) {
run = indirect[current];
offset = runBlockEnd - ((uint32)run.Length()
<< cached.BlockShift());
return fVolume->ValidateBlockRun(run);
}
}
}
RETURN_ERROR(B_ERROR);
}
} else {
off_t runBlockEnd = 0LL;
int32 current = -1;
while (++current < NUM_DIRECT_BLOCKS) {
if (data->direct[current].IsZero())
break;
runBlockEnd += (uint32)data->direct[current].Length()
<< fVolume->BlockShift();
if (runBlockEnd > pos) {
run = data->direct[current];
offset = runBlockEnd
- ((uint32)run.Length() << fVolume->BlockShift());
return fVolume->ValidateBlockRun(run);
}
}
return B_ENTRY_NOT_FOUND;
}
return fVolume->ValidateBlockRun(run);
}
status_t
Inode::ReadAt(off_t pos, uint8* buffer, size_t* _length)
{
return file_cache_read(FileCache(), NULL, pos, buffer, _length);
}
status_t
Inode::WriteAt(Transaction& transaction, off_t pos, const uint8* buffer,
size_t* _length)
{
InodeReadLocker locker(this);
Node().last_modified_time = Node().status_change_time
= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(real_time_clock_usecs()));
size_t length = *_length;
bool changeSize = (uint64)pos + (uint64)length > (uint64)Size();
if (pos < 0)
return B_BAD_VALUE;
locker.Unlock();
if (changeSize && !transaction.IsStarted())
transaction.Start(fVolume, BlockNumber());
WriteLocker writeLocker(fLock);
if (!transaction.IsStarted()
&& (uint64)pos + (uint64)length > (uint64)Size()) {
writeLocker.Unlock();
transaction.Start(fVolume, BlockNumber());
writeLocker.Lock();
}
off_t oldSize = Size();
if ((uint64)pos + (uint64)length > (uint64)oldSize) {
status_t status = SetFileSize(transaction, pos + length);
if (status != B_OK) {
*_length = 0;
WriteLockInTransaction(transaction);
RETURN_ERROR(status);
}
status = WriteBack(transaction);
if (status != B_OK) {
WriteLockInTransaction(transaction);
return status;
}
}
writeLocker.Unlock();
if (oldSize < pos)
FillGapWithZeros(oldSize, pos);
if (length == 0)
return B_OK;
status_t status = file_cache_write(FileCache(), NULL, pos, buffer, _length);
if (transaction.IsStarted())
WriteLockInTransaction(transaction);
return status;
}
with zeros.
It's more or less a copy of Inode::WriteAt() but it can handle
length differences of more than just 4 GB, and it never uses
the log, even if the INODE_LOGGED flag is set.
*/
status_t
Inode::FillGapWithZeros(off_t pos, off_t newSize)
{
while (pos < newSize) {
size_t size;
if (newSize > pos + 1024 * 1024 * 1024)
size = 1024 * 1024 * 1024;
else
size = newSize - pos;
status_t status = file_cache_write(FileCache(), NULL, pos, NULL, &size);
if (status < B_OK)
return status;
pos += size;
}
return B_OK;
}
the indirect and double indirect range uses this method.
The allocated block_run is saved in "run"
*/
status_t
Inode::_AllocateBlockArray(Transaction& transaction, block_run& run,
size_t length, bool variableSize)
{
if (!run.IsZero())
return B_BAD_VALUE;
status_t status = fVolume->Allocate(transaction, this, length, run,
variableSize ? 1 : length);
if (status != B_OK)
return status;
CachedBlock cached(fVolume);
off_t block = fVolume->ToBlock(run);
for (int32 i = 0; i < run.Length(); i++) {
status = cached.SetToWritable(transaction, block + i, true);
if (status != B_OK)
return status;
}
return B_OK;
}
ranges with the runs.
This method will also determine the size of the preallocation, if any.
*/
status_t
Inode::_GrowStream(Transaction& transaction, off_t size)
{
data_stream* data = &Node().data;
if (size < data->MaxDirectRange()
|| size < data->MaxIndirectRange()
|| size < data->MaxDoubleIndirectRange()) {
data->size = HOST_ENDIAN_TO_BFS_INT64(size);
return B_OK;
}
uint16 minimum = 1;
off_t bytes;
if (data->Size() < data->MaxDoubleIndirectRange()) {
bytes = size - data->MaxDoubleIndirectRange();
minimum = data->double_indirect.Length();
} else if (data->Size() < data->MaxIndirectRange())
bytes = size - data->MaxIndirectRange();
else if (data->Size() < data->MaxDirectRange())
bytes = size - data->MaxDirectRange();
else {
bytes = size - data->Size();
if (data->MaxDoubleIndirectRange() > 0)
minimum = data->double_indirect.Length();
}
off_t blocksNeeded = (bytes + fVolume->BlockSize() - 1)
>> fVolume->BlockShift();
if (blocksNeeded > fVolume->FreeBlocks())
return B_DEVICE_FULL;
off_t blocksRequested = blocksNeeded;
if (!IsAttribute() && !IsAttributeDirectory() && !IsSymLink()
&& fVolume->FreeBlocks() > 128) {
off_t roundTo = 0;
if (IsFile()) {
if (size < 1 * 1024 * 1024 && bytes < 512 * 1024) {
roundTo = 65536 >> fVolume->BlockShift();
} else if (size < 32 * 1024 * 1024 && bytes <= 1 * 1024 * 1024) {
roundTo = (512 * 1024) >> fVolume->BlockShift();
} else {
roundTo = size >> (fVolume->BlockShift() + 4);
}
} else if (IsIndex()) {
roundTo = 65536 >> fVolume->BlockShift();
} else {
roundTo = 4096 >> fVolume->BlockShift();
}
if (roundTo > 1) {
blocksRequested = ((blocksNeeded + roundTo) / roundTo) * roundTo;
}
}
while (blocksNeeded > 0) {
if (minimum > 1) {
blocksRequested = round_up(blocksRequested, minimum);
}
block_run run;
status_t status = fVolume->Allocate(transaction, this, blocksRequested,
run, minimum);
if (status != B_OK)
return status;
blocksNeeded -= run.Length();
blocksRequested = blocksNeeded;
if (data->Size() <= data->MaxDirectRange()) {
int32 free = 0;
for (; free < NUM_DIRECT_BLOCKS; free++) {
if (data->direct[free].IsZero())
break;
}
if (free < NUM_DIRECT_BLOCKS) {
int32 last = free - 1;
if (free > 0 && data->direct[last].MergeableWith(run)) {
data->direct[last].length = HOST_ENDIAN_TO_BFS_INT16(
data->direct[last].Length() + run.Length());
} else
data->direct[free] = run;
data->max_direct_range = HOST_ENDIAN_TO_BFS_INT64(
data->MaxDirectRange()
+ run.Length() * fVolume->BlockSize());
data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0
? data->max_direct_range : size);
continue;
}
}
if (data->Size() <= data->MaxIndirectRange()
|| !data->MaxIndirectRange()) {
CachedBlock cached(fVolume);
block_run* runs = NULL;
uint32 free = 0;
off_t block;
if (data->indirect.IsZero()) {
status = _AllocateBlockArray(transaction, data->indirect,
NUM_ARRAY_BLOCKS, true);
if (status != B_OK)
return status;
data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64(
data->MaxDirectRange());
status = cached.SetTo(data->indirect);
if (status != B_OK)
return status;
runs = (block_run*)cached.Block();
} else {
uint32 numberOfRuns = fVolume->BlockSize() / sizeof(block_run);
block = fVolume->ToBlock(data->indirect);
int32 i = 0;
for (; i < data->indirect.Length(); i++) {
status = cached.SetTo(block + i);
if (status != B_OK)
return status;
runs = (block_run*)cached.Block();
for (free = 0; free < numberOfRuns; free++)
if (runs[free].IsZero())
break;
if (free < numberOfRuns)
break;
}
if (i == data->indirect.Length())
runs = NULL;
}
if (runs != NULL) {
cached.MakeWritable(transaction);
int32 last = free - 1;
if (free > 0 && runs[last].MergeableWith(run)) {
runs[last].length = HOST_ENDIAN_TO_BFS_INT16(
runs[last].Length() + run.Length());
} else
runs[free] = run;
data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64(
data->MaxIndirectRange()
+ ((uint32)run.Length() << fVolume->BlockShift()));
data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0
? data->MaxIndirectRange() : size);
continue;
}
}
if (data->Size() <= data->MaxDoubleIndirectRange()
|| !data->max_double_indirect_range) {
if (!data->max_double_indirect_range)
minimum = _DoubleIndirectBlockLength();
else
minimum = data->double_indirect.Length();
if ((run.Length() % minimum) != 0) {
int32 rest = run.Length() % minimum;
run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - rest);
status = fVolume->Free(transaction,
block_run::Run(run.AllocationGroup(),
run.Start() + run.Length(), rest));
if (status != B_OK)
return status;
blocksNeeded += rest;
blocksRequested = round_up(blocksNeeded, minimum);
if (run.length == 0)
continue;
}
if (data->double_indirect.IsZero()) {
status = _AllocateBlockArray(transaction,
data->double_indirect, _DoubleIndirectBlockLength());
if (status != B_OK)
return status;
data->max_double_indirect_range = data->max_indirect_range;
}
int32 runsPerBlock;
int32 directSize;
int32 indirectSize;
get_double_indirect_sizes(data->double_indirect.Length(),
fVolume->BlockSize(), runsPerBlock, directSize, indirectSize);
if (directSize <= 0 || indirectSize <= 0)
return B_BAD_DATA;
off_t start = data->MaxDoubleIndirectRange()
- data->MaxIndirectRange();
int32 indirectIndex = start / indirectSize;
int32 index = (start % indirectSize) / directSize;
int32 runsPerArray = runsPerBlock * minimum;
CachedBlock cached(fVolume);
CachedBlock cachedDirect(fVolume);
block_run* array = NULL;
uint32 runLength = run.Length();
while (run.length != 0) {
if (array == NULL) {
uint32 block = indirectIndex / runsPerBlock;
if (block >= minimum)
return EFBIG;
status = cached.SetTo(fVolume->ToBlock(
data->double_indirect) + block);
if (status != B_OK)
return status;
array = (block_run*)cached.Block();
}
do {
if (array[indirectIndex % runsPerBlock].IsZero()) {
cached.MakeWritable(transaction);
status = _AllocateBlockArray(transaction,
array[indirectIndex % runsPerBlock],
data->double_indirect.Length());
if (status != B_OK)
return status;
}
status = cachedDirect.SetToWritable(transaction,
fVolume->ToBlock(array[indirectIndex
% runsPerBlock]) + index / runsPerBlock);
if (status != B_OK)
return status;
block_run* runs = (block_run*)cachedDirect.Block();
do {
runs[index % runsPerBlock] = run;
runs[index % runsPerBlock].length
= HOST_ENDIAN_TO_BFS_INT16(minimum);
run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start()
+ minimum);
run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length()
- minimum);
} while ((++index % runsPerBlock) != 0 && run.length);
} while ((index % runsPerArray) != 0 && run.length);
if (index == runsPerArray)
index = 0;
if (++indirectIndex % runsPerBlock == 0) {
array = NULL;
index = 0;
}
}
data->max_double_indirect_range = HOST_ENDIAN_TO_BFS_INT64(
data->MaxDoubleIndirectRange()
+ (runLength << fVolume->BlockShift()));
data->size = blocksNeeded > 0 ? HOST_ENDIAN_TO_BFS_INT64(
data->max_double_indirect_range) : size;
continue;
}
RETURN_ERROR(EFBIG);
}
data->size = HOST_ENDIAN_TO_BFS_INT64(size);
return B_OK;
}
size_t
Inode::_DoubleIndirectBlockLength() const
{
if (fVolume->BlockSize() > DOUBLE_INDIRECT_ARRAY_SIZE)
return 1;
return DOUBLE_INDIRECT_ARRAY_SIZE / fVolume->BlockSize();
}
stream.
*/
status_t
Inode::_FreeStaticStreamArray(Transaction& transaction, int32 level,
block_run run, off_t size, off_t offset, off_t& max)
{
int32 indirectSize;
if (level == 0) {
indirectSize = double_indirect_max_indirect_size(run.Length(),
fVolume->BlockSize());
} else {
indirectSize = double_indirect_max_direct_size(run.Length(),
fVolume->BlockSize());
}
if (indirectSize <= 0)
return B_BAD_DATA;
off_t start;
if (size > offset)
start = size - offset;
else
start = 0;
int32 index = start / indirectSize;
int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
CachedBlock cached(fVolume);
off_t blockNumber = fVolume->ToBlock(run);
offset += (off_t)index * indirectSize;
for (int32 i = index / runsPerBlock; i < run.Length(); i++) {
status_t status = cached.SetToWritable(transaction, blockNumber + i);
if (status != B_OK)
RETURN_ERROR(status);
block_run* array = (block_run*)cached.WritableBlock();
for (index = index % runsPerBlock; index < runsPerBlock; index++) {
if (array[index].IsZero()) {
i = run.Length();
break;
}
status_t status = B_OK;
if (level == 0) {
status = _FreeStaticStreamArray(transaction, 1, array[index],
size, offset, max);
} else if (offset >= size)
status = fVolume->Free(transaction, array[index]);
else
max = HOST_ENDIAN_TO_BFS_INT64(offset + indirectSize);
if (status < B_OK)
RETURN_ERROR(status);
if (offset >= size)
array[index].SetTo(0, 0, 0);
offset += indirectSize;
}
index = 0;
}
return B_OK;
}
It also trims the last block_run that contain the size.
"offset" and "max" are maintained until the last block_run that doesn't
have to be freed - after this, the values won't be correct anymore, but
will still assure correct function for all subsequent calls.
"max" is considered to be in file system byte order.
*/
status_t
Inode::_FreeStreamArray(Transaction& transaction, block_run* array,
uint32 arrayLength, off_t size, off_t& offset, off_t& max)
{
PRINT(("FreeStreamArray: arrayLength %" B_PRId32 ", size %" B_PRIdOFF
", offset %" B_PRIdOFF ", max %" B_PRIdOFF "\n", arrayLength, size,
offset, max));
off_t newOffset = offset;
uint32 i = 0;
for (; i < arrayLength; i++, offset = newOffset) {
if (array[i].IsZero())
break;
newOffset += (off_t)array[i].Length() << fVolume->BlockShift();
if (newOffset <= size)
continue;
block_run run = array[i];
if (newOffset > size && offset < size) {
run.start = HOST_ENDIAN_TO_BFS_INT16(array[i].Start()
+ ((size + fVolume->BlockSize() - 1 - offset)
>> fVolume->BlockShift()));
array[i].length = HOST_ENDIAN_TO_BFS_INT16(run.Start()
- array[i].Start());
run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length()
- array[i].Length());
if (run.length == 0)
continue;
max = HOST_ENDIAN_TO_BFS_INT64(offset + ((off_t)array[i].Length()
<< fVolume->BlockShift()));
} else {
array[i].SetTo(0, 0, 0);
if ((off_t)BFS_ENDIAN_TO_HOST_INT64(max) > offset)
max = HOST_ENDIAN_TO_BFS_INT64(offset);
}
if (fVolume->Free(transaction, run) < B_OK)
return B_IO_ERROR;
}
return B_OK;
}
status_t
Inode::_ShrinkStream(Transaction& transaction, off_t size)
{
data_stream* data = &Node().data;
status_t status;
if (data->MaxDoubleIndirectRange() > size) {
off_t* maxDoubleIndirect = &data->max_double_indirect_range;
status = _FreeStaticStreamArray(transaction, 0, data->double_indirect,
size, data->MaxIndirectRange(), *maxDoubleIndirect);
if (status != B_OK)
return status;
if (size <= data->MaxIndirectRange()) {
fVolume->Free(transaction, data->double_indirect);
data->double_indirect.SetTo(0, 0, 0);
data->max_double_indirect_range = 0;
}
}
if (data->MaxIndirectRange() > size) {
CachedBlock cached(fVolume);
off_t block = fVolume->ToBlock(data->indirect);
off_t offset = data->MaxDirectRange();
for (int32 i = 0; i < data->indirect.Length(); i++) {
status = cached.SetToWritable(transaction, block + i);
if (status != B_OK)
return status;
block_run* array = (block_run*)cached.WritableBlock();
off_t* maxIndirect = &data->max_indirect_range;
if (_FreeStreamArray(transaction, array, fVolume->BlockSize()
/ sizeof(block_run), size, offset, *maxIndirect) != B_OK)
return B_IO_ERROR;
}
if (data->max_direct_range == data->max_indirect_range) {
fVolume->Free(transaction, data->indirect);
data->indirect.SetTo(0, 0, 0);
data->max_indirect_range = 0;
}
}
if (data->MaxDirectRange() > size) {
off_t offset = 0;
off_t *maxDirect = &data->max_direct_range;
status = _FreeStreamArray(transaction, data->direct, NUM_DIRECT_BLOCKS,
size, offset, *maxDirect);
if (status != B_OK)
return status;
}
data->size = HOST_ENDIAN_TO_BFS_INT64(size);
return B_OK;
}
status_t
Inode::SetFileSize(Transaction& transaction, off_t size)
{
if (size < 0)
return B_BAD_VALUE;
off_t oldSize = Size();
if (size == oldSize)
return B_OK;
T(Resize(this, oldSize, size, false));
status_t status;
if (size > oldSize) {
status = _GrowStream(transaction, size);
if (status < B_OK) {
_ShrinkStream(transaction, oldSize);
}
} else
status = _ShrinkStream(transaction, size);
if (status < B_OK)
return status;
file_cache_set_size(FileCache(), size);
file_map_set_size(Map(), size);
return WriteBack(transaction);
}
status_t
Inode::Append(Transaction& transaction, off_t bytes)
{
return SetFileSize(transaction, Size() + bytes);
}
because of an earlier preallocation.
Returns true if there are any blocks to be trimmed.
*/
bool
Inode::NeedsTrimming() const
{
if (IsIndex() || IsDeleted()
|| (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0))
return false;
off_t roundedSize = round_up(Size(), fVolume->BlockSize());
return Node().data.MaxDirectRange() > roundedSize
|| Node().data.MaxIndirectRange() > roundedSize
|| Node().data.MaxDoubleIndirectRange() > roundedSize;
}
status_t
Inode::TrimPreallocation(Transaction& transaction)
{
T(Resize(this, max_c(Node().data.MaxDirectRange(),
Node().data.MaxIndirectRange()), Size(), true));
status_t status = _ShrinkStream(transaction, Size());
if (status < B_OK)
return status;
return WriteBack(transaction);
}
status_t
Inode::Free(Transaction& transaction)
{
FUNCTION();
if (!IsSymLink() || (Flags() & INODE_LONG_SYMLINK) != 0) {
status_t status = SetFileSize(transaction, 0);
if (status < B_OK)
return status;
}
{
AttributeIterator iterator(this);
char name[B_FILE_NAME_LENGTH];
uint32 type;
size_t length;
ino_t id;
while (iterator.GetNext(name, &length, &type, &id) == B_OK) {
RemoveAttribute(transaction, name);
}
}
if (WriteBack(transaction) < B_OK)
return B_IO_ERROR;
return fVolume->Free(transaction, BlockRun());
}
status_t
Inode::Sync()
{
if (FileCache())
return file_cache_sync(FileCache());
if (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0)
return B_OK;
InodeReadLocker locker(this);
data_stream* data = &Node().data;
status_t status = B_OK;
for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) {
if (data->direct[i].IsZero())
return B_OK;
status = block_cache_sync_etc(fVolume->BlockCache(),
fVolume->ToBlock(data->direct[i]), data->direct[i].Length());
if (status != B_OK)
return status;
}
if (data->max_indirect_range == 0)
return B_OK;
CachedBlock cached(fVolume);
off_t block = fVolume->ToBlock(data->indirect);
int32 count = fVolume->BlockSize() / sizeof(block_run);
for (int32 j = 0; j < data->indirect.Length(); j++) {
status = cached.SetTo(block + j);
if (status != B_OK)
return status;
block_run* runs = (block_run*)cached.Block();
for (int32 i = 0; i < count; i++) {
if (runs[i].IsZero())
return B_OK;
status = block_cache_sync_etc(fVolume->BlockCache(),
fVolume->ToBlock(runs[i]), runs[i].Length());
if (status != B_OK)
return status;
}
}
if (data->max_double_indirect_range == 0)
return B_OK;
off_t indirectBlock = fVolume->ToBlock(data->double_indirect);
for (int32 l = 0; l < data->double_indirect.Length(); l++) {
status = cached.SetTo(indirectBlock + l);
if (status != B_OK)
return status;
block_run* indirectRuns = (block_run*)cached.Block();
CachedBlock directCached(fVolume);
for (int32 k = 0; k < count; k++) {
if (indirectRuns[k].IsZero())
return B_OK;
block = fVolume->ToBlock(indirectRuns[k]);
for (int32 j = 0; j < indirectRuns[k].Length(); j++) {
status = directCached.SetTo(block + j);
if (status != B_OK)
return status;
block_run* runs = (block_run*)directCached.Block();
for (int32 i = 0; i < count; i++) {
if (runs[i].IsZero())
return B_OK;
status = block_cache_sync_etc(fVolume->BlockCache(),
fVolume->ToBlock(runs[i]), runs[i].Length());
if (status != B_OK)
return status;
}
}
}
}
return B_OK;
}
void
Inode::TransactionDone(bool success)
{
if (!success) {
UpdateNodeFromDisk();
}
}
void
Inode::RemovedFromTransaction()
{
Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_TRANSACTION);
if ((Flags() & INODE_DELETED) != 0)
fVolume->RemovedInodes().Add(this);
rw_lock_write_unlock(&Lock());
if (!fVolume->IsInitializing() && this != fVolume->IndicesNode())
put_vnode(fVolume->FSVolume(), ID());
}
status_t
Inode::Remove(Transaction& transaction, const char* name, ino_t* _id,
bool isDirectory, bool force)
{
if (fTree == NULL)
RETURN_ERROR(B_BAD_VALUE);
WriteLockInTransaction(transaction);
off_t id;
if (fTree->Find((uint8*)name, (uint16)strlen(name), &id) < B_OK)
return B_ENTRY_NOT_FOUND;
if (_id)
*_id = id;
Vnode vnode(fVolume, id);
Inode* inode;
status_t status = vnode.Get(&inode);
if (status < B_OK) {
REPORT_ERROR(status);
return fTree->Remove(transaction, name, id);
}
T(Remove(inode, name));
inode->WriteLockInTransaction(transaction);
if (!inode->IsIndex() && !force) {
if (inode->IsContainer() != isDirectory)
return isDirectory ? B_NOT_A_DIRECTORY : B_IS_A_DIRECTORY;
if (isDirectory && !inode->IsEmpty())
return B_DIRECTORY_NOT_EMPTY;
}
status = remove_vnode(fVolume->FSVolume(), id);
if (status != B_OK)
return status;
if (fTree->Remove(transaction, name, id) != B_OK && !force) {
unremove_vnode(fVolume->FSVolume(), id);
RETURN_ERROR(B_ERROR);
}
#ifdef DEBUG
if (fTree->Find((uint8*)name, (uint16)strlen(name), &id) == B_OK) {
DIE(("deleted entry still there"));
}
#endif
ContainerContentsChanged(transaction);
inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED);
inode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE);
Index index(fVolume);
if (inode->InNameIndex()) {
index.RemoveName(transaction, name, inode);
}
if (inode->InSizeIndex())
index.RemoveSize(transaction, inode);
if (inode->InLastModifiedIndex())
index.RemoveLastModified(transaction, inode);
return inode->WriteBack(transaction);
}
adds the created inode to that parent directory. If an attribute directory
is created, it will also automatically be added to the \a parent inode as
such. However, the indices root node, and the regular root node won't be
added to the superblock.
It will also create the initial B+tree for the inode if it's a directory
of any kind.
\a name may be \c NULL, but only if no \a parent is given.
If the "_id" or "_inode" variable is given and non-NULL to store the
inode's ID, the inode stays locked - you have to call put_vnode() if you
don't use it anymore.
If the node already exists, this method will fail if \c O_EXCL is set, or
it's a directory or a symlink. Otherwise, it will just be returned.
If \c O_TRUNC has been specified, the file will also be truncated.
*/
status_t
Inode::Create(Transaction& transaction, Inode* parent, const char* name,
int32 mode, int openMode, uint32 type, bool* _created, ino_t* _id,
Inode** _inode, fs_vnode_ops* vnodeOps, uint32 publishFlags)
{
FUNCTION_START(("name = %s, mode = %" B_PRId32 "\n", name, mode));
block_run parentRun = parent ? parent->BlockRun() : block_run::Run(0, 0, 0);
Volume* volume = transaction.GetVolume();
BPlusTree* tree = NULL;
if (parent != NULL && (mode & S_ATTR_DIR) == 0 && parent->IsContainer()) {
tree = parent->Tree();
}
if (parent != NULL) {
parent->WriteLockInTransaction(transaction);
}
if (parent != NULL && !volume->IsInitializing() && parent->IsContainer()) {
bool removed;
if (get_vnode_removed(volume->FSVolume(), parent->ID(), &removed)
== B_OK && removed) {
RETURN_ERROR(B_ENTRY_NOT_FOUND);
}
}
if (tree != NULL) {
off_t offset;
if (tree->Find((uint8*)name, (uint16)strlen(name), &offset) == B_OK) {
if (S_ISDIR(mode) || S_ISLNK(mode) || (openMode & O_EXCL) != 0)
return B_FILE_EXISTS;
Vnode vnode(volume, offset);
Inode* inode;
status_t status = vnode.Get(&inode);
if (status != B_OK) {
REPORT_ERROR(status);
return B_ENTRY_NOT_FOUND;
}
if (inode->IsDirectory() && (openMode & O_RWMASK) != O_RDONLY)
return B_IS_A_DIRECTORY;
if ((openMode & O_DIRECTORY) != 0 && !inode->IsDirectory())
return B_NOT_A_DIRECTORY;
if (inode->CheckPermissions(open_mode_to_access(openMode)) != B_OK)
return B_NOT_ALLOWED;
if ((openMode & O_TRUNC) != 0) {
inode->WriteLockInTransaction(transaction);
status_t status = inode->SetFileSize(transaction, 0);
if (status == B_OK)
status = inode->WriteBack(transaction);
if (status != B_OK)
return status;
}
if (_created)
*_created = false;
if (_id)
*_id = inode->ID();
if (_inode)
*_inode = inode;
if (_id != NULL || _inode != NULL)
vnode.Keep();
return B_OK;
}
} else if (parent != NULL && (mode & S_ATTR_DIR) == 0) {
return B_BAD_VALUE;
} else if ((openMode & O_DIRECTORY) != 0) {
return B_ENTRY_NOT_FOUND;
}
status_t status;
if (parent != NULL && (status = parent->CheckPermissions(W_OK)) != B_OK)
RETURN_ERROR(status);
InodeAllocator allocator(transaction);
block_run run;
Inode* inode;
status = allocator.New(&parentRun, mode, publishFlags, run, vnodeOps,
&inode);
if (status < B_OK)
return status;
T(Create(inode, parent, name, mode, openMode, type));
bfs_inode* node = &inode->Node();
if (parent == NULL) {
node->parent = run;
} else
node->parent = parentRun;
node->uid = HOST_ENDIAN_TO_BFS_INT32(geteuid());
node->gid = HOST_ENDIAN_TO_BFS_INT32(parent
? parent->Node().GroupID() : getegid());
node->type = HOST_ENDIAN_TO_BFS_INT32(type);
inode->WriteBack(transaction);
if (tree && inode->IsRegularNode()
&& inode->SetName(transaction, name) != B_OK)
return B_ERROR;
if (inode->IsContainer()) {
status = allocator.CreateTree();
if (status != B_OK)
return status;
}
if (tree != NULL) {
status = tree->Insert(transaction, name, inode->ID());
} else if (parent != NULL && (mode & S_ATTR_DIR) != 0) {
parent->Attributes() = run;
status = parent->WriteBack(transaction);
}
if (status != B_OK)
RETURN_ERROR(status);
Index index(volume);
if (inode->InNameIndex() && name != NULL) {
status = index.InsertName(transaction, name, inode);
if (status != B_OK && status != B_BAD_INDEX) {
if (tree)
tree->Remove(transaction, name, inode->ID());
else if (parent != NULL && (mode & S_ATTR_DIR) != 0)
parent->Node().attributes.SetTo(0, 0, 0);
RETURN_ERROR(status);
}
}
if (parent != NULL && parent->IsContainer())
parent->ContainerContentsChanged(transaction);
inode->UpdateOldLastModified();
if (inode->InSizeIndex())
index.InsertSize(transaction, inode);
if (inode->InLastModifiedIndex())
index.InsertLastModified(transaction, inode);
if (inode->NeedsFileCache()) {
inode->SetFileCache(file_cache_create(volume->ID(), inode->ID(),
inode->Size()));
inode->SetMap(file_map_create(volume->ID(), inode->ID(),
inode->Size()));
if (inode->FileCache() == NULL || inode->Map() == NULL)
return B_NO_MEMORY;
}
allocator.Keep(vnodeOps, publishFlags);
if (_created)
*_created = true;
if (_id != NULL)
*_id = inode->ID();
if (_inode != NULL)
*_inode = inode;
if (_id == NULL && _inode == NULL)
put_vnode(volume->FSVolume(), inode->ID());
return B_OK;
}
AttributeIterator::AttributeIterator(Inode* inode)
:
fCurrentSmallData(0),
fInode(inode),
fAttributes(NULL),
fIterator(NULL),
fBuffer(NULL)
{
inode->_AddIterator(this);
}
AttributeIterator::~AttributeIterator()
{
if (fAttributes)
put_vnode(fAttributes->GetVolume()->FSVolume(), fAttributes->ID());
delete fIterator;
fInode->_RemoveIterator(this);
}
status_t
AttributeIterator::Rewind()
{
fCurrentSmallData = 0;
if (fIterator != NULL)
fIterator->Rewind();
return B_OK;
}
status_t
AttributeIterator::GetNext(char* name, size_t* _length, uint32* _type,
ino_t* _id)
{
if (fCurrentSmallData >= 0) {
NodeGetter nodeGetter(fInode->GetVolume());
status_t status = nodeGetter.SetTo(fInode);
if (status != B_OK)
return status;
const bfs_inode* node = nodeGetter.Node();
const small_data* item = ((bfs_inode*)node)->SmallDataStart();
RecursiveLocker _(&fInode->SmallDataLock());
int32 index = 0;
for (; !item->IsLast(node); item = item->Next(), index++) {
if (item->NameSize() == FILE_NAME_NAME_LENGTH
&& *item->Name() == FILE_NAME_NAME)
continue;
if (index >= fCurrentSmallData)
break;
}
if (!item->IsLast(node)) {
strncpy(name, item->Name(), B_FILE_NAME_LENGTH);
*_type = item->Type();
*_length = item->NameSize();
*_id = (ino_t)index;
fCurrentSmallData = index + 1;
return B_OK;
}
fCurrentSmallData = -1;
}
if (fInode->Attributes().IsZero())
return B_ENTRY_NOT_FOUND;
Volume* volume = fInode->GetVolume();
if (fAttributes == NULL) {
if (get_vnode(volume->FSVolume(), volume->ToVnode(fInode->Attributes()),
(void**)&fAttributes) != B_OK) {
FATAL(("get_vnode() failed in AttributeIterator::GetNext(ino_t"
" = %" B_PRIdINO ",name = \"%s\")\n", fInode->ID(), name));
return B_ENTRY_NOT_FOUND;
}
BPlusTree* tree = fAttributes->Tree();
if (tree == NULL
|| (fIterator = new(std::nothrow) TreeIterator(tree)) == NULL) {
FATAL(("could not get tree in AttributeIterator::GetNext(ino_t"
" = %" B_PRIdINO ",name = \"%s\")\n", fInode->ID(), name));
return B_ENTRY_NOT_FOUND;
}
}
uint16 length;
ino_t id;
status_t status = fIterator->GetNextEntry(name, &length,
B_FILE_NAME_LENGTH, &id);
if (status != B_OK)
return status;
Vnode vnode(volume, id);
Inode* attribute;
if ((status = vnode.Get(&attribute)) == B_OK) {
*_type = attribute->Type();
*_length = length;
*_id = id;
}
return status;
}
void
AttributeIterator::Update(uint16 index, int8 change)
{
if (index < fCurrentSmallData)
fCurrentSmallData += change;
}