⛏️ index : haiku.git

/*
 * Copyright 2001-2015, Axel Dörfler, axeld@pinc-software.de.
 * This file may be used under the terms of the MIT License.
 */
#ifndef B_PLUS_TREE_H
#define B_PLUS_TREE_H


#include "bfs.h"

#include "Utility.h"

#if !_BOOT_MODE
#include "Journal.h"
class Inode;
#else
#define Inode BFS::Stream

namespace BFS {

class Stream;
class Transaction;
class TransactionListener {};
#endif // _BOOT_MODE

//	#pragma mark - on-disk structures

struct bplustree_node;

#define BPLUSTREE_NULL			-1LL
#define BPLUSTREE_FREE			-2LL

struct bplustree_header {
	uint32		magic;
	uint32		node_size;
	uint32		max_number_of_levels;
	uint32		data_type;
	int64		root_node_pointer;
	int64		free_node_pointer;
	int64		maximum_size;

	uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
	uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
	uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
	off_t RootNode() const
		{ return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
	off_t FreeNode() const
		{ return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
	off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
	uint32 MaxNumberOfLevels() const
		{ return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }

	inline bool CheckNode(const bplustree_node* node) const;
	inline bool IsValidLink(off_t link) const;
	bool IsValid() const;
} _PACKED;

#define BPLUSTREE_MAGIC 			0x69f6c2e8
#define BPLUSTREE_NODE_SIZE 		1024
#define BPLUSTREE_MAX_KEY_LENGTH	256
#define BPLUSTREE_MIN_KEY_LENGTH	1

enum bplustree_types {
	BPLUSTREE_STRING_TYPE	= 0,
	BPLUSTREE_INT32_TYPE	= 1,
	BPLUSTREE_UINT32_TYPE	= 2,
	BPLUSTREE_INT64_TYPE	= 3,
	BPLUSTREE_UINT64_TYPE	= 4,
	BPLUSTREE_FLOAT_TYPE	= 5,
	BPLUSTREE_DOUBLE_TYPE	= 6
};


struct duplicate_array;


template <typename T>
struct  __attribute__((packed)) Unaligned {
	T value;

	Unaligned<T>& operator=(const T& newValue)
	{
		value = newValue; return *this;
	}
	operator T() const { return value; }
};


struct bplustree_node {
			int64				left_link;
			int64				right_link;
			int64				overflow_link;
			uint16				all_key_count;
			uint16				all_key_length;

			off_t				LeftLink() const
									{ return BFS_ENDIAN_TO_HOST_INT64(
										left_link); }
			off_t				RightLink() const
									{ return BFS_ENDIAN_TO_HOST_INT64(
										right_link); }
			off_t				OverflowLink() const
									{ return BFS_ENDIAN_TO_HOST_INT64(
										overflow_link); }
			uint16				NumKeys() const
									{ return BFS_ENDIAN_TO_HOST_INT16(
										all_key_count); }
			uint16				AllKeyLength() const
									{ return BFS_ENDIAN_TO_HOST_INT16(
										all_key_length); }

	inline	Unaligned<uint16>*	KeyLengths() const;
	inline	Unaligned<off_t>*   Values() const;
	inline	uint8*				Keys() const;
	inline	int32				Used() const;
			uint8*				KeyAt(int32 index, uint16* keyLength) const;

	inline	bool				IsLeaf() const;

			void				Initialize();
			uint8				CountDuplicates(off_t offset,
									bool isFragment) const;
			off_t				DuplicateAt(off_t offset, bool isFragment,
									int8 index) const;
			uint32				FragmentsUsed(uint32 nodeSize) const;
	inline	duplicate_array*	FragmentAt(int8 index) const;
	inline	duplicate_array*	DuplicateArray() const;

	static inline uint8			LinkType(off_t link);
	static inline off_t			MakeLink(uint8 type, off_t link,
									uint32 fragmentIndex = 0);
	static inline bool			IsDuplicate(off_t link);
	static inline off_t			FragmentOffset(off_t link);
	static inline uint32		FragmentIndex(off_t link);
	static inline uint32		MaxFragments(uint32 nodeSize);

			status_t			CheckIntegrity(uint32 nodeSize) const;
} _PACKED;

//#define BPLUSTREE_NODE 0
#define BPLUSTREE_DUPLICATE_NODE 2
#define BPLUSTREE_DUPLICATE_FRAGMENT 3

#define NUM_FRAGMENT_VALUES 7
#define NUM_DUPLICATE_VALUES 125

//**************************************

enum bplustree_traversing {
	BPLUSTREE_FORWARD = 1,
	BPLUSTREE_BACKWARD = -1,

	BPLUSTREE_BEGIN = 0,
	BPLUSTREE_END = 1
};


//	#pragma mark - in-memory structures


class BPlusTree;
struct TreeCheck;
class TreeIterator;


#if !_BOOT_MODE
// needed for searching (utilizing a stack)
struct node_and_key {
	off_t	nodeOffset;
	uint16	keyIndex;
};
#endif // !_BOOT_MODE


class CachedNode {
public:
	CachedNode(BPlusTree* tree)
		:
		fTree(tree),
		fNode(NULL),
		fOffset(0),
		fBlockNumber(0),
		fBlock(NULL),
		fWritable(false)
	{
	}

	CachedNode(BPlusTree* tree, off_t offset, bool check = true)
		:
		fTree(tree),
		fNode(NULL),
		fOffset(0),
		fBlockNumber(0),
		fBlock(NULL),
		fWritable(false)
	{
		SetTo(offset, check);
	}

	~CachedNode()
	{
		Unset();
#if _BOOT_MODE
		free(fBlock);
#endif
	}

			const bplustree_node* SetTo(off_t offset, bool check = true);
			status_t			SetTo(off_t offset,
									const bplustree_node** _node,
									bool check = true);

			const bplustree_header* SetToHeader();
			void				Unset();

#if !_BOOT_MODE
			bplustree_node*		SetToWritable(Transaction& transaction,
									off_t offset, bool check = true);
			bplustree_node*		MakeWritable(Transaction& transaction);
			bplustree_header*	SetToWritableHeader(Transaction& transaction);

			void				UnsetUnchanged(Transaction& transaction);

			status_t			Free(Transaction& transaction, off_t offset);
			status_t			Allocate(Transaction& transaction,
									bplustree_node** _node, off_t* _offset);
#endif // !_BOOT_MODE

			bool				IsWritable() const { return fWritable; }
			bplustree_node*		Node() const { return fNode; }

protected:
			bplustree_node*		InternalSetTo(Transaction* transaction,
									off_t offset);

			BPlusTree*			fTree;
			bplustree_node*		fNode;
			off_t				fOffset;
			off_t				fBlockNumber;
			uint8*				fBlock;
			bool				fWritable;
};


class BPlusTree : public TransactionListener {
public:
#if !_BOOT_MODE
								BPlusTree(Transaction& transaction,
									Inode* stream,
									int32 nodeSize = BPLUSTREE_NODE_SIZE);
#endif
								BPlusTree(Inode* stream);
								BPlusTree();
								~BPlusTree();

#if !_BOOT_MODE
			status_t			SetTo(Transaction& transaction, Inode* stream,
									int32 nodeSize = BPLUSTREE_NODE_SIZE);
#endif
			status_t			SetTo(Inode* stream);
			status_t			SetStream(Inode* stream);

			status_t			InitCheck();

			size_t				NodeSize() const { return fNodeSize; }
			Inode*				Stream() const { return fStream; }

#if !_BOOT_MODE
			status_t			Validate(bool repair, bool& _errorsFound);
			status_t			MakeEmpty();

			status_t			Remove(Transaction& transaction,
									const uint8* key, uint16 keyLength,
									off_t value);
			status_t			Insert(Transaction& transaction,
									const uint8* key, uint16 keyLength,
									off_t value);

			status_t			Remove(Transaction& transaction, const char* key,
									off_t value);
			status_t			Insert(Transaction& transaction, const char* key,
									off_t value);
			status_t			Insert(Transaction& transaction, int32 key,
									off_t value);
			status_t			Insert(Transaction& transaction, uint32 key,
									off_t value);
			status_t			Insert(Transaction& transaction, int64 key,
									off_t value);
			status_t			Insert(Transaction& transaction, uint64 key,
									off_t value);
			status_t			Insert(Transaction& transaction, float key,
									off_t value);
			status_t			Insert(Transaction& transaction, double key,
									off_t value);

			status_t			Replace(Transaction& transaction,
									const uint8* key, uint16 keyLength,
									off_t value);
#endif // !_BOOT_MODE

			status_t			Find(const uint8* key, uint16 keyLength,
									off_t* value);

#if !_BOOT_MODE
	static	int32				TypeCodeToKeyType(type_code code);
	static	int32				ModeToKeyType(mode_t mode);

protected:
	virtual void				TransactionDone(bool success);
	virtual void				RemovedFromTransaction();
#endif // !_BOOT_MODE

private:
								BPlusTree(const BPlusTree& other);
								BPlusTree& operator=(const BPlusTree& other);
									// no implementation

			int32				_CompareKeys(const void* key1, int keylength1,
									const void* key2, int keylength2);
			status_t			_FindKey(const bplustree_node* node,
									const uint8* key, uint16 keyLength,
									uint16* index = NULL, off_t* next = NULL);
#if !_BOOT_MODE
			status_t			_SeekDown(Stack<node_and_key>& stack,
									const uint8* key, uint16 keyLength);

			status_t			_FindFreeDuplicateFragment(
									Transaction& transaction,
									const bplustree_node* node,
									CachedNode& cached, off_t* _offset,
									bplustree_node** _fragment, uint32* _index);
			status_t			_InsertDuplicate(Transaction& transaction,
									CachedNode& cached,
									const bplustree_node* node, uint16 index,
									off_t value);
			void				_InsertKey(bplustree_node* node, uint16 index,
									uint8* key, uint16 keyLength, off_t value);
			status_t			_SplitNode(bplustree_node* node,
									off_t nodeOffset, bplustree_node* other,
									off_t otherOffset, uint16* _keyIndex,
									uint8* key, uint16* _keyLength,
									off_t* _value);

			status_t			_RemoveDuplicate(Transaction& transaction,
									const bplustree_node* node,
									CachedNode& cached, uint16 keyIndex,
									off_t value);
			void				_RemoveKey(bplustree_node* node, uint16 index);

			void				_UpdateIterators(off_t offset, off_t nextOffset,
									uint16 keyIndex, uint16 splitAt,
									int8 change);
			void				_AddIterator(TreeIterator* iterator);
			void				_RemoveIterator(TreeIterator* iterator);

			status_t			_ValidateChildren(TreeCheck& check,
									uint32 level, off_t offset,
									const uint8* largestKey, uint16 keyLength,
									const bplustree_node* parent);
			status_t			_ValidateChild(TreeCheck& check,
									CachedNode& cached, uint32 level,
									off_t offset, off_t lastOffset,
									off_t nextOffset, const uint8* key,
									uint16 keyLength);
#endif // !_BOOT_MODE

private:
			friend class TreeIterator;
			friend class CachedNode;
			friend struct TreeCheck;

			Inode*				fStream;
			bplustree_header	fHeader;
			int32				fNodeSize;
			bool				fAllowDuplicates;
			bool				fInTransaction;
			status_t			fStatus;

#if !_BOOT_MODE
			mutex				fIteratorLock;
			SinglyLinkedList<TreeIterator> fIterators;
#endif
};


//	#pragma mark - helper classes/functions


class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> {
public:
								TreeIterator(BPlusTree* tree);
								~TreeIterator();

			status_t			Goto(int8 to);
			status_t			Traverse(int8 direction, void* key,
									uint16* keyLength, uint16 maxLength,
									off_t* value, uint16* duplicate = NULL);
			status_t			Find(const uint8* key, uint16 keyLength);

			status_t			Rewind();
			status_t			GetNextEntry(void* key, uint16* keyLength,
									uint16 maxLength, off_t* value,
									uint16* duplicate = NULL);
			status_t			GetPreviousEntry(void* key, uint16* keyLength,
									uint16 maxLength, off_t* value,
									uint16* duplicate = NULL);
			void				SkipDuplicates();

			BPlusTree*			Tree() const { return fTree; }

#ifdef DEBUG
			void				Dump();
#endif

private:
			friend class BPlusTree;

			// called by BPlusTree
			void				Update(off_t offset, off_t nextOffset,
									uint16 keyIndex, uint16 splitAt,
									int8 change);
			void				Stop();

private:
			BPlusTree*			fTree;
			off_t				fCurrentNodeOffset;
									// traverse position
			int32				fCurrentKey;
			off_t				fDuplicateNode;
			uint16				fDuplicate;
			uint16				fNumDuplicates;
			bool				fIsFragment;
};


//	#pragma mark - BPlusTree's inline functions
//	(most of them may not be needed)


#if !_BOOT_MODE
inline status_t
BPlusTree::Remove(Transaction& transaction, const char* key, off_t value)
{
	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
		return B_BAD_TYPE;
	return Remove(transaction, (uint8*)key, strlen(key), value);
}


inline status_t
BPlusTree::Insert(Transaction& transaction, const char* key, off_t value)
{
	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
		return B_BAD_TYPE;
	return Insert(transaction, (uint8*)key, strlen(key), value);
}


inline status_t
BPlusTree::Insert(Transaction& transaction, int32 key, off_t value)
{
	if (fHeader.DataType() != BPLUSTREE_INT32_TYPE)
		return B_BAD_TYPE;
	return Insert(transaction, (uint8*)&key, sizeof(key), value);
}


inline status_t
BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value)
{
	if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE)
		return B_BAD_TYPE;
	return Insert(transaction, (uint8*)&key, sizeof(key), value);
}


inline status_t
BPlusTree::Insert(Transaction& transaction, int64 key, off_t value)
{
	if (fHeader.DataType() != BPLUSTREE_INT64_TYPE)
		return B_BAD_TYPE;
	return Insert(transaction, (uint8*)&key, sizeof(key), value);
}


inline status_t
BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value)
{
	if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE)
		return B_BAD_TYPE;
	return Insert(transaction, (uint8*)&key, sizeof(key), value);
}


inline status_t
BPlusTree::Insert(Transaction& transaction, float key, off_t value)
{
	if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE)
		return B_BAD_TYPE;
	return Insert(transaction, (uint8*)&key, sizeof(key), value);
}


inline status_t
BPlusTree::Insert(Transaction& transaction, double key, off_t value)
{
	if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE)
		return B_BAD_TYPE;
	return Insert(transaction, (uint8*)&key, sizeof(key), value);
}
#endif // !_BOOT_MODE


//	#pragma mark - TreeIterator inline functions


inline status_t
TreeIterator::Rewind()
{
	return Goto(BPLUSTREE_BEGIN);
}


inline status_t
TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength,
	off_t* value, uint16* duplicate)
{
	return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value,
		duplicate);
}


inline status_t
TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength,
	off_t* value, uint16* duplicate)
{
	return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value,
		duplicate);
}


//	#pragma mark - bplustree_header inline functions


inline bool
bplustree_header::CheckNode(const bplustree_node* node) const
{
	// sanity checks (links, all_key_count)
	return IsValidLink(node->LeftLink())
		&& IsValidLink(node->RightLink())
		&& IsValidLink(node->OverflowLink())
		&& (int8*)node->Values() + node->NumKeys() * sizeof(off_t)
				<= (int8*)node + NodeSize();
}


inline bool
bplustree_header::IsValidLink(off_t link) const
{
	return link == BPLUSTREE_NULL
		|| (link > 0 && link <= MaximumSize() - NodeSize());
}


//	#pragma mark - bplustree_node inline functions


inline Unaligned<uint16>*
bplustree_node::KeyLengths() const
{
	return (Unaligned<uint16>*)(((char*)this) + key_align(sizeof(bplustree_node)
		+ AllKeyLength()));
}


inline Unaligned<off_t>*
bplustree_node::Values() const
{
	return (Unaligned<off_t>*)(
		(char*)KeyLengths() + NumKeys() * sizeof(uint16));
}


inline uint8*
bplustree_node::Keys() const
{
	return (uint8*)this + sizeof(bplustree_node);
}


inline int32
bplustree_node::Used() const
{
	return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys()
		* (sizeof(uint16) + sizeof(off_t));
}


inline bool
bplustree_node::IsLeaf() const
{
	return OverflowLink() == BPLUSTREE_NULL;
}


inline duplicate_array*
bplustree_node::FragmentAt(int8 index) const
{
	return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1));
}


inline duplicate_array*
bplustree_node::DuplicateArray() const
{
	return (duplicate_array*)&overflow_link;
}


inline uint8
bplustree_node::LinkType(off_t link)
{
	return *(uint64*)&link >> 62;
}


inline off_t
bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
{
	return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL)
		| (fragmentIndex & 0x3ff);
}


inline bool
bplustree_node::IsDuplicate(off_t link)
{
	return (LinkType(link)
		& (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
}


inline off_t
bplustree_node::FragmentOffset(off_t link)
{
	return link & 0x3ffffffffffffc00LL;
}


inline uint32
bplustree_node::FragmentIndex(off_t link)
{
	return (uint32)(link & 0x3ff);
}


inline uint32
bplustree_node::MaxFragments(uint32 nodeSize)
{
	return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t));
}


#if _BOOT_MODE
} // namespace BFS

#undef Inode
#endif

#endif	// B_PLUS_TREE_H