⛏️ index : haiku.git

/*-
 * Copyright (c) 1998 Matthew Dillon.  All Rights Reserved.
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * BLIST.C -	Bitmap allocator/deallocator, using a radix tree with hinting
 *
 *	This module implements a general bitmap allocator/deallocator.  The
 *	allocator eats around 2 bits per 'block'.  The module does not
 *	try to interpret the meaning of a 'block' other then to return
 *	SWAPBLK_NONE on an allocation failure.
 *
 *	A radix tree is used to maintain the bitmap.  Two radix constants are
 *	involved:  One for the bitmaps contained in the leaf nodes (typically
 *	32), and one for the meta nodes (typically 16).  Both meta and leaf
 *	nodes have a hint field.  This field gives us a hint as to the largest
 *	free contiguous range of blocks under the node.  It may contain a
 *	value that is too high, but will never contain a value that is too
 *	low.  When the radix tree is searched, allocation failures in subtrees
 *	update the hint.
 *
 *	The radix tree also implements two collapsed states for meta nodes:
 *	the ALL-ALLOCATED state and the ALL-FREE state.  If a meta node is
 *	in either of these two states, all information contained underneath
 *	the node is considered stale.  These states are used to optimize
 *	allocation and freeing operations.
 *
 * 	The hinting greatly increases code efficiency for allocations while
 *	the general radix structure optimizes both allocations and frees.  The
 *	radix tree should be able to operate well no matter how much
 *	fragmentation there is and no matter how large a bitmap is used.
 *
 *	Unlike the rlist code, the blist code wires all necessary memory at
 *	creation time.  Neither allocations nor frees require interaction with
 *	the memory subsystem.  In contrast, the rlist code may allocate memory
 *	on an rlist_free() call.  The non-blocking features of the blist code
 *	are used to great advantage in the swap code (vm/nswap_pager.c).  The
 *	rlist code uses a little less overall memory then the blist code (but
 *	due to swap interleaving not all that much less), but the blist code
 *	scales much, much better.
 *
 *	LAYOUT: The radix tree is layed out recursively using a
 *	linear array.  Each meta node is immediately followed (layed out
 *	sequentially in memory) by BLIST_META_RADIX lower level nodes.  This
 *	is a recursive structure but one that can be easily scanned through
 *	a very simple 'skip' calculation.  In order to support large radixes,
 *	portions of the tree may reside outside our memory allocation.  We
 *	handle this with an early-termination optimization (when bighint is
 *	set to -1) on the scan.  The memory allocation is only large enough
 *	to cover the number of blocks requested at creation time even if it
 *	must be encompassed in larger root-node radix.
 *
 *	NOTE: the allocator cannot currently allocate more then
 *	BLIST_BMAP_RADIX blocks per call.  It will panic with 'allocation too
 *	large' if you try.  This is an area that could use improvement.  The
 *	radix is large enough that this restriction does not effect the swap
 *	system, though.  Currently only the allocation code is effected by
 *	this algorithmic unfeature.  The freeing code can handle arbitrary
 *	ranges.
 *
 *	This code can be compiled stand-alone for debugging.
 */

/*
 *  NOTE: a few modifications is made in the orginal FreeBSD code:
 *  1. change the name of some variables/constants
 *  2. discard the code handling ALL_FREE and ALL_ALLOCATED state
 *  3. allocate more than 32 slots won't lead to kernel panic
 *  4. remove all the code for debugging
 *
 *                                                      Zhao Shuai
 *                                                   upczhsh@163.com
 */


#include <stdlib.h>
#include <util/RadixBitmap.h>

#define TERMINATOR -1


static uint32
radix_bitmap_init(radix_node *node, uint32 radix, uint32 skip, uint32 slots)
{
	uint32 index = 0;
	int32 big_hint = radix < slots ? radix : slots;

	// leaf node
	if (radix == BITMAP_RADIX) {
		if (node) {
			node->big_hint = big_hint;
			if (big_hint == BITMAP_RADIX)
				node->u.bitmap = 0;
			else
				node->u.bitmap = (bitmap_t)-1 << big_hint;
		}
		return index;
	}

	// not leaf node
	if (node) {
		node->big_hint = big_hint;
		node->u.available = big_hint;
	}

	radix /= NODE_RADIX;
	uint32 next_skip = skip / NODE_RADIX;

	uint32 i;
	for (i = 1; i <= skip; i += next_skip) {
		if (slots >= radix) {
			index = i + radix_bitmap_init(node ? &node[i] : NULL,
					radix, next_skip - 1, radix);
			slots -= radix;
		} else if (slots > 0) {
			index = i + radix_bitmap_init(node ? &node[i] : NULL,
					radix, next_skip - 1, slots);
			slots = 0;
		} else { // add a terminator
			if (node)
				node[i].big_hint = TERMINATOR;
			break;
		}
	}

	if (index < i)
		index = i;

	return index;
}


radix_bitmap *
radix_bitmap_create(uint32 slots)
{
	uint32 radix = BITMAP_RADIX;
	uint32 skip = 0;

	while (radix < slots) {
		radix *= NODE_RADIX;
		skip = (skip + 1) * NODE_RADIX;
	}

	radix_bitmap *bmp = (radix_bitmap *)malloc(sizeof(radix_bitmap));
	if (bmp == NULL)
		return NULL;

	bmp->radix = radix;
	bmp->skip = skip;
	bmp->free_slots = slots;
	bmp->root_size = 1 + radix_bitmap_init(NULL, radix, skip, slots);

	bmp->root = (radix_node *)malloc(bmp->root_size * sizeof(radix_node));
	if (bmp->root == NULL) {
		free(bmp);
		return NULL;
	}

	radix_bitmap_init(bmp->root, radix, skip, slots);

	return bmp;
}


void
radix_bitmap_destroy(radix_bitmap *bmp)
{
	free(bmp->root);
	free(bmp);
}


static radix_slot_t
radix_leaf_alloc(radix_node *leaf, radix_slot_t slotIndex, int32 count)
{
	if (count <= (int32)BITMAP_RADIX) {
		bitmap_t bitmap = ~leaf->u.bitmap;
		uint32 n = BITMAP_RADIX - count;
		bitmap_t mask = (bitmap_t)-1 >> n;
		for (uint32 j = 0; j <= n; j++) {
			if ((bitmap & mask) == mask) {
				leaf->u.bitmap |= mask;
				return (slotIndex + j);
			}
			mask <<= 1;
		}
	}

	// we could not allocate count here, update big_hint
	if (leaf->big_hint >= count)
		leaf->big_hint = count - 1;
	return RADIX_SLOT_NONE;
}


static radix_slot_t
radix_node_alloc(radix_node *node, radix_slot_t slotIndex, int32 count,
		uint32 radix, uint32 skip)
{
	uint32 next_skip = skip / NODE_RADIX;
	radix /= NODE_RADIX;

	for (uint32 i = 1; i <= skip; i += next_skip) {
		if (node[i].big_hint == TERMINATOR)  // TERMINATOR
			break;

		if (count <= node[i].big_hint) {
			radix_slot_t addr = RADIX_SLOT_NONE;
			if (next_skip == 1)
				addr = radix_leaf_alloc(&node[i], slotIndex, count);
			else
				addr = radix_node_alloc(&node[i], slotIndex, count, radix,
						next_skip - 1);
			if (addr != RADIX_SLOT_NONE) {
				node->u.available -= count;
				if (node->big_hint > node->u.available)
					node->big_hint = node->u.available;

				return addr;
			}
		}
		slotIndex += radix;
	}

	// we could not allocate count in the subtree, update big_hint
	if (node->big_hint >= count)
		node->big_hint = count - 1;
	return RADIX_SLOT_NONE;
}


radix_slot_t
radix_bitmap_alloc(radix_bitmap *bmp, uint32 count)
{
	radix_slot_t addr = RADIX_SLOT_NONE;

	if (bmp->radix == BITMAP_RADIX)
		addr = radix_leaf_alloc(bmp->root, 0, count);
	else
		addr = radix_node_alloc(bmp->root, 0, count, bmp->radix, bmp->skip);

	if (addr != RADIX_SLOT_NONE)
		bmp->free_slots -= count;

	return addr;
}


static void
radix_leaf_dealloc(radix_node *leaf, radix_slot_t slotIndex, uint32 count)
{
	uint32 n = slotIndex & (BITMAP_RADIX - 1);
	bitmap_t mask = ((bitmap_t)-1 >> (BITMAP_RADIX - count - n))
		& ((bitmap_t)-1 << n);
	leaf->u.bitmap &= ~mask;

	leaf->big_hint = BITMAP_RADIX;
}


static void
radix_node_dealloc(radix_node *node, radix_slot_t slotIndex, uint32 count,
		uint32 radix, uint32 skip, radix_slot_t index)
{
	node->u.available += count;

	uint32 next_skip = skip / NODE_RADIX;
	radix /= NODE_RADIX;

	uint32 i = (slotIndex - index) / radix;
	index += i * radix;
	i = i * next_skip + 1;

	while (i <= skip && index < slotIndex + count) {
		uint32 v = index + radix - slotIndex;
		if (v > count)
			v = count;

		if (next_skip == 1)
			radix_leaf_dealloc(&node[i], slotIndex, v);
		else
			radix_node_dealloc(&node[i], slotIndex, v, radix,
					next_skip - 1, index);

		if (node->big_hint < node[i].big_hint)
			node->big_hint = node[i].big_hint;

		count -= v;
		slotIndex += v;
		index += radix;
		i += next_skip;
	}
}


void
radix_bitmap_dealloc(radix_bitmap *bmp, radix_slot_t slotIndex, uint32 count)
{
	if (bmp->radix == BITMAP_RADIX)
		radix_leaf_dealloc(bmp->root, slotIndex, count);
	else
		radix_node_dealloc(bmp->root, slotIndex, count, bmp->radix,
				bmp->skip, 0);

	bmp->free_slots += count;
}