⛏️ index : haiku.git

/*
 * Copyright 2006-2009, Haiku, Inc. All Rights Reserved.
 * Distributed under the terms of the MIT License.
 *
 * Authors:
 *		Axel Dörfler, axeld@pinc-software.de
 *
 * The phase coefficient computation was taken from the X driver written by
 * Alan Hourihane and David Dawes.
 */


#include "accelerant.h"
#include "accelerant_protos.h"
#include "commands.h"

#include <Debug.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>

#include <AGP.h>


#undef TRACE
//#define TRACE_OVERLAY
#ifdef TRACE_OVERLAY
#	define TRACE(x...) _sPrintf("intel_extreme: " x)
#else
#	define TRACE(x...)
#endif

#define ERROR(x...) _sPrintf("intel_extreme: " x)
#define CALLED(x...) TRACE("CALLED %s\n", __PRETTY_FUNCTION__)


#define NUM_HORIZONTAL_TAPS		5
#define NUM_VERTICAL_TAPS		3
#define NUM_HORIZONTAL_UV_TAPS	3
#define NUM_VERTICAL_UV_TAPS	3
#define NUM_PHASES				17
#define MAX_TAPS				5

struct phase_coefficient {
	uint8	sign;
	uint8	exponent;
	uint16	mantissa;
};


/*!	Splits the coefficient floating point value into the 3 components
	sign, mantissa, and exponent.
*/
static bool
split_coefficient(double &coefficient, int32 mantissaSize,
	phase_coefficient &splitCoefficient)
{
	double absCoefficient = fabs(coefficient);

	int sign;
	if (coefficient < 0.0)
		sign = 1;
	else
		sign = 0;

	int32 intCoefficient, res;
	int32 maxValue = 1 << mantissaSize;
	res = 12 - mantissaSize;

	if ((intCoefficient = (int)(absCoefficient * 4 * maxValue + 0.5))
			< maxValue) {
		splitCoefficient.exponent = 3;
		splitCoefficient.mantissa = intCoefficient << res;
		coefficient = (double)intCoefficient / (double)(4 * maxValue);
	} else if ((intCoefficient = (int)(absCoefficient * 2 * maxValue + 0.5))
			< maxValue) {
		splitCoefficient.exponent = 2;
		splitCoefficient.mantissa = intCoefficient << res;
		coefficient = (double)intCoefficient / (double)(2 * maxValue);
	} else if ((intCoefficient = (int)(absCoefficient * maxValue + 0.5))
			< maxValue) {
		splitCoefficient.exponent = 1;
		splitCoefficient.mantissa = intCoefficient << res;
		coefficient = (double)intCoefficient / (double)maxValue;
	} else if ((intCoefficient = (int)(absCoefficient * maxValue * 0.5 + 0.5))
			< maxValue) {
		splitCoefficient.exponent = 0;
		splitCoefficient.mantissa = intCoefficient << res;
		coefficient = (double)intCoefficient / (double)(maxValue / 2);
	} else {
		// coefficient out of range
		return false;
	}

	splitCoefficient.sign = sign;
	if (sign)
		coefficient = -coefficient;

	return true;
}


static void
update_coefficients(int32 taps, double filterCutOff, bool horizontal, bool isY,
	phase_coefficient* splitCoefficients)
{
	if (filterCutOff < 1)
		filterCutOff = 1;
	if (filterCutOff > 3)
		filterCutOff = 3;

	bool isVerticalUV = !horizontal && !isY;
	int32 mantissaSize = horizontal ? 7 : 6;

	double rawCoefficients[MAX_TAPS * 32], coefficients[NUM_PHASES][MAX_TAPS];

	int32 num = taps * 16;
	for (int32 i = 0; i < num * 2; i++) {
		double sinc;
		double value = (1.0 / filterCutOff) * taps * M_PI * (i - num)
			/ (2 * num);
		if (value == 0.0)
			sinc = 1.0;
		else
			sinc = sin(value) / value;

		// Hamming window
		double window = (0.5 - 0.5 * cos(i * M_PI / num));
		rawCoefficients[i] = sinc * window;
	}

	for (int32 i = 0; i < NUM_PHASES; i++) {
		// Normalise the coefficients
		double sum = 0.0;
		int32 pos;
		for (int32 j = 0; j < taps; j++) {
			pos = i + j * 32;
			sum += rawCoefficients[pos];
		}
		for (int32 j = 0; j < taps; j++) {
			pos = i + j * 32;
			coefficients[i][j] = rawCoefficients[pos] / sum;
		}

		// split them into sign/mantissa/exponent
		for (int32 j = 0; j < taps; j++) {
			pos = j + i * taps;

			split_coefficient(coefficients[i][j], mantissaSize
				+ (((j == (taps - 1) / 2) && !isVerticalUV) ? 2 : 0),
				splitCoefficients[pos]);
		}

		int32 tapAdjust[MAX_TAPS];
		tapAdjust[0] = (taps - 1) / 2;
		for (int32 j = 1, k = 1; j <= tapAdjust[0]; j++, k++) {
			tapAdjust[k] = tapAdjust[0] - j;
			tapAdjust[++k] = tapAdjust[0] + j;
		}

		// Adjust the coefficients
		sum = 0.0;
		for (int32 j = 0; j < taps; j++) {
			sum += coefficients[i][j];
		}

		if (sum != 1.0) {
			for (int32 k = 0; k < taps; k++) {
				int32 tap2Fix = tapAdjust[k];
				double diff = 1.0 - sum;

				coefficients[i][tap2Fix] += diff;
				pos = tap2Fix + i * taps;

				split_coefficient(coefficients[i][tap2Fix], mantissaSize
					+ (((tap2Fix == (taps - 1) / 2) && !isVerticalUV) ? 2 : 0),
					splitCoefficients[pos]);

				sum = 0.0;
				for (int32 j = 0; j < taps; j++) {
					sum += coefficients[i][j];
				}
				if (sum == 1.0)
					break;
			}
		}
	}
}


static void
set_color_key(uint8 red, uint8 green, uint8 blue, uint8 redMask,
	uint8 greenMask, uint8 blueMask)
{
	overlay_registers* registers = gInfo->overlay_registers;

	registers->color_key_red = red;
	registers->color_key_green = green;
	registers->color_key_blue = blue;
	registers->color_key_mask_red = ~redMask;
	registers->color_key_mask_green = ~greenMask;
	registers->color_key_mask_blue = ~blueMask;
	registers->color_key_enabled = true;
}


static void
set_color_key(const overlay_window* window)
{
	switch (gInfo->shared_info->current_mode.space) {
		case B_CMAP8:
			set_color_key(0, 0, window->blue.value, 0x0, 0x0, 0xff);
			break;
		case B_RGB15:
			set_color_key(window->red.value << 3, window->green.value << 3,
				window->blue.value << 3, window->red.mask << 3,
				window->green.mask << 3, window->blue.mask << 3);
			break;
		case B_RGB16:
			set_color_key(window->red.value << 3, window->green.value << 2,
				window->blue.value << 3, window->red.mask << 3,
				window->green.mask << 2, window->blue.mask << 3);
			break;

		default:
			set_color_key(window->red.value, window->green.value,
				window->blue.value, window->red.mask, window->green.mask,
				window->blue.mask);
			break;
	}
}


static void
update_overlay(bool updateCoefficients)
{
	if (!gInfo->shared_info->overlay_active
		|| gInfo->shared_info->device_type.IsModel(INTEL_MODEL_965))
		return;

	QueueCommands queue(gInfo->shared_info->primary_ring_buffer);
	queue.PutFlush();
	queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP);
	queue.PutOverlayFlip(COMMAND_OVERLAY_CONTINUE, updateCoefficients);

	// make sure the flip is done now
	queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP);
	queue.PutFlush();

	TRACE("%s: UP: %lx, TST: %lx, ST: %lx, CMD: %lx (%lx), ERR: %lx\n",
		__func__, read32(INTEL_OVERLAY_UPDATE),
		read32(INTEL_OVERLAY_TEST), read32(INTEL_OVERLAY_STATUS),
		*(((uint32*)gInfo->overlay_registers) + 0x68/4), read32(0x30168),
		read32(0x2024));
}


static void
show_overlay(void)
{
	if (gInfo->shared_info->overlay_active
		|| gInfo->shared_info->device_type.IsModel(INTEL_MODEL_965))
		return;

	gInfo->shared_info->overlay_active = true;
	gInfo->overlay_registers->overlay_enabled = true;

	QueueCommands queue(gInfo->shared_info->primary_ring_buffer);
	queue.PutOverlayFlip(COMMAND_OVERLAY_ON, true);
	queue.PutFlush();

	TRACE("%s: UP: %lx, TST: %lx, ST: %lx, CMD: %lx (%lx), ERR: %lx\n",
		__func__, read32(INTEL_OVERLAY_UPDATE),
		read32(INTEL_OVERLAY_TEST), read32(INTEL_OVERLAY_STATUS),
		*(((uint32*)gInfo->overlay_registers) + 0x68/4),
		read32(0x30168), read32(0x2024));
}


static void
hide_overlay(void)
{
	if (!gInfo->shared_info->overlay_active
		|| gInfo->shared_info->device_type.IsModel(INTEL_MODEL_965))
		return;

	overlay_registers* registers = gInfo->overlay_registers;

	gInfo->shared_info->overlay_active = false;
	registers->overlay_enabled = false;

	QueueCommands queue(gInfo->shared_info->primary_ring_buffer);

	// flush pending commands
	queue.PutFlush();
	queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP);

	// clear overlay enabled bit
	queue.PutOverlayFlip(COMMAND_OVERLAY_CONTINUE, false);
	queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP);

	// turn off overlay engine
	queue.PutOverlayFlip(COMMAND_OVERLAY_OFF, false);
	queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP);

	gInfo->current_overlay = NULL;
}


//	#pragma mark -


uint32
intel_overlay_count(const display_mode* mode)
{
	// TODO: make this depending on the amount of RAM and the screen mode
	// (and we could even have more than one when using 3D as well)
	return 1;
}


const uint32*
intel_overlay_supported_spaces(const display_mode* mode)
{
	static const uint32 kSupportedSpaces[] = {B_RGB15, B_RGB16, B_RGB32,
		B_YCbCr422, 0};
	static const uint32 kSupportedi965Spaces[] = {B_YCbCr422, 0};
	intel_shared_info &sharedInfo = *gInfo->shared_info;

	if (sharedInfo.device_type.InGroup(INTEL_GROUP_96x))
		return kSupportedi965Spaces;

	return kSupportedSpaces;
}


uint32
intel_overlay_supported_features(uint32 colorSpace)
{
	return B_OVERLAY_COLOR_KEY
		| B_OVERLAY_HORIZONTAL_FILTERING
		| B_OVERLAY_VERTICAL_FILTERING
		| B_OVERLAY_HORIZONTAL_MIRRORING;
}


const overlay_buffer* 
intel_allocate_overlay_buffer(color_space colorSpace, uint16 width,
	uint16 height)
{
	TRACE("%s(width %u, height %u, colorSpace %lu)\n", __func__, width,
		height, colorSpace);

	intel_shared_info &sharedInfo = *gInfo->shared_info;
	uint32 bytesPerPixel;

	switch (colorSpace) {
		case B_RGB15:
			bytesPerPixel = 2;
			break;
		case B_RGB16:
			bytesPerPixel = 2;
			break;
		case B_RGB32:
			bytesPerPixel = 4;
			break;
		case B_YCbCr422:
			bytesPerPixel = 2;
			break;
		default:
			return NULL;
	}

	struct overlay* overlay = (struct overlay*)malloc(sizeof(struct overlay));
	if (overlay == NULL)
		return NULL;

	// TODO: locking!

	// alloc graphics mem

	int32 alignment = 0x3f;
	if (sharedInfo.device_type.IsModel(INTEL_MODEL_965))
		alignment = 0xff;

	overlay_buffer* buffer = &overlay->buffer;
	buffer->space = colorSpace;
	buffer->width = width;
	buffer->height = height;
	buffer->bytes_per_row = (width * bytesPerPixel + alignment) & ~alignment;

	status_t status = intel_allocate_memory(buffer->bytes_per_row * height,
		0, overlay->buffer_base);
	if (status < B_OK) {
		free(overlay);
		return NULL;
	}

	if (sharedInfo.device_type.IsModel(INTEL_MODEL_965)) {
		status = intel_allocate_memory(INTEL_i965_OVERLAY_STATE_SIZE,
			B_APERTURE_NON_RESERVED, overlay->state_base);
		if (status < B_OK) {
			intel_free_memory(overlay->buffer_base);
			free(overlay);
			return NULL;
		}

		overlay->state_offset = overlay->state_base
			- (addr_t)gInfo->shared_info->graphics_memory;
	}

	overlay->buffer_offset = overlay->buffer_base
		- (addr_t)gInfo->shared_info->graphics_memory;

	buffer->buffer = (uint8*)overlay->buffer_base;
	buffer->buffer_dma = (uint8*)gInfo->shared_info->physical_graphics_memory
		+ overlay->buffer_offset;

	TRACE("%s: base=%x, offset=%x, address=%x, physical address=%x\n",
		__func__, overlay->buffer_base, overlay->buffer_offset,
		buffer->buffer, buffer->buffer_dma);

	return buffer;
}


status_t
intel_release_overlay_buffer(const overlay_buffer* buffer)
{
	CALLED();

	struct overlay* overlay = (struct overlay*)buffer;

	// TODO: locking!

	if (gInfo->current_overlay == overlay)
		hide_overlay();

	intel_free_memory(overlay->buffer_base);
	if (gInfo->shared_info->device_type.IsModel(INTEL_MODEL_965))
		intel_free_memory(overlay->state_base);
	free(overlay);

	return B_OK;
}


status_t
intel_get_overlay_constraints(const display_mode* mode,
	const overlay_buffer* buffer, overlay_constraints* constraints)
{
	CALLED();

	// taken from the Radeon driver...

	// scaler input restrictions
	// TODO: check all these values; most of them are probably too restrictive

	// position
	constraints->view.h_alignment = 0;
	constraints->view.v_alignment = 0;

	// alignment
	switch (buffer->space) {
		case B_RGB15:
			constraints->view.width_alignment = 7;
			break;
		case B_RGB16:
			constraints->view.width_alignment = 7;
			break;
		case B_RGB32:
			constraints->view.width_alignment = 3;
			break;
		case B_YCbCr422:
			constraints->view.width_alignment = 7;
			break;
		case B_YUV12:
			constraints->view.width_alignment = 7;
			break;
		default:
			return B_BAD_VALUE;
	}
	constraints->view.height_alignment = 0;

	// size
	constraints->view.width.min = 4;		// make 4-tap filter happy
	constraints->view.height.min = 4;
	constraints->view.width.max = buffer->width;
	constraints->view.height.max = buffer->height;

	// scaler output restrictions
	constraints->window.h_alignment = 0;
	constraints->window.v_alignment = 0;
	constraints->window.width_alignment = 0;
	constraints->window.height_alignment = 0;
	constraints->window.width.min = 2;
	constraints->window.width.max = mode->virtual_width;
	constraints->window.height.min = 2;
	constraints->window.height.max = mode->virtual_height;

	// TODO: the minimum values are not tested
	constraints->h_scale.min = 1.0f / (1 << 4);
	constraints->h_scale.max = buffer->width * 7;
	constraints->v_scale.min = 1.0f / (1 << 4);
	constraints->v_scale.max = buffer->height * 7;

	return B_OK;
}


overlay_token
intel_allocate_overlay(void)
{
	CALLED();

	// we only have a single overlay channel
	if (atomic_or(&gInfo->shared_info->overlay_channel_used, 1) != 0)
		return NULL;

	return (overlay_token)++gInfo->shared_info->overlay_token;
}


status_t
intel_release_overlay(overlay_token overlayToken)
{
	CALLED();

	// we only have a single token, which simplifies this
	if (overlayToken != (overlay_token)gInfo->shared_info->overlay_token)
		return B_BAD_VALUE;

	atomic_and(&gInfo->shared_info->overlay_channel_used, 0);

	return B_OK;
}


status_t
intel_configure_overlay(overlay_token overlayToken,
	const overlay_buffer* buffer, const overlay_window* window,
	const overlay_view* view)
{
	CALLED();

	if (overlayToken != (overlay_token)gInfo->shared_info->overlay_token)
		return B_BAD_VALUE;

	if (window == NULL || view == NULL) {
		hide_overlay();
		return B_OK;
	}

	struct overlay* overlay = (struct overlay*)buffer;
	overlay_registers* registers = gInfo->overlay_registers;
	intel_shared_info &sharedInfo = *gInfo->shared_info;
	bool updateCoefficients = false;
	uint32 bytesPerPixel = 2;

	switch (buffer->space) {
		case B_RGB15:
			registers->source_format = OVERLAY_FORMAT_RGB15;
			break;
		case B_RGB16:
			registers->source_format = OVERLAY_FORMAT_RGB16;
			break;
		case B_RGB32:
			registers->source_format = OVERLAY_FORMAT_RGB32;
			bytesPerPixel = 4;
			break;
		case B_YCbCr422:
			registers->source_format = OVERLAY_FORMAT_YCbCr422;
			break;
	}

	if (!gInfo->shared_info->overlay_active
		|| memcmp(&gInfo->last_overlay_view, view, sizeof(overlay_view)) != 0
		|| memcmp(&gInfo->last_overlay_frame, window, sizeof(overlay_frame)) != 0) {
		// scaling has changed, program window and scaling factor

		// clip the window to on screen bounds
		// TODO: this is not yet complete or correct - especially if we start
		// to support moving the display!
		int32 left, top, right, bottom;
		left = window->h_start;
		right = window->h_start + window->width;
		top = window->v_start;
		bottom = window->v_start + window->height;
		if (left < 0)
			left = 0;
		if (top < 0)
			top = 0;
		if (right > sharedInfo.current_mode.timing.h_display)
			right = sharedInfo.current_mode.timing.h_display;
		if (bottom > sharedInfo.current_mode.timing.v_display)
			bottom = sharedInfo.current_mode.timing.v_display;
		if (left >= right || top >= bottom) {
			// overlay is not within visible bounds
			hide_overlay();
			return B_OK;
		}

		registers->window_left = left;
		registers->window_top = top;
		registers->window_width = right - left;
		registers->window_height = bottom - top;

		uint32 horizontalScale = (view->width << 12) / window->width;
		uint32 verticalScale = (view->height << 12) / window->height;
		uint32 horizontalScaleUV = horizontalScale >> 1;
		uint32 verticalScaleUV = verticalScale >> 1;
		horizontalScale = horizontalScaleUV << 1;
		verticalScale = verticalScaleUV << 1;

		// we need to offset the overlay view to adapt it to the clipping
		// (in addition to whatever offset is desired already)
		left = view->h_start - (int32)((window->h_start - left)
			* (horizontalScale / 4096.0) + 0.5);
		top = view->v_start - (int32)((window->v_start - top)
			* (verticalScale / 4096.0) + 0.5);
		right = view->h_start + view->width;
		bottom = view->v_start + view->height;

		gInfo->overlay_position_buffer_offset = buffer->bytes_per_row * top
			+ left * bytesPerPixel;

		// Note: in non-planar mode, you *must* not program the source
		// width/height UV registers - they must stay cleared, or the chip is
		// doing strange stuff.
		// On the other hand, you have to program the UV scaling registers, or
		// the result will be wrong, too.
		registers->source_width_rgb = right - left;
		registers->source_height_rgb = bottom - top;
		if (gInfo->shared_info->device_type.InFamily(INTEL_FAMILY_8xx)) {
			registers->source_bytes_per_row_rgb = (((overlay->buffer_offset
				+ (view->width << 1) + 0x1f) >> 5)
				- (overlay->buffer_offset >> 5) - 1) << 2;
		} else {
			int yaddress = overlay->buffer_offset;
			int yswidth = view->width << 1;
			registers->source_bytes_per_row_rgb = (((((yaddress
				+ yswidth + 0x3f) >> 6) - (yaddress >> 6)) << 1) - 1) << 2;
		}

		// horizontal scaling
		registers->scale_rgb.horizontal_downscale_factor
			= horizontalScale >> 12;
		registers->scale_rgb.horizontal_scale_fraction
			= horizontalScale & 0xfff;
		registers->scale_uv.horizontal_downscale_factor
			= horizontalScaleUV >> 12;
		registers->scale_uv.horizontal_scale_fraction
			= horizontalScaleUV & 0xfff;

		// vertical scaling
		registers->scale_rgb.vertical_scale_fraction = verticalScale & 0xfff;
		registers->scale_uv.vertical_scale_fraction = verticalScaleUV & 0xfff;
		registers->vertical_scale_rgb = verticalScale >> 12;
		registers->vertical_scale_uv = verticalScaleUV >> 12;

		TRACE("scale: h = %ld.%ld, v = %ld.%ld\n", horizontalScale >> 12,
			horizontalScale & 0xfff, verticalScale >> 12,
			verticalScale & 0xfff);

		if (verticalScale != gInfo->last_vertical_overlay_scale
			|| horizontalScale != gInfo->last_horizontal_overlay_scale) {
			// Recompute phase coefficients (taken from X driver)
			updateCoefficients = true;

			phase_coefficient coefficients[NUM_HORIZONTAL_TAPS * NUM_PHASES];
			update_coefficients(NUM_HORIZONTAL_TAPS, horizontalScale / 4096.0,
				true, true, coefficients);

			phase_coefficient coefficientsUV[
				NUM_HORIZONTAL_UV_TAPS * NUM_PHASES];
			update_coefficients(NUM_HORIZONTAL_UV_TAPS,
				horizontalScaleUV / 4096.0, true, false, coefficientsUV);

			int32 pos = 0;
			for (int32 i = 0; i < NUM_PHASES; i++) {
				for (int32 j = 0; j < NUM_HORIZONTAL_TAPS; j++) {
					registers->horizontal_coefficients_rgb[pos]
						= coefficients[pos].sign << 15
							| coefficients[pos].exponent << 12
							| coefficients[pos].mantissa;
					pos++;
				}
			}

			pos = 0;
			for (int32 i = 0; i < NUM_PHASES; i++) {
				for (int32 j = 0; j < NUM_HORIZONTAL_UV_TAPS; j++) {
					registers->horizontal_coefficients_uv[pos]
						= coefficientsUV[pos].sign << 15
							| coefficientsUV[pos].exponent << 12
							| coefficientsUV[pos].mantissa;
					pos++;
				}
			}

			gInfo->last_vertical_overlay_scale = verticalScale;
			gInfo->last_horizontal_overlay_scale = horizontalScale;
		}

		gInfo->last_overlay_view = *view;
		gInfo->last_overlay_frame = *(overlay_frame*)window;
	}

	registers->color_control_output_mode = true;
	registers->select_pipe = 0;

	// program buffer

	registers->buffer_rgb0
		= overlay->buffer_offset + gInfo->overlay_position_buffer_offset;
	registers->stride_rgb = buffer->bytes_per_row;

	registers->mirroring_mode
		= (window->flags & B_OVERLAY_HORIZONTAL_MIRRORING) != 0
			? OVERLAY_MIRROR_HORIZONTAL : OVERLAY_MIRROR_NORMAL;
	registers->ycbcr422_order = 0;

	if (!gInfo->shared_info->overlay_active) {
		// overlay is shown for the first time
		set_color_key(window);
		show_overlay();
	} else
		update_overlay(updateCoefficients);

	gInfo->current_overlay = overlay;
	return B_OK;
}