* Copyright 2004-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
* Distributed under the terms of the MIT License.
*/
#include <OS.h>
#include <fs_interface.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/uio.h>
#define TRACE_FILE_CACHE
#define TRACE(x) printf x
#define dprintf printf
#ifndef ASSERT
# define ASSERT(x) ;
#endif
#define MAX_IO_VECS 64 // 256 kB
#define MAX_FILE_IO_VECS 4
#define MAX_TEMP_IO_VECS 8
#define CACHED_FILE_EXTENTS 2
struct vm_cache_ref;
struct file_extent {
off_t offset;
file_io_vec disk;
};
struct file_map {
file_map();
~file_map();
file_extent *operator[](uint32 index);
file_extent *ExtentAt(uint32 index);
status_t Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset);
void Free();
union {
file_extent direct[CACHED_FILE_EXTENTS];
file_extent *array;
};
size_t count;
};
struct file_cache_ref {
vm_cache_ref *cache;
void *vnode;
void *device;
void *cookie;
file_map map;
};
const uint32 kMaxFileVecs = 1024;
file_io_vec gFileVecs[kMaxFileVecs];
size_t gFileVecCount;
off_t gFileSize;
file_map::file_map()
{
array = NULL;
count = 0;
}
file_map::~file_map()
{
Free();
}
file_extent *
file_map::operator[](uint32 index)
{
return ExtentAt(index);
}
file_extent *
file_map::ExtentAt(uint32 index)
{
if (index >= count)
return NULL;
if (count > CACHED_FILE_EXTENTS)
return &array[index];
return &direct[index];
}
status_t
file_map::Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset)
{
TRACE(("file_map::Add(vecCount = %ld)\n", vecCount));
off_t offset = 0;
if (vecCount <= CACHED_FILE_EXTENTS && count == 0) {
} else {
file_extent *newMap = (file_extent *)realloc(array,
(count + vecCount) * sizeof(file_extent));
if (newMap == NULL)
return B_NO_MEMORY;
array = newMap;
if (count != 0) {
file_extent *extent = ExtentAt(count - 1);
offset = extent->offset + extent->disk.length;
}
}
int32 start = count;
count += vecCount;
for (uint32 i = 0; i < vecCount; i++) {
file_extent *extent = ExtentAt(start + i);
extent->offset = offset;
extent->disk = vecs[i];
offset += extent->disk.length;
}
#ifdef TRACE_FILE_CACHE
for (uint32 i = 0; i < count; i++) {
file_extent *extent = ExtentAt(i);
dprintf(" [%ld] extend offset %lld, disk offset %lld, length %lld\n",
i, extent->offset, extent->disk.offset, extent->disk.length);
}
#endif
lastOffset = offset;
return B_OK;
}
void
file_map::Free()
{
if (count > CACHED_FILE_EXTENTS)
free(array);
array = NULL;
count = 0;
}
void
set_vecs(iovec *vecs, size_t *_count, ...)
{
uint32 base = 0;
size_t count = 0;
va_list args;
va_start(args, _count);
while (count < MAX_IO_VECS) {
int32 length = va_arg(args, int32);
if (length < 0)
break;
vecs[count].iov_base = (void *)base;
vecs[count].iov_len = length;
base += length;
count++;
}
va_end(args);
*_count = count;
}
void
set_file_map(int32 base, int32 length, ...)
{
gFileVecs[0].offset = base;
gFileVecs[0].length = length;
gFileSize = length;
gFileVecCount = 1;
va_list args;
va_start(args, length);
while (gFileVecCount < kMaxFileVecs) {
off_t offset = va_arg(args, int32);
if (offset < 0)
break;
length = va_arg(args, int32);
gFileVecs[gFileVecCount].offset = offset;
gFileVecs[gFileVecCount].length = length;
gFileSize += length;
gFileVecCount++;
}
va_end(args);
}
status_t
find_map_base(off_t offset, off_t &diskOffset, off_t &diskLength,
off_t &fileOffset)
{
fileOffset = 0;
for (uint32 i = 0; i < gFileVecCount; i++) {
if (offset < gFileVecs[i].length) {
diskOffset = gFileVecs[i].offset;
diskLength = gFileVecs[i].length;
return B_OK;
}
fileOffset += gFileVecs[i].length;
offset -= gFileVecs[i].length;
}
return B_ENTRY_NOT_FOUND;
}
static status_t
vfs_get_file_map(void *vnode, off_t offset, size_t size, file_io_vec *vecs,
size_t *_count)
{
off_t diskOffset, diskLength, fileOffset;
size_t max = *_count;
uint32 index = 0;
printf("vfs_get_file_map(offset = %lld, size = %lu, count = %lu)\n",
offset, size, *_count);
while (true) {
status_t status = find_map_base(offset, diskOffset, diskLength, fileOffset);
if (status != B_OK)
return status;
vecs[index].offset = diskOffset + offset - fileOffset;
vecs[index].length = diskLength - offset + fileOffset;
offset += vecs[index].length;
if (size <= vecs[index].length
|| offset >= gFileSize) {
if (offset > gFileSize) {
vecs[index].length = gFileSize - fileOffset;
}
*_count = index + 1;
return B_OK;
}
size -= vecs[index].length;
index++;
if (index >= max) {
*_count = index;
return B_BUFFER_OVERFLOW;
}
}
}
static status_t
vfs_read_pages(void *device, void *cookie, off_t offset,
const iovec *vecs, size_t count, size_t *bytes, bool kernel)
{
printf("read offset %lld, length %lu\n", offset, *bytes);
for (uint32 i = 0; i < count; i++) {
printf(" [%lu] base %lu, length %lu\n",
i, (uint32)vecs[i].iov_base, vecs[i].iov_len);
}
return B_OK;
}
static status_t
vfs_write_pages(void *device, void *cookie, off_t offset,
const iovec *vecs, size_t count, size_t *bytes, bool kernel)
{
printf("write offset %lld, length %lu\n", offset, *bytes);
for (uint32 i = 0; i < count; i++) {
printf(" [%lu] base %lu, length %lu\n",
i, (uint32)vecs[i].iov_base, vecs[i].iov_len);
}
return B_OK;
}
static file_extent *
find_file_extent(file_cache_ref *ref, off_t offset, uint32 *_index)
{
for (uint32 index = 0; index < ref->map.count; index++) {
file_extent *extent = ref->map[index];
if (extent->offset <= offset
&& extent->offset + extent->disk.length > offset) {
if (_index)
*_index = index;
return extent;
}
}
return NULL;
}
static status_t
get_file_map(file_cache_ref *ref, off_t offset, size_t size,
file_io_vec *vecs, size_t *_count)
{
size_t maxVecs = *_count;
status_t status = B_OK;
if (ref->map.count == 0) {
if (ref->map.count == 0) {
size_t vecCount = maxVecs;
off_t mapOffset = 0;
while (true) {
status = vfs_get_file_map(ref->vnode, mapOffset, ~0UL, vecs, &vecCount);
if (status < B_OK && status != B_BUFFER_OVERFLOW) {
return status;
}
status_t addStatus = ref->map.Add(vecs, vecCount, mapOffset);
if (addStatus != B_OK) {
status = addStatus;
}
if (status != B_BUFFER_OVERFLOW)
break;
vecCount = maxVecs;
}
}
}
if (status != B_OK) {
ref->map.Free();
return status;
}
uint32 index;
file_extent *fileExtent = find_file_extent(ref, offset, &index);
if (fileExtent == NULL) {
*_count = 0;
return B_OK;
}
offset -= fileExtent->offset;
vecs[0].offset = fileExtent->disk.offset + offset;
vecs[0].length = fileExtent->disk.length - offset;
if (vecs[0].length >= size || index >= ref->map.count - 1) {
*_count = 1;
return B_OK;
}
size -= vecs[0].length;
for (index = 1; index < ref->map.count;) {
fileExtent++;
vecs[index] = fileExtent->disk;
index++;
if (size <= fileExtent->disk.length)
break;
if (index >= maxVecs) {
*_count = index;
return B_BUFFER_OVERFLOW;
}
size -= fileExtent->disk.length;
}
*_count = index;
return B_OK;
}
Does the dirty work of translating the request into actual disk offsets
and reads to or writes from the supplied iovecs as specified by \a doWrite.
*/
static status_t
pages_io(file_cache_ref *ref, off_t offset, const iovec *vecs, size_t count,
size_t *_numBytes, bool doWrite)
{
TRACE(("pages_io: ref = %p, offset = %lld, size = %lu, vecCount = %lu, %s\n", ref, offset,
*_numBytes, count, doWrite ? "write" : "read"));
file_io_vec fileVecs[MAX_FILE_IO_VECS];
size_t fileVecCount = MAX_FILE_IO_VECS;
size_t numBytes = *_numBytes;
status_t status = get_file_map(ref, offset, numBytes, fileVecs,
&fileVecCount);
if (status < B_OK && status != B_BUFFER_OVERFLOW) {
TRACE(("get_file_map(offset = %lld, numBytes = %lu) failed: %s\n", offset,
numBytes, strerror(status)));
return status;
}
bool bufferOverflow = status == B_BUFFER_OVERFLOW;
#ifdef TRACE_FILE_CACHE
dprintf("got %lu file vecs for %lld:%lu%s:\n", fileVecCount, offset, numBytes,
bufferOverflow ? " (array too small)" : "");
for (size_t i = 0; i < fileVecCount; i++) {
dprintf(" [%lu] offset = %lld, size = %lld\n",
i, fileVecs[i].offset, fileVecs[i].length);
}
#endif
if (fileVecCount == 0) {
TRACE(("pages_io: access outside of vnode %p at offset %lld\n",
ref->vnode, offset));
return B_BAD_VALUE;
}
uint32 fileVecIndex;
size_t size;
if (!doWrite) {
size = fileVecs[0].length;
if (size > numBytes)
size = numBytes;
status = vfs_read_pages(ref->device, ref->cookie, fileVecs[0].offset, vecs,
count, &size, false);
if (status < B_OK)
return status;
if (size > fileVecs[0].length) {
size = fileVecs[0].length;
}
ASSERT(size <= fileVecs[0].length);
if (size == numBytes)
return B_OK;
if (size != fileVecs[0].length) {
*_numBytes = size;
return B_OK;
}
fileVecIndex = 1;
} else {
fileVecIndex = 0;
size = 0;
}
size_t totalSize = size;
uint32 i = 0;
for (; i < count; i++) {
if (size < vecs[i].iov_len)
break;
size -= vecs[i].iov_len;
}
size_t vecOffset = size;
size_t bytesLeft = numBytes - size;
while (true) {
for (; fileVecIndex < fileVecCount; fileVecIndex++) {
file_io_vec &fileVec = fileVecs[fileVecIndex];
off_t fileOffset = fileVec.offset;
off_t fileLeft = min_c(fileVec.length, bytesLeft);
TRACE(("FILE VEC [%lu] length %lld\n", fileVecIndex, fileLeft));
while (fileLeft > 0) {
iovec tempVecs[MAX_TEMP_IO_VECS];
uint32 tempCount = 0;
size = 0;
for (size = 0; size < fileLeft && i < count
&& tempCount < MAX_TEMP_IO_VECS;) {
size_t vecLeft = vecs[i].iov_len - vecOffset;
if (vecLeft == 0) {
vecOffset = 0;
i++;
continue;
}
TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
i, vecOffset, size));
size_t tempVecSize = min_c(vecLeft, fileLeft - size);
tempVecs[tempCount].iov_base
= (void *)((addr_t)vecs[i].iov_base + vecOffset);
tempVecs[tempCount].iov_len = tempVecSize;
tempCount++;
size += tempVecSize;
vecOffset += tempVecSize;
}
size_t bytes = size;
if (doWrite) {
status = vfs_write_pages(ref->device, ref->cookie,
fileOffset, tempVecs, tempCount, &bytes, false);
} else {
status = vfs_read_pages(ref->device, ref->cookie,
fileOffset, tempVecs, tempCount, &bytes, false);
}
if (status < B_OK)
return status;
totalSize += bytes;
bytesLeft -= size;
fileOffset += size;
fileLeft -= size;
if (size != bytes || i >= count) {
*_numBytes = totalSize;
return B_OK;
}
}
}
if (bufferOverflow) {
status = get_file_map(ref, offset + totalSize, bytesLeft, fileVecs,
&fileVecCount);
if (status < B_OK && status != B_BUFFER_OVERFLOW) {
TRACE(("get_file_map(offset = %lld, numBytes = %lu) failed: %s\n",
offset, numBytes, strerror(status)));
return status;
}
bufferOverflow = status == B_BUFFER_OVERFLOW;
fileVecIndex = 0;
#ifdef TRACE_FILE_CACHE
dprintf("got %lu file vecs for %lld:%lu%s:\n", fileVecCount,
offset + totalSize, numBytes,
bufferOverflow ? " (array too small)" : "");
for (size_t i = 0; i < fileVecCount; i++) {
dprintf(" [%lu] offset = %lld, size = %lld\n",
i, fileVecs[i].offset, fileVecs[i].length);
}
#endif
} else
break;
}
*_numBytes = totalSize;
return B_OK;
}
int
main(int argc, char **argv)
{
file_cache_ref ref;
iovec vecs[MAX_IO_VECS];
size_t count = 1;
size_t numBytes = 10000;
off_t offset = 4999;
set_vecs(vecs, &count, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 4096, 8192, 16384, 4096, 4096, -1);
set_file_map(0, 2000, 5000, 3000, 10000, 800, 11000, 20, 12000, 30,
13000, 70, 14000, 100, 15000, 900, 20000, 30000, -1);
status_t status = pages_io(&ref, offset, vecs, count, &numBytes, false);
if (status < B_OK)
fprintf(stderr, "pages_io() returned: %s\n", strerror(status));
return 0;
}