* Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de.
* Distributed under the terms of the MIT License.
*/
#include <scheduling_analysis.h>
#include <elf.h>
#include <kernel.h>
#include <scheduler_defs.h>
#include <tracing.h>
#include <util/AutoLock.h>
#include "scheduler_tracing.h"
#if SCHEDULER_TRACING
namespace SchedulingAnalysis {
using namespace SchedulerTracing;
#if SCHEDULING_ANALYSIS_TRACING
using namespace SchedulingAnalysisTracing;
#endif
struct ThreadWaitObject;
struct HashObjectKey {
virtual ~HashObjectKey()
{
}
virtual uint32 HashKey() const = 0;
};
struct HashObject {
HashObject* next;
virtual ~HashObject()
{
}
virtual uint32 HashKey() const = 0;
virtual bool Equals(const HashObjectKey* key) const = 0;
};
struct ThreadKey : HashObjectKey {
thread_id id;
ThreadKey(thread_id id)
:
id(id)
{
}
virtual uint32 HashKey() const
{
return id;
}
};
struct Thread : HashObject, scheduling_analysis_thread {
ScheduleState state;
bigtime_t lastTime;
ThreadWaitObject* waitObject;
Thread(thread_id id)
:
state(UNKNOWN),
lastTime(0),
waitObject(NULL)
{
this->id = id;
name[0] = '\0';
runs = 0;
total_run_time = 0;
min_run_time = 1;
max_run_time = -1;
latencies = 0;
total_latency = 0;
min_latency = -1;
max_latency = -1;
reruns = 0;
total_rerun_time = 0;
min_rerun_time = -1;
max_rerun_time = -1;
unspecified_wait_time = 0;
preemptions = 0;
wait_objects = NULL;
}
virtual uint32 HashKey() const
{
return id;
}
virtual bool Equals(const HashObjectKey* _key) const
{
const ThreadKey* key = dynamic_cast<const ThreadKey*>(_key);
if (key == NULL)
return false;
return key->id == id;
}
};
struct WaitObjectKey : HashObjectKey {
uint32 type;
void* object;
WaitObjectKey(uint32 type, void* object)
:
type(type),
object(object)
{
}
virtual uint32 HashKey() const
{
return type ^ (uint32)(addr_t)object;
}
};
struct WaitObject : HashObject, scheduling_analysis_wait_object {
WaitObject(uint32 type, void* object)
{
this->type = type;
this->object = object;
name[0] = '\0';
referenced_object = NULL;
}
virtual uint32 HashKey() const
{
return type ^ (uint32)(addr_t)object;
}
virtual bool Equals(const HashObjectKey* _key) const
{
const WaitObjectKey* key = dynamic_cast<const WaitObjectKey*>(_key);
if (key == NULL)
return false;
return key->type == type && key->object == object;
}
};
struct ThreadWaitObjectKey : HashObjectKey {
thread_id thread;
uint32 type;
void* object;
ThreadWaitObjectKey(thread_id thread, uint32 type, void* object)
:
thread(thread),
type(type),
object(object)
{
}
virtual uint32 HashKey() const
{
return thread ^ type ^ (uint32)(addr_t)object;
}
};
struct ThreadWaitObject : HashObject, scheduling_analysis_thread_wait_object {
ThreadWaitObject(thread_id thread, WaitObject* waitObject)
{
this->thread = thread;
wait_object = waitObject;
wait_time = 0;
waits = 0;
next_in_list = NULL;
}
virtual uint32 HashKey() const
{
return thread ^ wait_object->type ^ (uint32)(addr_t)wait_object->object;
}
virtual bool Equals(const HashObjectKey* _key) const
{
const ThreadWaitObjectKey* key
= dynamic_cast<const ThreadWaitObjectKey*>(_key);
if (key == NULL)
return false;
return key->thread == thread && key->type == wait_object->type
&& key->object == wait_object->object;
}
};
class SchedulingAnalysisManager {
public:
SchedulingAnalysisManager(void* buffer, size_t size)
:
fBuffer(buffer),
fSize(size),
fHashTable(),
fHashTableSize(0)
{
fAnalysis.thread_count = 0;
fAnalysis.threads = 0;
fAnalysis.wait_object_count = 0;
fAnalysis.thread_wait_object_count = 0;
size_t maxObjectSize = max_c(max_c(sizeof(Thread), sizeof(WaitObject)),
sizeof(ThreadWaitObject));
fHashTableSize = size / (maxObjectSize + sizeof(HashObject*));
fHashTable = (HashObject**)((uint8*)fBuffer + fSize) - fHashTableSize;
fNextAllocation = (uint8*)fBuffer;
fRemainingBytes = (addr_t)fHashTable - (addr_t)fBuffer;
image_info info;
if (elf_get_image_info_for_address((addr_t)&scheduler_init, &info)
== B_OK) {
fKernelStart = (addr_t)info.text;
fKernelEnd = (addr_t)info.data + info.data_size;
} else {
fKernelStart = 0;
fKernelEnd = 0;
}
}
const scheduling_analysis* Analysis() const
{
return &fAnalysis;
}
void* Allocate(size_t size)
{
size = (size + 7) & ~(size_t)7;
if (size > fRemainingBytes)
return NULL;
void* address = fNextAllocation;
fNextAllocation += size;
fRemainingBytes -= size;
return address;
}
void Insert(HashObject* object)
{
uint32 index = object->HashKey() % fHashTableSize;
object->next = fHashTable[index];
fHashTable[index] = object;
}
void Remove(HashObject* object)
{
uint32 index = object->HashKey() % fHashTableSize;
HashObject** slot = &fHashTable[index];
while (*slot != object)
slot = &(*slot)->next;
*slot = object->next;
}
HashObject* Lookup(const HashObjectKey& key) const
{
uint32 index = key.HashKey() % fHashTableSize;
HashObject* object = fHashTable[index];
while (object != NULL && !object->Equals(&key))
object = object->next;
return object;
}
Thread* ThreadFor(thread_id id) const
{
return dynamic_cast<Thread*>(Lookup(ThreadKey(id)));
}
WaitObject* WaitObjectFor(uint32 type, void* object) const
{
return dynamic_cast<WaitObject*>(Lookup(WaitObjectKey(type, object)));
}
ThreadWaitObject* ThreadWaitObjectFor(thread_id thread, uint32 type,
void* object) const
{
return dynamic_cast<ThreadWaitObject*>(
Lookup(ThreadWaitObjectKey(thread, type, object)));
}
status_t AddThread(thread_id id, const char* name)
{
Thread* thread = ThreadFor(id);
if (thread == NULL) {
void* memory = Allocate(sizeof(Thread));
if (memory == NULL)
return B_NO_MEMORY;
thread = new(memory) Thread(id);
Insert(thread);
fAnalysis.thread_count++;
}
if (name != NULL && thread->name[0] == '\0')
strlcpy(thread->name, name, sizeof(thread->name));
return B_OK;
}
status_t AddWaitObject(uint32 type, void* object,
WaitObject** _waitObject = NULL)
{
if (WaitObjectFor(type, object) != NULL)
return B_OK;
void* memory = Allocate(sizeof(WaitObject));
if (memory == NULL)
return B_NO_MEMORY;
WaitObject* waitObject = new(memory) WaitObject(type, object);
Insert(waitObject);
fAnalysis.wait_object_count++;
if (type == THREAD_BLOCK_TYPE_SNOOZE
|| type == THREAD_BLOCK_TYPE_SIGNAL) {
strcpy(waitObject->name, "?");
}
if (_waitObject != NULL)
*_waitObject = waitObject;
return B_OK;
}
status_t UpdateWaitObject(uint32 type, void* object, const char* name,
void* referencedObject)
{
WaitObject* waitObject = WaitObjectFor(type, object);
if (waitObject == NULL)
return B_OK;
if (waitObject->name[0] != '\0') {
Remove(waitObject);
status_t error = AddWaitObject(type, object, &waitObject);
if (error != B_OK)
return error;
}
if (name == NULL)
name = "?";
strlcpy(waitObject->name, name, sizeof(waitObject->name));
waitObject->referenced_object = referencedObject;
return B_OK;
}
bool UpdateWaitObjectDontAdd(uint32 type, void* object, const char* name,
void* referencedObject)
{
WaitObject* waitObject = WaitObjectFor(type, object);
if (waitObject == NULL || waitObject->name[0] != '\0')
return false;
if (name == NULL)
name = "?";
strlcpy(waitObject->name, name, sizeof(waitObject->name));
waitObject->referenced_object = referencedObject;
return B_OK;
}
status_t AddThreadWaitObject(Thread* thread, uint32 type, void* object)
{
WaitObject* waitObject = WaitObjectFor(type, object);
if (waitObject == NULL) {
return B_ERROR;
}
ThreadWaitObject* threadWaitObject = ThreadWaitObjectFor(thread->id,
type, object);
if (threadWaitObject == NULL
|| threadWaitObject->wait_object != waitObject) {
if (threadWaitObject != NULL)
Remove(threadWaitObject);
void* memory = Allocate(sizeof(ThreadWaitObject));
if (memory == NULL)
return B_NO_MEMORY;
threadWaitObject = new(memory) ThreadWaitObject(thread->id,
waitObject);
Insert(threadWaitObject);
fAnalysis.thread_wait_object_count++;
threadWaitObject->next_in_list = thread->wait_objects;
thread->wait_objects = threadWaitObject;
}
thread->waitObject = threadWaitObject;
return B_OK;
}
int32 MissingWaitObjects() const
{
int32 count = 0;
for (uint32 i = 0; i < fHashTableSize; i++) {
HashObject* object = fHashTable[i];
while (object != NULL) {
WaitObject* waitObject = dynamic_cast<WaitObject*>(object);
if (waitObject != NULL && waitObject->name[0] == '\0')
count++;
object = object->next;
}
}
return count;
}
status_t FinishAnalysis()
{
scheduling_analysis_thread** threads
= (scheduling_analysis_thread**)Allocate(
sizeof(Thread*) * fAnalysis.thread_count);
if (threads == NULL)
return B_NO_MEMORY;
int32 index = 0;
for (uint32 i = 0; i < fHashTableSize; i++) {
HashObject* object = fHashTable[i];
while (object != NULL) {
Thread* thread = dynamic_cast<Thread*>(object);
if (thread != NULL) {
threads[index++] = thread;
} else if (WaitObject* waitObject
= dynamic_cast<WaitObject*>(object)) {
_PolishWaitObject(waitObject);
}
object = object->next;
}
}
fAnalysis.threads = threads;
dprintf("scheduling analysis: free bytes: %lu/%lu\n", fRemainingBytes, fSize);
return B_OK;
}
private:
void _PolishWaitObject(WaitObject* waitObject)
{
if (waitObject->name[0] != '\0')
return;
switch (waitObject->type) {
case THREAD_BLOCK_TYPE_SEMAPHORE:
{
sem_info info;
if (get_sem_info((sem_id)(addr_t)waitObject->object, &info)
== B_OK) {
strlcpy(waitObject->name, info.name,
sizeof(waitObject->name));
}
break;
}
case THREAD_BLOCK_TYPE_CONDITION_VARIABLE:
{
ConditionVariable* variable
= (ConditionVariable*)waitObject->object;
if (!_IsInKernelImage(variable))
break;
waitObject->referenced_object = (void*)variable->Object();
strlcpy(waitObject->name, variable->ObjectType(),
sizeof(waitObject->name));
break;
}
case THREAD_BLOCK_TYPE_MUTEX:
{
mutex* lock = (mutex*)waitObject->object;
if (!_IsInKernelImage(lock))
break;
strlcpy(waitObject->name, lock->name, sizeof(waitObject->name));
break;
}
case THREAD_BLOCK_TYPE_RW_LOCK:
{
rw_lock* lock = (rw_lock*)waitObject->object;
if (!_IsInKernelImage(lock))
break;
strlcpy(waitObject->name, lock->name, sizeof(waitObject->name));
break;
}
case THREAD_BLOCK_TYPE_OTHER:
{
const char* name = (const char*)waitObject->object;
if (name == NULL || _IsInKernelImage(name))
return;
strlcpy(waitObject->name, name, sizeof(waitObject->name));
}
case THREAD_BLOCK_TYPE_OTHER_OBJECT:
case THREAD_BLOCK_TYPE_SNOOZE:
case THREAD_BLOCK_TYPE_SIGNAL:
default:
break;
}
if (waitObject->name[0] != '\0')
return;
strcpy(waitObject->name, "?");
}
bool _IsInKernelImage(const void* _address)
{
addr_t address = (addr_t)_address;
return address >= fKernelStart && address < fKernelEnd;
}
private:
scheduling_analysis fAnalysis;
void* fBuffer;
size_t fSize;
HashObject** fHashTable;
uint32 fHashTableSize;
uint8* fNextAllocation;
size_t fRemainingBytes;
addr_t fKernelStart;
addr_t fKernelEnd;
};
static status_t
analyze_scheduling(bigtime_t from, bigtime_t until,
SchedulingAnalysisManager& manager)
{
TraceEntryIterator iterator;
iterator.MoveTo(INT_MAX);
while (TraceEntry* _entry = iterator.Previous()) {
SchedulerTraceEntry* baseEntry
= dynamic_cast<SchedulerTraceEntry*>(_entry);
if (baseEntry == NULL || baseEntry->Time() >= until)
continue;
if (baseEntry->Time() < from)
break;
status_t error = manager.AddThread(baseEntry->ThreadID(),
baseEntry->Name());
if (error != B_OK)
return error;
if (ScheduleThread* entry = dynamic_cast<ScheduleThread*>(_entry)) {
error = manager.AddThread(entry->PreviousThreadID(), NULL);
if (error != B_OK)
return error;
if (entry->PreviousState() == B_THREAD_WAITING) {
void* waitObject = (void*)entry->PreviousWaitObject();
switch (entry->PreviousWaitObjectType()) {
case THREAD_BLOCK_TYPE_SNOOZE:
case THREAD_BLOCK_TYPE_SIGNAL:
waitObject = NULL;
break;
case THREAD_BLOCK_TYPE_SEMAPHORE:
case THREAD_BLOCK_TYPE_CONDITION_VARIABLE:
case THREAD_BLOCK_TYPE_MUTEX:
case THREAD_BLOCK_TYPE_RW_LOCK:
case THREAD_BLOCK_TYPE_OTHER:
default:
break;
}
error = manager.AddWaitObject(entry->PreviousWaitObjectType(),
waitObject);
if (error != B_OK)
return error;
}
}
}
#if SCHEDULING_ANALYSIS_TRACING
int32 startEntryIndex = iterator.Index();
#endif
while (TraceEntry* _entry = iterator.Next()) {
#if SCHEDULING_ANALYSIS_TRACING
if (WaitObjectTraceEntry* waitObjectEntry
= dynamic_cast<WaitObjectTraceEntry*>(_entry)) {
status_t error = manager.UpdateWaitObject(waitObjectEntry->Type(),
waitObjectEntry->Object(), waitObjectEntry->Name(),
waitObjectEntry->ReferencedObject());
if (error != B_OK)
return error;
continue;
}
#endif
SchedulerTraceEntry* baseEntry
= dynamic_cast<SchedulerTraceEntry*>(_entry);
if (baseEntry == NULL)
continue;
if (baseEntry->Time() >= until)
break;
if (ScheduleThread* entry = dynamic_cast<ScheduleThread*>(_entry)) {
Thread* thread = manager.ThreadFor(entry->ThreadID());
bigtime_t diffTime = entry->Time() - thread->lastTime;
if (thread->state == READY) {
thread->latencies++;
thread->total_latency += diffTime;
if (thread->min_latency < 0 || diffTime < thread->min_latency)
thread->min_latency = diffTime;
if (diffTime > thread->max_latency)
thread->max_latency = diffTime;
} else if (thread->state == PREEMPTED) {
thread->reruns++;
thread->total_rerun_time += diffTime;
if (thread->min_rerun_time < 0
|| diffTime < thread->min_rerun_time) {
thread->min_rerun_time = diffTime;
}
if (diffTime > thread->max_rerun_time)
thread->max_rerun_time = diffTime;
}
if (thread->state == STILL_RUNNING) {
thread->state = RUNNING;
}
if (thread->state != RUNNING) {
thread->lastTime = entry->Time();
thread->state = RUNNING;
}
if (entry->ThreadID() == entry->PreviousThreadID())
continue;
thread = manager.ThreadFor(entry->PreviousThreadID());
diffTime = entry->Time() - thread->lastTime;
if (thread->state == STILL_RUNNING) {
thread->runs++;
thread->preemptions++;
thread->total_run_time += diffTime;
if (thread->min_run_time < 0 || diffTime < thread->min_run_time)
thread->min_run_time = diffTime;
if (diffTime > thread->max_run_time)
thread->max_run_time = diffTime;
thread->lastTime = entry->Time();
thread->state = PREEMPTED;
} else if (thread->state == RUNNING) {
thread->runs++;
thread->total_run_time += diffTime;
if (thread->min_run_time < 0 || diffTime < thread->min_run_time)
thread->min_run_time = diffTime;
if (diffTime > thread->max_run_time)
thread->max_run_time = diffTime;
if (entry->PreviousState() == B_THREAD_WAITING) {
void* waitObject = (void*)entry->PreviousWaitObject();
switch (entry->PreviousWaitObjectType()) {
case THREAD_BLOCK_TYPE_SNOOZE:
case THREAD_BLOCK_TYPE_SIGNAL:
waitObject = NULL;
break;
case THREAD_BLOCK_TYPE_SEMAPHORE:
case THREAD_BLOCK_TYPE_CONDITION_VARIABLE:
case THREAD_BLOCK_TYPE_MUTEX:
case THREAD_BLOCK_TYPE_RW_LOCK:
case THREAD_BLOCK_TYPE_OTHER:
default:
break;
}
status_t error = manager.AddThreadWaitObject(thread,
entry->PreviousWaitObjectType(), waitObject);
if (error != B_OK)
return error;
}
thread->lastTime = entry->Time();
thread->state = WAITING;
} else if (thread->state == UNKNOWN) {
uint32 threadState = entry->PreviousState();
if (threadState == B_THREAD_WAITING
|| threadState == B_THREAD_SUSPENDED) {
thread->lastTime = entry->Time();
thread->state = WAITING;
} else if (threadState == B_THREAD_READY) {
thread->lastTime = entry->Time();
thread->state = PREEMPTED;
}
}
} else if (EnqueueThread* entry
= dynamic_cast<EnqueueThread*>(_entry)) {
Thread* thread = manager.ThreadFor(entry->ThreadID());
if (thread->state == RUNNING || thread->state == STILL_RUNNING) {
thread->state = STILL_RUNNING;
} else {
bigtime_t diffTime = entry->Time() - thread->lastTime;
if (thread->waitObject != NULL) {
thread->waitObject->wait_time += diffTime;
thread->waitObject->waits++;
thread->waitObject = NULL;
} else if (thread->state != UNKNOWN)
thread->unspecified_wait_time += diffTime;
thread->lastTime = entry->Time();
thread->state = READY;
}
} else if (RemoveThread* entry = dynamic_cast<RemoveThread*>(_entry)) {
Thread* thread = manager.ThreadFor(entry->ThreadID());
bigtime_t diffTime = entry->Time() - thread->lastTime;
if (thread->state == RUNNING) {
thread->runs++;
thread->total_run_time += diffTime;
if (thread->min_run_time < 0 || diffTime < thread->min_run_time)
thread->min_run_time = diffTime;
if (diffTime > thread->max_run_time)
thread->max_run_time = diffTime;
} else if (thread->state == READY || thread->state == PREEMPTED) {
thread->unspecified_wait_time += diffTime;
}
thread->lastTime = entry->Time();
thread->state = WAITING;
}
}
#if SCHEDULING_ANALYSIS_TRACING
int32 missingWaitObjects = manager.MissingWaitObjects();
if (missingWaitObjects > 0) {
iterator.MoveTo(startEntryIndex + 1);
while (TraceEntry* _entry = iterator.Previous()) {
if (WaitObjectTraceEntry* waitObjectEntry
= dynamic_cast<WaitObjectTraceEntry*>(_entry)) {
if (manager.UpdateWaitObjectDontAdd(
waitObjectEntry->Type(), waitObjectEntry->Object(),
waitObjectEntry->Name(),
waitObjectEntry->ReferencedObject())) {
if (--missingWaitObjects == 0)
break;
}
}
}
}
#endif
return B_OK;
}
}
#endif
status_t
_user_analyze_scheduling(bigtime_t from, bigtime_t until, void* buffer,
size_t size, scheduling_analysis* analysis)
{
#if SCHEDULER_TRACING
using namespace SchedulingAnalysis;
if ((addr_t)buffer & 0x7) {
addr_t diff = (addr_t)buffer & 0x7;
buffer = (void*)((addr_t)buffer + 8 - diff);
size -= 8 - diff;
}
size &= ~(size_t)0x7;
if (buffer == NULL || !IS_USER_ADDRESS(buffer) || size == 0)
return B_BAD_VALUE;
status_t error = lock_memory(buffer, size, B_READ_DEVICE);
if (error != B_OK)
return error;
SchedulingAnalysisManager manager(buffer, size);
InterruptsLocker locker;
lock_tracing_buffer();
error = analyze_scheduling(from, until, manager);
unlock_tracing_buffer();
locker.Unlock();
if (error == B_OK)
error = manager.FinishAnalysis();
unlock_memory(buffer, size, B_READ_DEVICE);
if (error == B_OK) {
error = user_memcpy(analysis, manager.Analysis(),
sizeof(scheduling_analysis));
}
return error;
#else
return B_BAD_VALUE;
#endif
}