...
 
Commits (3)
......@@ -173,6 +173,10 @@ public:
}
};
typedef OsLock<4,1024,1> InternalLock;
#if TESTING_LOCK_SPIN
typedef OsLock<4,TESTING_LOCK_SPIN,1> InternalLock;
#else
typedef OsLock<0,0,0> InternalLock;
#endif
#endif /* _OsLocks_h_ */
......@@ -61,7 +61,7 @@ public:
// dedicated constructor for bootstrap: pthread becomes mainFibre
OsProcessor(Cluster& cluster, _friend<_Bootstrapper>);
// fake context for poller pthread, needed 'currScope' for timer handling
// fake context for poller pthread, set 'currScope' for timer handling
static void setupFakeContext(StackContext* sc, EventScope* es, _friend<BaseThreadPoller>);
~OsProcessor() { RABORT("Cannot delete OsProcessor"); }
......
......@@ -24,7 +24,7 @@ struct _cfibre_cond_t : public fibre_cond_t {};
struct _cfibre_rwlock_t : public fibre_rwlock_t {};
struct _cfibre_barrier_t : public fibre_barrier_t {};
struct _cfast_mutex_t : public fast_mutex_t {};
struct _cfast_cond_t : public fast_cond_t {};
struct _cfast_cond_t : public fast_cond_t {};
struct _cfibre_attr_t : public fibre_attr_t {};
struct _cfibre_mutexattr_t : public fibre_mutexattr_t {};
......@@ -32,7 +32,7 @@ struct _cfibre_condattr_t : public fibre_condattr_t {};
struct _cfibre_rwlockattr_t : public fibre_rwlockattr_t {};
struct _cfibre_barrierattr_t : public fibre_barrierattr_t {};
struct _cfast_mutexattr_t : public fast_mutexattr_t {};
struct _cfast_condattr_t : public fast_condattr_t {};
struct _cfast_condattr_t : public fast_condattr_t {};
struct _cfibre_cluster_t : public Cluster {};
struct _cfibre_sproc_t : public OsProcessor {
......
......@@ -5,6 +5,9 @@
#define TESTING_POLLER_FIBRE_SPIN 65536 // poller fibre: spin loop of NB polls
#define TESTING_LAZY_FD_REGISTRATION 1 // lazy vs. eager registration after fd creation
// **** libfibre options - system threading
#define TESTING_LOCK_SPIN 1024 // spin before blocking on system lock
/******************************** sanity checks ********************************/
#if !TESTING_LOADBALANCING
......
......@@ -102,14 +102,11 @@ void BaseProcessor::idleLoop() {
}
}
void BaseProcessor::enqueueResume(StackContext& s, _friend<StackContext>) {
#if TESTING_LOADBALANCING
if (!scheduler.addReadyStack(s)) enqueueDirect(s);
#else
enqueueDirect(s);
readyCount.V();
#endif
bool BaseProcessor::addReadyStack(StackContext& s) {
return scheduler.addReadyStack(s);
}
#endif
StackContext& BaseProcessor::scheduleFull(_friend<StackContext>) {
#if TESTING_LOADBALANCING
......
......@@ -93,6 +93,16 @@ class BaseProcessor : public ProcessorRing::Link {
void idleLoopTerminate();
void enqueueDirect(StackContext& s) {
DBG::outl(DBG::Level::Scheduling, "Stack ", FmtHex(&s), " queueing on ", FmtHex(this));
stats->enq.count();
readyQueue.enqueue(s);
}
#if TESTING_LOADBALANCING
bool addReadyStack(StackContext& s);
#endif
protected:
Scheduler& scheduler;
StackContext* idleStack;
......@@ -106,12 +116,6 @@ protected:
StackContext::idleYieldTo(sc, _friend<BaseProcessor>());
}
void enqueueDirect(StackContext& s) {
DBG::outl(DBG::Level::Scheduling, "Stack ", FmtHex(&s), " queueing on ", FmtHex(this));
stats->enq.count();
readyQueue.enqueue(s);
}
public:
BaseProcessor(Scheduler& c, const char* n = "Processor") : scheduler(c), idleStack(nullptr), stackCount(0) {
stats = new ProcessorStats(this, n);
......@@ -136,7 +140,14 @@ public:
enqueueDirect(s);
}
void enqueueResume(StackContext& s, _friend<StackContext>);
void enqueueResume(StackContext& s, _friend<StackContext>) {
#if TESTING_LOADBALANCING
if (!addReadyStack(s)) enqueueDirect(s);
#else
enqueueDirect(s);
readyCount.V();
#endif
}
StackContext& scheduleFull(_friend<StackContext>);
StackContext* scheduleYield(_friend<StackContext>);
......
......@@ -17,8 +17,9 @@
#ifndef _Basics_h_
#define _Basics_h_ 1
#include "runtime/Platform.h"
#include "runtime/Assertions.h"
#include "runtime/FloatingPoint.h"
#include "runtime/Platform.h"
class NoObject {
NoObject() = delete; // no creation
......
......@@ -333,25 +333,37 @@ public:
};
class FastMutex : public BaseSuspender {
BlockStackMCS queue;
volatile size_t counter;
FlexStackMPSC queue;
public:
FastMutex() : counter(0) {}
bool test() { return counter > 0; }
bool acquire() {
if (__atomic_add_fetch(&counter, 1, __ATOMIC_SEQ_CST) == 1) return true;
StackContext* cs = Context::CurrStack();
if (!queue.push(*cs)) {
prepareSuspend();
doSuspend(*cs);
}
queue.push(*cs);
prepareSuspend();
doSuspend(*cs);
return true;
}
bool tryAcquire() {
StackContext* cs = Context::CurrStack();
return queue.tryPushEmpty(*cs);
size_t c = counter;
return (c == 0) && __atomic_compare_exchange_n(&counter, &c, c+1, false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
}
template<bool DirectSwitch = false>
void release() {
StackContext* cs = Context::CurrStack();
StackContext* next = queue.next(*cs);
BlockStackMCS::clear(*cs);
if (next) next->resume();
if (__atomic_sub_fetch(&counter, 1, __ATOMIC_SEQ_CST) == 0) return;
StackContext* next;
for (;;) {
next = queue.pop();
if (next) break;
Pause();
}
next->resume<DirectSwitch>();
}
};
......
/******************************************************************************
Copyright (C) Martin Karsten 2015-2019
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
#ifndef _FloatingPoint_h_
#define _FloatingPoint_h_ 1
#include <stdint.h>
#if defined(__x86_64__)
class FloatingPointFlags { // FP (x87/SSE) control/status words (ABI Section 3.2.3, Fig 3.4)
uint32_t csr;
uint32_t cw;
public:
FloatingPointFlags(uint32_t csr = 0x1FC0, uint32_t cw = 0x037F) : csr(csr), cw(cw) {}
FloatingPointFlags(bool s) { if (s) save(); }
void save() {
asm volatile("stmxcsr %0" : "=m"(csr) :: "memory");
asm volatile("fnstcw %0" : "=m"(cw) :: "memory");
}
void restore() {
asm volatile("ldmxcsr %0" :: "m"(csr) : "memory");
asm volatile("fldcw %0" :: "m"(cw) : "memory");
}
};
class FloatingPointContext {
char fpu[512] __attribute__((__aligned__(16))); // alignment required for fxsave/fxrstor
enum State { Init = 0, Clean = 1, Dirty = 2 } state;
public:
FloatingPointContext() : state(Init) {}
void setClean() { state = Clean; }
bool isClean() const { return state == Clean; }
static void initCPU() {
asm volatile("finit" ::: "memory");
}
void save() { // TODO: later use XSAVEOPTS for complete SSE/AVX/etc state!
asm volatile("fxsave %0" : "=m"(fpu) :: "memory");
state = Dirty;
}
void restore() { // TODO: later use XRSTOR for complete SSE/AVX/etc state!
if (state == Dirty) asm volatile("fxrstor %0" :: "m"(fpu) : "memory");
else if (state == Init) initCPU();
state = Clean;
}
};
#else
#error unsupported architecture: only __x86_64__ supported at this time
#endif
#endif /* _FloatingPoint_h_ */
......@@ -412,7 +412,7 @@ template<typename T, size_t NUM, size_t CNT, typename LT> class IntrusiveQueueMC
static_assert(NUM < CNT, "NUM >= CNT");
protected:
T* tail;
T* volatile tail;
public:
IntrusiveQueueMCS() : tail(nullptr) {}
......
......@@ -19,16 +19,16 @@
#include "runtime/testoptions.h"
#include <cstddef>
#include <cstdint>
#include <stddef.h>
#include <stdint.h>
#if defined(__x86_64__)
#if defined(__clang__) // avoid include file problems
static inline void Pause(void) { asm volatile("pause"); }
#else
#if defined(__GNUC__) && !defined(__clang__)
#include <xmmintrin.h> // _mm_pause
static inline void Pause(void) { _mm_pause(); }
#else // avoid include file problems
static inline void Pause(void) { asm volatile("pause"); }
#endif
static inline void MemoryFence(void) { asm volatile("mfence" ::: "memory"); }
......@@ -50,9 +50,11 @@ static const size_t charbits = 8;
static const size_t pageoffsetbits = 12;
static const size_t pagetablebits = 9;
static const size_t pagelevels = 4;
#if defined(__cplusplus)
static const size_t pagebits = pageoffsetbits + pagetablebits * pagelevels;
static const size_t framebits = pageoffsetbits + 40;
static const size_t ptentries = 1 << pagetablebits;
#endif
static const vaddr stackAlignment = 16;
......@@ -79,43 +81,6 @@ static inline void unreachable() {
__builtin_trap();
}
class FloatingPointFlags { // FP (x87/SSE) control/status words (ABI Section 3.2.3, Fig 3.4)
uint32_t csr;
uint32_t cw;
public:
FloatingPointFlags(uint32_t csr = 0x1FC0, uint32_t cw = 0x037F) : csr(csr), cw(cw) {}
FloatingPointFlags(bool s) { if (s) save(); }
void save() {
asm volatile("stmxcsr %0" : "=m"(csr) :: "memory");
asm volatile("fnstcw %0" : "=m"(cw) :: "memory");
}
void restore() {
asm volatile("ldmxcsr %0" :: "m"(csr) : "memory");
asm volatile("fldcw %0" :: "m"(cw) : "memory");
}
};
class FloatingPointContext {
char fpu[512] __attribute__((__aligned__(16))); // alignment required for fxsave/fxrstor
enum State { Init = 0, Clean = 1, Dirty = 2 } state;
public:
FloatingPointContext() : state(Init) {}
void setClean() { state = Clean; }
bool isClean() const { return state == Clean; }
static void initCPU() {
asm volatile("finit" ::: "memory");
}
void save() { // TODO: later use XSAVEOPTS for complete SSE/AVX/etc state!
asm volatile("fxsave %0" : "=m"(fpu) :: "memory");
state = Dirty;
}
void restore() { // TODO: later use XRSTOR for complete SSE/AVX/etc state!
if (state == Dirty) asm volatile("fxrstor %0" :: "m"(fpu) : "memory");
else if (state == Init) initCPU();
state = Clean;
}
};
#else
#error unsupported architecture: only __x86_64__ supported at this time
#endif
......
......@@ -20,7 +20,7 @@
#include "runtime/Basics.h"
template<typename T>
static inline bool _CAS(T *ptr, T expected, T desired, int success_memorder = __ATOMIC_SEQ_CST, int failure_memorder = __ATOMIC_RELAXED) {
static inline bool _CAS(T volatile *ptr, T expected, T desired, int success_memorder = __ATOMIC_SEQ_CST, int failure_memorder = __ATOMIC_RELAXED) {
T* exp = &expected;
return __atomic_compare_exchange_n(ptr, exp, desired, false, success_memorder, failure_memorder);
}
......@@ -126,13 +126,13 @@ public:
volatile bool wait;
};
private:
Node* tail;
Node* volatile tail;
public:
MCSLock() : tail(nullptr) {}
bool test() const { return tail != nullptr; }
bool tryAcquire(Node& n) {
n.next = nullptr;
return ((tail == nullptr) && _CAS(&tail, (Node*)nullptr, &n,__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
return ((tail == nullptr) && _CAS(&tail, (Node*)nullptr, &n, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
}
void acquire(Node& n) {
n.next = nullptr;
......
......@@ -32,7 +32,7 @@ StackContext::StackContext(Scheduler& scheduler, bool bg)
template<StackContext::SwitchCode Code>
inline void StackContext::switchStack(StackContext& nextStack) {
// various checks
static_assert(Code == Idle || Code == Yield || Code == Migrate || Code == Suspend || Code == Terminate, "Illegal SwitchCode");
static_assert(Code == Idle || Code == Yield || Code == Resume || Code == Suspend || Code == Terminate, "Illegal SwitchCode");
CHECK_PREEMPTION(0);
RASSERT(this == Context::CurrStack() && this != &nextStack, FmtHex(this), ' ', FmtHex(Context::CurrStack()), ' ', FmtHex(&nextStack));
......@@ -42,7 +42,7 @@ inline void StackContext::switchStack(StackContext& nextStack) {
switch (Code) {
case Idle: stackSwitch(this, postIdle, &stackPointer, nextStack.stackPointer); break;
case Yield: stackSwitch(this, postYield, &stackPointer, nextStack.stackPointer); break;
case Migrate: stackSwitch(this, postMigrate, &stackPointer, nextStack.stackPointer); break;
case Resume: stackSwitch(this, postResume, &stackPointer, nextStack.stackPointer); break;
case Suspend: stackSwitch(this, postSuspend, &stackPointer, nextStack.stackPointer); break;
case Terminate: stackSwitch(this, postTerminate, &stackPointer, nextStack.stackPointer); break;
}
......@@ -62,7 +62,7 @@ void StackContext::postYield(StackContext* prevStack) {
}
// yield -> resume right away
void StackContext::postMigrate(StackContext* prevStack) {
void StackContext::postResume(StackContext* prevStack) {
CHECK_PREEMPTION(0);
prevStack->resumeInternal();
}
......@@ -81,6 +81,24 @@ void StackContext::postTerminate(StackContext* prevStack) {
RuntimeStackDestroy(*prevStack, _friend<StackContext>());
}
void StackContext::suspendInternal() {
switchStack<Suspend>(Context::CurrProcessor().scheduleFull(_friend<StackContext>()));
}
void StackContext::resumeInternal() {
processor->enqueueResume(*this, _friend<StackContext>());
}
void StackContext::resumeDirect() {
yieldResume(*this);
}
void StackContext::changeProcessor(BaseProcessor& p) {
processor->removeStack(_friend<StackContext>());
processor = &p;
processor->addStack(_friend<StackContext>());
}
// a new thread/stack starts in stubInit() and then jumps to this routine
extern "C" void invokeStack(funcvoid3_t func, ptr_t arg1, ptr_t arg2, ptr_t arg3) {
CHECK_PREEMPTION(0);
......@@ -90,29 +108,37 @@ extern "C" void invokeStack(funcvoid3_t func, ptr_t arg1, ptr_t arg2, ptr_t arg3
StackContext::terminate();
}
void StackContext::idleYieldTo(StackContext& nextStack, _friend<BaseProcessor>) {
inline void StackContext::yieldTo(StackContext& nextStack) {
CHECK_PREEMPTION(1); // expect preemption still enabled
RuntimeDisablePreemption();
Context::CurrStack()->switchStack<Idle>(nextStack);
Context::CurrStack()->switchStack<Yield>(nextStack);
RuntimeEnablePreemption();
}
bool StackContext::yield() {
inline void StackContext::yieldResume(StackContext& nextStack) {
CHECK_PREEMPTION(1); // expect preemption still enabled
RuntimeDisablePreemption();
StackContext* nextStack = Context::CurrProcessor().scheduleYield(_friend<StackContext>());
if (nextStack) Context::CurrStack()->switchStack<Yield>(*nextStack);
Context::CurrStack()->switchStack<Resume>(nextStack);
RuntimeEnablePreemption();
}
bool StackContext::yield() {
StackContext* nextStack = Context::CurrProcessor().scheduleYield(_friend<StackContext>());
if (nextStack) yieldTo(*nextStack);
return nextStack;
}
bool StackContext::yieldGlobal() {
StackContext* nextStack = Context::CurrProcessor().scheduleYieldGlobal(_friend<StackContext>());
if (nextStack) yieldTo(*nextStack);
return nextStack;
}
void StackContext::idleYieldTo(StackContext& nextStack, _friend<BaseProcessor>) {
CHECK_PREEMPTION(1); // expect preemption still enabled
RuntimeDisablePreemption();
StackContext* nextStack = Context::CurrProcessor().scheduleYieldGlobal(_friend<StackContext>());
if (nextStack) Context::CurrStack()->switchStack<Yield>(*nextStack);
Context::CurrStack()->switchStack<Idle>(nextStack);
RuntimeEnablePreemption();
return nextStack;
}
void StackContext::preempt() {
......@@ -128,20 +154,6 @@ void StackContext::terminate() {
unreachable();
}
void StackContext::suspendInternal() {
switchStack<Suspend>(Context::CurrProcessor().scheduleFull(_friend<StackContext>()));
}
void StackContext::resumeInternal() {
processor->enqueueResume(*this, _friend<StackContext>());
}
void StackContext::changeProcessor(BaseProcessor& p) {
processor->removeStack(_friend<StackContext>());
processor = &p;
processor->addStack(_friend<StackContext>());
}
void StackContext::rebalance() {
if (!affinity) changeProcessor(Context::CurrProcessor().getScheduler().placement(_friend<StackContext>(), true));
}
......@@ -156,9 +168,7 @@ void StackContext::migrateNow(BaseProcessor& proc) {
StackContext* sc = Context::CurrStack();
sc->affinity = false;
sc->changeProcessor(proc);
RuntimeDisablePreemption();
sc->switchStack<Migrate>(Context::CurrProcessor().scheduleFull(_friend<StackContext>()));
RuntimeEnablePreemption();
sc->yieldResume(Context::CurrProcessor().scheduleFull(_friend<StackContext>()));
}
// migrate to scheduler (for disk I/O), don't change stackCount or affinity
......@@ -166,9 +176,7 @@ BaseProcessor& StackContext::migrateNow(Scheduler& scheduler, _friend<EventScope
StackContext* sc = Context::CurrStack();
BaseProcessor* proc = sc->processor;
sc->processor = &scheduler.placement(_friend<StackContext>(), true);
RuntimeDisablePreemption();
sc->switchStack<Migrate>(Context::CurrProcessor().scheduleFull(_friend<StackContext>()));
RuntimeEnablePreemption();
sc->yieldResume(Context::CurrProcessor().scheduleFull(_friend<StackContext>()));
return *proc;
}
......@@ -176,7 +184,5 @@ BaseProcessor& StackContext::migrateNow(Scheduler& scheduler, _friend<EventScope
void StackContext::migrateNow(BaseProcessor& proc, _friend<EventScope>) {
StackContext* sc = Context::CurrStack();
sc->processor = &proc;
RuntimeDisablePreemption();
sc->switchStack<Migrate>(Context::CurrProcessor().scheduleFull(_friend<StackContext>()));
RuntimeEnablePreemption();
sc->yieldResume(Context::CurrProcessor().scheduleFull(_friend<StackContext>()));
}
......@@ -34,9 +34,9 @@ class KernelProcessor;
class Scheduler;
#if TESTING_ENABLE_DEBUGGING
static const size_t StackLinkCount = 3;
#else
static const size_t StackLinkCount = 2;
#else
static const size_t StackLinkCount = 1;
#endif
template <size_t NUM> class StackList :
......@@ -56,15 +56,12 @@ public IntrusiveQueueStub<StackContext,NUM,StackLinkCount,DoubleLink<StackContex
#endif
static const size_t FlexQueueLink = 0;
typedef StackList<FlexQueueLink> FlexStackList;
typedef StackList <FlexQueueLink> FlexStackList;
typedef StackQueue<FlexQueueLink> FlexStackQueue;
typedef StackMPSC<FlexQueueLink> FlexStackMPSC;
static const size_t BlockQueueLink = 1;
typedef StackMCS<BlockQueueLink> BlockStackMCS;
typedef StackMPSC <FlexQueueLink> FlexStackMPSC;
#if TESTING_ENABLE_DEBUGGING
static const size_t DebugListLink = 2;
static const size_t DebugListLink = 1;
typedef StackList<DebugListLink> GlobalStackList;
#endif
......@@ -81,19 +78,22 @@ class StackContext : public DoubleLink<StackContext,StackLinkCount> {
const StackContext& operator=(const StackContext&) = delete;
// central stack switching routine
enum SwitchCode { Idle = 'I', Yield = 'Y', Migrate = 'M', Suspend = 'S', Terminate = 'T' };
enum SwitchCode { Idle = 'I', Yield = 'Y', Resume = 'R', Suspend = 'S', Terminate = 'T' };
template<SwitchCode> inline void switchStack(StackContext& nextStack);
// these routines are called immediately after the stack switch
static void postIdle (StackContext* prevStack);
static void postYield (StackContext* prevStack);
static void postMigrate (StackContext* prevStack);
static void postResume (StackContext* prevStack);
static void postSuspend (StackContext* prevStack);
static void postTerminate(StackContext* prevStack);
void suspendInternal();
void resumeInternal();
void resumeDirect();
void changeProcessor(BaseProcessor&);
static inline void yieldTo(StackContext& nextStack);
static inline void yieldResume(StackContext& nextStack);
protected:
// constructor/destructors can only be called by derived classes
......@@ -126,9 +126,9 @@ public:
}
// context switching - static -> apply to Context::CurrStack()
static void idleYieldTo(StackContext& nextStack, _friend<BaseProcessor>);
static bool yield();
static bool yieldGlobal();
static void idleYieldTo(StackContext& nextStack, _friend<BaseProcessor>);
static void preempt();
static void terminate() __noreturn;
......@@ -138,15 +138,21 @@ public:
size_t spin = SpinStart;
while (spin <= SpinEnd) {
for (size_t i = 0; i < spin; i += 1) Pause();
if (runState) return; // resumed already? skip suspend
// resumed already? skip suspend
size_t exp = 2;
if (__atomic_compare_exchange_n(&runState, &exp, 1, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) return;
spin += spin;
}
suspendInternal();
}
// if suspended (runState == 0), resume
template<bool DirectSwitch = false>
void resume() {
if (__atomic_fetch_add( &runState, 1, __ATOMIC_RELAXED ) == 0) resumeInternal();
if (__atomic_fetch_add( &runState, 1, __ATOMIC_RELAXED ) == 0) {
if (DirectSwitch) resumeDirect();
else resumeInternal();
}
}
// set ResumeInfo to facilitate later resume race
......
......@@ -13,7 +13,7 @@
#define TESTING_NEMESIS_READYQUEUE 1 // lock-free: nemesis vs. stub-based MPSC
//#define TESTING_PLACEMENT_STAGING 1 // load-based staging vs. round-robin placement
//#define TESTING_IDLE_SPIN 65536 // spin before idle loop
#define TESTING_HALT_SPIN 65536 // spin before halting worker pthread
#define TESTING_HALT_SPIN 65536 // spin before halting worker thread/core
//#define TESTING_MUTEX_FIFO 1 // use fifo/baton mutex
//#define TESTING_MUTEX_BARGING 1 // use blocking/barging mutex
//#define TESTING_MUTEX_SPIN 1 // spin before block in non-fifo mutex
......