Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 5 additions & 36 deletions src/snmalloc/aal/aal.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,10 @@
#include "../ds_core/ds_core.h"
#include "aal_concept.h"
#include "aal_consts.h"

#if __has_include(<time.h>)
# include <time.h>
# ifdef CLOCK_MONOTONIC
# define SNMALLOC_TICK_USE_CLOCK_GETTIME
# endif
#endif
#include "snmalloc/stl/utility.h"

#include <stdint.h>

#ifndef SNMALLOC_TICK_USE_CLOCK_GETTIME
# include <chrono>
#endif

#if ( \
defined(__i386__) || defined(_M_IX86) || defined(_X86_) || \
defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || \
Expand Down Expand Up @@ -169,37 +158,17 @@ namespace snmalloc
/**
* Return an architecture-specific cycle counter.
*
* If the compiler provides a portable prefetch builtin, use it directly,
* otherwise delegate to the architecture-specific layer. This allows new
* architectures to avoid needing to implement a custom `tick` method
* if they are used only with a compiler that provides the builtin.
* If the architecture reports that CPU cycle counters are unavailable,
* use any architecture-specific implementation that exists, otherwise
* fall back to zero. When counters are available, prefer a compiler
* builtin and then the architecture-specific implementation.
*/
static inline uint64_t tick() noexcept
{
if constexpr (
(Arch::aal_features & NoCpuCycleCounters) == NoCpuCycleCounters)
{
#ifdef SNMALLOC_TICK_USE_CLOCK_GETTIME
// the buf is populated by clock_gettime
SNMALLOC_UNINITIALISED timespec buf;
// we can skip the error checking here:
// * EFAULT: for out-of-bound pointers (buf is always valid stack
// memory)
// * EINVAL: for invalid clock_id (we only use CLOCK_MONOTONIC enforced
// by POSIX.1)
// Notice that clock_gettime is a usually a vDSO call, so the overhead
// is minimal.
::clock_gettime(CLOCK_MONOTONIC, &buf);
return static_cast<uint64_t>(buf.tv_sec) * 1000'000'000 +
static_cast<uint64_t>(buf.tv_nsec);
# undef SNMALLOC_TICK_USE_CLOCK_GETTIME
#else
auto tick = std::chrono::high_resolution_clock::now();
return static_cast<uint64_t>(
std::chrono::duration_cast<std::chrono::nanoseconds>(
tick.time_since_epoch())
.count());
#endif
return 0;
}
else
{
Expand Down
2 changes: 2 additions & 0 deletions src/snmalloc/backend_helpers/backend_helpers.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#pragma once

#include "../mem/mem.h"
#include "authmap.h"
#include "buddy.h"
Expand Down
1 change: 1 addition & 0 deletions src/snmalloc/ds_aal/ds_aal.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@
#include "../aal/aal.h"
#include "flaglock.h"
#include "prevent_fork.h"
#include "seqset.h"
#include "singleton.h"
File renamed without changes.
2 changes: 2 additions & 0 deletions src/snmalloc/ds_core/cheri.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#pragma once

#include "mitigations.h"

namespace snmalloc
Expand Down
1 change: 0 additions & 1 deletion src/snmalloc/ds_core/ds_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@
#include "mitigations.h"
#include "ptrwrap.h"
#include "redblacktree.h"
#include "seqset.h"
#include "tid.h"
2 changes: 2 additions & 0 deletions src/snmalloc/global/global.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#pragma once

#include "bounds_checks.h"
#include "globalalloc.h"
#include "libc.h"
Expand Down
2 changes: 2 additions & 0 deletions src/snmalloc/mem/mem.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#pragma once

#include "backend_concept.h"
#include "backend_wrappers.h"
#include "check_init.h"
Expand Down
23 changes: 23 additions & 0 deletions src/snmalloc/pal/pal_posix.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <strings.h>
#include <sys/mman.h>
#include <sys/uio.h>
#include <time.h>
#include <unistd.h>

#if __has_include(<sys/random.h>)
Expand Down Expand Up @@ -404,6 +405,28 @@ namespace snmalloc
(static_cast<uint64_t>(ts.tv_nsec) / 1000000);
}

static uint64_t tick()
{
if constexpr (
(Aal::aal_features & NoCpuCycleCounters) != NoCpuCycleCounters)
{
return Aal::tick();
}
else
{
auto hold = KeepErrno();

struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
{
error("Failed to get monotonic time");
}

return (static_cast<uint64_t>(ts.tv_sec) * 1'000'000'000) +
static_cast<uint64_t>(ts.tv_nsec);
}
}

static uint64_t dev_urandom()
{
union
Expand Down
33 changes: 29 additions & 4 deletions src/snmalloc/pal/pal_windows.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
#ifdef _WIN32
# ifndef _MSC_VER
# include <errno.h>
# include <stdio.h>
# endif
# include <cstdio>
# ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
# endif
Expand Down Expand Up @@ -309,12 +309,11 @@ namespace snmalloc
return result;
}

static uint64_t internal_time_in_ms()
static uint64_t performance_counter_frequency()
{
// Performance counter is a high-precision monotonic clock.
static stl::Atomic<uint64_t> freq_cache = 0;
constexpr uint64_t ms_per_second = 1000;
SNMALLOC_UNINITIALISED LARGE_INTEGER buf;

auto freq = freq_cache.load(stl::memory_order_relaxed);
if (SNMALLOC_UNLIKELY(freq == 0))
{
Expand All @@ -324,10 +323,36 @@ namespace snmalloc
freq = static_cast<uint64_t>(buf.QuadPart);
freq_cache.store(freq, stl::memory_order_relaxed);
}

return freq;
}

static uint64_t internal_time_in_ms()
{
constexpr uint64_t ms_per_second = 1000;
SNMALLOC_UNINITIALISED LARGE_INTEGER buf;
auto freq = performance_counter_frequency();
::QueryPerformanceCounter(&buf);
return (static_cast<uint64_t>(buf.QuadPart) * ms_per_second) / freq;
}

static uint64_t tick()
{
if constexpr (
(Aal::aal_features & NoCpuCycleCounters) != NoCpuCycleCounters)
{
return Aal::tick();
}
else
{
constexpr uint64_t ns_per_second = 1'000'000'000;
SNMALLOC_UNINITIALISED LARGE_INTEGER buf;
auto freq = performance_counter_frequency();
::QueryPerformanceCounter(&buf);
return (static_cast<uint64_t>(buf.QuadPart) * ns_per_second) / freq;
}
}

# ifdef PLATFORM_HAS_WAITONADDRESS
using WaitingWord = char;

Expand Down
2 changes: 2 additions & 0 deletions src/snmalloc/snmalloc_front.h
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
#pragma once

#include "global/global.h"
2 changes: 2 additions & 0 deletions src/snmalloc/stl/cxx/utility.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#pragma once

#include <utility>

namespace snmalloc
Expand Down
5 changes: 3 additions & 2 deletions src/test/perf/contention/contention.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include <iomanip>
#include <iostream>
#include <snmalloc/pal/pal.h>
#include <snmalloc/snmalloc.h>
#include <thread>
#include <vector>
Expand All @@ -30,7 +31,7 @@ class ParallelTest
auto prev = ready.fetch_add(1);
if (prev + 1 == cores)
{
start = Aal::tick();
start = DefaultPal::tick();
flag = true;
}
while (!flag)
Expand All @@ -41,7 +42,7 @@ class ParallelTest
prev = complete.fetch_add(1);
if (prev + 1 == cores)
{
end = Aal::tick();
end = DefaultPal::tick();
}
}

Expand Down
9 changes: 5 additions & 4 deletions src/test/perf/startup/startup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include <algorithm>
#include <iostream>
#include <snmalloc/pal/pal.h>
#include <snmalloc/snmalloc.h>
#include <thread>
#include <vector>
Expand All @@ -30,7 +31,7 @@ class ParallelTest
auto prev = ready.fetch_add(1);
if (prev + 1 == cores)
{
start = Aal::tick();
start = DefaultPal::tick();
flag = true;
}
while (!flag)
Expand All @@ -41,7 +42,7 @@ class ParallelTest
prev = complete.fetch_add(1);
if (prev + 1 == cores)
{
end = Aal::tick();
end = DefaultPal::tick();
}
}

Expand Down Expand Up @@ -76,9 +77,9 @@ int main()

ParallelTest test(
[](size_t id) {
auto start = Aal::tick();
auto start = DefaultPal::tick();
snmalloc::dealloc(snmalloc::alloc(1));
auto end = Aal::tick();
auto end = DefaultPal::tick();
counters[id] = end - start;
},
nthreads);
Expand Down
2 changes: 2 additions & 0 deletions src/test/setup.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#pragma once

#if defined(SNMALLOC_CI_BUILD)
# include <snmalloc/pal/pal.h>
# if defined(WIN32)
Expand Down
Loading