| 1 | //===---------------------- rpmalloc.c ------------------*- C -*-=============//
|
| 2 | //
|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
| 4 | // See https://llvm.org/LICENSE.txt for license information.
|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
| 6 | //
|
| 7 | //===----------------------------------------------------------------------===//
|
| 8 | //
|
| 9 | // This library provides a cross-platform lock free thread caching malloc
|
| 10 | // implementation in C11.
|
| 11 | //
|
| 12 | //===----------------------------------------------------------------------===//
|
| 13 |
|
| 14 | #include "rpmalloc.h"
|
| 15 |
|
| 16 | ////////////
|
| 17 | ///
|
| 18 | /// Build time configurable limits
|
| 19 | ///
|
| 20 | //////
|
| 21 |
|
| 22 | #if defined(__clang__)
|
| 23 | #pragma clang diagnostic ignored "-Wunused-macros"
|
| 24 | #pragma clang diagnostic ignored "-Wunused-function"
|
| 25 | #if __has_warning("-Wreserved-identifier")
|
| 26 | #pragma clang diagnostic ignored "-Wreserved-identifier"
|
| 27 | #endif
|
| 28 | #if __has_warning("-Wstatic-in-inline")
|
| 29 | #pragma clang diagnostic ignored "-Wstatic-in-inline"
|
| 30 | #endif
|
| 31 | #elif defined(__GNUC__)
|
| 32 | #pragma GCC diagnostic ignored "-Wunused-macros"
|
| 33 | #pragma GCC diagnostic ignored "-Wunused-function"
|
| 34 | #endif
|
| 35 |
|
| 36 | #if !defined(__has_builtin)
|
| 37 | #define __has_builtin(b) 0
|
| 38 | #endif
|
| 39 |
|
| 40 | #if defined(__GNUC__) || defined(__clang__)
|
| 41 |
|
| 42 | #if __has_builtin(__builtin_memcpy_inline)
|
| 43 | #define _rpmalloc_memcpy_const(x, y, s) __builtin_memcpy_inline(x, y, s)
|
| 44 | #else
|
| 45 | #define _rpmalloc_memcpy_const(x, y, s) \
|
| 46 | do { \
|
| 47 | _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), \
|
| 48 | "len must be a constant integer"); \
|
| 49 | memcpy(x, y, s); \
|
| 50 | } while (0)
|
| 51 | #endif
|
| 52 |
|
| 53 | #if __has_builtin(__builtin_memset_inline)
|
| 54 | #define _rpmalloc_memset_const(x, y, s) __builtin_memset_inline(x, y, s)
|
| 55 | #else
|
| 56 | #define _rpmalloc_memset_const(x, y, s) \
|
| 57 | do { \
|
| 58 | _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), \
|
| 59 | "len must be a constant integer"); \
|
| 60 | memset(x, y, s); \
|
| 61 | } while (0)
|
| 62 | #endif
|
| 63 | #else
|
| 64 | #define _rpmalloc_memcpy_const(x, y, s) memcpy(x, y, s)
|
| 65 | #define _rpmalloc_memset_const(x, y, s) memset(x, y, s)
|
| 66 | #endif
|
| 67 |
|
| 68 | #if __has_builtin(__builtin_assume)
|
| 69 | #define rpmalloc_assume(cond) __builtin_assume(cond)
|
| 70 | #elif defined(__GNUC__)
|
| 71 | #define rpmalloc_assume(cond) \
|
| 72 | do { \
|
| 73 | if (!__builtin_expect(cond, 0)) \
|
| 74 | __builtin_unreachable(); \
|
| 75 | } while (0)
|
| 76 | #elif defined(_MSC_VER)
|
| 77 | #define rpmalloc_assume(cond) __assume(cond)
|
| 78 | #else
|
| 79 | #define rpmalloc_assume(cond) 0
|
| 80 | #endif
|
| 81 |
|
| 82 | #ifndef HEAP_ARRAY_SIZE
|
| 83 | //! Size of heap hashmap
|
| 84 | #define HEAP_ARRAY_SIZE 47
|
| 85 | #endif
|
| 86 | #ifndef ENABLE_THREAD_CACHE
|
| 87 | //! Enable per-thread cache
|
| 88 | #define ENABLE_THREAD_CACHE 1
|
| 89 | #endif
|
| 90 | #ifndef ENABLE_GLOBAL_CACHE
|
| 91 | //! Enable global cache shared between all threads, requires thread cache
|
| 92 | #define ENABLE_GLOBAL_CACHE 1
|
| 93 | #endif
|
| 94 | #ifndef ENABLE_VALIDATE_ARGS
|
| 95 | //! Enable validation of args to public entry points
|
| 96 | #define ENABLE_VALIDATE_ARGS 0
|
| 97 | #endif
|
| 98 | #ifndef ENABLE_STATISTICS
|
| 99 | //! Enable statistics collection
|
| 100 | #define ENABLE_STATISTICS 0
|
| 101 | #endif
|
| 102 | #ifndef ENABLE_ASSERTS
|
| 103 | //! Enable asserts
|
| 104 | #define ENABLE_ASSERTS 0
|
| 105 | #endif
|
| 106 | #ifndef ENABLE_OVERRIDE
|
| 107 | //! Override standard library malloc/free and new/delete entry points
|
| 108 | #define ENABLE_OVERRIDE 0
|
| 109 | #endif
|
| 110 | #ifndef ENABLE_PRELOAD
|
| 111 | //! Support preloading
|
| 112 | #define ENABLE_PRELOAD 0
|
| 113 | #endif
|
| 114 | #ifndef DISABLE_UNMAP
|
| 115 | //! Disable unmapping memory pages (also enables unlimited cache)
|
| 116 | #define DISABLE_UNMAP 0
|
| 117 | #endif
|
| 118 | #ifndef ENABLE_UNLIMITED_CACHE
|
| 119 | //! Enable unlimited global cache (no unmapping until finalization)
|
| 120 | #define ENABLE_UNLIMITED_CACHE 0
|
| 121 | #endif
|
| 122 | #ifndef ENABLE_ADAPTIVE_THREAD_CACHE
|
| 123 | //! Enable adaptive thread cache size based on use heuristics
|
| 124 | #define ENABLE_ADAPTIVE_THREAD_CACHE 0
|
| 125 | #endif
|
| 126 | #ifndef DEFAULT_SPAN_MAP_COUNT
|
| 127 | //! Default number of spans to map in call to map more virtual memory (default
|
| 128 | //! values yield 4MiB here)
|
| 129 | #define DEFAULT_SPAN_MAP_COUNT 64
|
| 130 | #endif
|
| 131 | #ifndef GLOBAL_CACHE_MULTIPLIER
|
| 132 | //! Multiplier for global cache
|
| 133 | #define GLOBAL_CACHE_MULTIPLIER 8
|
| 134 | #endif
|
| 135 |
|
| 136 | #if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE
|
| 137 | #error Must use global cache if unmap is disabled
|
| 138 | #endif
|
| 139 |
|
| 140 | #if DISABLE_UNMAP
|
| 141 | #undef ENABLE_UNLIMITED_CACHE
|
| 142 | #define ENABLE_UNLIMITED_CACHE 1
|
| 143 | #endif
|
| 144 |
|
| 145 | #if !ENABLE_GLOBAL_CACHE
|
| 146 | #undef ENABLE_UNLIMITED_CACHE
|
| 147 | #define ENABLE_UNLIMITED_CACHE 0
|
| 148 | #endif
|
| 149 |
|
| 150 | #if !ENABLE_THREAD_CACHE
|
| 151 | #undef ENABLE_ADAPTIVE_THREAD_CACHE
|
| 152 | #define ENABLE_ADAPTIVE_THREAD_CACHE 0
|
| 153 | #endif
|
| 154 |
|
| 155 | #if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64)
|
| 156 | #define PLATFORM_WINDOWS 1
|
| 157 | #define PLATFORM_POSIX 0
|
| 158 | #else
|
| 159 | #define PLATFORM_WINDOWS 0
|
| 160 | #define PLATFORM_POSIX 1
|
| 161 | #endif
|
| 162 |
|
| 163 | /// Platform and arch specifics
|
| 164 | #if defined(_MSC_VER) && !defined(__clang__)
|
| 165 | #pragma warning(disable : 5105)
|
| 166 | #ifndef FORCEINLINE
|
| 167 | #define FORCEINLINE inline __forceinline
|
| 168 | #endif
|
| 169 | #define _Static_assert static_assert
|
| 170 | #else
|
| 171 | #ifndef FORCEINLINE
|
| 172 | #define FORCEINLINE inline __attribute__((__always_inline__))
|
| 173 | #endif
|
| 174 | #endif
|
| 175 | #if PLATFORM_WINDOWS
|
| 176 | #ifndef WIN32_LEAN_AND_MEAN
|
| 177 | #define WIN32_LEAN_AND_MEAN
|
| 178 | #endif
|
| 179 | #include <windows.h>
|
| 180 | #if ENABLE_VALIDATE_ARGS
|
| 181 | #include <intsafe.h>
|
| 182 | #endif
|
| 183 | #else
|
| 184 | #include <stdio.h>
|
| 185 | #include <stdlib.h>
|
| 186 | #include <time.h>
|
| 187 | #include <unistd.h>
|
| 188 | #if defined(__linux__) || defined(__ANDROID__)
|
| 189 | #include <sys/prctl.h>
|
| 190 | #if !defined(PR_SET_VMA)
|
| 191 | #define PR_SET_VMA 0x53564d41
|
| 192 | #define PR_SET_VMA_ANON_NAME 0
|
| 193 | #endif
|
| 194 | #endif
|
| 195 | #if defined(__APPLE__)
|
| 196 | #include <TargetConditionals.h>
|
| 197 | #if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
|
| 198 | #include <mach/mach_vm.h>
|
| 199 | #include <mach/vm_statistics.h>
|
| 200 | #endif
|
| 201 | #include <pthread.h>
|
| 202 | #endif
|
| 203 | #if defined(__HAIKU__) || defined(__TINYC__)
|
| 204 | #include <pthread.h>
|
| 205 | #endif
|
| 206 | #endif
|
| 207 |
|
| 208 | #include <errno.h>
|
| 209 | #include <stdint.h>
|
| 210 | #include <string.h>
|
| 211 |
|
| 212 | #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
|
| 213 | #include <fibersapi.h>
|
| 214 | static DWORD fls_key;
|
| 215 | #endif
|
| 216 |
|
| 217 | #if PLATFORM_POSIX
|
| 218 | #include <sched.h>
|
| 219 | #include <sys/mman.h>
|
| 220 | #ifdef __FreeBSD__
|
| 221 | #include <sys/sysctl.h>
|
| 222 | #define MAP_HUGETLB MAP_ALIGNED_SUPER
|
| 223 | #ifndef PROT_MAX
|
| 224 | #define PROT_MAX(f) 0
|
| 225 | #endif
|
| 226 | #else
|
| 227 | #define PROT_MAX(f) 0
|
| 228 | #endif
|
| 229 | #ifdef __sun
|
| 230 | extern int madvise(caddr_t, size_t, int);
|
| 231 | #endif
|
| 232 | #ifndef MAP_UNINITIALIZED
|
| 233 | #define MAP_UNINITIALIZED 0
|
| 234 | #endif
|
| 235 | #endif
|
| 236 | #include <errno.h>
|
| 237 |
|
| 238 | #if ENABLE_ASSERTS
|
| 239 | #undef NDEBUG
|
| 240 | #if defined(_MSC_VER) && !defined(_DEBUG)
|
| 241 | #define _DEBUG
|
| 242 | #endif
|
| 243 | #include <assert.h>
|
| 244 | #define RPMALLOC_TOSTRING_M(x) #x
|
| 245 | #define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x)
|
| 246 | #define rpmalloc_assert(truth, message) \
|
| 247 | do { \
|
| 248 | if (!(truth)) { \
|
| 249 | if (_memory_config.error_callback) { \
|
| 250 | _memory_config.error_callback(message " (" RPMALLOC_TOSTRING( \
|
| 251 | truth) ") at " __FILE__ ":" RPMALLOC_TOSTRING(__LINE__)); \
|
| 252 | } else { \
|
| 253 | assert((truth) && message); \
|
| 254 | } \
|
| 255 | } \
|
| 256 | } while (0)
|
| 257 | #else
|
| 258 | #define rpmalloc_assert(truth, message) \
|
| 259 | do { \
|
| 260 | } while (0)
|
| 261 | #endif
|
| 262 | #if ENABLE_STATISTICS
|
| 263 | #include <stdio.h>
|
| 264 | #endif
|
| 265 |
|
| 266 | //////
|
| 267 | ///
|
| 268 | /// Atomic access abstraction (since MSVC does not do C11 yet)
|
| 269 | ///
|
| 270 | //////
|
| 271 |
|
| 272 | #if defined(_MSC_VER) && !defined(__clang__)
|
| 273 |
|
| 274 | typedef volatile long atomic32_t;
|
| 275 | typedef volatile long long atomic64_t;
|
| 276 | typedef volatile void *atomicptr_t;
|
| 277 |
|
| 278 | static FORCEINLINE int32_t atomic_load32(atomic32_t *src) { return *src; }
|
| 279 | static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
|
| 280 | *dst = val;
|
| 281 | }
|
| 282 | static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) {
|
| 283 | return (int32_t)InterlockedIncrement(val);
|
| 284 | }
|
| 285 | static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) {
|
| 286 | return (int32_t)InterlockedDecrement(val);
|
| 287 | }
|
| 288 | static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) {
|
| 289 | return (int32_t)InterlockedExchangeAdd(val, add) + add;
|
| 290 | }
|
| 291 | static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val,
|
| 292 | int32_t ref) {
|
| 293 | return (InterlockedCompareExchange(dst, val, ref) == ref) ? 1 : 0;
|
| 294 | }
|
| 295 | static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val) {
|
| 296 | *dst = val;
|
| 297 | }
|
| 298 | static FORCEINLINE int64_t atomic_load64(atomic64_t *src) { return *src; }
|
| 299 | static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) {
|
| 300 | return (int64_t)InterlockedExchangeAdd64(val, add) + add;
|
| 301 | }
|
| 302 | static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) {
|
| 303 | return (void *)*src;
|
| 304 | }
|
| 305 | static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) {
|
| 306 | *dst = val;
|
| 307 | }
|
| 308 | static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) {
|
| 309 | *dst = val;
|
| 310 | }
|
| 311 | static FORCEINLINE void *atomic_exchange_ptr_acquire(atomicptr_t *dst,
|
| 312 | void *val) {
|
| 313 | return (void *)InterlockedExchangePointer((void *volatile *)dst, val);
|
| 314 | }
|
| 315 | static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) {
|
| 316 | return (InterlockedCompareExchangePointer((void *volatile *)dst, val, ref) ==
|
| 317 | ref)
|
| 318 | ? 1
|
| 319 | : 0;
|
| 320 | }
|
| 321 |
|
| 322 | #define EXPECTED(x) (x)
|
| 323 | #define UNEXPECTED(x) (x)
|
| 324 |
|
| 325 | #else
|
| 326 |
|
| 327 | #include <stdatomic.h>
|
| 328 |
|
| 329 | typedef volatile _Atomic(int32_t) atomic32_t;
|
| 330 | typedef volatile _Atomic(int64_t) atomic64_t;
|
| 331 | typedef volatile _Atomic(void *) atomicptr_t;
|
| 332 |
|
| 333 | static FORCEINLINE int32_t atomic_load32(atomic32_t *src) {
|
| 334 | return atomic_load_explicit(src, memory_order_relaxed);
|
| 335 | }
|
| 336 | static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
|
| 337 | atomic_store_explicit(dst, val, memory_order_relaxed);
|
| 338 | }
|
| 339 | static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) {
|
| 340 | return atomic_fetch_add_explicit(val, 1, memory_order_relaxed) + 1;
|
| 341 | }
|
| 342 | static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) {
|
| 343 | return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1;
|
| 344 | }
|
| 345 | static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) {
|
| 346 | return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add;
|
| 347 | }
|
| 348 | static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val,
|
| 349 | int32_t ref) {
|
| 350 | return atomic_compare_exchange_weak_explicit(
|
| 351 | dst, &ref, val, memory_order_acquire, memory_order_relaxed);
|
| 352 | }
|
| 353 | static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val) {
|
| 354 | atomic_store_explicit(dst, val, memory_order_release);
|
| 355 | }
|
| 356 | static FORCEINLINE int64_t atomic_load64(atomic64_t *val) {
|
| 357 | return atomic_load_explicit(val, memory_order_relaxed);
|
| 358 | }
|
| 359 | static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) {
|
| 360 | return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add;
|
| 361 | }
|
| 362 | static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) {
|
| 363 | return atomic_load_explicit(src, memory_order_relaxed);
|
| 364 | }
|
| 365 | static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) {
|
| 366 | atomic_store_explicit(dst, val, memory_order_relaxed);
|
| 367 | }
|
| 368 | static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) {
|
| 369 | atomic_store_explicit(dst, val, memory_order_release);
|
| 370 | }
|
| 371 | static FORCEINLINE void *atomic_exchange_ptr_acquire(atomicptr_t *dst,
|
| 372 | void *val) {
|
| 373 | return atomic_exchange_explicit(dst, val, memory_order_acquire);
|
| 374 | }
|
| 375 | static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) {
|
| 376 | return atomic_compare_exchange_weak_explicit(
|
| 377 | dst, &ref, val, memory_order_relaxed, memory_order_relaxed);
|
| 378 | }
|
| 379 |
|
| 380 | #define EXPECTED(x) __builtin_expect((x), 1)
|
| 381 | #define UNEXPECTED(x) __builtin_expect((x), 0)
|
| 382 |
|
| 383 | #endif
|
| 384 |
|
| 385 | ////////////
|
| 386 | ///
|
| 387 | /// Statistics related functions (evaluate to nothing when statistics not
|
| 388 | /// enabled)
|
| 389 | ///
|
| 390 | //////
|
| 391 |
|
| 392 | #if ENABLE_STATISTICS
|
| 393 | #define _rpmalloc_stat_inc(counter) atomic_incr32(counter)
|
| 394 | #define _rpmalloc_stat_dec(counter) atomic_decr32(counter)
|
| 395 | #define _rpmalloc_stat_add(counter, value) \
|
| 396 | atomic_add32(counter, (int32_t)(value))
|
| 397 | #define _rpmalloc_stat_add64(counter, value) \
|
| 398 | atomic_add64(counter, (int64_t)(value))
|
| 399 | #define _rpmalloc_stat_add_peak(counter, value, peak) \
|
| 400 | do { \
|
| 401 | int32_t _cur_count = atomic_add32(counter, (int32_t)(value)); \
|
| 402 | if (_cur_count > (peak)) \
|
| 403 | peak = _cur_count; \
|
| 404 | } while (0)
|
| 405 | #define _rpmalloc_stat_sub(counter, value) \
|
| 406 | atomic_add32(counter, -(int32_t)(value))
|
| 407 | #define _rpmalloc_stat_inc_alloc(heap, class_idx) \
|
| 408 | do { \
|
| 409 | int32_t alloc_current = \
|
| 410 | atomic_incr32(&heap->size_class_use[class_idx].alloc_current); \
|
| 411 | if (alloc_current > heap->size_class_use[class_idx].alloc_peak) \
|
| 412 | heap->size_class_use[class_idx].alloc_peak = alloc_current; \
|
| 413 | atomic_incr32(&heap->size_class_use[class_idx].alloc_total); \
|
| 414 | } while (0)
|
| 415 | #define _rpmalloc_stat_inc_free(heap, class_idx) \
|
| 416 | do { \
|
| 417 | atomic_decr32(&heap->size_class_use[class_idx].alloc_current); \
|
| 418 | atomic_incr32(&heap->size_class_use[class_idx].free_total); \
|
| 419 | } while (0)
|
| 420 | #else
|
| 421 | #define _rpmalloc_stat_inc(counter) \
|
| 422 | do { \
|
| 423 | } while (0)
|
| 424 | #define _rpmalloc_stat_dec(counter) \
|
| 425 | do { \
|
| 426 | } while (0)
|
| 427 | #define _rpmalloc_stat_add(counter, value) \
|
| 428 | do { \
|
| 429 | } while (0)
|
| 430 | #define _rpmalloc_stat_add64(counter, value) \
|
| 431 | do { \
|
| 432 | } while (0)
|
| 433 | #define _rpmalloc_stat_add_peak(counter, value, peak) \
|
| 434 | do { \
|
| 435 | } while (0)
|
| 436 | #define _rpmalloc_stat_sub(counter, value) \
|
| 437 | do { \
|
| 438 | } while (0)
|
| 439 | #define _rpmalloc_stat_inc_alloc(heap, class_idx) \
|
| 440 | do { \
|
| 441 | } while (0)
|
| 442 | #define _rpmalloc_stat_inc_free(heap, class_idx) \
|
| 443 | do { \
|
| 444 | } while (0)
|
| 445 | #endif
|
| 446 |
|
| 447 | ///
|
| 448 | /// Preconfigured limits and sizes
|
| 449 | ///
|
| 450 |
|
| 451 | //! Granularity of a small allocation block (must be power of two)
|
| 452 | #define SMALL_GRANULARITY 16
|
| 453 | //! Small granularity shift count
|
| 454 | #define SMALL_GRANULARITY_SHIFT 4
|
| 455 | //! Number of small block size classes
|
| 456 | #define SMALL_CLASS_COUNT 65
|
| 457 | //! Maximum size of a small block
|
| 458 | #define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1))
|
| 459 | //! Granularity of a medium allocation block
|
| 460 | #define MEDIUM_GRANULARITY 512
|
| 461 | //! Medium granularity shift count
|
| 462 | #define MEDIUM_GRANULARITY_SHIFT 9
|
| 463 | //! Number of medium block size classes
|
| 464 | #define MEDIUM_CLASS_COUNT 61
|
| 465 | //! Total number of small + medium size classes
|
| 466 | #define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT)
|
| 467 | //! Number of large block size classes
|
| 468 | #define LARGE_CLASS_COUNT 63
|
| 469 | //! Maximum size of a medium block
|
| 470 | #define MEDIUM_SIZE_LIMIT \
|
| 471 | (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT))
|
| 472 | //! Maximum size of a large block
|
| 473 | #define LARGE_SIZE_LIMIT \
|
| 474 | ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE)
|
| 475 | //! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power
|
| 476 | //! of two)
|
| 477 | #define 128
|
| 478 | //! Number of spans in thread cache
|
| 479 | #define MAX_THREAD_SPAN_CACHE 400
|
| 480 | //! Number of spans to transfer between thread and global cache
|
| 481 | #define THREAD_SPAN_CACHE_TRANSFER 64
|
| 482 | //! Number of spans in thread cache for large spans (must be greater than
|
| 483 | //! LARGE_CLASS_COUNT / 2)
|
| 484 | #define MAX_THREAD_SPAN_LARGE_CACHE 100
|
| 485 | //! Number of spans to transfer between thread and global cache for large spans
|
| 486 | #define THREAD_SPAN_LARGE_CACHE_TRANSFER 6
|
| 487 |
|
| 488 | _Static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0,
|
| 489 | "Small granularity must be power of two" );
|
| 490 | _Static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0,
|
| 491 | "Span header size must be power of two" );
|
| 492 |
|
| 493 | #if ENABLE_VALIDATE_ARGS
|
| 494 | //! Maximum allocation size to avoid integer overflow
|
| 495 | #undef MAX_ALLOC_SIZE
|
| 496 | #define MAX_ALLOC_SIZE (((size_t) - 1) - _memory_span_size)
|
| 497 | #endif
|
| 498 |
|
| 499 | #define pointer_offset(ptr, ofs) (void *)((char *)(ptr) + (ptrdiff_t)(ofs))
|
| 500 | #define pointer_diff(first, second) \
|
| 501 | (ptrdiff_t)((const char *)(first) - (const char *)(second))
|
| 502 |
|
| 503 | #define INVALID_POINTER ((void *)((uintptr_t) - 1))
|
| 504 |
|
| 505 | #define SIZE_CLASS_LARGE SIZE_CLASS_COUNT
|
| 506 | #define SIZE_CLASS_HUGE ((uint32_t) - 1)
|
| 507 |
|
| 508 | ////////////
|
| 509 | ///
|
| 510 | /// Data types
|
| 511 | ///
|
| 512 | //////
|
| 513 |
|
| 514 | //! A memory heap, per thread
|
| 515 | typedef struct heap_t heap_t;
|
| 516 | //! Span of memory pages
|
| 517 | typedef struct span_t span_t;
|
| 518 | //! Span list
|
| 519 | typedef struct span_list_t span_list_t;
|
| 520 | //! Span active data
|
| 521 | typedef struct span_active_t span_active_t;
|
| 522 | //! Size class definition
|
| 523 | typedef struct size_class_t size_class_t;
|
| 524 | //! Global cache
|
| 525 | typedef struct global_cache_t global_cache_t;
|
| 526 |
|
| 527 | //! Flag indicating span is the first (master) span of a split superspan
|
| 528 | #define SPAN_FLAG_MASTER 1U
|
| 529 | //! Flag indicating span is a secondary (sub) span of a split superspan
|
| 530 | #define SPAN_FLAG_SUBSPAN 2U
|
| 531 | //! Flag indicating span has blocks with increased alignment
|
| 532 | #define SPAN_FLAG_ALIGNED_BLOCKS 4U
|
| 533 | //! Flag indicating an unmapped master span
|
| 534 | #define SPAN_FLAG_UNMAPPED_MASTER 8U
|
| 535 |
|
| 536 | #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
|
| 537 | struct span_use_t {
|
| 538 | //! Current number of spans used (actually used, not in cache)
|
| 539 | atomic32_t current;
|
| 540 | //! High water mark of spans used
|
| 541 | atomic32_t high;
|
| 542 | #if ENABLE_STATISTICS
|
| 543 | //! Number of spans in deferred list
|
| 544 | atomic32_t spans_deferred;
|
| 545 | //! Number of spans transitioned to global cache
|
| 546 | atomic32_t spans_to_global;
|
| 547 | //! Number of spans transitioned from global cache
|
| 548 | atomic32_t spans_from_global;
|
| 549 | //! Number of spans transitioned to thread cache
|
| 550 | atomic32_t spans_to_cache;
|
| 551 | //! Number of spans transitioned from thread cache
|
| 552 | atomic32_t spans_from_cache;
|
| 553 | //! Number of spans transitioned to reserved state
|
| 554 | atomic32_t spans_to_reserved;
|
| 555 | //! Number of spans transitioned from reserved state
|
| 556 | atomic32_t spans_from_reserved;
|
| 557 | //! Number of raw memory map calls
|
| 558 | atomic32_t spans_map_calls;
|
| 559 | #endif
|
| 560 | };
|
| 561 | typedef struct span_use_t span_use_t;
|
| 562 | #endif
|
| 563 |
|
| 564 | #if ENABLE_STATISTICS
|
| 565 | struct size_class_use_t {
|
| 566 | //! Current number of allocations
|
| 567 | atomic32_t alloc_current;
|
| 568 | //! Peak number of allocations
|
| 569 | int32_t alloc_peak;
|
| 570 | //! Total number of allocations
|
| 571 | atomic32_t alloc_total;
|
| 572 | //! Total number of frees
|
| 573 | atomic32_t free_total;
|
| 574 | //! Number of spans in use
|
| 575 | atomic32_t spans_current;
|
| 576 | //! Number of spans transitioned to cache
|
| 577 | int32_t spans_peak;
|
| 578 | //! Number of spans transitioned to cache
|
| 579 | atomic32_t spans_to_cache;
|
| 580 | //! Number of spans transitioned from cache
|
| 581 | atomic32_t spans_from_cache;
|
| 582 | //! Number of spans transitioned from reserved state
|
| 583 | atomic32_t spans_from_reserved;
|
| 584 | //! Number of spans mapped
|
| 585 | atomic32_t spans_map_calls;
|
| 586 | int32_t unused;
|
| 587 | };
|
| 588 | typedef struct size_class_use_t size_class_use_t;
|
| 589 | #endif
|
| 590 |
|
| 591 | // A span can either represent a single span of memory pages with size declared
|
| 592 | // by span_map_count configuration variable, or a set of spans in a continuous
|
| 593 | // region, a super span. Any reference to the term "span" usually refers to both
|
| 594 | // a single span or a super span. A super span can further be divided into
|
| 595 | // multiple spans (or this, super spans), where the first (super)span is the
|
| 596 | // master and subsequent (super)spans are subspans. The master span keeps track
|
| 597 | // of how many subspans that are still alive and mapped in virtual memory, and
|
| 598 | // once all subspans and master have been unmapped the entire superspan region
|
| 599 | // is released and unmapped (on Windows for example, the entire superspan range
|
| 600 | // has to be released in the same call to release the virtual memory range, but
|
| 601 | // individual subranges can be decommitted individually to reduce physical
|
| 602 | // memory use).
|
| 603 | struct span_t {
|
| 604 | //! Free list
|
| 605 | void *free_list;
|
| 606 | //! Total block count of size class
|
| 607 | uint32_t block_count;
|
| 608 | //! Size class
|
| 609 | uint32_t size_class;
|
| 610 | //! Index of last block initialized in free list
|
| 611 | uint32_t free_list_limit;
|
| 612 | //! Number of used blocks remaining when in partial state
|
| 613 | uint32_t used_count;
|
| 614 | //! Deferred free list
|
| 615 | atomicptr_t free_list_deferred;
|
| 616 | //! Size of deferred free list, or list of spans when part of a cache list
|
| 617 | uint32_t list_size;
|
| 618 | //! Size of a block
|
| 619 | uint32_t block_size;
|
| 620 | //! Flags and counters
|
| 621 | uint32_t flags;
|
| 622 | //! Number of spans
|
| 623 | uint32_t span_count;
|
| 624 | //! Total span counter for master spans
|
| 625 | uint32_t total_spans;
|
| 626 | //! Offset from master span for subspans
|
| 627 | uint32_t offset_from_master;
|
| 628 | //! Remaining span counter, for master spans
|
| 629 | atomic32_t remaining_spans;
|
| 630 | //! Alignment offset
|
| 631 | uint32_t align_offset;
|
| 632 | //! Owning heap
|
| 633 | heap_t *heap;
|
| 634 | //! Next span
|
| 635 | span_t *next;
|
| 636 | //! Previous span
|
| 637 | span_t *prev;
|
| 638 | };
|
| 639 | _Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch" );
|
| 640 |
|
| 641 | struct span_cache_t {
|
| 642 | size_t count;
|
| 643 | span_t *span[MAX_THREAD_SPAN_CACHE];
|
| 644 | };
|
| 645 | typedef struct span_cache_t span_cache_t;
|
| 646 |
|
| 647 | struct span_large_cache_t {
|
| 648 | size_t count;
|
| 649 | span_t *span[MAX_THREAD_SPAN_LARGE_CACHE];
|
| 650 | };
|
| 651 | typedef struct span_large_cache_t span_large_cache_t;
|
| 652 |
|
| 653 | struct heap_size_class_t {
|
| 654 | //! Free list of active span
|
| 655 | void *free_list;
|
| 656 | //! Double linked list of partially used spans with free blocks.
|
| 657 | // Previous span pointer in head points to tail span of list.
|
| 658 | span_t *partial_span;
|
| 659 | //! Early level cache of fully free spans
|
| 660 | span_t *cache;
|
| 661 | };
|
| 662 | typedef struct heap_size_class_t heap_size_class_t;
|
| 663 |
|
| 664 | // Control structure for a heap, either a thread heap or a first class heap if
|
| 665 | // enabled
|
| 666 | struct heap_t {
|
| 667 | //! Owning thread ID
|
| 668 | uintptr_t owner_thread;
|
| 669 | //! Free lists for each size class
|
| 670 | heap_size_class_t size_class[SIZE_CLASS_COUNT];
|
| 671 | #if ENABLE_THREAD_CACHE
|
| 672 | //! Arrays of fully freed spans, single span
|
| 673 | span_cache_t span_cache;
|
| 674 | #endif
|
| 675 | //! List of deferred free spans (single linked list)
|
| 676 | atomicptr_t span_free_deferred;
|
| 677 | //! Number of full spans
|
| 678 | size_t full_span_count;
|
| 679 | //! Mapped but unused spans
|
| 680 | span_t *span_reserve;
|
| 681 | //! Master span for mapped but unused spans
|
| 682 | span_t *span_reserve_master;
|
| 683 | //! Number of mapped but unused spans
|
| 684 | uint32_t spans_reserved;
|
| 685 | //! Child count
|
| 686 | atomic32_t child_count;
|
| 687 | //! Next heap in id list
|
| 688 | heap_t *next_heap;
|
| 689 | //! Next heap in orphan list
|
| 690 | heap_t *next_orphan;
|
| 691 | //! Heap ID
|
| 692 | int32_t id;
|
| 693 | //! Finalization state flag
|
| 694 | int finalize;
|
| 695 | //! Master heap owning the memory pages
|
| 696 | heap_t *master_heap;
|
| 697 | #if ENABLE_THREAD_CACHE
|
| 698 | //! Arrays of fully freed spans, large spans with > 1 span count
|
| 699 | span_large_cache_t span_large_cache[LARGE_CLASS_COUNT - 1];
|
| 700 | #endif
|
| 701 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 702 | //! Double linked list of fully utilized spans with free blocks for each size
|
| 703 | //! class.
|
| 704 | // Previous span pointer in head points to tail span of list.
|
| 705 | span_t *full_span[SIZE_CLASS_COUNT];
|
| 706 | //! Double linked list of large and huge spans allocated by this heap
|
| 707 | span_t *large_huge_span;
|
| 708 | #endif
|
| 709 | #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
|
| 710 | //! Current and high water mark of spans used per span count
|
| 711 | span_use_t span_use[LARGE_CLASS_COUNT];
|
| 712 | #endif
|
| 713 | #if ENABLE_STATISTICS
|
| 714 | //! Allocation stats per size class
|
| 715 | size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1];
|
| 716 | //! Number of bytes transitioned thread -> global
|
| 717 | atomic64_t thread_to_global;
|
| 718 | //! Number of bytes transitioned global -> thread
|
| 719 | atomic64_t global_to_thread;
|
| 720 | #endif
|
| 721 | };
|
| 722 |
|
| 723 | // Size class for defining a block size bucket
|
| 724 | struct size_class_t {
|
| 725 | //! Size of blocks in this class
|
| 726 | uint32_t block_size;
|
| 727 | //! Number of blocks in each chunk
|
| 728 | uint16_t block_count;
|
| 729 | //! Class index this class is merged with
|
| 730 | uint16_t class_idx;
|
| 731 | };
|
| 732 | _Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch" );
|
| 733 |
|
| 734 | struct global_cache_t {
|
| 735 | //! Cache lock
|
| 736 | atomic32_t lock;
|
| 737 | //! Cache count
|
| 738 | uint32_t count;
|
| 739 | #if ENABLE_STATISTICS
|
| 740 | //! Insert count
|
| 741 | size_t insert_count;
|
| 742 | //! Extract count
|
| 743 | size_t extract_count;
|
| 744 | #endif
|
| 745 | //! Cached spans
|
| 746 | span_t *span[GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE];
|
| 747 | //! Unlimited cache overflow
|
| 748 | span_t *overflow;
|
| 749 | };
|
| 750 |
|
| 751 | ////////////
|
| 752 | ///
|
| 753 | /// Global data
|
| 754 | ///
|
| 755 | //////
|
| 756 |
|
| 757 | //! Default span size (64KiB)
|
| 758 | #define _memory_default_span_size (64 * 1024)
|
| 759 | #define _memory_default_span_size_shift 16
|
| 760 | #define _memory_default_span_mask (~((uintptr_t)(_memory_span_size - 1)))
|
| 761 |
|
| 762 | //! Initialized flag
|
| 763 | static int _rpmalloc_initialized;
|
| 764 | //! Main thread ID
|
| 765 | static uintptr_t _rpmalloc_main_thread_id;
|
| 766 | //! Configuration
|
| 767 | static rpmalloc_config_t _memory_config;
|
| 768 | //! Memory page size
|
| 769 | static size_t _memory_page_size;
|
| 770 | //! Shift to divide by page size
|
| 771 | static size_t _memory_page_size_shift;
|
| 772 | //! Granularity at which memory pages are mapped by OS
|
| 773 | static size_t _memory_map_granularity;
|
| 774 | #if RPMALLOC_CONFIGURABLE
|
| 775 | //! Size of a span of memory pages
|
| 776 | static size_t _memory_span_size;
|
| 777 | //! Shift to divide by span size
|
| 778 | static size_t _memory_span_size_shift;
|
| 779 | //! Mask to get to start of a memory span
|
| 780 | static uintptr_t _memory_span_mask;
|
| 781 | #else
|
| 782 | //! Hardwired span size
|
| 783 | #define _memory_span_size _memory_default_span_size
|
| 784 | #define _memory_span_size_shift _memory_default_span_size_shift
|
| 785 | #define _memory_span_mask _memory_default_span_mask
|
| 786 | #endif
|
| 787 | //! Number of spans to map in each map call
|
| 788 | static size_t _memory_span_map_count;
|
| 789 | //! Number of spans to keep reserved in each heap
|
| 790 | static size_t _memory_heap_reserve_count;
|
| 791 | //! Global size classes
|
| 792 | static size_class_t _memory_size_class[SIZE_CLASS_COUNT];
|
| 793 | //! Run-time size limit of medium blocks
|
| 794 | static size_t _memory_medium_size_limit;
|
| 795 | //! Heap ID counter
|
| 796 | static atomic32_t _memory_heap_id;
|
| 797 | //! Huge page support
|
| 798 | static int _memory_huge_pages;
|
| 799 | #if ENABLE_GLOBAL_CACHE
|
| 800 | //! Global span cache
|
| 801 | static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT];
|
| 802 | #endif
|
| 803 | //! Global reserved spans
|
| 804 | static span_t *_memory_global_reserve;
|
| 805 | //! Global reserved count
|
| 806 | static size_t _memory_global_reserve_count;
|
| 807 | //! Global reserved master
|
| 808 | static span_t *_memory_global_reserve_master;
|
| 809 | //! All heaps
|
| 810 | static heap_t *_memory_heaps[HEAP_ARRAY_SIZE];
|
| 811 | //! Used to restrict access to mapping memory for huge pages
|
| 812 | static atomic32_t _memory_global_lock;
|
| 813 | //! Orphaned heaps
|
| 814 | static heap_t *_memory_orphan_heaps;
|
| 815 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 816 | //! Orphaned heaps (first class heaps)
|
| 817 | static heap_t *_memory_first_class_orphan_heaps;
|
| 818 | #endif
|
| 819 | #if ENABLE_STATISTICS
|
| 820 | //! Allocations counter
|
| 821 | static atomic64_t _allocation_counter;
|
| 822 | //! Deallocations counter
|
| 823 | static atomic64_t _deallocation_counter;
|
| 824 | //! Active heap count
|
| 825 | static atomic32_t _memory_active_heaps;
|
| 826 | //! Number of currently mapped memory pages
|
| 827 | static atomic32_t _mapped_pages;
|
| 828 | //! Peak number of concurrently mapped memory pages
|
| 829 | static int32_t _mapped_pages_peak;
|
| 830 | //! Number of mapped master spans
|
| 831 | static atomic32_t _master_spans;
|
| 832 | //! Number of unmapped dangling master spans
|
| 833 | static atomic32_t _unmapped_master_spans;
|
| 834 | //! Running counter of total number of mapped memory pages since start
|
| 835 | static atomic32_t _mapped_total;
|
| 836 | //! Running counter of total number of unmapped memory pages since start
|
| 837 | static atomic32_t _unmapped_total;
|
| 838 | //! Number of currently mapped memory pages in OS calls
|
| 839 | static atomic32_t _mapped_pages_os;
|
| 840 | //! Number of currently allocated pages in huge allocations
|
| 841 | static atomic32_t _huge_pages_current;
|
| 842 | //! Peak number of currently allocated pages in huge allocations
|
| 843 | static int32_t _huge_pages_peak;
|
| 844 | #endif
|
| 845 |
|
| 846 | ////////////
|
| 847 | ///
|
| 848 | /// Thread local heap and ID
|
| 849 | ///
|
| 850 | //////
|
| 851 |
|
| 852 | //! Current thread heap
|
| 853 | #if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || \
|
| 854 | defined(__TINYC__)
|
| 855 | static pthread_key_t _memory_thread_heap;
|
| 856 | #else
|
| 857 | #ifdef _MSC_VER
|
| 858 | #define _Thread_local __declspec(thread)
|
| 859 | #define TLS_MODEL
|
| 860 | #else
|
| 861 | #ifndef __HAIKU__
|
| 862 | #define TLS_MODEL __attribute__((tls_model("initial-exec")))
|
| 863 | #else
|
| 864 | #define TLS_MODEL
|
| 865 | #endif
|
| 866 | #if !defined(__clang__) && defined(__GNUC__)
|
| 867 | #define _Thread_local __thread
|
| 868 | #endif
|
| 869 | #endif
|
| 870 | static _Thread_local heap_t *_memory_thread_heap TLS_MODEL;
|
| 871 | #endif
|
| 872 |
|
| 873 | static inline heap_t *get_thread_heap_raw(void) {
|
| 874 | #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
|
| 875 | return pthread_getspecific(_memory_thread_heap);
|
| 876 | #else
|
| 877 | return _memory_thread_heap;
|
| 878 | #endif
|
| 879 | }
|
| 880 |
|
| 881 | //! Get the current thread heap
|
| 882 | static inline heap_t *get_thread_heap(void) {
|
| 883 | heap_t *heap = get_thread_heap_raw();
|
| 884 | #if ENABLE_PRELOAD
|
| 885 | if (EXPECTED(heap != 0))
|
| 886 | return heap;
|
| 887 | rpmalloc_initialize();
|
| 888 | return get_thread_heap_raw();
|
| 889 | #else
|
| 890 | return heap;
|
| 891 | #endif
|
| 892 | }
|
| 893 |
|
| 894 | //! Fast thread ID
|
| 895 | static inline uintptr_t get_thread_id(void) {
|
| 896 | #if defined(_WIN32)
|
| 897 | return (uintptr_t)((void *)NtCurrentTeb());
|
| 898 | #elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__)
|
| 899 | uintptr_t tid;
|
| 900 | #if defined(__i386__)
|
| 901 | __asm__("movl %%gs:0, %0" : "=r" (tid) : :);
|
| 902 | #elif defined(__x86_64__)
|
| 903 | #if defined(__MACH__)
|
| 904 | __asm__("movq %%gs:0, %0" : "=r" (tid) : :);
|
| 905 | #else
|
| 906 | __asm__("movq %%fs:0, %0" : "=r" (tid) : :);
|
| 907 | #endif
|
| 908 | #elif defined(__arm__)
|
| 909 | __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r" (tid));
|
| 910 | #elif defined(__aarch64__)
|
| 911 | #if defined(__MACH__)
|
| 912 | // tpidr_el0 likely unused, always return 0 on iOS
|
| 913 | __asm__ volatile("mrs %0, tpidrro_el0" : "=r" (tid));
|
| 914 | #else
|
| 915 | __asm__ volatile("mrs %0, tpidr_el0" : "=r" (tid));
|
| 916 | #endif
|
| 917 | #else
|
| 918 | #error This platform needs implementation of get_thread_id()
|
| 919 | #endif
|
| 920 | return tid;
|
| 921 | #else
|
| 922 | #error This platform needs implementation of get_thread_id()
|
| 923 | #endif
|
| 924 | }
|
| 925 |
|
| 926 | //! Set the current thread heap
|
| 927 | static void set_thread_heap(heap_t *heap) {
|
| 928 | #if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || \
|
| 929 | defined(__TINYC__)
|
| 930 | pthread_setspecific(_memory_thread_heap, heap);
|
| 931 | #else
|
| 932 | _memory_thread_heap = heap;
|
| 933 | #endif
|
| 934 | if (heap)
|
| 935 | heap->owner_thread = get_thread_id();
|
| 936 | }
|
| 937 |
|
| 938 | //! Set main thread ID
|
| 939 | extern void rpmalloc_set_main_thread(void);
|
| 940 |
|
| 941 | void rpmalloc_set_main_thread(void) {
|
| 942 | _rpmalloc_main_thread_id = get_thread_id();
|
| 943 | }
|
| 944 |
|
| 945 | static void _rpmalloc_spin(void) {
|
| 946 | #if defined(_MSC_VER)
|
| 947 | #if defined(_M_ARM64)
|
| 948 | __yield();
|
| 949 | #else
|
| 950 | _mm_pause();
|
| 951 | #endif
|
| 952 | #elif defined(__x86_64__) || defined(__i386__)
|
| 953 | __asm__ volatile("pause" ::: "memory" );
|
| 954 | #elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7)
|
| 955 | __asm__ volatile("yield" ::: "memory" );
|
| 956 | #elif defined(__powerpc__) || defined(__powerpc64__)
|
| 957 | // No idea if ever been compiled in such archs but ... as precaution
|
| 958 | __asm__ volatile("or 27,27,27" );
|
| 959 | #elif defined(__sparc__)
|
| 960 | __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0" );
|
| 961 | #else
|
| 962 | struct timespec ts = {0};
|
| 963 | nanosleep(&ts, 0);
|
| 964 | #endif
|
| 965 | }
|
| 966 |
|
| 967 | #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
|
| 968 | static void NTAPI _rpmalloc_thread_destructor(void *value) {
|
| 969 | #if ENABLE_OVERRIDE
|
| 970 | // If this is called on main thread it means rpmalloc_finalize
|
| 971 | // has not been called and shutdown is forced (through _exit) or unclean
|
| 972 | if (get_thread_id() == _rpmalloc_main_thread_id)
|
| 973 | return;
|
| 974 | #endif
|
| 975 | if (value)
|
| 976 | rpmalloc_thread_finalize(1);
|
| 977 | }
|
| 978 | #endif
|
| 979 |
|
| 980 | ////////////
|
| 981 | ///
|
| 982 | /// Low level memory map/unmap
|
| 983 | ///
|
| 984 | //////
|
| 985 |
|
| 986 | static void _rpmalloc_set_name(void *address, size_t size) {
|
| 987 | #if defined(__linux__) || defined(__ANDROID__)
|
| 988 | const char *name = _memory_huge_pages ? _memory_config.huge_page_name
|
| 989 | : _memory_config.page_name;
|
| 990 | if (address == MAP_FAILED || !name)
|
| 991 | return;
|
| 992 | // If the kernel does not support CONFIG_ANON_VMA_NAME or if the call fails
|
| 993 | // (e.g. invalid name) it is a no-op basically.
|
| 994 | (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)address, size,
|
| 995 | (uintptr_t)name);
|
| 996 | #else
|
| 997 | (void)sizeof(size);
|
| 998 | (void)sizeof(address);
|
| 999 | #endif
|
| 1000 | }
|
| 1001 |
|
| 1002 | //! Map more virtual memory
|
| 1003 | // size is number of bytes to map
|
| 1004 | // offset receives the offset in bytes from start of mapped region
|
| 1005 | // returns address to start of mapped region to use
|
| 1006 | static void *_rpmalloc_mmap(size_t size, size_t *offset) {
|
| 1007 | rpmalloc_assert(!(size % _memory_page_size), "Invalid mmap size" );
|
| 1008 | rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size" );
|
| 1009 | void *address = _memory_config.memory_map(size, offset);
|
| 1010 | if (EXPECTED(address != 0)) {
|
| 1011 | _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift),
|
| 1012 | _mapped_pages_peak);
|
| 1013 | _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift));
|
| 1014 | }
|
| 1015 | return address;
|
| 1016 | }
|
| 1017 |
|
| 1018 | //! Unmap virtual memory
|
| 1019 | // address is the memory address to unmap, as returned from _memory_map
|
| 1020 | // size is the number of bytes to unmap, which might be less than full region
|
| 1021 | // for a partial unmap offset is the offset in bytes to the actual mapped
|
| 1022 | // region, as set by _memory_map release is set to 0 for partial unmap, or size
|
| 1023 | // of entire range for a full unmap
|
| 1024 | static void _rpmalloc_unmap(void *address, size_t size, size_t offset,
|
| 1025 | size_t release) {
|
| 1026 | rpmalloc_assert(!release || (release >= size), "Invalid unmap size" );
|
| 1027 | rpmalloc_assert(!release || (release >= _memory_page_size),
|
| 1028 | "Invalid unmap size" );
|
| 1029 | if (release) {
|
| 1030 | rpmalloc_assert(!(release % _memory_page_size), "Invalid unmap size" );
|
| 1031 | _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift));
|
| 1032 | _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift));
|
| 1033 | }
|
| 1034 | _memory_config.memory_unmap(address, size, offset, release);
|
| 1035 | }
|
| 1036 |
|
| 1037 | //! Default implementation to map new pages to virtual memory
|
| 1038 | static void *_rpmalloc_mmap_os(size_t size, size_t *offset) {
|
| 1039 | // Either size is a heap (a single page) or a (multiple) span - we only need
|
| 1040 | // to align spans, and only if larger than map granularity
|
| 1041 | size_t padding = ((size >= _memory_span_size) &&
|
| 1042 | (_memory_span_size > _memory_map_granularity))
|
| 1043 | ? _memory_span_size
|
| 1044 | : 0;
|
| 1045 | rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size" );
|
| 1046 | #if PLATFORM_WINDOWS
|
| 1047 | // Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not
|
| 1048 | // allocated unless/until the virtual addresses are actually accessed"
|
| 1049 | void *ptr = VirtualAlloc(0, size + padding,
|
| 1050 | (_memory_huge_pages ? MEM_LARGE_PAGES : 0) |
|
| 1051 | MEM_RESERVE | MEM_COMMIT,
|
| 1052 | PAGE_READWRITE);
|
| 1053 | if (!ptr) {
|
| 1054 | if (_memory_config.map_fail_callback) {
|
| 1055 | if (_memory_config.map_fail_callback(size + padding))
|
| 1056 | return _rpmalloc_mmap_os(size, offset);
|
| 1057 | } else {
|
| 1058 | rpmalloc_assert(ptr, "Failed to map virtual memory block" );
|
| 1059 | }
|
| 1060 | return 0;
|
| 1061 | }
|
| 1062 | #else
|
| 1063 | int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
|
| 1064 | #if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
|
| 1065 | int fd = (int)VM_MAKE_TAG(240U);
|
| 1066 | if (_memory_huge_pages)
|
| 1067 | fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
|
| 1068 | void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0);
|
| 1069 | #elif defined(MAP_HUGETLB)
|
| 1070 | void *ptr = mmap(addr: 0, len: size + padding,
|
| 1071 | PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE),
|
| 1072 | flags: (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, fd: -1, offset: 0);
|
| 1073 | #if defined(MADV_HUGEPAGE)
|
| 1074 | // In some configurations, huge pages allocations might fail thus
|
| 1075 | // we fallback to normal allocations and promote the region as transparent
|
| 1076 | // huge page
|
| 1077 | if ((ptr == MAP_FAILED || !ptr) && _memory_huge_pages) {
|
| 1078 | ptr = mmap(addr: 0, len: size + padding, PROT_READ | PROT_WRITE, flags: flags, fd: -1, offset: 0);
|
| 1079 | if (ptr && ptr != MAP_FAILED) {
|
| 1080 | int prm = madvise(addr: ptr, len: size + padding, MADV_HUGEPAGE);
|
| 1081 | (void)prm;
|
| 1082 | rpmalloc_assert((prm == 0), "Failed to promote the page to THP" );
|
| 1083 | }
|
| 1084 | }
|
| 1085 | #endif
|
| 1086 | _rpmalloc_set_name(address: ptr, size: size + padding);
|
| 1087 | #elif defined(MAP_ALIGNED)
|
| 1088 | const size_t align =
|
| 1089 | (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1));
|
| 1090 | void *ptr =
|
| 1091 | mmap(0, size + padding, PROT_READ | PROT_WRITE,
|
| 1092 | (_memory_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0);
|
| 1093 | #elif defined(MAP_ALIGN)
|
| 1094 | caddr_t base = (_memory_huge_pages ? (caddr_t)(4 << 20) : 0);
|
| 1095 | void *ptr = mmap(base, size + padding, PROT_READ | PROT_WRITE,
|
| 1096 | (_memory_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0);
|
| 1097 | #else
|
| 1098 | void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
|
| 1099 | #endif
|
| 1100 | if ((ptr == MAP_FAILED) || !ptr) {
|
| 1101 | if (_memory_config.map_fail_callback) {
|
| 1102 | if (_memory_config.map_fail_callback(size + padding))
|
| 1103 | return _rpmalloc_mmap_os(size, offset);
|
| 1104 | } else if (errno != ENOMEM) {
|
| 1105 | rpmalloc_assert((ptr != MAP_FAILED) && ptr,
|
| 1106 | "Failed to map virtual memory block" );
|
| 1107 | }
|
| 1108 | return 0;
|
| 1109 | }
|
| 1110 | #endif
|
| 1111 | _rpmalloc_stat_add(&_mapped_pages_os,
|
| 1112 | (int32_t)((size + padding) >> _memory_page_size_shift));
|
| 1113 | if (padding) {
|
| 1114 | size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask);
|
| 1115 | rpmalloc_assert(final_padding <= _memory_span_size,
|
| 1116 | "Internal failure in padding" );
|
| 1117 | rpmalloc_assert(final_padding <= padding, "Internal failure in padding" );
|
| 1118 | rpmalloc_assert(!(final_padding % 8), "Internal failure in padding" );
|
| 1119 | ptr = pointer_offset(ptr, final_padding);
|
| 1120 | *offset = final_padding >> 3;
|
| 1121 | }
|
| 1122 | rpmalloc_assert((size < _memory_span_size) ||
|
| 1123 | !((uintptr_t)ptr & ~_memory_span_mask),
|
| 1124 | "Internal failure in padding" );
|
| 1125 | return ptr;
|
| 1126 | }
|
| 1127 |
|
| 1128 | //! Default implementation to unmap pages from virtual memory
|
| 1129 | static void _rpmalloc_unmap_os(void *address, size_t size, size_t offset,
|
| 1130 | size_t release) {
|
| 1131 | rpmalloc_assert(release || (offset == 0), "Invalid unmap size" );
|
| 1132 | rpmalloc_assert(!release || (release >= _memory_page_size),
|
| 1133 | "Invalid unmap size" );
|
| 1134 | rpmalloc_assert(size >= _memory_page_size, "Invalid unmap size" );
|
| 1135 | if (release && offset) {
|
| 1136 | offset <<= 3;
|
| 1137 | address = pointer_offset(address, -(int32_t)offset);
|
| 1138 | if ((release >= _memory_span_size) &&
|
| 1139 | (_memory_span_size > _memory_map_granularity)) {
|
| 1140 | // Padding is always one span size
|
| 1141 | release += _memory_span_size;
|
| 1142 | }
|
| 1143 | }
|
| 1144 | #if !DISABLE_UNMAP
|
| 1145 | #if PLATFORM_WINDOWS
|
| 1146 | if (!VirtualFree(address, release ? 0 : size,
|
| 1147 | release ? MEM_RELEASE : MEM_DECOMMIT)) {
|
| 1148 | rpmalloc_assert(0, "Failed to unmap virtual memory block" );
|
| 1149 | }
|
| 1150 | #else
|
| 1151 | if (release) {
|
| 1152 | if (munmap(addr: address, len: release)) {
|
| 1153 | rpmalloc_assert(0, "Failed to unmap virtual memory block" );
|
| 1154 | }
|
| 1155 | } else {
|
| 1156 | #if defined(MADV_FREE_REUSABLE)
|
| 1157 | int ret;
|
| 1158 | while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 &&
|
| 1159 | (errno == EAGAIN))
|
| 1160 | errno = 0;
|
| 1161 | if ((ret == -1) && (errno != 0)) {
|
| 1162 | #elif defined(MADV_DONTNEED)
|
| 1163 | if (madvise(addr: address, len: size, MADV_DONTNEED)) {
|
| 1164 | #elif defined(MADV_PAGEOUT)
|
| 1165 | if (madvise(address, size, MADV_PAGEOUT)) {
|
| 1166 | #elif defined(MADV_FREE)
|
| 1167 | if (madvise(address, size, MADV_FREE)) {
|
| 1168 | #else
|
| 1169 | if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
|
| 1170 | #endif
|
| 1171 | rpmalloc_assert(0, "Failed to madvise virtual memory block as free" );
|
| 1172 | }
|
| 1173 | }
|
| 1174 | #endif
|
| 1175 | #endif
|
| 1176 | if (release)
|
| 1177 | _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift);
|
| 1178 | }
|
| 1179 |
|
| 1180 | static void _rpmalloc_span_mark_as_subspan_unless_master(span_t *master,
|
| 1181 | span_t *subspan,
|
| 1182 | size_t span_count);
|
| 1183 |
|
| 1184 | //! Use global reserved spans to fulfill a memory map request (reserve size must
|
| 1185 | //! be checked by caller)
|
| 1186 | static span_t *_rpmalloc_global_get_reserved_spans(size_t span_count) {
|
| 1187 | span_t *span = _memory_global_reserve;
|
| 1188 | _rpmalloc_span_mark_as_subspan_unless_master(master: _memory_global_reserve_master,
|
| 1189 | subspan: span, span_count);
|
| 1190 | _memory_global_reserve_count -= span_count;
|
| 1191 | if (_memory_global_reserve_count)
|
| 1192 | _memory_global_reserve =
|
| 1193 | (span_t *)pointer_offset(span, span_count << _memory_span_size_shift);
|
| 1194 | else
|
| 1195 | _memory_global_reserve = 0;
|
| 1196 | return span;
|
| 1197 | }
|
| 1198 |
|
| 1199 | //! Store the given spans as global reserve (must only be called from within new
|
| 1200 | //! heap allocation, not thread safe)
|
| 1201 | static void _rpmalloc_global_set_reserved_spans(span_t *master, span_t *reserve,
|
| 1202 | size_t reserve_span_count) {
|
| 1203 | _memory_global_reserve_master = master;
|
| 1204 | _memory_global_reserve_count = reserve_span_count;
|
| 1205 | _memory_global_reserve = reserve;
|
| 1206 | }
|
| 1207 |
|
| 1208 | ////////////
|
| 1209 | ///
|
| 1210 | /// Span linked list management
|
| 1211 | ///
|
| 1212 | //////
|
| 1213 |
|
| 1214 | //! Add a span to double linked list at the head
|
| 1215 | static void _rpmalloc_span_double_link_list_add(span_t **head, span_t *span) {
|
| 1216 | if (*head)
|
| 1217 | (*head)->prev = span;
|
| 1218 | span->next = *head;
|
| 1219 | *head = span;
|
| 1220 | }
|
| 1221 |
|
| 1222 | //! Pop head span from double linked list
|
| 1223 | static void _rpmalloc_span_double_link_list_pop_head(span_t **head,
|
| 1224 | span_t *span) {
|
| 1225 | rpmalloc_assert(*head == span, "Linked list corrupted" );
|
| 1226 | span = *head;
|
| 1227 | *head = span->next;
|
| 1228 | }
|
| 1229 |
|
| 1230 | //! Remove a span from double linked list
|
| 1231 | static void _rpmalloc_span_double_link_list_remove(span_t **head,
|
| 1232 | span_t *span) {
|
| 1233 | rpmalloc_assert(*head, "Linked list corrupted" );
|
| 1234 | if (*head == span) {
|
| 1235 | *head = span->next;
|
| 1236 | } else {
|
| 1237 | span_t *next_span = span->next;
|
| 1238 | span_t *prev_span = span->prev;
|
| 1239 | prev_span->next = next_span;
|
| 1240 | if (EXPECTED(next_span != 0))
|
| 1241 | next_span->prev = prev_span;
|
| 1242 | }
|
| 1243 | }
|
| 1244 |
|
| 1245 | ////////////
|
| 1246 | ///
|
| 1247 | /// Span control
|
| 1248 | ///
|
| 1249 | //////
|
| 1250 |
|
| 1251 | static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span);
|
| 1252 |
|
| 1253 | static void _rpmalloc_heap_finalize(heap_t *heap);
|
| 1254 |
|
| 1255 | static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master,
|
| 1256 | span_t *reserve,
|
| 1257 | size_t reserve_span_count);
|
| 1258 |
|
| 1259 | //! Declare the span to be a subspan and store distance from master span and
|
| 1260 | //! span count
|
| 1261 | static void _rpmalloc_span_mark_as_subspan_unless_master(span_t *master,
|
| 1262 | span_t *subspan,
|
| 1263 | size_t span_count) {
|
| 1264 | rpmalloc_assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER),
|
| 1265 | "Span master pointer and/or flag mismatch" );
|
| 1266 | if (subspan != master) {
|
| 1267 | subspan->flags = SPAN_FLAG_SUBSPAN;
|
| 1268 | subspan->offset_from_master =
|
| 1269 | (uint32_t)((uintptr_t)pointer_diff(subspan, master) >>
|
| 1270 | _memory_span_size_shift);
|
| 1271 | subspan->align_offset = 0;
|
| 1272 | }
|
| 1273 | subspan->span_count = (uint32_t)span_count;
|
| 1274 | }
|
| 1275 |
|
| 1276 | //! Use reserved spans to fulfill a memory map request (reserve size must be
|
| 1277 | //! checked by caller)
|
| 1278 | static span_t *_rpmalloc_span_map_from_reserve(heap_t *heap,
|
| 1279 | size_t span_count) {
|
| 1280 | // Update the heap span reserve
|
| 1281 | span_t *span = heap->span_reserve;
|
| 1282 | heap->span_reserve =
|
| 1283 | (span_t *)pointer_offset(span, span_count * _memory_span_size);
|
| 1284 | heap->spans_reserved -= (uint32_t)span_count;
|
| 1285 |
|
| 1286 | _rpmalloc_span_mark_as_subspan_unless_master(master: heap->span_reserve_master, subspan: span,
|
| 1287 | span_count);
|
| 1288 | if (span_count <= LARGE_CLASS_COUNT)
|
| 1289 | _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved);
|
| 1290 |
|
| 1291 | return span;
|
| 1292 | }
|
| 1293 |
|
| 1294 | //! Get the aligned number of spans to map in based on wanted count, configured
|
| 1295 | //! mapping granularity and the page size
|
| 1296 | static size_t _rpmalloc_span_align_count(size_t span_count) {
|
| 1297 | size_t request_count = (span_count > _memory_span_map_count)
|
| 1298 | ? span_count
|
| 1299 | : _memory_span_map_count;
|
| 1300 | if ((_memory_page_size > _memory_span_size) &&
|
| 1301 | ((request_count * _memory_span_size) % _memory_page_size))
|
| 1302 | request_count +=
|
| 1303 | _memory_span_map_count - (request_count % _memory_span_map_count);
|
| 1304 | return request_count;
|
| 1305 | }
|
| 1306 |
|
| 1307 | //! Setup a newly mapped span
|
| 1308 | static void _rpmalloc_span_initialize(span_t *span, size_t total_span_count,
|
| 1309 | size_t span_count, size_t align_offset) {
|
| 1310 | span->total_spans = (uint32_t)total_span_count;
|
| 1311 | span->span_count = (uint32_t)span_count;
|
| 1312 | span->align_offset = (uint32_t)align_offset;
|
| 1313 | span->flags = SPAN_FLAG_MASTER;
|
| 1314 | atomic_store32(dst: &span->remaining_spans, val: (int32_t)total_span_count);
|
| 1315 | }
|
| 1316 |
|
| 1317 | static void _rpmalloc_span_unmap(span_t *span);
|
| 1318 |
|
| 1319 | //! Map an aligned set of spans, taking configured mapping granularity and the
|
| 1320 | //! page size into account
|
| 1321 | static span_t *_rpmalloc_span_map_aligned_count(heap_t *heap,
|
| 1322 | size_t span_count) {
|
| 1323 | // If we already have some, but not enough, reserved spans, release those to
|
| 1324 | // heap cache and map a new full set of spans. Otherwise we would waste memory
|
| 1325 | // if page size > span size (huge pages)
|
| 1326 | size_t aligned_span_count = _rpmalloc_span_align_count(span_count);
|
| 1327 | size_t align_offset = 0;
|
| 1328 | span_t *span = (span_t *)_rpmalloc_mmap(
|
| 1329 | size: aligned_span_count * _memory_span_size, offset: &align_offset);
|
| 1330 | if (!span)
|
| 1331 | return 0;
|
| 1332 | _rpmalloc_span_initialize(span, total_span_count: aligned_span_count, span_count, align_offset);
|
| 1333 | _rpmalloc_stat_inc(&_master_spans);
|
| 1334 | if (span_count <= LARGE_CLASS_COUNT)
|
| 1335 | _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls);
|
| 1336 | if (aligned_span_count > span_count) {
|
| 1337 | span_t *reserved_spans =
|
| 1338 | (span_t *)pointer_offset(span, span_count * _memory_span_size);
|
| 1339 | size_t reserved_count = aligned_span_count - span_count;
|
| 1340 | if (heap->spans_reserved) {
|
| 1341 | _rpmalloc_span_mark_as_subspan_unless_master(
|
| 1342 | master: heap->span_reserve_master, subspan: heap->span_reserve, span_count: heap->spans_reserved);
|
| 1343 | _rpmalloc_heap_cache_insert(heap, span: heap->span_reserve);
|
| 1344 | }
|
| 1345 | if (reserved_count > _memory_heap_reserve_count) {
|
| 1346 | // If huge pages or eager spam map count, the global reserve spin lock is
|
| 1347 | // held by caller, _rpmalloc_span_map
|
| 1348 | rpmalloc_assert(atomic_load32(&_memory_global_lock) == 1,
|
| 1349 | "Global spin lock not held as expected" );
|
| 1350 | size_t remain_count = reserved_count - _memory_heap_reserve_count;
|
| 1351 | reserved_count = _memory_heap_reserve_count;
|
| 1352 | span_t *remain_span = (span_t *)pointer_offset(
|
| 1353 | reserved_spans, reserved_count * _memory_span_size);
|
| 1354 | if (_memory_global_reserve) {
|
| 1355 | _rpmalloc_span_mark_as_subspan_unless_master(
|
| 1356 | master: _memory_global_reserve_master, subspan: _memory_global_reserve,
|
| 1357 | span_count: _memory_global_reserve_count);
|
| 1358 | _rpmalloc_span_unmap(span: _memory_global_reserve);
|
| 1359 | }
|
| 1360 | _rpmalloc_global_set_reserved_spans(master: span, reserve: remain_span, reserve_span_count: remain_count);
|
| 1361 | }
|
| 1362 | _rpmalloc_heap_set_reserved_spans(heap, master: span, reserve: reserved_spans,
|
| 1363 | reserve_span_count: reserved_count);
|
| 1364 | }
|
| 1365 | return span;
|
| 1366 | }
|
| 1367 |
|
| 1368 | //! Map in memory pages for the given number of spans (or use previously
|
| 1369 | //! reserved pages)
|
| 1370 | static span_t *_rpmalloc_span_map(heap_t *heap, size_t span_count) {
|
| 1371 | if (span_count <= heap->spans_reserved)
|
| 1372 | return _rpmalloc_span_map_from_reserve(heap, span_count);
|
| 1373 | span_t *span = 0;
|
| 1374 | int use_global_reserve =
|
| 1375 | (_memory_page_size > _memory_span_size) ||
|
| 1376 | (_memory_span_map_count > _memory_heap_reserve_count);
|
| 1377 | if (use_global_reserve) {
|
| 1378 | // If huge pages, make sure only one thread maps more memory to avoid bloat
|
| 1379 | while (!atomic_cas32_acquire(dst: &_memory_global_lock, val: 1, ref: 0))
|
| 1380 | _rpmalloc_spin();
|
| 1381 | if (_memory_global_reserve_count >= span_count) {
|
| 1382 | size_t reserve_count =
|
| 1383 | (!heap->spans_reserved ? _memory_heap_reserve_count : span_count);
|
| 1384 | if (_memory_global_reserve_count < reserve_count)
|
| 1385 | reserve_count = _memory_global_reserve_count;
|
| 1386 | span = _rpmalloc_global_get_reserved_spans(span_count: reserve_count);
|
| 1387 | if (span) {
|
| 1388 | if (reserve_count > span_count) {
|
| 1389 | span_t *reserved_span = (span_t *)pointer_offset(
|
| 1390 | span, span_count << _memory_span_size_shift);
|
| 1391 | _rpmalloc_heap_set_reserved_spans(heap, master: _memory_global_reserve_master,
|
| 1392 | reserve: reserved_span,
|
| 1393 | reserve_span_count: reserve_count - span_count);
|
| 1394 | }
|
| 1395 | // Already marked as subspan in _rpmalloc_global_get_reserved_spans
|
| 1396 | span->span_count = (uint32_t)span_count;
|
| 1397 | }
|
| 1398 | }
|
| 1399 | }
|
| 1400 | if (!span)
|
| 1401 | span = _rpmalloc_span_map_aligned_count(heap, span_count);
|
| 1402 | if (use_global_reserve)
|
| 1403 | atomic_store32_release(dst: &_memory_global_lock, val: 0);
|
| 1404 | return span;
|
| 1405 | }
|
| 1406 |
|
| 1407 | //! Unmap memory pages for the given number of spans (or mark as unused if no
|
| 1408 | //! partial unmappings)
|
| 1409 | static void _rpmalloc_span_unmap(span_t *span) {
|
| 1410 | rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) ||
|
| 1411 | (span->flags & SPAN_FLAG_SUBSPAN),
|
| 1412 | "Span flag corrupted" );
|
| 1413 | rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) ||
|
| 1414 | !(span->flags & SPAN_FLAG_SUBSPAN),
|
| 1415 | "Span flag corrupted" );
|
| 1416 |
|
| 1417 | int is_master = !!(span->flags & SPAN_FLAG_MASTER);
|
| 1418 | span_t *master =
|
| 1419 | is_master ? span
|
| 1420 | : ((span_t *)pointer_offset(
|
| 1421 | span, -(intptr_t)((uintptr_t)span->offset_from_master *
|
| 1422 | _memory_span_size)));
|
| 1423 | rpmalloc_assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN),
|
| 1424 | "Span flag corrupted" );
|
| 1425 | rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted" );
|
| 1426 |
|
| 1427 | size_t span_count = span->span_count;
|
| 1428 | if (!is_master) {
|
| 1429 | // Directly unmap subspans (unless huge pages, in which case we defer and
|
| 1430 | // unmap entire page range with master)
|
| 1431 | rpmalloc_assert(span->align_offset == 0, "Span align offset corrupted" );
|
| 1432 | if (_memory_span_size >= _memory_page_size)
|
| 1433 | _rpmalloc_unmap(address: span, size: span_count * _memory_span_size, offset: 0, release: 0);
|
| 1434 | } else {
|
| 1435 | // Special double flag to denote an unmapped master
|
| 1436 | // It must be kept in memory since span header must be used
|
| 1437 | span->flags |=
|
| 1438 | SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN | SPAN_FLAG_UNMAPPED_MASTER;
|
| 1439 | _rpmalloc_stat_add(&_unmapped_master_spans, 1);
|
| 1440 | }
|
| 1441 |
|
| 1442 | if (atomic_add32(val: &master->remaining_spans, add: -(int32_t)span_count) <= 0) {
|
| 1443 | // Everything unmapped, unmap the master span with release flag to unmap the
|
| 1444 | // entire range of the super span
|
| 1445 | rpmalloc_assert(!!(master->flags & SPAN_FLAG_MASTER) &&
|
| 1446 | !!(master->flags & SPAN_FLAG_SUBSPAN),
|
| 1447 | "Span flag corrupted" );
|
| 1448 | size_t unmap_count = master->span_count;
|
| 1449 | if (_memory_span_size < _memory_page_size)
|
| 1450 | unmap_count = master->total_spans;
|
| 1451 | _rpmalloc_stat_sub(&_master_spans, 1);
|
| 1452 | _rpmalloc_stat_sub(&_unmapped_master_spans, 1);
|
| 1453 | _rpmalloc_unmap(address: master, size: unmap_count * _memory_span_size,
|
| 1454 | offset: master->align_offset,
|
| 1455 | release: (size_t)master->total_spans * _memory_span_size);
|
| 1456 | }
|
| 1457 | }
|
| 1458 |
|
| 1459 | //! Move the span (used for small or medium allocations) to the heap thread
|
| 1460 | //! cache
|
| 1461 | static void _rpmalloc_span_release_to_cache(heap_t *heap, span_t *span) {
|
| 1462 | rpmalloc_assert(heap == span->heap, "Span heap pointer corrupted" );
|
| 1463 | rpmalloc_assert(span->size_class < SIZE_CLASS_COUNT,
|
| 1464 | "Invalid span size class" );
|
| 1465 | rpmalloc_assert(span->span_count == 1, "Invalid span count" );
|
| 1466 | #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
|
| 1467 | atomic_decr32(&heap->span_use[0].current);
|
| 1468 | #endif
|
| 1469 | _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
|
| 1470 | if (!heap->finalize) {
|
| 1471 | _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache);
|
| 1472 | _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache);
|
| 1473 | if (heap->size_class[span->size_class].cache)
|
| 1474 | _rpmalloc_heap_cache_insert(heap,
|
| 1475 | span: heap->size_class[span->size_class].cache);
|
| 1476 | heap->size_class[span->size_class].cache = span;
|
| 1477 | } else {
|
| 1478 | _rpmalloc_span_unmap(span);
|
| 1479 | }
|
| 1480 | }
|
| 1481 |
|
| 1482 | //! Initialize a (partial) free list up to next system memory page, while
|
| 1483 | //! reserving the first block as allocated, returning number of blocks in list
|
| 1484 | static uint32_t free_list_partial_init(void **list, void **first_block,
|
| 1485 | void *page_start, void *block_start,
|
| 1486 | uint32_t block_count,
|
| 1487 | uint32_t block_size) {
|
| 1488 | rpmalloc_assert(block_count, "Internal failure" );
|
| 1489 | *first_block = block_start;
|
| 1490 | if (block_count > 1) {
|
| 1491 | void *free_block = pointer_offset(block_start, block_size);
|
| 1492 | void *block_end =
|
| 1493 | pointer_offset(block_start, (size_t)block_size * block_count);
|
| 1494 | // If block size is less than half a memory page, bound init to next memory
|
| 1495 | // page boundary
|
| 1496 | if (block_size < (_memory_page_size >> 1)) {
|
| 1497 | void *page_end = pointer_offset(page_start, _memory_page_size);
|
| 1498 | if (page_end < block_end)
|
| 1499 | block_end = page_end;
|
| 1500 | }
|
| 1501 | *list = free_block;
|
| 1502 | block_count = 2;
|
| 1503 | void *next_block = pointer_offset(free_block, block_size);
|
| 1504 | while (next_block < block_end) {
|
| 1505 | *((void **)free_block) = next_block;
|
| 1506 | free_block = next_block;
|
| 1507 | ++block_count;
|
| 1508 | next_block = pointer_offset(next_block, block_size);
|
| 1509 | }
|
| 1510 | *((void **)free_block) = 0;
|
| 1511 | } else {
|
| 1512 | *list = 0;
|
| 1513 | }
|
| 1514 | return block_count;
|
| 1515 | }
|
| 1516 |
|
| 1517 | //! Initialize an unused span (from cache or mapped) to be new active span,
|
| 1518 | //! putting the initial free list in heap class free list
|
| 1519 | static void *_rpmalloc_span_initialize_new(heap_t *heap,
|
| 1520 | heap_size_class_t *heap_size_class,
|
| 1521 | span_t *span, uint32_t class_idx) {
|
| 1522 | rpmalloc_assert(span->span_count == 1, "Internal failure" );
|
| 1523 | size_class_t *size_class = _memory_size_class + class_idx;
|
| 1524 | span->size_class = class_idx;
|
| 1525 | span->heap = heap;
|
| 1526 | span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS;
|
| 1527 | span->block_size = size_class->block_size;
|
| 1528 | span->block_count = size_class->block_count;
|
| 1529 | span->free_list = 0;
|
| 1530 | span->list_size = 0;
|
| 1531 | atomic_store_ptr_release(dst: &span->free_list_deferred, val: 0);
|
| 1532 |
|
| 1533 | // Setup free list. Only initialize one system page worth of free blocks in
|
| 1534 | // list
|
| 1535 | void *block;
|
| 1536 | span->free_list_limit =
|
| 1537 | free_list_partial_init(list: &heap_size_class->free_list, first_block: &block, page_start: span,
|
| 1538 | pointer_offset(span, SPAN_HEADER_SIZE),
|
| 1539 | block_count: size_class->block_count, block_size: size_class->block_size);
|
| 1540 | // Link span as partial if there remains blocks to be initialized as free
|
| 1541 | // list, or full if fully initialized
|
| 1542 | if (span->free_list_limit < span->block_count) {
|
| 1543 | _rpmalloc_span_double_link_list_add(head: &heap_size_class->partial_span, span);
|
| 1544 | span->used_count = span->free_list_limit;
|
| 1545 | } else {
|
| 1546 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 1547 | _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
|
| 1548 | #endif
|
| 1549 | ++heap->full_span_count;
|
| 1550 | span->used_count = span->block_count;
|
| 1551 | }
|
| 1552 | return block;
|
| 1553 | }
|
| 1554 |
|
| 1555 | static void (span_t *span) {
|
| 1556 | // We need acquire semantics on the CAS operation since we are interested in
|
| 1557 | // the list size Refer to _rpmalloc_deallocate_defer_small_or_medium for
|
| 1558 | // further comments on this dependency
|
| 1559 | do {
|
| 1560 | span->free_list =
|
| 1561 | atomic_exchange_ptr_acquire(dst: &span->free_list_deferred, INVALID_POINTER);
|
| 1562 | } while (span->free_list == INVALID_POINTER);
|
| 1563 | span->used_count -= span->list_size;
|
| 1564 | span->list_size = 0;
|
| 1565 | atomic_store_ptr_release(dst: &span->free_list_deferred, val: 0);
|
| 1566 | }
|
| 1567 |
|
| 1568 | static int _rpmalloc_span_is_fully_utilized(span_t *span) {
|
| 1569 | rpmalloc_assert(span->free_list_limit <= span->block_count,
|
| 1570 | "Span free list corrupted" );
|
| 1571 | return !span->free_list && (span->free_list_limit >= span->block_count);
|
| 1572 | }
|
| 1573 |
|
| 1574 | static int _rpmalloc_span_finalize(heap_t *heap, size_t iclass, span_t *span,
|
| 1575 | span_t **list_head) {
|
| 1576 | void *free_list = heap->size_class[iclass].free_list;
|
| 1577 | span_t *class_span = (span_t *)((uintptr_t)free_list & _memory_span_mask);
|
| 1578 | if (span == class_span) {
|
| 1579 | // Adopt the heap class free list back into the span free list
|
| 1580 | void *block = span->free_list;
|
| 1581 | void *last_block = 0;
|
| 1582 | while (block) {
|
| 1583 | last_block = block;
|
| 1584 | block = *((void **)block);
|
| 1585 | }
|
| 1586 | uint32_t free_count = 0;
|
| 1587 | block = free_list;
|
| 1588 | while (block) {
|
| 1589 | ++free_count;
|
| 1590 | block = *((void **)block);
|
| 1591 | }
|
| 1592 | if (last_block) {
|
| 1593 | *((void **)last_block) = free_list;
|
| 1594 | } else {
|
| 1595 | span->free_list = free_list;
|
| 1596 | }
|
| 1597 | heap->size_class[iclass].free_list = 0;
|
| 1598 | span->used_count -= free_count;
|
| 1599 | }
|
| 1600 | // If this assert triggers you have memory leaks
|
| 1601 | rpmalloc_assert(span->list_size == span->used_count, "Memory leak detected" );
|
| 1602 | if (span->list_size == span->used_count) {
|
| 1603 | _rpmalloc_stat_dec(&heap->span_use[0].current);
|
| 1604 | _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current);
|
| 1605 | // This function only used for spans in double linked lists
|
| 1606 | if (list_head)
|
| 1607 | _rpmalloc_span_double_link_list_remove(head: list_head, span);
|
| 1608 | _rpmalloc_span_unmap(span);
|
| 1609 | return 1;
|
| 1610 | }
|
| 1611 | return 0;
|
| 1612 | }
|
| 1613 |
|
| 1614 | ////////////
|
| 1615 | ///
|
| 1616 | /// Global cache
|
| 1617 | ///
|
| 1618 | //////
|
| 1619 |
|
| 1620 | #if ENABLE_GLOBAL_CACHE
|
| 1621 |
|
| 1622 | //! Finalize a global cache
|
| 1623 | static void _rpmalloc_global_cache_finalize(global_cache_t *cache) {
|
| 1624 | while (!atomic_cas32_acquire(dst: &cache->lock, val: 1, ref: 0))
|
| 1625 | _rpmalloc_spin();
|
| 1626 |
|
| 1627 | for (size_t ispan = 0; ispan < cache->count; ++ispan)
|
| 1628 | _rpmalloc_span_unmap(span: cache->span[ispan]);
|
| 1629 | cache->count = 0;
|
| 1630 |
|
| 1631 | while (cache->overflow) {
|
| 1632 | span_t *span = cache->overflow;
|
| 1633 | cache->overflow = span->next;
|
| 1634 | _rpmalloc_span_unmap(span);
|
| 1635 | }
|
| 1636 |
|
| 1637 | atomic_store32_release(dst: &cache->lock, val: 0);
|
| 1638 | }
|
| 1639 |
|
| 1640 | static void _rpmalloc_global_cache_insert_spans(span_t **span,
|
| 1641 | size_t span_count,
|
| 1642 | size_t count) {
|
| 1643 | const size_t cache_limit =
|
| 1644 | (span_count == 1) ? GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE
|
| 1645 | : GLOBAL_CACHE_MULTIPLIER *
|
| 1646 | (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
|
| 1647 |
|
| 1648 | global_cache_t *cache = &_memory_span_cache[span_count - 1];
|
| 1649 |
|
| 1650 | size_t insert_count = count;
|
| 1651 | while (!atomic_cas32_acquire(dst: &cache->lock, val: 1, ref: 0))
|
| 1652 | _rpmalloc_spin();
|
| 1653 |
|
| 1654 | #if ENABLE_STATISTICS
|
| 1655 | cache->insert_count += count;
|
| 1656 | #endif
|
| 1657 | if ((cache->count + insert_count) > cache_limit)
|
| 1658 | insert_count = cache_limit - cache->count;
|
| 1659 |
|
| 1660 | memcpy(dest: cache->span + cache->count, src: span, n: sizeof(span_t *) * insert_count);
|
| 1661 | cache->count += (uint32_t)insert_count;
|
| 1662 |
|
| 1663 | #if ENABLE_UNLIMITED_CACHE
|
| 1664 | while (insert_count < count) {
|
| 1665 | #else
|
| 1666 | // Enable unlimited cache if huge pages, or we will leak since it is unlikely
|
| 1667 | // that an entire huge page will be unmapped, and we're unable to partially
|
| 1668 | // decommit a huge page
|
| 1669 | while ((_memory_page_size > _memory_span_size) && (insert_count < count)) {
|
| 1670 | #endif
|
| 1671 | span_t *current_span = span[insert_count++];
|
| 1672 | current_span->next = cache->overflow;
|
| 1673 | cache->overflow = current_span;
|
| 1674 | }
|
| 1675 | atomic_store32_release(dst: &cache->lock, val: 0);
|
| 1676 |
|
| 1677 | span_t *keep = 0;
|
| 1678 | for (size_t ispan = insert_count; ispan < count; ++ispan) {
|
| 1679 | span_t *current_span = span[ispan];
|
| 1680 | // Keep master spans that has remaining subspans to avoid dangling them
|
| 1681 | if ((current_span->flags & SPAN_FLAG_MASTER) &&
|
| 1682 | (atomic_load32(src: ¤t_span->remaining_spans) >
|
| 1683 | (int32_t)current_span->span_count)) {
|
| 1684 | current_span->next = keep;
|
| 1685 | keep = current_span;
|
| 1686 | } else {
|
| 1687 | _rpmalloc_span_unmap(span: current_span);
|
| 1688 | }
|
| 1689 | }
|
| 1690 |
|
| 1691 | if (keep) {
|
| 1692 | while (!atomic_cas32_acquire(dst: &cache->lock, val: 1, ref: 0))
|
| 1693 | _rpmalloc_spin();
|
| 1694 |
|
| 1695 | size_t islot = 0;
|
| 1696 | while (keep) {
|
| 1697 | for (; islot < cache->count; ++islot) {
|
| 1698 | span_t *current_span = cache->span[islot];
|
| 1699 | if (!(current_span->flags & SPAN_FLAG_MASTER) ||
|
| 1700 | ((current_span->flags & SPAN_FLAG_MASTER) &&
|
| 1701 | (atomic_load32(src: ¤t_span->remaining_spans) <=
|
| 1702 | (int32_t)current_span->span_count))) {
|
| 1703 | _rpmalloc_span_unmap(span: current_span);
|
| 1704 | cache->span[islot] = keep;
|
| 1705 | break;
|
| 1706 | }
|
| 1707 | }
|
| 1708 | if (islot == cache->count)
|
| 1709 | break;
|
| 1710 | keep = keep->next;
|
| 1711 | }
|
| 1712 |
|
| 1713 | if (keep) {
|
| 1714 | span_t *tail = keep;
|
| 1715 | while (tail->next)
|
| 1716 | tail = tail->next;
|
| 1717 | tail->next = cache->overflow;
|
| 1718 | cache->overflow = keep;
|
| 1719 | }
|
| 1720 |
|
| 1721 | atomic_store32_release(dst: &cache->lock, val: 0);
|
| 1722 | }
|
| 1723 | }
|
| 1724 |
|
| 1725 | static size_t (span_t **span,
|
| 1726 | size_t span_count,
|
| 1727 | size_t count) {
|
| 1728 | global_cache_t *cache = &_memory_span_cache[span_count - 1];
|
| 1729 |
|
| 1730 | size_t = 0;
|
| 1731 | while (!atomic_cas32_acquire(dst: &cache->lock, val: 1, ref: 0))
|
| 1732 | _rpmalloc_spin();
|
| 1733 |
|
| 1734 | #if ENABLE_STATISTICS
|
| 1735 | cache->extract_count += count;
|
| 1736 | #endif
|
| 1737 | size_t want = count - extract_count;
|
| 1738 | if (want > cache->count)
|
| 1739 | want = cache->count;
|
| 1740 |
|
| 1741 | memcpy(dest: span + extract_count, src: cache->span + (cache->count - want),
|
| 1742 | n: sizeof(span_t *) * want);
|
| 1743 | cache->count -= (uint32_t)want;
|
| 1744 | extract_count += want;
|
| 1745 |
|
| 1746 | while ((extract_count < count) && cache->overflow) {
|
| 1747 | span_t *current_span = cache->overflow;
|
| 1748 | span[extract_count++] = current_span;
|
| 1749 | cache->overflow = current_span->next;
|
| 1750 | }
|
| 1751 |
|
| 1752 | #if ENABLE_ASSERTS
|
| 1753 | for (size_t ispan = 0; ispan < extract_count; ++ispan) {
|
| 1754 | rpmalloc_assert(span[ispan]->span_count == span_count,
|
| 1755 | "Global cache span count mismatch" );
|
| 1756 | }
|
| 1757 | #endif
|
| 1758 |
|
| 1759 | atomic_store32_release(dst: &cache->lock, val: 0);
|
| 1760 |
|
| 1761 | return extract_count;
|
| 1762 | }
|
| 1763 |
|
| 1764 | #endif
|
| 1765 |
|
| 1766 | ////////////
|
| 1767 | ///
|
| 1768 | /// Heap control
|
| 1769 | ///
|
| 1770 | //////
|
| 1771 |
|
| 1772 | static void _rpmalloc_deallocate_huge(span_t *);
|
| 1773 |
|
| 1774 | //! Store the given spans as reserve in the given heap
|
| 1775 | static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master,
|
| 1776 | span_t *reserve,
|
| 1777 | size_t reserve_span_count) {
|
| 1778 | heap->span_reserve_master = master;
|
| 1779 | heap->span_reserve = reserve;
|
| 1780 | heap->spans_reserved = (uint32_t)reserve_span_count;
|
| 1781 | }
|
| 1782 |
|
| 1783 | //! Adopt the deferred span cache list, optionally extracting the first single
|
| 1784 | //! span for immediate re-use
|
| 1785 | static void _rpmalloc_heap_cache_adopt_deferred(heap_t *heap,
|
| 1786 | span_t **single_span) {
|
| 1787 | span_t *span = (span_t *)((void *)atomic_exchange_ptr_acquire(
|
| 1788 | dst: &heap->span_free_deferred, val: 0));
|
| 1789 | while (span) {
|
| 1790 | span_t *next_span = (span_t *)span->free_list;
|
| 1791 | rpmalloc_assert(span->heap == heap, "Span heap pointer corrupted" );
|
| 1792 | if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
|
| 1793 | rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted" );
|
| 1794 | --heap->full_span_count;
|
| 1795 | _rpmalloc_stat_dec(&heap->span_use[0].spans_deferred);
|
| 1796 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 1797 | _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
|
| 1798 | span);
|
| 1799 | #endif
|
| 1800 | _rpmalloc_stat_dec(&heap->span_use[0].current);
|
| 1801 | _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
|
| 1802 | if (single_span && !*single_span)
|
| 1803 | *single_span = span;
|
| 1804 | else
|
| 1805 | _rpmalloc_heap_cache_insert(heap, span);
|
| 1806 | } else {
|
| 1807 | if (span->size_class == SIZE_CLASS_HUGE) {
|
| 1808 | _rpmalloc_deallocate_huge(span);
|
| 1809 | } else {
|
| 1810 | rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE,
|
| 1811 | "Span size class invalid" );
|
| 1812 | rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted" );
|
| 1813 | --heap->full_span_count;
|
| 1814 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 1815 | _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span);
|
| 1816 | #endif
|
| 1817 | uint32_t idx = span->span_count - 1;
|
| 1818 | _rpmalloc_stat_dec(&heap->span_use[idx].spans_deferred);
|
| 1819 | _rpmalloc_stat_dec(&heap->span_use[idx].current);
|
| 1820 | if (!idx && single_span && !*single_span)
|
| 1821 | *single_span = span;
|
| 1822 | else
|
| 1823 | _rpmalloc_heap_cache_insert(heap, span);
|
| 1824 | }
|
| 1825 | }
|
| 1826 | span = next_span;
|
| 1827 | }
|
| 1828 | }
|
| 1829 |
|
| 1830 | static void _rpmalloc_heap_unmap(heap_t *heap) {
|
| 1831 | if (!heap->master_heap) {
|
| 1832 | if ((heap->finalize > 1) && !atomic_load32(src: &heap->child_count)) {
|
| 1833 | span_t *span = (span_t *)((uintptr_t)heap & _memory_span_mask);
|
| 1834 | _rpmalloc_span_unmap(span);
|
| 1835 | }
|
| 1836 | } else {
|
| 1837 | if (atomic_decr32(val: &heap->master_heap->child_count) == 0) {
|
| 1838 | _rpmalloc_heap_unmap(heap: heap->master_heap);
|
| 1839 | }
|
| 1840 | }
|
| 1841 | }
|
| 1842 |
|
| 1843 | static void _rpmalloc_heap_global_finalize(heap_t *heap) {
|
| 1844 | if (heap->finalize++ > 1) {
|
| 1845 | --heap->finalize;
|
| 1846 | return;
|
| 1847 | }
|
| 1848 |
|
| 1849 | _rpmalloc_heap_finalize(heap);
|
| 1850 |
|
| 1851 | #if ENABLE_THREAD_CACHE
|
| 1852 | for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
|
| 1853 | span_cache_t *span_cache;
|
| 1854 | if (!iclass)
|
| 1855 | span_cache = &heap->span_cache;
|
| 1856 | else
|
| 1857 | span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
|
| 1858 | for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
|
| 1859 | _rpmalloc_span_unmap(span: span_cache->span[ispan]);
|
| 1860 | span_cache->count = 0;
|
| 1861 | }
|
| 1862 | #endif
|
| 1863 |
|
| 1864 | if (heap->full_span_count) {
|
| 1865 | --heap->finalize;
|
| 1866 | return;
|
| 1867 | }
|
| 1868 |
|
| 1869 | for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
|
| 1870 | if (heap->size_class[iclass].free_list ||
|
| 1871 | heap->size_class[iclass].partial_span) {
|
| 1872 | --heap->finalize;
|
| 1873 | return;
|
| 1874 | }
|
| 1875 | }
|
| 1876 | // Heap is now completely free, unmap and remove from heap list
|
| 1877 | size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE;
|
| 1878 | heap_t *list_heap = _memory_heaps[list_idx];
|
| 1879 | if (list_heap == heap) {
|
| 1880 | _memory_heaps[list_idx] = heap->next_heap;
|
| 1881 | } else {
|
| 1882 | while (list_heap->next_heap != heap)
|
| 1883 | list_heap = list_heap->next_heap;
|
| 1884 | list_heap->next_heap = heap->next_heap;
|
| 1885 | }
|
| 1886 |
|
| 1887 | _rpmalloc_heap_unmap(heap);
|
| 1888 | }
|
| 1889 |
|
| 1890 | //! Insert a single span into thread heap cache, releasing to global cache if
|
| 1891 | //! overflow
|
| 1892 | static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span) {
|
| 1893 | if (UNEXPECTED(heap->finalize != 0)) {
|
| 1894 | _rpmalloc_span_unmap(span);
|
| 1895 | _rpmalloc_heap_global_finalize(heap);
|
| 1896 | return;
|
| 1897 | }
|
| 1898 | #if ENABLE_THREAD_CACHE
|
| 1899 | size_t span_count = span->span_count;
|
| 1900 | _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_to_cache);
|
| 1901 | if (span_count == 1) {
|
| 1902 | span_cache_t *span_cache = &heap->span_cache;
|
| 1903 | span_cache->span[span_cache->count++] = span;
|
| 1904 | if (span_cache->count == MAX_THREAD_SPAN_CACHE) {
|
| 1905 | const size_t remain_count =
|
| 1906 | MAX_THREAD_SPAN_CACHE - THREAD_SPAN_CACHE_TRANSFER;
|
| 1907 | #if ENABLE_GLOBAL_CACHE
|
| 1908 | _rpmalloc_stat_add64(&heap->thread_to_global,
|
| 1909 | THREAD_SPAN_CACHE_TRANSFER * _memory_span_size);
|
| 1910 | _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
|
| 1911 | THREAD_SPAN_CACHE_TRANSFER);
|
| 1912 | _rpmalloc_global_cache_insert_spans(span: span_cache->span + remain_count,
|
| 1913 | span_count,
|
| 1914 | THREAD_SPAN_CACHE_TRANSFER);
|
| 1915 | #else
|
| 1916 | for (size_t ispan = 0; ispan < THREAD_SPAN_CACHE_TRANSFER; ++ispan)
|
| 1917 | _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
|
| 1918 | #endif
|
| 1919 | span_cache->count = remain_count;
|
| 1920 | }
|
| 1921 | } else {
|
| 1922 | size_t cache_idx = span_count - 2;
|
| 1923 | span_large_cache_t *span_cache = heap->span_large_cache + cache_idx;
|
| 1924 | span_cache->span[span_cache->count++] = span;
|
| 1925 | const size_t cache_limit =
|
| 1926 | (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
|
| 1927 | if (span_cache->count == cache_limit) {
|
| 1928 | const size_t transfer_limit = 2 + (cache_limit >> 2);
|
| 1929 | const size_t transfer_count =
|
| 1930 | (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit
|
| 1931 | ? THREAD_SPAN_LARGE_CACHE_TRANSFER
|
| 1932 | : transfer_limit);
|
| 1933 | const size_t remain_count = cache_limit - transfer_count;
|
| 1934 | #if ENABLE_GLOBAL_CACHE
|
| 1935 | _rpmalloc_stat_add64(&heap->thread_to_global,
|
| 1936 | transfer_count * span_count * _memory_span_size);
|
| 1937 | _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
|
| 1938 | transfer_count);
|
| 1939 | _rpmalloc_global_cache_insert_spans(span: span_cache->span + remain_count,
|
| 1940 | span_count, count: transfer_count);
|
| 1941 | #else
|
| 1942 | for (size_t ispan = 0; ispan < transfer_count; ++ispan)
|
| 1943 | _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
|
| 1944 | #endif
|
| 1945 | span_cache->count = remain_count;
|
| 1946 | }
|
| 1947 | }
|
| 1948 | #else
|
| 1949 | (void)sizeof(heap);
|
| 1950 | _rpmalloc_span_unmap(span);
|
| 1951 | #endif
|
| 1952 | }
|
| 1953 |
|
| 1954 | //! Extract the given number of spans from the different cache levels
|
| 1955 | static span_t *(heap_t *heap,
|
| 1956 | size_t span_count) {
|
| 1957 | span_t *span = 0;
|
| 1958 | #if ENABLE_THREAD_CACHE
|
| 1959 | span_cache_t *span_cache;
|
| 1960 | if (span_count == 1)
|
| 1961 | span_cache = &heap->span_cache;
|
| 1962 | else
|
| 1963 | span_cache = (span_cache_t *)(heap->span_large_cache + (span_count - 2));
|
| 1964 | if (span_cache->count) {
|
| 1965 | _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_cache);
|
| 1966 | return span_cache->span[--span_cache->count];
|
| 1967 | }
|
| 1968 | #endif
|
| 1969 | return span;
|
| 1970 | }
|
| 1971 |
|
| 1972 | static span_t *(heap_t *heap,
|
| 1973 | size_t span_count) {
|
| 1974 | span_t *span = 0;
|
| 1975 | if (span_count == 1) {
|
| 1976 | _rpmalloc_heap_cache_adopt_deferred(heap, single_span: &span);
|
| 1977 | } else {
|
| 1978 | _rpmalloc_heap_cache_adopt_deferred(heap, single_span: 0);
|
| 1979 | span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
|
| 1980 | }
|
| 1981 | return span;
|
| 1982 | }
|
| 1983 |
|
| 1984 | static span_t *(heap_t *heap,
|
| 1985 | size_t span_count) {
|
| 1986 | if (heap->spans_reserved >= span_count)
|
| 1987 | return _rpmalloc_span_map(heap, span_count);
|
| 1988 | return 0;
|
| 1989 | }
|
| 1990 |
|
| 1991 | //! Extract a span from the global cache
|
| 1992 | static span_t *(heap_t *heap,
|
| 1993 | size_t span_count) {
|
| 1994 | #if ENABLE_GLOBAL_CACHE
|
| 1995 | #if ENABLE_THREAD_CACHE
|
| 1996 | span_cache_t *span_cache;
|
| 1997 | size_t wanted_count;
|
| 1998 | if (span_count == 1) {
|
| 1999 | span_cache = &heap->span_cache;
|
| 2000 | wanted_count = THREAD_SPAN_CACHE_TRANSFER;
|
| 2001 | } else {
|
| 2002 | span_cache = (span_cache_t *)(heap->span_large_cache + (span_count - 2));
|
| 2003 | wanted_count = THREAD_SPAN_LARGE_CACHE_TRANSFER;
|
| 2004 | }
|
| 2005 | span_cache->count = _rpmalloc_global_cache_extract_spans(
|
| 2006 | span: span_cache->span, span_count, count: wanted_count);
|
| 2007 | if (span_cache->count) {
|
| 2008 | _rpmalloc_stat_add64(&heap->global_to_thread,
|
| 2009 | span_count * span_cache->count * _memory_span_size);
|
| 2010 | _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
|
| 2011 | span_cache->count);
|
| 2012 | return span_cache->span[--span_cache->count];
|
| 2013 | }
|
| 2014 | #else
|
| 2015 | span_t *span = 0;
|
| 2016 | size_t count = _rpmalloc_global_cache_extract_spans(&span, span_count, 1);
|
| 2017 | if (count) {
|
| 2018 | _rpmalloc_stat_add64(&heap->global_to_thread,
|
| 2019 | span_count * count * _memory_span_size);
|
| 2020 | _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
|
| 2021 | count);
|
| 2022 | return span;
|
| 2023 | }
|
| 2024 | #endif
|
| 2025 | #endif
|
| 2026 | (void)sizeof(heap);
|
| 2027 | (void)sizeof(span_count);
|
| 2028 | return 0;
|
| 2029 | }
|
| 2030 |
|
| 2031 | static void _rpmalloc_inc_span_statistics(heap_t *heap, size_t span_count,
|
| 2032 | uint32_t class_idx) {
|
| 2033 | (void)sizeof(heap);
|
| 2034 | (void)sizeof(span_count);
|
| 2035 | (void)sizeof(class_idx);
|
| 2036 | #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
|
| 2037 | uint32_t idx = (uint32_t)span_count - 1;
|
| 2038 | uint32_t current_count =
|
| 2039 | (uint32_t)atomic_incr32(&heap->span_use[idx].current);
|
| 2040 | if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high))
|
| 2041 | atomic_store32(&heap->span_use[idx].high, (int32_t)current_count);
|
| 2042 | _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1,
|
| 2043 | heap->size_class_use[class_idx].spans_peak);
|
| 2044 | #endif
|
| 2045 | }
|
| 2046 |
|
| 2047 | //! Get a span from one of the cache levels (thread cache, reserved, global
|
| 2048 | //! cache) or fallback to mapping more memory
|
| 2049 | static span_t *
|
| 2050 | (heap_t *heap,
|
| 2051 | heap_size_class_t *heap_size_class,
|
| 2052 | size_t span_count, uint32_t class_idx) {
|
| 2053 | span_t *span;
|
| 2054 | #if ENABLE_THREAD_CACHE
|
| 2055 | if (heap_size_class && heap_size_class->cache) {
|
| 2056 | span = heap_size_class->cache;
|
| 2057 | heap_size_class->cache =
|
| 2058 | (heap->span_cache.count
|
| 2059 | ? heap->span_cache.span[--heap->span_cache.count]
|
| 2060 | : 0);
|
| 2061 | _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
|
| 2062 | return span;
|
| 2063 | }
|
| 2064 | #endif
|
| 2065 | (void)sizeof(class_idx);
|
| 2066 | // Allow 50% overhead to increase cache hits
|
| 2067 | size_t base_span_count = span_count;
|
| 2068 | size_t limit_span_count =
|
| 2069 | (span_count > 2) ? (span_count + (span_count >> 1)) : span_count;
|
| 2070 | if (limit_span_count > LARGE_CLASS_COUNT)
|
| 2071 | limit_span_count = LARGE_CLASS_COUNT;
|
| 2072 | do {
|
| 2073 | span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
|
| 2074 | if (EXPECTED(span != 0)) {
|
| 2075 | _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
|
| 2076 | _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
|
| 2077 | return span;
|
| 2078 | }
|
| 2079 | span = _rpmalloc_heap_thread_cache_deferred_extract(heap, span_count);
|
| 2080 | if (EXPECTED(span != 0)) {
|
| 2081 | _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
|
| 2082 | _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
|
| 2083 | return span;
|
| 2084 | }
|
| 2085 | span = _rpmalloc_heap_global_cache_extract(heap, span_count);
|
| 2086 | if (EXPECTED(span != 0)) {
|
| 2087 | _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
|
| 2088 | _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
|
| 2089 | return span;
|
| 2090 | }
|
| 2091 | span = _rpmalloc_heap_reserved_extract(heap, span_count);
|
| 2092 | if (EXPECTED(span != 0)) {
|
| 2093 | _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved);
|
| 2094 | _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
|
| 2095 | return span;
|
| 2096 | }
|
| 2097 | ++span_count;
|
| 2098 | } while (span_count <= limit_span_count);
|
| 2099 | // Final fallback, map in more virtual memory
|
| 2100 | span = _rpmalloc_span_map(heap, span_count: base_span_count);
|
| 2101 | _rpmalloc_inc_span_statistics(heap, span_count: base_span_count, class_idx);
|
| 2102 | _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls);
|
| 2103 | return span;
|
| 2104 | }
|
| 2105 |
|
| 2106 | static void _rpmalloc_heap_initialize(heap_t *heap) {
|
| 2107 | _rpmalloc_memset_const(heap, 0, sizeof(heap_t));
|
| 2108 | // Get a new heap ID
|
| 2109 | heap->id = 1 + atomic_incr32(val: &_memory_heap_id);
|
| 2110 |
|
| 2111 | // Link in heap in heap ID map
|
| 2112 | size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE;
|
| 2113 | heap->next_heap = _memory_heaps[list_idx];
|
| 2114 | _memory_heaps[list_idx] = heap;
|
| 2115 | }
|
| 2116 |
|
| 2117 | static void _rpmalloc_heap_orphan(heap_t *heap, int first_class) {
|
| 2118 | heap->owner_thread = (uintptr_t)-1;
|
| 2119 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 2120 | heap_t **heap_list =
|
| 2121 | (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps);
|
| 2122 | #else
|
| 2123 | (void)sizeof(first_class);
|
| 2124 | heap_t **heap_list = &_memory_orphan_heaps;
|
| 2125 | #endif
|
| 2126 | heap->next_orphan = *heap_list;
|
| 2127 | *heap_list = heap;
|
| 2128 | }
|
| 2129 |
|
| 2130 | //! Allocate a new heap from newly mapped memory pages
|
| 2131 | static heap_t *_rpmalloc_heap_allocate_new(void) {
|
| 2132 | // Map in pages for a 16 heaps. If page size is greater than required size for
|
| 2133 | // this, map a page and use first part for heaps and remaining part for spans
|
| 2134 | // for allocations. Adds a lot of complexity, but saves a lot of memory on
|
| 2135 | // systems where page size > 64 spans (4MiB)
|
| 2136 | size_t heap_size = sizeof(heap_t);
|
| 2137 | size_t aligned_heap_size = 16 * ((heap_size + 15) / 16);
|
| 2138 | size_t request_heap_count = 16;
|
| 2139 | size_t heap_span_count = ((aligned_heap_size * request_heap_count) +
|
| 2140 | sizeof(span_t) + _memory_span_size - 1) /
|
| 2141 | _memory_span_size;
|
| 2142 | size_t block_size = _memory_span_size * heap_span_count;
|
| 2143 | size_t span_count = heap_span_count;
|
| 2144 | span_t *span = 0;
|
| 2145 | // If there are global reserved spans, use these first
|
| 2146 | if (_memory_global_reserve_count >= heap_span_count) {
|
| 2147 | span = _rpmalloc_global_get_reserved_spans(span_count: heap_span_count);
|
| 2148 | }
|
| 2149 | if (!span) {
|
| 2150 | if (_memory_page_size > block_size) {
|
| 2151 | span_count = _memory_page_size / _memory_span_size;
|
| 2152 | block_size = _memory_page_size;
|
| 2153 | // If using huge pages, make sure to grab enough heaps to avoid
|
| 2154 | // reallocating a huge page just to serve new heaps
|
| 2155 | size_t possible_heap_count =
|
| 2156 | (block_size - sizeof(span_t)) / aligned_heap_size;
|
| 2157 | if (possible_heap_count >= (request_heap_count * 16))
|
| 2158 | request_heap_count *= 16;
|
| 2159 | else if (possible_heap_count < request_heap_count)
|
| 2160 | request_heap_count = possible_heap_count;
|
| 2161 | heap_span_count = ((aligned_heap_size * request_heap_count) +
|
| 2162 | sizeof(span_t) + _memory_span_size - 1) /
|
| 2163 | _memory_span_size;
|
| 2164 | }
|
| 2165 |
|
| 2166 | size_t align_offset = 0;
|
| 2167 | span = (span_t *)_rpmalloc_mmap(size: block_size, offset: &align_offset);
|
| 2168 | if (!span)
|
| 2169 | return 0;
|
| 2170 |
|
| 2171 | // Master span will contain the heaps
|
| 2172 | _rpmalloc_stat_inc(&_master_spans);
|
| 2173 | _rpmalloc_span_initialize(span, total_span_count: span_count, span_count: heap_span_count, align_offset);
|
| 2174 | }
|
| 2175 |
|
| 2176 | size_t remain_size = _memory_span_size - sizeof(span_t);
|
| 2177 | heap_t *heap = (heap_t *)pointer_offset(span, sizeof(span_t));
|
| 2178 | _rpmalloc_heap_initialize(heap);
|
| 2179 |
|
| 2180 | // Put extra heaps as orphans
|
| 2181 | size_t num_heaps = remain_size / aligned_heap_size;
|
| 2182 | if (num_heaps < request_heap_count)
|
| 2183 | num_heaps = request_heap_count;
|
| 2184 | atomic_store32(dst: &heap->child_count, val: (int32_t)num_heaps - 1);
|
| 2185 | heap_t * = (heap_t *)pointer_offset(heap, aligned_heap_size);
|
| 2186 | while (num_heaps > 1) {
|
| 2187 | _rpmalloc_heap_initialize(heap: extra_heap);
|
| 2188 | extra_heap->master_heap = heap;
|
| 2189 | _rpmalloc_heap_orphan(heap: extra_heap, first_class: 1);
|
| 2190 | extra_heap = (heap_t *)pointer_offset(extra_heap, aligned_heap_size);
|
| 2191 | --num_heaps;
|
| 2192 | }
|
| 2193 |
|
| 2194 | if (span_count > heap_span_count) {
|
| 2195 | // Cap reserved spans
|
| 2196 | size_t remain_count = span_count - heap_span_count;
|
| 2197 | size_t reserve_count =
|
| 2198 | (remain_count > _memory_heap_reserve_count ? _memory_heap_reserve_count
|
| 2199 | : remain_count);
|
| 2200 | span_t *remain_span =
|
| 2201 | (span_t *)pointer_offset(span, heap_span_count * _memory_span_size);
|
| 2202 | _rpmalloc_heap_set_reserved_spans(heap, master: span, reserve: remain_span, reserve_span_count: reserve_count);
|
| 2203 |
|
| 2204 | if (remain_count > reserve_count) {
|
| 2205 | // Set to global reserved spans
|
| 2206 | remain_span = (span_t *)pointer_offset(remain_span,
|
| 2207 | reserve_count * _memory_span_size);
|
| 2208 | reserve_count = remain_count - reserve_count;
|
| 2209 | _rpmalloc_global_set_reserved_spans(master: span, reserve: remain_span, reserve_span_count: reserve_count);
|
| 2210 | }
|
| 2211 | }
|
| 2212 |
|
| 2213 | return heap;
|
| 2214 | }
|
| 2215 |
|
| 2216 | static heap_t *(heap_t **heap_list) {
|
| 2217 | heap_t *heap = *heap_list;
|
| 2218 | *heap_list = (heap ? heap->next_orphan : 0);
|
| 2219 | return heap;
|
| 2220 | }
|
| 2221 |
|
| 2222 | //! Allocate a new heap, potentially reusing a previously orphaned heap
|
| 2223 | static heap_t *_rpmalloc_heap_allocate(int first_class) {
|
| 2224 | heap_t *heap = 0;
|
| 2225 | while (!atomic_cas32_acquire(dst: &_memory_global_lock, val: 1, ref: 0))
|
| 2226 | _rpmalloc_spin();
|
| 2227 | if (first_class == 0)
|
| 2228 | heap = _rpmalloc_heap_extract_orphan(heap_list: &_memory_orphan_heaps);
|
| 2229 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 2230 | if (!heap)
|
| 2231 | heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps);
|
| 2232 | #endif
|
| 2233 | if (!heap)
|
| 2234 | heap = _rpmalloc_heap_allocate_new();
|
| 2235 | atomic_store32_release(dst: &_memory_global_lock, val: 0);
|
| 2236 | if (heap)
|
| 2237 | _rpmalloc_heap_cache_adopt_deferred(heap, single_span: 0);
|
| 2238 | return heap;
|
| 2239 | }
|
| 2240 |
|
| 2241 | static void _rpmalloc_heap_release(void *heapptr, int first_class,
|
| 2242 | int release_cache) {
|
| 2243 | heap_t *heap = (heap_t *)heapptr;
|
| 2244 | if (!heap)
|
| 2245 | return;
|
| 2246 | // Release thread cache spans back to global cache
|
| 2247 | _rpmalloc_heap_cache_adopt_deferred(heap, single_span: 0);
|
| 2248 | if (release_cache || heap->finalize) {
|
| 2249 | #if ENABLE_THREAD_CACHE
|
| 2250 | for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
|
| 2251 | span_cache_t *span_cache;
|
| 2252 | if (!iclass)
|
| 2253 | span_cache = &heap->span_cache;
|
| 2254 | else
|
| 2255 | span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
|
| 2256 | if (!span_cache->count)
|
| 2257 | continue;
|
| 2258 | #if ENABLE_GLOBAL_CACHE
|
| 2259 | if (heap->finalize) {
|
| 2260 | for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
|
| 2261 | _rpmalloc_span_unmap(span: span_cache->span[ispan]);
|
| 2262 | } else {
|
| 2263 | _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count *
|
| 2264 | (iclass + 1) *
|
| 2265 | _memory_span_size);
|
| 2266 | _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
|
| 2267 | span_cache->count);
|
| 2268 | _rpmalloc_global_cache_insert_spans(span: span_cache->span, span_count: iclass + 1,
|
| 2269 | count: span_cache->count);
|
| 2270 | }
|
| 2271 | #else
|
| 2272 | for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
|
| 2273 | _rpmalloc_span_unmap(span_cache->span[ispan]);
|
| 2274 | #endif
|
| 2275 | span_cache->count = 0;
|
| 2276 | }
|
| 2277 | #endif
|
| 2278 | }
|
| 2279 |
|
| 2280 | if (get_thread_heap_raw() == heap)
|
| 2281 | set_thread_heap(0);
|
| 2282 |
|
| 2283 | #if ENABLE_STATISTICS
|
| 2284 | atomic_decr32(&_memory_active_heaps);
|
| 2285 | rpmalloc_assert(atomic_load32(&_memory_active_heaps) >= 0,
|
| 2286 | "Still active heaps during finalization" );
|
| 2287 | #endif
|
| 2288 |
|
| 2289 | // If we are forcibly terminating with _exit the state of the
|
| 2290 | // lock atomic is unknown and it's best to just go ahead and exit
|
| 2291 | if (get_thread_id() != _rpmalloc_main_thread_id) {
|
| 2292 | while (!atomic_cas32_acquire(dst: &_memory_global_lock, val: 1, ref: 0))
|
| 2293 | _rpmalloc_spin();
|
| 2294 | }
|
| 2295 | _rpmalloc_heap_orphan(heap, first_class);
|
| 2296 | atomic_store32_release(dst: &_memory_global_lock, val: 0);
|
| 2297 | }
|
| 2298 |
|
| 2299 | static void _rpmalloc_heap_release_raw(void *heapptr, int release_cache) {
|
| 2300 | _rpmalloc_heap_release(heapptr, first_class: 0, release_cache);
|
| 2301 | }
|
| 2302 |
|
| 2303 | static void _rpmalloc_heap_release_raw_fc(void *heapptr) {
|
| 2304 | _rpmalloc_heap_release_raw(heapptr, release_cache: 1);
|
| 2305 | }
|
| 2306 |
|
| 2307 | static void _rpmalloc_heap_finalize(heap_t *heap) {
|
| 2308 | if (heap->spans_reserved) {
|
| 2309 | span_t *span = _rpmalloc_span_map(heap, span_count: heap->spans_reserved);
|
| 2310 | _rpmalloc_span_unmap(span);
|
| 2311 | heap->spans_reserved = 0;
|
| 2312 | }
|
| 2313 |
|
| 2314 | _rpmalloc_heap_cache_adopt_deferred(heap, single_span: 0);
|
| 2315 |
|
| 2316 | for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
|
| 2317 | if (heap->size_class[iclass].cache)
|
| 2318 | _rpmalloc_span_unmap(span: heap->size_class[iclass].cache);
|
| 2319 | heap->size_class[iclass].cache = 0;
|
| 2320 | span_t *span = heap->size_class[iclass].partial_span;
|
| 2321 | while (span) {
|
| 2322 | span_t *next = span->next;
|
| 2323 | _rpmalloc_span_finalize(heap, iclass, span,
|
| 2324 | list_head: &heap->size_class[iclass].partial_span);
|
| 2325 | span = next;
|
| 2326 | }
|
| 2327 | // If class still has a free list it must be a full span
|
| 2328 | if (heap->size_class[iclass].free_list) {
|
| 2329 | span_t *class_span =
|
| 2330 | (span_t *)((uintptr_t)heap->size_class[iclass].free_list &
|
| 2331 | _memory_span_mask);
|
| 2332 | span_t **list = 0;
|
| 2333 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 2334 | list = &heap->full_span[iclass];
|
| 2335 | #endif
|
| 2336 | --heap->full_span_count;
|
| 2337 | if (!_rpmalloc_span_finalize(heap, iclass, span: class_span, list_head: list)) {
|
| 2338 | if (list)
|
| 2339 | _rpmalloc_span_double_link_list_remove(head: list, span: class_span);
|
| 2340 | _rpmalloc_span_double_link_list_add(
|
| 2341 | head: &heap->size_class[iclass].partial_span, span: class_span);
|
| 2342 | }
|
| 2343 | }
|
| 2344 | }
|
| 2345 |
|
| 2346 | #if ENABLE_THREAD_CACHE
|
| 2347 | for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
|
| 2348 | span_cache_t *span_cache;
|
| 2349 | if (!iclass)
|
| 2350 | span_cache = &heap->span_cache;
|
| 2351 | else
|
| 2352 | span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
|
| 2353 | for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
|
| 2354 | _rpmalloc_span_unmap(span: span_cache->span[ispan]);
|
| 2355 | span_cache->count = 0;
|
| 2356 | }
|
| 2357 | #endif
|
| 2358 | rpmalloc_assert(!atomic_load_ptr(&heap->span_free_deferred),
|
| 2359 | "Heaps still active during finalization" );
|
| 2360 | }
|
| 2361 |
|
| 2362 | ////////////
|
| 2363 | ///
|
| 2364 | /// Allocation entry points
|
| 2365 | ///
|
| 2366 | //////
|
| 2367 |
|
| 2368 | //! Pop first block from a free list
|
| 2369 | static void *free_list_pop(void **list) {
|
| 2370 | void *block = *list;
|
| 2371 | *list = *((void **)block);
|
| 2372 | return block;
|
| 2373 | }
|
| 2374 |
|
| 2375 | //! Allocate a small/medium sized memory block from the given heap
|
| 2376 | static void *_rpmalloc_allocate_from_heap_fallback(
|
| 2377 | heap_t *heap, heap_size_class_t *heap_size_class, uint32_t class_idx) {
|
| 2378 | span_t *span = heap_size_class->partial_span;
|
| 2379 | rpmalloc_assume(heap != 0);
|
| 2380 | if (EXPECTED(span != 0)) {
|
| 2381 | rpmalloc_assert(span->block_count ==
|
| 2382 | _memory_size_class[span->size_class].block_count,
|
| 2383 | "Span block count corrupted" );
|
| 2384 | rpmalloc_assert(!_rpmalloc_span_is_fully_utilized(span),
|
| 2385 | "Internal failure" );
|
| 2386 | void *block;
|
| 2387 | if (span->free_list) {
|
| 2388 | // Span local free list is not empty, swap to size class free list
|
| 2389 | block = free_list_pop(list: &span->free_list);
|
| 2390 | heap_size_class->free_list = span->free_list;
|
| 2391 | span->free_list = 0;
|
| 2392 | } else {
|
| 2393 | // If the span did not fully initialize free list, link up another page
|
| 2394 | // worth of blocks
|
| 2395 | void *block_start = pointer_offset(
|
| 2396 | span, SPAN_HEADER_SIZE +
|
| 2397 | ((size_t)span->free_list_limit * span->block_size));
|
| 2398 | span->free_list_limit += free_list_partial_init(
|
| 2399 | list: &heap_size_class->free_list, first_block: &block,
|
| 2400 | page_start: (void *)((uintptr_t)block_start & ~(_memory_page_size - 1)),
|
| 2401 | block_start, block_count: span->block_count - span->free_list_limit,
|
| 2402 | block_size: span->block_size);
|
| 2403 | }
|
| 2404 | rpmalloc_assert(span->free_list_limit <= span->block_count,
|
| 2405 | "Span block count corrupted" );
|
| 2406 | span->used_count = span->free_list_limit;
|
| 2407 |
|
| 2408 | // Swap in deferred free list if present
|
| 2409 | if (atomic_load_ptr(src: &span->free_list_deferred))
|
| 2410 | _rpmalloc_span_extract_free_list_deferred(span);
|
| 2411 |
|
| 2412 | // If span is still not fully utilized keep it in partial list and early
|
| 2413 | // return block
|
| 2414 | if (!_rpmalloc_span_is_fully_utilized(span))
|
| 2415 | return block;
|
| 2416 |
|
| 2417 | // The span is fully utilized, unlink from partial list and add to fully
|
| 2418 | // utilized list
|
| 2419 | _rpmalloc_span_double_link_list_pop_head(head: &heap_size_class->partial_span,
|
| 2420 | span);
|
| 2421 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 2422 | _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
|
| 2423 | #endif
|
| 2424 | ++heap->full_span_count;
|
| 2425 | return block;
|
| 2426 | }
|
| 2427 |
|
| 2428 | // Find a span in one of the cache levels
|
| 2429 | span = _rpmalloc_heap_extract_new_span(heap, heap_size_class, span_count: 1, class_idx);
|
| 2430 | if (EXPECTED(span != 0)) {
|
| 2431 | // Mark span as owned by this heap and set base data, return first block
|
| 2432 | return _rpmalloc_span_initialize_new(heap, heap_size_class, span,
|
| 2433 | class_idx);
|
| 2434 | }
|
| 2435 |
|
| 2436 | return 0;
|
| 2437 | }
|
| 2438 |
|
| 2439 | //! Allocate a small sized memory block from the given heap
|
| 2440 | static void *_rpmalloc_allocate_small(heap_t *heap, size_t size) {
|
| 2441 | rpmalloc_assert(heap, "No thread heap" );
|
| 2442 | // Small sizes have unique size classes
|
| 2443 | const uint32_t class_idx =
|
| 2444 | (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT);
|
| 2445 | heap_size_class_t *heap_size_class = heap->size_class + class_idx;
|
| 2446 | _rpmalloc_stat_inc_alloc(heap, class_idx);
|
| 2447 | if (EXPECTED(heap_size_class->free_list != 0))
|
| 2448 | return free_list_pop(list: &heap_size_class->free_list);
|
| 2449 | return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
|
| 2450 | class_idx);
|
| 2451 | }
|
| 2452 |
|
| 2453 | //! Allocate a medium sized memory block from the given heap
|
| 2454 | static void *_rpmalloc_allocate_medium(heap_t *heap, size_t size) {
|
| 2455 | rpmalloc_assert(heap, "No thread heap" );
|
| 2456 | // Calculate the size class index and do a dependent lookup of the final class
|
| 2457 | // index (in case of merged classes)
|
| 2458 | const uint32_t base_idx =
|
| 2459 | (uint32_t)(SMALL_CLASS_COUNT +
|
| 2460 | ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT));
|
| 2461 | const uint32_t class_idx = _memory_size_class[base_idx].class_idx;
|
| 2462 | heap_size_class_t *heap_size_class = heap->size_class + class_idx;
|
| 2463 | _rpmalloc_stat_inc_alloc(heap, class_idx);
|
| 2464 | if (EXPECTED(heap_size_class->free_list != 0))
|
| 2465 | return free_list_pop(list: &heap_size_class->free_list);
|
| 2466 | return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
|
| 2467 | class_idx);
|
| 2468 | }
|
| 2469 |
|
| 2470 | //! Allocate a large sized memory block from the given heap
|
| 2471 | static void *_rpmalloc_allocate_large(heap_t *heap, size_t size) {
|
| 2472 | rpmalloc_assert(heap, "No thread heap" );
|
| 2473 | // Calculate number of needed max sized spans (including header)
|
| 2474 | // Since this function is never called if size > LARGE_SIZE_LIMIT
|
| 2475 | // the span_count is guaranteed to be <= LARGE_CLASS_COUNT
|
| 2476 | size += SPAN_HEADER_SIZE;
|
| 2477 | size_t span_count = size >> _memory_span_size_shift;
|
| 2478 | if (size & (_memory_span_size - 1))
|
| 2479 | ++span_count;
|
| 2480 |
|
| 2481 | // Find a span in one of the cache levels
|
| 2482 | span_t *span =
|
| 2483 | _rpmalloc_heap_extract_new_span(heap, heap_size_class: 0, span_count, SIZE_CLASS_LARGE);
|
| 2484 | if (!span)
|
| 2485 | return span;
|
| 2486 |
|
| 2487 | // Mark span as owned by this heap and set base data
|
| 2488 | rpmalloc_assert(span->span_count >= span_count, "Internal failure" );
|
| 2489 | span->size_class = SIZE_CLASS_LARGE;
|
| 2490 | span->heap = heap;
|
| 2491 |
|
| 2492 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 2493 | _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
|
| 2494 | #endif
|
| 2495 | ++heap->full_span_count;
|
| 2496 |
|
| 2497 | return pointer_offset(span, SPAN_HEADER_SIZE);
|
| 2498 | }
|
| 2499 |
|
| 2500 | //! Allocate a huge block by mapping memory pages directly
|
| 2501 | static void *_rpmalloc_allocate_huge(heap_t *heap, size_t size) {
|
| 2502 | rpmalloc_assert(heap, "No thread heap" );
|
| 2503 | _rpmalloc_heap_cache_adopt_deferred(heap, single_span: 0);
|
| 2504 | size += SPAN_HEADER_SIZE;
|
| 2505 | size_t num_pages = size >> _memory_page_size_shift;
|
| 2506 | if (size & (_memory_page_size - 1))
|
| 2507 | ++num_pages;
|
| 2508 | size_t align_offset = 0;
|
| 2509 | span_t *span =
|
| 2510 | (span_t *)_rpmalloc_mmap(size: num_pages * _memory_page_size, offset: &align_offset);
|
| 2511 | if (!span)
|
| 2512 | return span;
|
| 2513 |
|
| 2514 | // Store page count in span_count
|
| 2515 | span->size_class = SIZE_CLASS_HUGE;
|
| 2516 | span->span_count = (uint32_t)num_pages;
|
| 2517 | span->align_offset = (uint32_t)align_offset;
|
| 2518 | span->heap = heap;
|
| 2519 | _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
|
| 2520 |
|
| 2521 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 2522 | _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
|
| 2523 | #endif
|
| 2524 | ++heap->full_span_count;
|
| 2525 |
|
| 2526 | return pointer_offset(span, SPAN_HEADER_SIZE);
|
| 2527 | }
|
| 2528 |
|
| 2529 | //! Allocate a block of the given size
|
| 2530 | static void *_rpmalloc_allocate(heap_t *heap, size_t size) {
|
| 2531 | _rpmalloc_stat_add64(&_allocation_counter, 1);
|
| 2532 | if (EXPECTED(size <= SMALL_SIZE_LIMIT))
|
| 2533 | return _rpmalloc_allocate_small(heap, size);
|
| 2534 | else if (size <= _memory_medium_size_limit)
|
| 2535 | return _rpmalloc_allocate_medium(heap, size);
|
| 2536 | else if (size <= LARGE_SIZE_LIMIT)
|
| 2537 | return _rpmalloc_allocate_large(heap, size);
|
| 2538 | return _rpmalloc_allocate_huge(heap, size);
|
| 2539 | }
|
| 2540 |
|
| 2541 | static void *_rpmalloc_aligned_allocate(heap_t *heap, size_t alignment,
|
| 2542 | size_t size) {
|
| 2543 | if (alignment <= SMALL_GRANULARITY)
|
| 2544 | return _rpmalloc_allocate(heap, size);
|
| 2545 |
|
| 2546 | #if ENABLE_VALIDATE_ARGS
|
| 2547 | if ((size + alignment) < size) {
|
| 2548 | errno = EINVAL;
|
| 2549 | return 0;
|
| 2550 | }
|
| 2551 | if (alignment & (alignment - 1)) {
|
| 2552 | errno = EINVAL;
|
| 2553 | return 0;
|
| 2554 | }
|
| 2555 | #endif
|
| 2556 |
|
| 2557 | if ((alignment <= SPAN_HEADER_SIZE) &&
|
| 2558 | ((size + SPAN_HEADER_SIZE) < _memory_medium_size_limit)) {
|
| 2559 | // If alignment is less or equal to span header size (which is power of
|
| 2560 | // two), and size aligned to span header size multiples is less than size +
|
| 2561 | // alignment, then use natural alignment of blocks to provide alignment
|
| 2562 | size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) &
|
| 2563 | ~(uintptr_t)(SPAN_HEADER_SIZE - 1)
|
| 2564 | : SPAN_HEADER_SIZE;
|
| 2565 | rpmalloc_assert(!(multiple_size % SPAN_HEADER_SIZE),
|
| 2566 | "Failed alignment calculation" );
|
| 2567 | if (multiple_size <= (size + alignment))
|
| 2568 | return _rpmalloc_allocate(heap, size: multiple_size);
|
| 2569 | }
|
| 2570 |
|
| 2571 | void *ptr = 0;
|
| 2572 | size_t align_mask = alignment - 1;
|
| 2573 | if (alignment <= _memory_page_size) {
|
| 2574 | ptr = _rpmalloc_allocate(heap, size: size + alignment);
|
| 2575 | if ((uintptr_t)ptr & align_mask) {
|
| 2576 | ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
|
| 2577 | // Mark as having aligned blocks
|
| 2578 | span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
|
| 2579 | span->flags |= SPAN_FLAG_ALIGNED_BLOCKS;
|
| 2580 | }
|
| 2581 | return ptr;
|
| 2582 | }
|
| 2583 |
|
| 2584 | // Fallback to mapping new pages for this request. Since pointers passed
|
| 2585 | // to rpfree must be able to reach the start of the span by bitmasking of
|
| 2586 | // the address with the span size, the returned aligned pointer from this
|
| 2587 | // function must be with a span size of the start of the mapped area.
|
| 2588 | // In worst case this requires us to loop and map pages until we get a
|
| 2589 | // suitable memory address. It also means we can never align to span size
|
| 2590 | // or greater, since the span header will push alignment more than one
|
| 2591 | // span size away from span start (thus causing pointer mask to give us
|
| 2592 | // an invalid span start on free)
|
| 2593 | if (alignment & align_mask) {
|
| 2594 | errno = EINVAL;
|
| 2595 | return 0;
|
| 2596 | }
|
| 2597 | if (alignment >= _memory_span_size) {
|
| 2598 | errno = EINVAL;
|
| 2599 | return 0;
|
| 2600 | }
|
| 2601 |
|
| 2602 | size_t = alignment / _memory_page_size;
|
| 2603 |
|
| 2604 | // Since each span has a header, we will at least need one extra memory page
|
| 2605 | size_t num_pages = 1 + (size / _memory_page_size);
|
| 2606 | if (size & (_memory_page_size - 1))
|
| 2607 | ++num_pages;
|
| 2608 |
|
| 2609 | if (extra_pages > num_pages)
|
| 2610 | num_pages = 1 + extra_pages;
|
| 2611 |
|
| 2612 | size_t original_pages = num_pages;
|
| 2613 | size_t limit_pages = (_memory_span_size / _memory_page_size) * 2;
|
| 2614 | if (limit_pages < (original_pages * 2))
|
| 2615 | limit_pages = original_pages * 2;
|
| 2616 |
|
| 2617 | size_t mapped_size, align_offset;
|
| 2618 | span_t *span;
|
| 2619 |
|
| 2620 | retry:
|
| 2621 | align_offset = 0;
|
| 2622 | mapped_size = num_pages * _memory_page_size;
|
| 2623 |
|
| 2624 | span = (span_t *)_rpmalloc_mmap(size: mapped_size, offset: &align_offset);
|
| 2625 | if (!span) {
|
| 2626 | errno = ENOMEM;
|
| 2627 | return 0;
|
| 2628 | }
|
| 2629 | ptr = pointer_offset(span, SPAN_HEADER_SIZE);
|
| 2630 |
|
| 2631 | if ((uintptr_t)ptr & align_mask)
|
| 2632 | ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
|
| 2633 |
|
| 2634 | if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) ||
|
| 2635 | (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) ||
|
| 2636 | (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) {
|
| 2637 | _rpmalloc_unmap(address: span, size: mapped_size, offset: align_offset, release: mapped_size);
|
| 2638 | ++num_pages;
|
| 2639 | if (num_pages > limit_pages) {
|
| 2640 | errno = EINVAL;
|
| 2641 | return 0;
|
| 2642 | }
|
| 2643 | goto retry;
|
| 2644 | }
|
| 2645 |
|
| 2646 | // Store page count in span_count
|
| 2647 | span->size_class = SIZE_CLASS_HUGE;
|
| 2648 | span->span_count = (uint32_t)num_pages;
|
| 2649 | span->align_offset = (uint32_t)align_offset;
|
| 2650 | span->heap = heap;
|
| 2651 | _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
|
| 2652 |
|
| 2653 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 2654 | _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
|
| 2655 | #endif
|
| 2656 | ++heap->full_span_count;
|
| 2657 |
|
| 2658 | _rpmalloc_stat_add64(&_allocation_counter, 1);
|
| 2659 |
|
| 2660 | return ptr;
|
| 2661 | }
|
| 2662 |
|
| 2663 | ////////////
|
| 2664 | ///
|
| 2665 | /// Deallocation entry points
|
| 2666 | ///
|
| 2667 | //////
|
| 2668 |
|
| 2669 | //! Deallocate the given small/medium memory block in the current thread local
|
| 2670 | //! heap
|
| 2671 | static void _rpmalloc_deallocate_direct_small_or_medium(span_t *span,
|
| 2672 | void *block) {
|
| 2673 | heap_t *heap = span->heap;
|
| 2674 | rpmalloc_assert(heap->owner_thread == get_thread_id() ||
|
| 2675 | !heap->owner_thread || heap->finalize,
|
| 2676 | "Internal failure" );
|
| 2677 | // Add block to free list
|
| 2678 | if (UNEXPECTED(_rpmalloc_span_is_fully_utilized(span))) {
|
| 2679 | span->used_count = span->block_count;
|
| 2680 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 2681 | _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
|
| 2682 | span);
|
| 2683 | #endif
|
| 2684 | _rpmalloc_span_double_link_list_add(
|
| 2685 | head: &heap->size_class[span->size_class].partial_span, span);
|
| 2686 | --heap->full_span_count;
|
| 2687 | }
|
| 2688 | *((void **)block) = span->free_list;
|
| 2689 | --span->used_count;
|
| 2690 | span->free_list = block;
|
| 2691 | if (UNEXPECTED(span->used_count == span->list_size)) {
|
| 2692 | // If there are no used blocks it is guaranteed that no other external
|
| 2693 | // thread is accessing the span
|
| 2694 | if (span->used_count) {
|
| 2695 | // Make sure we have synchronized the deferred list and list size by using
|
| 2696 | // acquire semantics and guarantee that no external thread is accessing
|
| 2697 | // span concurrently
|
| 2698 | void *free_list;
|
| 2699 | do {
|
| 2700 | free_list = atomic_exchange_ptr_acquire(dst: &span->free_list_deferred,
|
| 2701 | INVALID_POINTER);
|
| 2702 | } while (free_list == INVALID_POINTER);
|
| 2703 | atomic_store_ptr_release(dst: &span->free_list_deferred, val: free_list);
|
| 2704 | }
|
| 2705 | _rpmalloc_span_double_link_list_remove(
|
| 2706 | head: &heap->size_class[span->size_class].partial_span, span);
|
| 2707 | _rpmalloc_span_release_to_cache(heap, span);
|
| 2708 | }
|
| 2709 | }
|
| 2710 |
|
| 2711 | static void _rpmalloc_deallocate_defer_free_span(heap_t *heap, span_t *span) {
|
| 2712 | if (span->size_class != SIZE_CLASS_HUGE)
|
| 2713 | _rpmalloc_stat_inc(&heap->span_use[span->span_count - 1].spans_deferred);
|
| 2714 | // This list does not need ABA protection, no mutable side state
|
| 2715 | do {
|
| 2716 | span->free_list = (void *)atomic_load_ptr(src: &heap->span_free_deferred);
|
| 2717 | } while (!atomic_cas_ptr(dst: &heap->span_free_deferred, val: span, ref: span->free_list));
|
| 2718 | }
|
| 2719 |
|
| 2720 | //! Put the block in the deferred free list of the owning span
|
| 2721 | static void _rpmalloc_deallocate_defer_small_or_medium(span_t *span,
|
| 2722 | void *block) {
|
| 2723 | // The memory ordering here is a bit tricky, to avoid having to ABA protect
|
| 2724 | // the deferred free list to avoid desynchronization of list and list size
|
| 2725 | // we need to have acquire semantics on successful CAS of the pointer to
|
| 2726 | // guarantee the list_size variable validity + release semantics on pointer
|
| 2727 | // store
|
| 2728 | void *free_list;
|
| 2729 | do {
|
| 2730 | free_list =
|
| 2731 | atomic_exchange_ptr_acquire(dst: &span->free_list_deferred, INVALID_POINTER);
|
| 2732 | } while (free_list == INVALID_POINTER);
|
| 2733 | *((void **)block) = free_list;
|
| 2734 | uint32_t free_count = ++span->list_size;
|
| 2735 | int all_deferred_free = (free_count == span->block_count);
|
| 2736 | atomic_store_ptr_release(dst: &span->free_list_deferred, val: block);
|
| 2737 | if (all_deferred_free) {
|
| 2738 | // Span was completely freed by this block. Due to the INVALID_POINTER spin
|
| 2739 | // lock no other thread can reach this state simultaneously on this span.
|
| 2740 | // Safe to move to owner heap deferred cache
|
| 2741 | _rpmalloc_deallocate_defer_free_span(heap: span->heap, span);
|
| 2742 | }
|
| 2743 | }
|
| 2744 |
|
| 2745 | static void _rpmalloc_deallocate_small_or_medium(span_t *span, void *p) {
|
| 2746 | _rpmalloc_stat_inc_free(span->heap, span->size_class);
|
| 2747 | if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) {
|
| 2748 | // Realign pointer to block start
|
| 2749 | void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
|
| 2750 | uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
|
| 2751 | p = pointer_offset(p, -(int32_t)(block_offset % span->block_size));
|
| 2752 | }
|
| 2753 | // Check if block belongs to this heap or if deallocation should be deferred
|
| 2754 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 2755 | int defer =
|
| 2756 | (span->heap->owner_thread &&
|
| 2757 | (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
|
| 2758 | #else
|
| 2759 | int defer =
|
| 2760 | ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
|
| 2761 | #endif
|
| 2762 | if (!defer)
|
| 2763 | _rpmalloc_deallocate_direct_small_or_medium(span, block: p);
|
| 2764 | else
|
| 2765 | _rpmalloc_deallocate_defer_small_or_medium(span, block: p);
|
| 2766 | }
|
| 2767 |
|
| 2768 | //! Deallocate the given large memory block to the current heap
|
| 2769 | static void _rpmalloc_deallocate_large(span_t *span) {
|
| 2770 | rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Bad span size class" );
|
| 2771 | rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) ||
|
| 2772 | !(span->flags & SPAN_FLAG_SUBSPAN),
|
| 2773 | "Span flag corrupted" );
|
| 2774 | rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) ||
|
| 2775 | (span->flags & SPAN_FLAG_SUBSPAN),
|
| 2776 | "Span flag corrupted" );
|
| 2777 | // We must always defer (unless finalizing) if from another heap since we
|
| 2778 | // cannot touch the list or counters of another heap
|
| 2779 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 2780 | int defer =
|
| 2781 | (span->heap->owner_thread &&
|
| 2782 | (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
|
| 2783 | #else
|
| 2784 | int defer =
|
| 2785 | ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
|
| 2786 | #endif
|
| 2787 | if (defer) {
|
| 2788 | _rpmalloc_deallocate_defer_free_span(heap: span->heap, span);
|
| 2789 | return;
|
| 2790 | }
|
| 2791 | rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted" );
|
| 2792 | --span->heap->full_span_count;
|
| 2793 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 2794 | _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
|
| 2795 | #endif
|
| 2796 | #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
|
| 2797 | // Decrease counter
|
| 2798 | size_t idx = span->span_count - 1;
|
| 2799 | atomic_decr32(&span->heap->span_use[idx].current);
|
| 2800 | #endif
|
| 2801 | heap_t *heap = span->heap;
|
| 2802 | rpmalloc_assert(heap, "No thread heap" );
|
| 2803 | #if ENABLE_THREAD_CACHE
|
| 2804 | const int set_as_reserved =
|
| 2805 | ((span->span_count > 1) && (heap->span_cache.count == 0) &&
|
| 2806 | !heap->finalize && !heap->spans_reserved);
|
| 2807 | #else
|
| 2808 | const int set_as_reserved =
|
| 2809 | ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved);
|
| 2810 | #endif
|
| 2811 | if (set_as_reserved) {
|
| 2812 | heap->span_reserve = span;
|
| 2813 | heap->spans_reserved = span->span_count;
|
| 2814 | if (span->flags & SPAN_FLAG_MASTER) {
|
| 2815 | heap->span_reserve_master = span;
|
| 2816 | } else { // SPAN_FLAG_SUBSPAN
|
| 2817 | span_t *master = (span_t *)pointer_offset(
|
| 2818 | span,
|
| 2819 | -(intptr_t)((size_t)span->offset_from_master * _memory_span_size));
|
| 2820 | heap->span_reserve_master = master;
|
| 2821 | rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted" );
|
| 2822 | rpmalloc_assert(atomic_load32(&master->remaining_spans) >=
|
| 2823 | (int32_t)span->span_count,
|
| 2824 | "Master span count corrupted" );
|
| 2825 | }
|
| 2826 | _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved);
|
| 2827 | } else {
|
| 2828 | // Insert into cache list
|
| 2829 | _rpmalloc_heap_cache_insert(heap, span);
|
| 2830 | }
|
| 2831 | }
|
| 2832 |
|
| 2833 | //! Deallocate the given huge span
|
| 2834 | static void _rpmalloc_deallocate_huge(span_t *span) {
|
| 2835 | rpmalloc_assert(span->heap, "No span heap" );
|
| 2836 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 2837 | int defer =
|
| 2838 | (span->heap->owner_thread &&
|
| 2839 | (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
|
| 2840 | #else
|
| 2841 | int defer =
|
| 2842 | ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
|
| 2843 | #endif
|
| 2844 | if (defer) {
|
| 2845 | _rpmalloc_deallocate_defer_free_span(heap: span->heap, span);
|
| 2846 | return;
|
| 2847 | }
|
| 2848 | rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted" );
|
| 2849 | --span->heap->full_span_count;
|
| 2850 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 2851 | _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
|
| 2852 | #endif
|
| 2853 |
|
| 2854 | // Oversized allocation, page count is stored in span_count
|
| 2855 | size_t num_pages = span->span_count;
|
| 2856 | _rpmalloc_unmap(address: span, size: num_pages * _memory_page_size, offset: span->align_offset,
|
| 2857 | release: num_pages * _memory_page_size);
|
| 2858 | _rpmalloc_stat_sub(&_huge_pages_current, num_pages);
|
| 2859 | }
|
| 2860 |
|
| 2861 | //! Deallocate the given block
|
| 2862 | static void _rpmalloc_deallocate(void *p) {
|
| 2863 | _rpmalloc_stat_add64(&_deallocation_counter, 1);
|
| 2864 | // Grab the span (always at start of span, using span alignment)
|
| 2865 | span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
|
| 2866 | if (UNEXPECTED(!span))
|
| 2867 | return;
|
| 2868 | if (EXPECTED(span->size_class < SIZE_CLASS_COUNT))
|
| 2869 | _rpmalloc_deallocate_small_or_medium(span, p);
|
| 2870 | else if (span->size_class == SIZE_CLASS_LARGE)
|
| 2871 | _rpmalloc_deallocate_large(span);
|
| 2872 | else
|
| 2873 | _rpmalloc_deallocate_huge(span);
|
| 2874 | }
|
| 2875 |
|
| 2876 | ////////////
|
| 2877 | ///
|
| 2878 | /// Reallocation entry points
|
| 2879 | ///
|
| 2880 | //////
|
| 2881 |
|
| 2882 | static size_t _rpmalloc_usable_size(void *p);
|
| 2883 |
|
| 2884 | //! Reallocate the given block to the given size
|
| 2885 | static void *_rpmalloc_reallocate(heap_t *heap, void *p, size_t size,
|
| 2886 | size_t oldsize, unsigned int flags) {
|
| 2887 | if (p) {
|
| 2888 | // Grab the span using guaranteed span alignment
|
| 2889 | span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
|
| 2890 | if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
|
| 2891 | // Small/medium sized block
|
| 2892 | rpmalloc_assert(span->span_count == 1, "Span counter corrupted" );
|
| 2893 | void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
|
| 2894 | uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
|
| 2895 | uint32_t block_idx = block_offset / span->block_size;
|
| 2896 | void *block =
|
| 2897 | pointer_offset(blocks_start, (size_t)block_idx * span->block_size);
|
| 2898 | if (!oldsize)
|
| 2899 | oldsize =
|
| 2900 | (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block));
|
| 2901 | if ((size_t)span->block_size >= size) {
|
| 2902 | // Still fits in block, never mind trying to save memory, but preserve
|
| 2903 | // data if alignment changed
|
| 2904 | if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
|
| 2905 | memmove(dest: block, src: p, n: oldsize);
|
| 2906 | return block;
|
| 2907 | }
|
| 2908 | } else if (span->size_class == SIZE_CLASS_LARGE) {
|
| 2909 | // Large block
|
| 2910 | size_t total_size = size + SPAN_HEADER_SIZE;
|
| 2911 | size_t num_spans = total_size >> _memory_span_size_shift;
|
| 2912 | if (total_size & (_memory_span_mask - 1))
|
| 2913 | ++num_spans;
|
| 2914 | size_t current_spans = span->span_count;
|
| 2915 | void *block = pointer_offset(span, SPAN_HEADER_SIZE);
|
| 2916 | if (!oldsize)
|
| 2917 | oldsize = (current_spans * _memory_span_size) -
|
| 2918 | (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
|
| 2919 | if ((current_spans >= num_spans) && (total_size >= (oldsize / 2))) {
|
| 2920 | // Still fits in block, never mind trying to save memory, but preserve
|
| 2921 | // data if alignment changed
|
| 2922 | if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
|
| 2923 | memmove(dest: block, src: p, n: oldsize);
|
| 2924 | return block;
|
| 2925 | }
|
| 2926 | } else {
|
| 2927 | // Oversized block
|
| 2928 | size_t total_size = size + SPAN_HEADER_SIZE;
|
| 2929 | size_t num_pages = total_size >> _memory_page_size_shift;
|
| 2930 | if (total_size & (_memory_page_size - 1))
|
| 2931 | ++num_pages;
|
| 2932 | // Page count is stored in span_count
|
| 2933 | size_t current_pages = span->span_count;
|
| 2934 | void *block = pointer_offset(span, SPAN_HEADER_SIZE);
|
| 2935 | if (!oldsize)
|
| 2936 | oldsize = (current_pages * _memory_page_size) -
|
| 2937 | (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
|
| 2938 | if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) {
|
| 2939 | // Still fits in block, never mind trying to save memory, but preserve
|
| 2940 | // data if alignment changed
|
| 2941 | if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
|
| 2942 | memmove(dest: block, src: p, n: oldsize);
|
| 2943 | return block;
|
| 2944 | }
|
| 2945 | }
|
| 2946 | } else {
|
| 2947 | oldsize = 0;
|
| 2948 | }
|
| 2949 |
|
| 2950 | if (!!(flags & RPMALLOC_GROW_OR_FAIL))
|
| 2951 | return 0;
|
| 2952 |
|
| 2953 | // Size is greater than block size, need to allocate a new block and
|
| 2954 | // deallocate the old Avoid hysteresis by overallocating if increase is small
|
| 2955 | // (below 37%)
|
| 2956 | size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3);
|
| 2957 | size_t new_size =
|
| 2958 | (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size);
|
| 2959 | void *block = _rpmalloc_allocate(heap, size: new_size);
|
| 2960 | if (p && block) {
|
| 2961 | if (!(flags & RPMALLOC_NO_PRESERVE))
|
| 2962 | memcpy(dest: block, src: p, n: oldsize < new_size ? oldsize : new_size);
|
| 2963 | _rpmalloc_deallocate(p);
|
| 2964 | }
|
| 2965 |
|
| 2966 | return block;
|
| 2967 | }
|
| 2968 |
|
| 2969 | static void *_rpmalloc_aligned_reallocate(heap_t *heap, void *ptr,
|
| 2970 | size_t alignment, size_t size,
|
| 2971 | size_t oldsize, unsigned int flags) {
|
| 2972 | if (alignment <= SMALL_GRANULARITY)
|
| 2973 | return _rpmalloc_reallocate(heap, p: ptr, size, oldsize, flags);
|
| 2974 |
|
| 2975 | int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL);
|
| 2976 | size_t usablesize = (ptr ? _rpmalloc_usable_size(p: ptr) : 0);
|
| 2977 | if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) {
|
| 2978 | if (no_alloc || (size >= (usablesize / 2)))
|
| 2979 | return ptr;
|
| 2980 | }
|
| 2981 | // Aligned alloc marks span as having aligned blocks
|
| 2982 | void *block =
|
| 2983 | (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0);
|
| 2984 | if (EXPECTED(block != 0)) {
|
| 2985 | if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) {
|
| 2986 | if (!oldsize)
|
| 2987 | oldsize = usablesize;
|
| 2988 | memcpy(dest: block, src: ptr, n: oldsize < size ? oldsize : size);
|
| 2989 | }
|
| 2990 | _rpmalloc_deallocate(p: ptr);
|
| 2991 | }
|
| 2992 | return block;
|
| 2993 | }
|
| 2994 |
|
| 2995 | ////////////
|
| 2996 | ///
|
| 2997 | /// Initialization, finalization and utility
|
| 2998 | ///
|
| 2999 | //////
|
| 3000 |
|
| 3001 | //! Get the usable size of the given block
|
| 3002 | static size_t _rpmalloc_usable_size(void *p) {
|
| 3003 | // Grab the span using guaranteed span alignment
|
| 3004 | span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
|
| 3005 | if (span->size_class < SIZE_CLASS_COUNT) {
|
| 3006 | // Small/medium block
|
| 3007 | void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
|
| 3008 | return span->block_size -
|
| 3009 | ((size_t)pointer_diff(p, blocks_start) % span->block_size);
|
| 3010 | }
|
| 3011 | if (span->size_class == SIZE_CLASS_LARGE) {
|
| 3012 | // Large block
|
| 3013 | size_t current_spans = span->span_count;
|
| 3014 | return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span);
|
| 3015 | }
|
| 3016 | // Oversized block, page count is stored in span_count
|
| 3017 | size_t current_pages = span->span_count;
|
| 3018 | return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span);
|
| 3019 | }
|
| 3020 |
|
| 3021 | //! Adjust and optimize the size class properties for the given class
|
| 3022 | static void _rpmalloc_adjust_size_class(size_t iclass) {
|
| 3023 | size_t block_size = _memory_size_class[iclass].block_size;
|
| 3024 | size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size;
|
| 3025 |
|
| 3026 | _memory_size_class[iclass].block_count = (uint16_t)block_count;
|
| 3027 | _memory_size_class[iclass].class_idx = (uint16_t)iclass;
|
| 3028 |
|
| 3029 | // Check if previous size classes can be merged
|
| 3030 | if (iclass >= SMALL_CLASS_COUNT) {
|
| 3031 | size_t prevclass = iclass;
|
| 3032 | while (prevclass > 0) {
|
| 3033 | --prevclass;
|
| 3034 | // A class can be merged if number of pages and number of blocks are equal
|
| 3035 | if (_memory_size_class[prevclass].block_count ==
|
| 3036 | _memory_size_class[iclass].block_count)
|
| 3037 | _rpmalloc_memcpy_const(_memory_size_class + prevclass,
|
| 3038 | _memory_size_class + iclass,
|
| 3039 | sizeof(_memory_size_class[iclass]));
|
| 3040 | else
|
| 3041 | break;
|
| 3042 | }
|
| 3043 | }
|
| 3044 | }
|
| 3045 |
|
| 3046 | //! Initialize the allocator and setup global data
|
| 3047 | extern inline int rpmalloc_initialize(void) {
|
| 3048 | if (_rpmalloc_initialized) {
|
| 3049 | rpmalloc_thread_initialize();
|
| 3050 | return 0;
|
| 3051 | }
|
| 3052 | return rpmalloc_initialize_config(config: 0);
|
| 3053 | }
|
| 3054 |
|
| 3055 | int rpmalloc_initialize_config(const rpmalloc_config_t *config) {
|
| 3056 | if (_rpmalloc_initialized) {
|
| 3057 | rpmalloc_thread_initialize();
|
| 3058 | return 0;
|
| 3059 | }
|
| 3060 | _rpmalloc_initialized = 1;
|
| 3061 |
|
| 3062 | if (config)
|
| 3063 | memcpy(dest: &_memory_config, src: config, n: sizeof(rpmalloc_config_t));
|
| 3064 | else
|
| 3065 | _rpmalloc_memset_const(&_memory_config, 0, sizeof(rpmalloc_config_t));
|
| 3066 |
|
| 3067 | if (!_memory_config.memory_map || !_memory_config.memory_unmap) {
|
| 3068 | _memory_config.memory_map = _rpmalloc_mmap_os;
|
| 3069 | _memory_config.memory_unmap = _rpmalloc_unmap_os;
|
| 3070 | }
|
| 3071 |
|
| 3072 | #if PLATFORM_WINDOWS
|
| 3073 | SYSTEM_INFO system_info;
|
| 3074 | memset(&system_info, 0, sizeof(system_info));
|
| 3075 | GetSystemInfo(&system_info);
|
| 3076 | _memory_map_granularity = system_info.dwAllocationGranularity;
|
| 3077 | #else
|
| 3078 | _memory_map_granularity = (size_t)sysconf(_SC_PAGESIZE);
|
| 3079 | #endif
|
| 3080 |
|
| 3081 | #if RPMALLOC_CONFIGURABLE
|
| 3082 | _memory_page_size = _memory_config.page_size;
|
| 3083 | #else
|
| 3084 | _memory_page_size = 0;
|
| 3085 | #endif
|
| 3086 | _memory_huge_pages = 0;
|
| 3087 | if (!_memory_page_size) {
|
| 3088 | #if PLATFORM_WINDOWS
|
| 3089 | _memory_page_size = system_info.dwPageSize;
|
| 3090 | #else
|
| 3091 | _memory_page_size = _memory_map_granularity;
|
| 3092 | if (_memory_config.enable_huge_pages) {
|
| 3093 | #if defined(__linux__)
|
| 3094 | size_t huge_page_size = 0;
|
| 3095 | FILE *meminfo = fopen(filename: "/proc/meminfo" , modes: "r" );
|
| 3096 | if (meminfo) {
|
| 3097 | char line[128];
|
| 3098 | while (!huge_page_size && fgets(s: line, n: sizeof(line) - 1, stream: meminfo)) {
|
| 3099 | line[sizeof(line) - 1] = 0;
|
| 3100 | if (strstr(haystack: line, needle: "Hugepagesize:" ))
|
| 3101 | huge_page_size = (size_t)strtol(nptr: line + 13, endptr: 0, base: 10) * 1024;
|
| 3102 | }
|
| 3103 | fclose(stream: meminfo);
|
| 3104 | }
|
| 3105 | if (huge_page_size) {
|
| 3106 | _memory_huge_pages = 1;
|
| 3107 | _memory_page_size = huge_page_size;
|
| 3108 | _memory_map_granularity = huge_page_size;
|
| 3109 | }
|
| 3110 | #elif defined(__FreeBSD__)
|
| 3111 | int rc;
|
| 3112 | size_t sz = sizeof(rc);
|
| 3113 |
|
| 3114 | if (sysctlbyname("vm.pmap.pg_ps_enabled" , &rc, &sz, NULL, 0) == 0 &&
|
| 3115 | rc == 1) {
|
| 3116 | static size_t defsize = 2 * 1024 * 1024;
|
| 3117 | int nsize = 0;
|
| 3118 | size_t sizes[4] = {0};
|
| 3119 | _memory_huge_pages = 1;
|
| 3120 | _memory_page_size = defsize;
|
| 3121 | if ((nsize = getpagesizes(sizes, 4)) >= 2) {
|
| 3122 | nsize--;
|
| 3123 | for (size_t csize = sizes[nsize]; nsize >= 0 && csize;
|
| 3124 | --nsize, csize = sizes[nsize]) {
|
| 3125 | //! Unlikely, but as a precaution..
|
| 3126 | rpmalloc_assert(!(csize & (csize - 1)) && !(csize % 1024),
|
| 3127 | "Invalid page size" );
|
| 3128 | if (defsize < csize) {
|
| 3129 | _memory_page_size = csize;
|
| 3130 | break;
|
| 3131 | }
|
| 3132 | }
|
| 3133 | }
|
| 3134 | _memory_map_granularity = _memory_page_size;
|
| 3135 | }
|
| 3136 | #elif defined(__APPLE__) || defined(__NetBSD__)
|
| 3137 | _memory_huge_pages = 1;
|
| 3138 | _memory_page_size = 2 * 1024 * 1024;
|
| 3139 | _memory_map_granularity = _memory_page_size;
|
| 3140 | #endif
|
| 3141 | }
|
| 3142 | #endif
|
| 3143 | } else {
|
| 3144 | if (_memory_config.enable_huge_pages)
|
| 3145 | _memory_huge_pages = 1;
|
| 3146 | }
|
| 3147 |
|
| 3148 | #if PLATFORM_WINDOWS
|
| 3149 | if (_memory_config.enable_huge_pages) {
|
| 3150 | HANDLE token = 0;
|
| 3151 | size_t large_page_minimum = GetLargePageMinimum();
|
| 3152 | if (large_page_minimum)
|
| 3153 | OpenProcessToken(GetCurrentProcess(),
|
| 3154 | TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
|
| 3155 | if (token) {
|
| 3156 | LUID luid;
|
| 3157 | if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) {
|
| 3158 | TOKEN_PRIVILEGES token_privileges;
|
| 3159 | memset(&token_privileges, 0, sizeof(token_privileges));
|
| 3160 | token_privileges.PrivilegeCount = 1;
|
| 3161 | token_privileges.Privileges[0].Luid = luid;
|
| 3162 | token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
|
| 3163 | if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
|
| 3164 | if (GetLastError() == ERROR_SUCCESS)
|
| 3165 | _memory_huge_pages = 1;
|
| 3166 | }
|
| 3167 | }
|
| 3168 | CloseHandle(token);
|
| 3169 | }
|
| 3170 | if (_memory_huge_pages) {
|
| 3171 | if (large_page_minimum > _memory_page_size)
|
| 3172 | _memory_page_size = large_page_minimum;
|
| 3173 | if (large_page_minimum > _memory_map_granularity)
|
| 3174 | _memory_map_granularity = large_page_minimum;
|
| 3175 | }
|
| 3176 | }
|
| 3177 | #endif
|
| 3178 |
|
| 3179 | size_t min_span_size = 256;
|
| 3180 | size_t max_page_size;
|
| 3181 | #if UINTPTR_MAX > 0xFFFFFFFF
|
| 3182 | max_page_size = 4096ULL * 1024ULL * 1024ULL;
|
| 3183 | #else
|
| 3184 | max_page_size = 4 * 1024 * 1024;
|
| 3185 | #endif
|
| 3186 | if (_memory_page_size < min_span_size)
|
| 3187 | _memory_page_size = min_span_size;
|
| 3188 | if (_memory_page_size > max_page_size)
|
| 3189 | _memory_page_size = max_page_size;
|
| 3190 | _memory_page_size_shift = 0;
|
| 3191 | size_t page_size_bit = _memory_page_size;
|
| 3192 | while (page_size_bit != 1) {
|
| 3193 | ++_memory_page_size_shift;
|
| 3194 | page_size_bit >>= 1;
|
| 3195 | }
|
| 3196 | _memory_page_size = ((size_t)1 << _memory_page_size_shift);
|
| 3197 |
|
| 3198 | #if RPMALLOC_CONFIGURABLE
|
| 3199 | if (!_memory_config.span_size) {
|
| 3200 | _memory_span_size = _memory_default_span_size;
|
| 3201 | _memory_span_size_shift = _memory_default_span_size_shift;
|
| 3202 | _memory_span_mask = _memory_default_span_mask;
|
| 3203 | } else {
|
| 3204 | size_t span_size = _memory_config.span_size;
|
| 3205 | if (span_size > (256 * 1024))
|
| 3206 | span_size = (256 * 1024);
|
| 3207 | _memory_span_size = 4096;
|
| 3208 | _memory_span_size_shift = 12;
|
| 3209 | while (_memory_span_size < span_size) {
|
| 3210 | _memory_span_size <<= 1;
|
| 3211 | ++_memory_span_size_shift;
|
| 3212 | }
|
| 3213 | _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1);
|
| 3214 | }
|
| 3215 | #endif
|
| 3216 |
|
| 3217 | _memory_span_map_count =
|
| 3218 | (_memory_config.span_map_count ? _memory_config.span_map_count
|
| 3219 | : DEFAULT_SPAN_MAP_COUNT);
|
| 3220 | if ((_memory_span_size * _memory_span_map_count) < _memory_page_size)
|
| 3221 | _memory_span_map_count = (_memory_page_size / _memory_span_size);
|
| 3222 | if ((_memory_page_size >= _memory_span_size) &&
|
| 3223 | ((_memory_span_map_count * _memory_span_size) % _memory_page_size))
|
| 3224 | _memory_span_map_count = (_memory_page_size / _memory_span_size);
|
| 3225 | _memory_heap_reserve_count = (_memory_span_map_count > DEFAULT_SPAN_MAP_COUNT)
|
| 3226 | ? DEFAULT_SPAN_MAP_COUNT
|
| 3227 | : _memory_span_map_count;
|
| 3228 |
|
| 3229 | _memory_config.page_size = _memory_page_size;
|
| 3230 | _memory_config.span_size = _memory_span_size;
|
| 3231 | _memory_config.span_map_count = _memory_span_map_count;
|
| 3232 | _memory_config.enable_huge_pages = _memory_huge_pages;
|
| 3233 |
|
| 3234 | #if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || \
|
| 3235 | defined(__TINYC__)
|
| 3236 | if (pthread_key_create(&_memory_thread_heap, _rpmalloc_heap_release_raw_fc))
|
| 3237 | return -1;
|
| 3238 | #endif
|
| 3239 | #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
|
| 3240 | fls_key = FlsAlloc(&_rpmalloc_thread_destructor);
|
| 3241 | #endif
|
| 3242 |
|
| 3243 | // Setup all small and medium size classes
|
| 3244 | size_t iclass = 0;
|
| 3245 | _memory_size_class[iclass].block_size = SMALL_GRANULARITY;
|
| 3246 | _rpmalloc_adjust_size_class(iclass);
|
| 3247 | for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) {
|
| 3248 | size_t size = iclass * SMALL_GRANULARITY;
|
| 3249 | _memory_size_class[iclass].block_size = (uint32_t)size;
|
| 3250 | _rpmalloc_adjust_size_class(iclass);
|
| 3251 | }
|
| 3252 | // At least two blocks per span, then fall back to large allocations
|
| 3253 | _memory_medium_size_limit = (_memory_span_size - SPAN_HEADER_SIZE) >> 1;
|
| 3254 | if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT)
|
| 3255 | _memory_medium_size_limit = MEDIUM_SIZE_LIMIT;
|
| 3256 | for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) {
|
| 3257 | size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY);
|
| 3258 | if (size > _memory_medium_size_limit) {
|
| 3259 | _memory_medium_size_limit =
|
| 3260 | SMALL_SIZE_LIMIT + (iclass * MEDIUM_GRANULARITY);
|
| 3261 | break;
|
| 3262 | }
|
| 3263 | _memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size;
|
| 3264 | _rpmalloc_adjust_size_class(SMALL_CLASS_COUNT + iclass);
|
| 3265 | }
|
| 3266 |
|
| 3267 | _memory_orphan_heaps = 0;
|
| 3268 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 3269 | _memory_first_class_orphan_heaps = 0;
|
| 3270 | #endif
|
| 3271 | #if ENABLE_STATISTICS
|
| 3272 | atomic_store32(&_memory_active_heaps, 0);
|
| 3273 | atomic_store32(&_mapped_pages, 0);
|
| 3274 | _mapped_pages_peak = 0;
|
| 3275 | atomic_store32(&_master_spans, 0);
|
| 3276 | atomic_store32(&_mapped_total, 0);
|
| 3277 | atomic_store32(&_unmapped_total, 0);
|
| 3278 | atomic_store32(&_mapped_pages_os, 0);
|
| 3279 | atomic_store32(&_huge_pages_current, 0);
|
| 3280 | _huge_pages_peak = 0;
|
| 3281 | #endif
|
| 3282 | memset(s: _memory_heaps, c: 0, n: sizeof(_memory_heaps));
|
| 3283 | atomic_store32_release(dst: &_memory_global_lock, val: 0);
|
| 3284 |
|
| 3285 | rpmalloc_linker_reference();
|
| 3286 |
|
| 3287 | // Initialize this thread
|
| 3288 | rpmalloc_thread_initialize();
|
| 3289 | return 0;
|
| 3290 | }
|
| 3291 |
|
| 3292 | //! Finalize the allocator
|
| 3293 | void rpmalloc_finalize(void) {
|
| 3294 | rpmalloc_thread_finalize(release_caches: 1);
|
| 3295 | // rpmalloc_dump_statistics(stdout);
|
| 3296 |
|
| 3297 | if (_memory_global_reserve) {
|
| 3298 | atomic_add32(val: &_memory_global_reserve_master->remaining_spans,
|
| 3299 | add: -(int32_t)_memory_global_reserve_count);
|
| 3300 | _memory_global_reserve_master = 0;
|
| 3301 | _memory_global_reserve_count = 0;
|
| 3302 | _memory_global_reserve = 0;
|
| 3303 | }
|
| 3304 | atomic_store32_release(dst: &_memory_global_lock, val: 0);
|
| 3305 |
|
| 3306 | // Free all thread caches and fully free spans
|
| 3307 | for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
|
| 3308 | heap_t *heap = _memory_heaps[list_idx];
|
| 3309 | while (heap) {
|
| 3310 | heap_t *next_heap = heap->next_heap;
|
| 3311 | heap->finalize = 1;
|
| 3312 | _rpmalloc_heap_global_finalize(heap);
|
| 3313 | heap = next_heap;
|
| 3314 | }
|
| 3315 | }
|
| 3316 |
|
| 3317 | #if ENABLE_GLOBAL_CACHE
|
| 3318 | // Free global caches
|
| 3319 | for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass)
|
| 3320 | _rpmalloc_global_cache_finalize(cache: &_memory_span_cache[iclass]);
|
| 3321 | #endif
|
| 3322 |
|
| 3323 | #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
|
| 3324 | pthread_key_delete(_memory_thread_heap);
|
| 3325 | #endif
|
| 3326 | #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
|
| 3327 | FlsFree(fls_key);
|
| 3328 | fls_key = 0;
|
| 3329 | #endif
|
| 3330 | #if ENABLE_STATISTICS
|
| 3331 | // If you hit these asserts you probably have memory leaks (perhaps global
|
| 3332 | // scope data doing dynamic allocations) or double frees in your code
|
| 3333 | rpmalloc_assert(atomic_load32(&_mapped_pages) == 0, "Memory leak detected" );
|
| 3334 | rpmalloc_assert(atomic_load32(&_mapped_pages_os) == 0,
|
| 3335 | "Memory leak detected" );
|
| 3336 | #endif
|
| 3337 |
|
| 3338 | _rpmalloc_initialized = 0;
|
| 3339 | }
|
| 3340 |
|
| 3341 | //! Initialize thread, assign heap
|
| 3342 | extern inline void rpmalloc_thread_initialize(void) {
|
| 3343 | if (!get_thread_heap_raw()) {
|
| 3344 | heap_t *heap = _rpmalloc_heap_allocate(first_class: 0);
|
| 3345 | if (heap) {
|
| 3346 | _rpmalloc_stat_inc(&_memory_active_heaps);
|
| 3347 | set_thread_heap(heap);
|
| 3348 | #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
|
| 3349 | FlsSetValue(fls_key, heap);
|
| 3350 | #endif
|
| 3351 | }
|
| 3352 | }
|
| 3353 | }
|
| 3354 |
|
| 3355 | //! Finalize thread, orphan heap
|
| 3356 | void rpmalloc_thread_finalize(int release_caches) {
|
| 3357 | heap_t *heap = get_thread_heap_raw();
|
| 3358 | if (heap)
|
| 3359 | _rpmalloc_heap_release_raw(heapptr: heap, release_cache: release_caches);
|
| 3360 | set_thread_heap(0);
|
| 3361 | #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
|
| 3362 | FlsSetValue(fls_key, 0);
|
| 3363 | #endif
|
| 3364 | }
|
| 3365 |
|
| 3366 | int rpmalloc_is_thread_initialized(void) {
|
| 3367 | return (get_thread_heap_raw() != 0) ? 1 : 0;
|
| 3368 | }
|
| 3369 |
|
| 3370 | const rpmalloc_config_t *rpmalloc_config(void) { return &_memory_config; }
|
| 3371 |
|
| 3372 | // Extern interface
|
| 3373 |
|
| 3374 | extern inline RPMALLOC_ALLOCATOR void *rpmalloc(size_t size) {
|
| 3375 | #if ENABLE_VALIDATE_ARGS
|
| 3376 | if (size >= MAX_ALLOC_SIZE) {
|
| 3377 | errno = EINVAL;
|
| 3378 | return 0;
|
| 3379 | }
|
| 3380 | #endif
|
| 3381 | heap_t *heap = get_thread_heap();
|
| 3382 | return _rpmalloc_allocate(heap, size);
|
| 3383 | }
|
| 3384 |
|
| 3385 | extern inline void rpfree(void *ptr) { _rpmalloc_deallocate(p: ptr); }
|
| 3386 |
|
| 3387 | extern inline RPMALLOC_ALLOCATOR void *rpcalloc(size_t num, size_t size) {
|
| 3388 | size_t total;
|
| 3389 | #if ENABLE_VALIDATE_ARGS
|
| 3390 | #if PLATFORM_WINDOWS
|
| 3391 | int err = SizeTMult(num, size, &total);
|
| 3392 | if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
|
| 3393 | errno = EINVAL;
|
| 3394 | return 0;
|
| 3395 | }
|
| 3396 | #else
|
| 3397 | int err = __builtin_umull_overflow(num, size, &total);
|
| 3398 | if (err || (total >= MAX_ALLOC_SIZE)) {
|
| 3399 | errno = EINVAL;
|
| 3400 | return 0;
|
| 3401 | }
|
| 3402 | #endif
|
| 3403 | #else
|
| 3404 | total = num * size;
|
| 3405 | #endif
|
| 3406 | heap_t *heap = get_thread_heap();
|
| 3407 | void *block = _rpmalloc_allocate(heap, size: total);
|
| 3408 | if (block)
|
| 3409 | memset(s: block, c: 0, n: total);
|
| 3410 | return block;
|
| 3411 | }
|
| 3412 |
|
| 3413 | extern inline RPMALLOC_ALLOCATOR void *rprealloc(void *ptr, size_t size) {
|
| 3414 | #if ENABLE_VALIDATE_ARGS
|
| 3415 | if (size >= MAX_ALLOC_SIZE) {
|
| 3416 | errno = EINVAL;
|
| 3417 | return ptr;
|
| 3418 | }
|
| 3419 | #endif
|
| 3420 | heap_t *heap = get_thread_heap();
|
| 3421 | return _rpmalloc_reallocate(heap, p: ptr, size, oldsize: 0, flags: 0);
|
| 3422 | }
|
| 3423 |
|
| 3424 | extern RPMALLOC_ALLOCATOR void *rpaligned_realloc(void *ptr, size_t alignment,
|
| 3425 | size_t size, size_t oldsize,
|
| 3426 | unsigned int flags) {
|
| 3427 | #if ENABLE_VALIDATE_ARGS
|
| 3428 | if ((size + alignment < size) || (alignment > _memory_page_size)) {
|
| 3429 | errno = EINVAL;
|
| 3430 | return 0;
|
| 3431 | }
|
| 3432 | #endif
|
| 3433 | heap_t *heap = get_thread_heap();
|
| 3434 | return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize,
|
| 3435 | flags);
|
| 3436 | }
|
| 3437 |
|
| 3438 | extern RPMALLOC_ALLOCATOR void *rpaligned_alloc(size_t alignment, size_t size) {
|
| 3439 | heap_t *heap = get_thread_heap();
|
| 3440 | return _rpmalloc_aligned_allocate(heap, alignment, size);
|
| 3441 | }
|
| 3442 |
|
| 3443 | extern inline RPMALLOC_ALLOCATOR void *
|
| 3444 | rpaligned_calloc(size_t alignment, size_t num, size_t size) {
|
| 3445 | size_t total;
|
| 3446 | #if ENABLE_VALIDATE_ARGS
|
| 3447 | #if PLATFORM_WINDOWS
|
| 3448 | int err = SizeTMult(num, size, &total);
|
| 3449 | if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
|
| 3450 | errno = EINVAL;
|
| 3451 | return 0;
|
| 3452 | }
|
| 3453 | #else
|
| 3454 | int err = __builtin_umull_overflow(num, size, &total);
|
| 3455 | if (err || (total >= MAX_ALLOC_SIZE)) {
|
| 3456 | errno = EINVAL;
|
| 3457 | return 0;
|
| 3458 | }
|
| 3459 | #endif
|
| 3460 | #else
|
| 3461 | total = num * size;
|
| 3462 | #endif
|
| 3463 | void *block = rpaligned_alloc(alignment, size: total);
|
| 3464 | if (block)
|
| 3465 | memset(s: block, c: 0, n: total);
|
| 3466 | return block;
|
| 3467 | }
|
| 3468 |
|
| 3469 | extern inline RPMALLOC_ALLOCATOR void *rpmemalign(size_t alignment,
|
| 3470 | size_t size) {
|
| 3471 | return rpaligned_alloc(alignment, size);
|
| 3472 | }
|
| 3473 |
|
| 3474 | extern inline int rpposix_memalign(void **memptr, size_t alignment,
|
| 3475 | size_t size) {
|
| 3476 | if (memptr)
|
| 3477 | *memptr = rpaligned_alloc(alignment, size);
|
| 3478 | else
|
| 3479 | return EINVAL;
|
| 3480 | return *memptr ? 0 : ENOMEM;
|
| 3481 | }
|
| 3482 |
|
| 3483 | extern inline size_t rpmalloc_usable_size(void *ptr) {
|
| 3484 | return (ptr ? _rpmalloc_usable_size(p: ptr) : 0);
|
| 3485 | }
|
| 3486 |
|
| 3487 | extern inline void rpmalloc_thread_collect(void) {}
|
| 3488 |
|
| 3489 | void rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats) {
|
| 3490 | memset(s: stats, c: 0, n: sizeof(rpmalloc_thread_statistics_t));
|
| 3491 | heap_t *heap = get_thread_heap_raw();
|
| 3492 | if (!heap)
|
| 3493 | return;
|
| 3494 |
|
| 3495 | for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
|
| 3496 | size_class_t *size_class = _memory_size_class + iclass;
|
| 3497 | span_t *span = heap->size_class[iclass].partial_span;
|
| 3498 | while (span) {
|
| 3499 | size_t free_count = span->list_size;
|
| 3500 | size_t block_count = size_class->block_count;
|
| 3501 | if (span->free_list_limit < block_count)
|
| 3502 | block_count = span->free_list_limit;
|
| 3503 | free_count += (block_count - span->used_count);
|
| 3504 | stats->sizecache += free_count * size_class->block_size;
|
| 3505 | span = span->next;
|
| 3506 | }
|
| 3507 | }
|
| 3508 |
|
| 3509 | #if ENABLE_THREAD_CACHE
|
| 3510 | for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
|
| 3511 | span_cache_t *span_cache;
|
| 3512 | if (!iclass)
|
| 3513 | span_cache = &heap->span_cache;
|
| 3514 | else
|
| 3515 | span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
|
| 3516 | stats->spancache += span_cache->count * (iclass + 1) * _memory_span_size;
|
| 3517 | }
|
| 3518 | #endif
|
| 3519 |
|
| 3520 | span_t *deferred = (span_t *)atomic_load_ptr(src: &heap->span_free_deferred);
|
| 3521 | while (deferred) {
|
| 3522 | if (deferred->size_class != SIZE_CLASS_HUGE)
|
| 3523 | stats->spancache += (size_t)deferred->span_count * _memory_span_size;
|
| 3524 | deferred = (span_t *)deferred->free_list;
|
| 3525 | }
|
| 3526 |
|
| 3527 | #if ENABLE_STATISTICS
|
| 3528 | stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global);
|
| 3529 | stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread);
|
| 3530 |
|
| 3531 | for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
|
| 3532 | stats->span_use[iclass].current =
|
| 3533 | (size_t)atomic_load32(&heap->span_use[iclass].current);
|
| 3534 | stats->span_use[iclass].peak =
|
| 3535 | (size_t)atomic_load32(&heap->span_use[iclass].high);
|
| 3536 | stats->span_use[iclass].to_global =
|
| 3537 | (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global);
|
| 3538 | stats->span_use[iclass].from_global =
|
| 3539 | (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global);
|
| 3540 | stats->span_use[iclass].to_cache =
|
| 3541 | (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache);
|
| 3542 | stats->span_use[iclass].from_cache =
|
| 3543 | (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache);
|
| 3544 | stats->span_use[iclass].to_reserved =
|
| 3545 | (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved);
|
| 3546 | stats->span_use[iclass].from_reserved =
|
| 3547 | (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved);
|
| 3548 | stats->span_use[iclass].map_calls =
|
| 3549 | (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls);
|
| 3550 | }
|
| 3551 | for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
|
| 3552 | stats->size_use[iclass].alloc_current =
|
| 3553 | (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current);
|
| 3554 | stats->size_use[iclass].alloc_peak =
|
| 3555 | (size_t)heap->size_class_use[iclass].alloc_peak;
|
| 3556 | stats->size_use[iclass].alloc_total =
|
| 3557 | (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total);
|
| 3558 | stats->size_use[iclass].free_total =
|
| 3559 | (size_t)atomic_load32(&heap->size_class_use[iclass].free_total);
|
| 3560 | stats->size_use[iclass].spans_to_cache =
|
| 3561 | (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache);
|
| 3562 | stats->size_use[iclass].spans_from_cache =
|
| 3563 | (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache);
|
| 3564 | stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(
|
| 3565 | &heap->size_class_use[iclass].spans_from_reserved);
|
| 3566 | stats->size_use[iclass].map_calls =
|
| 3567 | (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls);
|
| 3568 | }
|
| 3569 | #endif
|
| 3570 | }
|
| 3571 |
|
| 3572 | void rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats) {
|
| 3573 | memset(s: stats, c: 0, n: sizeof(rpmalloc_global_statistics_t));
|
| 3574 | #if ENABLE_STATISTICS
|
| 3575 | stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
|
| 3576 | stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
|
| 3577 | stats->mapped_total =
|
| 3578 | (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
|
| 3579 | stats->unmapped_total =
|
| 3580 | (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
|
| 3581 | stats->huge_alloc =
|
| 3582 | (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
|
| 3583 | stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size;
|
| 3584 | #endif
|
| 3585 | #if ENABLE_GLOBAL_CACHE
|
| 3586 | for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
|
| 3587 | global_cache_t *cache = &_memory_span_cache[iclass];
|
| 3588 | while (!atomic_cas32_acquire(dst: &cache->lock, val: 1, ref: 0))
|
| 3589 | _rpmalloc_spin();
|
| 3590 | uint32_t count = cache->count;
|
| 3591 | #if ENABLE_UNLIMITED_CACHE
|
| 3592 | span_t *current_span = cache->overflow;
|
| 3593 | while (current_span) {
|
| 3594 | ++count;
|
| 3595 | current_span = current_span->next;
|
| 3596 | }
|
| 3597 | #endif
|
| 3598 | atomic_store32_release(dst: &cache->lock, val: 0);
|
| 3599 | stats->cached += count * (iclass + 1) * _memory_span_size;
|
| 3600 | }
|
| 3601 | #endif
|
| 3602 | }
|
| 3603 |
|
| 3604 | #if ENABLE_STATISTICS
|
| 3605 |
|
| 3606 | static void _memory_heap_dump_statistics(heap_t *heap, void *file) {
|
| 3607 | fprintf(file, "Heap %d stats:\n" , heap->id);
|
| 3608 | fprintf(file, "Class CurAlloc PeakAlloc TotAlloc TotFree BlkSize "
|
| 3609 | "BlkCount SpansCur SpansPeak PeakAllocMiB ToCacheMiB "
|
| 3610 | "FromCacheMiB FromReserveMiB MmapCalls\n" );
|
| 3611 | for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
|
| 3612 | if (!atomic_load32(&heap->size_class_use[iclass].alloc_total))
|
| 3613 | continue;
|
| 3614 | fprintf(
|
| 3615 | file,
|
| 3616 | "%3u: %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu "
|
| 3617 | "%9u\n" ,
|
| 3618 | (uint32_t)iclass,
|
| 3619 | atomic_load32(&heap->size_class_use[iclass].alloc_current),
|
| 3620 | heap->size_class_use[iclass].alloc_peak,
|
| 3621 | atomic_load32(&heap->size_class_use[iclass].alloc_total),
|
| 3622 | atomic_load32(&heap->size_class_use[iclass].free_total),
|
| 3623 | _memory_size_class[iclass].block_size,
|
| 3624 | _memory_size_class[iclass].block_count,
|
| 3625 | atomic_load32(&heap->size_class_use[iclass].spans_current),
|
| 3626 | heap->size_class_use[iclass].spans_peak,
|
| 3627 | ((size_t)heap->size_class_use[iclass].alloc_peak *
|
| 3628 | (size_t)_memory_size_class[iclass].block_size) /
|
| 3629 | (size_t)(1024 * 1024),
|
| 3630 | ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) *
|
| 3631 | _memory_span_size) /
|
| 3632 | (size_t)(1024 * 1024),
|
| 3633 | ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) *
|
| 3634 | _memory_span_size) /
|
| 3635 | (size_t)(1024 * 1024),
|
| 3636 | ((size_t)atomic_load32(
|
| 3637 | &heap->size_class_use[iclass].spans_from_reserved) *
|
| 3638 | _memory_span_size) /
|
| 3639 | (size_t)(1024 * 1024),
|
| 3640 | atomic_load32(&heap->size_class_use[iclass].spans_map_calls));
|
| 3641 | }
|
| 3642 | fprintf(file, "Spans Current Peak Deferred PeakMiB Cached ToCacheMiB "
|
| 3643 | "FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB "
|
| 3644 | "FromGlobalMiB MmapCalls\n" );
|
| 3645 | for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
|
| 3646 | if (!atomic_load32(&heap->span_use[iclass].high) &&
|
| 3647 | !atomic_load32(&heap->span_use[iclass].spans_map_calls))
|
| 3648 | continue;
|
| 3649 | fprintf(
|
| 3650 | file,
|
| 3651 | "%4u: %8d %8u %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n" ,
|
| 3652 | (uint32_t)(iclass + 1), atomic_load32(&heap->span_use[iclass].current),
|
| 3653 | atomic_load32(&heap->span_use[iclass].high),
|
| 3654 | atomic_load32(&heap->span_use[iclass].spans_deferred),
|
| 3655 | ((size_t)atomic_load32(&heap->span_use[iclass].high) *
|
| 3656 | (size_t)_memory_span_size * (iclass + 1)) /
|
| 3657 | (size_t)(1024 * 1024),
|
| 3658 | #if ENABLE_THREAD_CACHE
|
| 3659 | (unsigned int)(!iclass ? heap->span_cache.count
|
| 3660 | : heap->span_large_cache[iclass - 1].count),
|
| 3661 | ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) *
|
| 3662 | (iclass + 1) * _memory_span_size) /
|
| 3663 | (size_t)(1024 * 1024),
|
| 3664 | ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) *
|
| 3665 | (iclass + 1) * _memory_span_size) /
|
| 3666 | (size_t)(1024 * 1024),
|
| 3667 | #else
|
| 3668 | 0, (size_t)0, (size_t)0,
|
| 3669 | #endif
|
| 3670 | ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) *
|
| 3671 | (iclass + 1) * _memory_span_size) /
|
| 3672 | (size_t)(1024 * 1024),
|
| 3673 | ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) *
|
| 3674 | (iclass + 1) * _memory_span_size) /
|
| 3675 | (size_t)(1024 * 1024),
|
| 3676 | ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) *
|
| 3677 | (size_t)_memory_span_size * (iclass + 1)) /
|
| 3678 | (size_t)(1024 * 1024),
|
| 3679 | ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) *
|
| 3680 | (size_t)_memory_span_size * (iclass + 1)) /
|
| 3681 | (size_t)(1024 * 1024),
|
| 3682 | atomic_load32(&heap->span_use[iclass].spans_map_calls));
|
| 3683 | }
|
| 3684 | fprintf(file, "Full spans: %zu\n" , heap->full_span_count);
|
| 3685 | fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n" );
|
| 3686 | fprintf(
|
| 3687 | file, "%17zu %17zu\n" ,
|
| 3688 | (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024),
|
| 3689 | (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024));
|
| 3690 | }
|
| 3691 |
|
| 3692 | #endif
|
| 3693 |
|
| 3694 | void rpmalloc_dump_statistics(void *file) {
|
| 3695 | #if ENABLE_STATISTICS
|
| 3696 | for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
|
| 3697 | heap_t *heap = _memory_heaps[list_idx];
|
| 3698 | while (heap) {
|
| 3699 | int need_dump = 0;
|
| 3700 | for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT);
|
| 3701 | ++iclass) {
|
| 3702 | if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) {
|
| 3703 | rpmalloc_assert(
|
| 3704 | !atomic_load32(&heap->size_class_use[iclass].free_total),
|
| 3705 | "Heap statistics counter mismatch" );
|
| 3706 | rpmalloc_assert(
|
| 3707 | !atomic_load32(&heap->size_class_use[iclass].spans_map_calls),
|
| 3708 | "Heap statistics counter mismatch" );
|
| 3709 | continue;
|
| 3710 | }
|
| 3711 | need_dump = 1;
|
| 3712 | }
|
| 3713 | for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT);
|
| 3714 | ++iclass) {
|
| 3715 | if (!atomic_load32(&heap->span_use[iclass].high) &&
|
| 3716 | !atomic_load32(&heap->span_use[iclass].spans_map_calls))
|
| 3717 | continue;
|
| 3718 | need_dump = 1;
|
| 3719 | }
|
| 3720 | if (need_dump)
|
| 3721 | _memory_heap_dump_statistics(heap, file);
|
| 3722 | heap = heap->next_heap;
|
| 3723 | }
|
| 3724 | }
|
| 3725 | fprintf(file, "Global stats:\n" );
|
| 3726 | size_t huge_current =
|
| 3727 | (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
|
| 3728 | size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size;
|
| 3729 | fprintf(file, "HugeCurrentMiB HugePeakMiB\n" );
|
| 3730 | fprintf(file, "%14zu %11zu\n" , huge_current / (size_t)(1024 * 1024),
|
| 3731 | huge_peak / (size_t)(1024 * 1024));
|
| 3732 |
|
| 3733 | #if ENABLE_GLOBAL_CACHE
|
| 3734 | fprintf(file, "GlobalCacheMiB\n" );
|
| 3735 | for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
|
| 3736 | global_cache_t *cache = _memory_span_cache + iclass;
|
| 3737 | size_t global_cache = (size_t)cache->count * iclass * _memory_span_size;
|
| 3738 |
|
| 3739 | size_t global_overflow_cache = 0;
|
| 3740 | span_t *span = cache->overflow;
|
| 3741 | while (span) {
|
| 3742 | global_overflow_cache += iclass * _memory_span_size;
|
| 3743 | span = span->next;
|
| 3744 | }
|
| 3745 | if (global_cache || global_overflow_cache || cache->insert_count ||
|
| 3746 | cache->extract_count)
|
| 3747 | fprintf(file,
|
| 3748 | "%4zu: %8zuMiB (%8zuMiB overflow) %14zu insert %14zu extract\n" ,
|
| 3749 | iclass + 1, global_cache / (size_t)(1024 * 1024),
|
| 3750 | global_overflow_cache / (size_t)(1024 * 1024),
|
| 3751 | cache->insert_count, cache->extract_count);
|
| 3752 | }
|
| 3753 | #endif
|
| 3754 |
|
| 3755 | size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
|
| 3756 | size_t mapped_os =
|
| 3757 | (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size;
|
| 3758 | size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
|
| 3759 | size_t mapped_total =
|
| 3760 | (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
|
| 3761 | size_t unmapped_total =
|
| 3762 | (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
|
| 3763 | fprintf(
|
| 3764 | file,
|
| 3765 | "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB\n" );
|
| 3766 | fprintf(file, "%9zu %11zu %13zu %14zu %16zu\n" ,
|
| 3767 | mapped / (size_t)(1024 * 1024), mapped_os / (size_t)(1024 * 1024),
|
| 3768 | mapped_peak / (size_t)(1024 * 1024),
|
| 3769 | mapped_total / (size_t)(1024 * 1024),
|
| 3770 | unmapped_total / (size_t)(1024 * 1024));
|
| 3771 |
|
| 3772 | fprintf(file, "\n" );
|
| 3773 | #if 0
|
| 3774 | int64_t allocated = atomic_load64(&_allocation_counter);
|
| 3775 | int64_t deallocated = atomic_load64(&_deallocation_counter);
|
| 3776 | fprintf(file, "Allocation count: %lli\n" , allocated);
|
| 3777 | fprintf(file, "Deallocation count: %lli\n" , deallocated);
|
| 3778 | fprintf(file, "Current allocations: %lli\n" , (allocated - deallocated));
|
| 3779 | fprintf(file, "Master spans: %d\n" , atomic_load32(&_master_spans));
|
| 3780 | fprintf(file, "Dangling master spans: %d\n" , atomic_load32(&_unmapped_master_spans));
|
| 3781 | #endif
|
| 3782 | #endif
|
| 3783 | (void)sizeof(file);
|
| 3784 | }
|
| 3785 |
|
| 3786 | #if RPMALLOC_FIRST_CLASS_HEAPS
|
| 3787 |
|
| 3788 | extern inline rpmalloc_heap_t *rpmalloc_heap_acquire(void) {
|
| 3789 | // Must be a pristine heap from newly mapped memory pages, or else memory
|
| 3790 | // blocks could already be allocated from the heap which would (wrongly) be
|
| 3791 | // released when heap is cleared with rpmalloc_heap_free_all(). Also heaps
|
| 3792 | // guaranteed to be pristine from the dedicated orphan list can be used.
|
| 3793 | heap_t *heap = _rpmalloc_heap_allocate(1);
|
| 3794 | rpmalloc_assume(heap != NULL);
|
| 3795 | heap->owner_thread = 0;
|
| 3796 | _rpmalloc_stat_inc(&_memory_active_heaps);
|
| 3797 | return heap;
|
| 3798 | }
|
| 3799 |
|
| 3800 | extern inline void rpmalloc_heap_release(rpmalloc_heap_t *heap) {
|
| 3801 | if (heap)
|
| 3802 | _rpmalloc_heap_release(heap, 1, 1);
|
| 3803 | }
|
| 3804 |
|
| 3805 | extern inline RPMALLOC_ALLOCATOR void *
|
| 3806 | rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) {
|
| 3807 | #if ENABLE_VALIDATE_ARGS
|
| 3808 | if (size >= MAX_ALLOC_SIZE) {
|
| 3809 | errno = EINVAL;
|
| 3810 | return 0;
|
| 3811 | }
|
| 3812 | #endif
|
| 3813 | return _rpmalloc_allocate(heap, size);
|
| 3814 | }
|
| 3815 |
|
| 3816 | extern inline RPMALLOC_ALLOCATOR void *
|
| 3817 | rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment,
|
| 3818 | size_t size) {
|
| 3819 | #if ENABLE_VALIDATE_ARGS
|
| 3820 | if (size >= MAX_ALLOC_SIZE) {
|
| 3821 | errno = EINVAL;
|
| 3822 | return 0;
|
| 3823 | }
|
| 3824 | #endif
|
| 3825 | return _rpmalloc_aligned_allocate(heap, alignment, size);
|
| 3826 | }
|
| 3827 |
|
| 3828 | extern inline RPMALLOC_ALLOCATOR void *
|
| 3829 | rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num, size_t size) {
|
| 3830 | return rpmalloc_heap_aligned_calloc(heap, 0, num, size);
|
| 3831 | }
|
| 3832 |
|
| 3833 | extern inline RPMALLOC_ALLOCATOR void *
|
| 3834 | rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment,
|
| 3835 | size_t num, size_t size) {
|
| 3836 | size_t total;
|
| 3837 | #if ENABLE_VALIDATE_ARGS
|
| 3838 | #if PLATFORM_WINDOWS
|
| 3839 | int err = SizeTMult(num, size, &total);
|
| 3840 | if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
|
| 3841 | errno = EINVAL;
|
| 3842 | return 0;
|
| 3843 | }
|
| 3844 | #else
|
| 3845 | int err = __builtin_umull_overflow(num, size, &total);
|
| 3846 | if (err || (total >= MAX_ALLOC_SIZE)) {
|
| 3847 | errno = EINVAL;
|
| 3848 | return 0;
|
| 3849 | }
|
| 3850 | #endif
|
| 3851 | #else
|
| 3852 | total = num * size;
|
| 3853 | #endif
|
| 3854 | void *block = _rpmalloc_aligned_allocate(heap, alignment, total);
|
| 3855 | if (block)
|
| 3856 | memset(block, 0, total);
|
| 3857 | return block;
|
| 3858 | }
|
| 3859 |
|
| 3860 | extern inline RPMALLOC_ALLOCATOR void *
|
| 3861 | rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size,
|
| 3862 | unsigned int flags) {
|
| 3863 | #if ENABLE_VALIDATE_ARGS
|
| 3864 | if (size >= MAX_ALLOC_SIZE) {
|
| 3865 | errno = EINVAL;
|
| 3866 | return ptr;
|
| 3867 | }
|
| 3868 | #endif
|
| 3869 | return _rpmalloc_reallocate(heap, ptr, size, 0, flags);
|
| 3870 | }
|
| 3871 |
|
| 3872 | extern inline RPMALLOC_ALLOCATOR void *
|
| 3873 | rpmalloc_heap_aligned_realloc(rpmalloc_heap_t *heap, void *ptr,
|
| 3874 | size_t alignment, size_t size,
|
| 3875 | unsigned int flags) {
|
| 3876 | #if ENABLE_VALIDATE_ARGS
|
| 3877 | if ((size + alignment < size) || (alignment > _memory_page_size)) {
|
| 3878 | errno = EINVAL;
|
| 3879 | return 0;
|
| 3880 | }
|
| 3881 | #endif
|
| 3882 | return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags);
|
| 3883 | }
|
| 3884 |
|
| 3885 | extern inline void rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr) {
|
| 3886 | (void)sizeof(heap);
|
| 3887 | _rpmalloc_deallocate(ptr);
|
| 3888 | }
|
| 3889 |
|
| 3890 | extern inline void rpmalloc_heap_free_all(rpmalloc_heap_t *heap) {
|
| 3891 | span_t *span;
|
| 3892 | span_t *next_span;
|
| 3893 |
|
| 3894 | _rpmalloc_heap_cache_adopt_deferred(heap, 0);
|
| 3895 |
|
| 3896 | for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
|
| 3897 | span = heap->size_class[iclass].partial_span;
|
| 3898 | while (span) {
|
| 3899 | next_span = span->next;
|
| 3900 | _rpmalloc_heap_cache_insert(heap, span);
|
| 3901 | span = next_span;
|
| 3902 | }
|
| 3903 | heap->size_class[iclass].partial_span = 0;
|
| 3904 | span = heap->full_span[iclass];
|
| 3905 | while (span) {
|
| 3906 | next_span = span->next;
|
| 3907 | _rpmalloc_heap_cache_insert(heap, span);
|
| 3908 | span = next_span;
|
| 3909 | }
|
| 3910 |
|
| 3911 | span = heap->size_class[iclass].cache;
|
| 3912 | if (span)
|
| 3913 | _rpmalloc_heap_cache_insert(heap, span);
|
| 3914 | heap->size_class[iclass].cache = 0;
|
| 3915 | }
|
| 3916 | memset(heap->size_class, 0, sizeof(heap->size_class));
|
| 3917 | memset(heap->full_span, 0, sizeof(heap->full_span));
|
| 3918 |
|
| 3919 | span = heap->large_huge_span;
|
| 3920 | while (span) {
|
| 3921 | next_span = span->next;
|
| 3922 | if (UNEXPECTED(span->size_class == SIZE_CLASS_HUGE))
|
| 3923 | _rpmalloc_deallocate_huge(span);
|
| 3924 | else
|
| 3925 | _rpmalloc_heap_cache_insert(heap, span);
|
| 3926 | span = next_span;
|
| 3927 | }
|
| 3928 | heap->large_huge_span = 0;
|
| 3929 | heap->full_span_count = 0;
|
| 3930 |
|
| 3931 | #if ENABLE_THREAD_CACHE
|
| 3932 | for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
|
| 3933 | span_cache_t *span_cache;
|
| 3934 | if (!iclass)
|
| 3935 | span_cache = &heap->span_cache;
|
| 3936 | else
|
| 3937 | span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
|
| 3938 | if (!span_cache->count)
|
| 3939 | continue;
|
| 3940 | #if ENABLE_GLOBAL_CACHE
|
| 3941 | _rpmalloc_stat_add64(&heap->thread_to_global,
|
| 3942 | span_cache->count * (iclass + 1) * _memory_span_size);
|
| 3943 | _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
|
| 3944 | span_cache->count);
|
| 3945 | _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1,
|
| 3946 | span_cache->count);
|
| 3947 | #else
|
| 3948 | for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
|
| 3949 | _rpmalloc_span_unmap(span_cache->span[ispan]);
|
| 3950 | #endif
|
| 3951 | span_cache->count = 0;
|
| 3952 | }
|
| 3953 | #endif
|
| 3954 |
|
| 3955 | #if ENABLE_STATISTICS
|
| 3956 | for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
|
| 3957 | atomic_store32(&heap->size_class_use[iclass].alloc_current, 0);
|
| 3958 | atomic_store32(&heap->size_class_use[iclass].spans_current, 0);
|
| 3959 | }
|
| 3960 | for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
|
| 3961 | atomic_store32(&heap->span_use[iclass].current, 0);
|
| 3962 | }
|
| 3963 | #endif
|
| 3964 | }
|
| 3965 |
|
| 3966 | extern inline void rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap) {
|
| 3967 | heap_t *prev_heap = get_thread_heap_raw();
|
| 3968 | if (prev_heap != heap) {
|
| 3969 | set_thread_heap(heap);
|
| 3970 | if (prev_heap)
|
| 3971 | rpmalloc_heap_release(prev_heap);
|
| 3972 | }
|
| 3973 | }
|
| 3974 |
|
| 3975 | extern inline rpmalloc_heap_t *rpmalloc_get_heap_for_ptr(void *ptr) {
|
| 3976 | // Grab the span, and then the heap from the span
|
| 3977 | span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
|
| 3978 | if (span) {
|
| 3979 | return span->heap;
|
| 3980 | }
|
| 3981 | return 0;
|
| 3982 | }
|
| 3983 |
|
| 3984 | #endif
|
| 3985 |
|
| 3986 | #if ENABLE_PRELOAD || ENABLE_OVERRIDE
|
| 3987 |
|
| 3988 | #include "malloc.c"
|
| 3989 |
|
| 3990 | #endif
|
| 3991 |
|
| 3992 | void rpmalloc_linker_reference(void) { (void)sizeof(_rpmalloc_initialized); }
|
| 3993 | |