Last Updated on April 21, 2025 by chase
This weekend I thought I would brush up a little bit on some C++, I thought back to some of the more challenging assignments I had back when I was in school. This one isn’t as wide in scope as that assignment, but it was a great exercise. I’m not one to jump into heavy template usage where it isn’t needed, but this sort of system depends on it for extensibility of multiple data types.
This turned out great for an initial pass like this. It is a node based memory manager, you define a class that specifies how much memory you want each node to hold, as well as how many nodes. Each node can store up to that amount of memory. Each node is tracked in an unordered_map that allows for O(1) retrieval during deletion. A linked list is used for the free list to allow O(1) time during allocation for finding an available node, as well as during adding the deallocated node back to the free list.
What I’ve always enjoyed about memory managers is just how the memory is contiguous, minimizing paging, performance gains and really just keeping data organized.

Here is the implementation:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
#pragma once namespace Chazix { template <size_t S> class CDataAllocatorBase; template <size_t S> class CNode; using byte = unsigned char; template <size_t S> using InUseMap = std::unordered_map<std::uintptr_t, CNode<S>*>; constexpr const byte g_validPad = 0xAA; constexpr const byte g_emptyMem = 0xEE; constexpr const byte g_allocatedMask = 0x1; //----------------------------------------------------------------------------------------------- #pragma pack(push, 1) struct SHeader { byte m_metaData{ 0 }; size_t m_size{ 0 }; const void MarkAllocated(bool allocated) { if (allocated) { m_metaData |= g_allocatedMask; return; } m_metaData &= ~g_allocatedMask; } const bool Allocated() const { return (m_metaData & g_allocatedMask) != 0; } }; //----------------------------------------------------------------------------------------------- template <size_t T> struct SPadding { constexpr SPadding() : m_pad{ 0 } { std::fill(std::begin(m_pad), std::end(m_pad), g_validPad); } byte m_pad[T]{ 0 }; }; constexpr const size_t g_padByteCount = 4; constexpr const SPadding<g_padByteCount> g_validPadding{}; //----------------------------------------------------------------------------------------------- template <size_t S> class CNode : public CListNode<CNode<S>> { public: void Init() { memset(&m_header, 0x00, sizeof(m_header)); m_header.m_size = DataSize(); memset(&m_padInit, g_validPad, sizeof(m_padInit)); memset(m_data, g_emptyMem, sizeof(m_data)); memset(&m_padEnd, g_validPad, sizeof(m_padEnd)); } constexpr size_t DataSize() { return sizeof(m_data) / sizeof(byte); } void* DataPtr() { return m_data; } const bool Allocated() const { return m_header.Allocated(); } void MarkAllocated(bool allocated) { m_header.MarkAllocated(allocated); } private: SHeader m_header; SPadding<g_padByteCount> m_padInit; byte m_data[S]{ 0 }; SPadding<g_padByteCount> m_padEnd; friend class CDataAllocatorBase<S>; }; //----------------------------------------------------------------------------------------------- template <size_t S> class CDataAllocatorBase { public: CDataAllocatorBase(const size_t nodeCount) : m_segments(nodeCount), m_totalSize(nodeCount * sizeof(CNode<S>)) { m_buffer = std::make_unique<byte[]>(m_totalSize); m_inUseMap = std::make_unique<InUseMap<S>>(); m_freeList = std::make_unique<CLinkedList<CNode<S>>>(); std::span<byte> bufferSpan(m_buffer.get(), m_totalSize); for (auto i = 0; i < m_segments; ++i) { CNode<S>* node = recast_<CNode<S>*>(&bufferSpan[i * sizeof(CNode<S>)]); node->Init(); m_freeList->Append(node); } } virtual ~CDataAllocatorBase() { m_buffer.reset(nullptr); m_inUseMap.reset(nullptr); m_freeList.reset(nullptr); } template <typename T, typename ... C> T* Alloc(C&& ... args) { assert(sizeof(T) <= DataSize()); return AllocHelper(args ...); } template <typename ... C> void* Alloc(size_t bytes, C&& ... args) { assert(bytes <= DataSize()); return AllocHelper<byte>(args ...); } template <typename T> const bool Allocated(T* data) { assert(DataInBounds(data)); CNode<S>* node = CastToNode(data); return node->Allocated(); } template <typename T> void Delete(T* data) { assert(DataInBounds(data)); CNode<S>* node = CastToNode(data); std::uintptr_t nodeAddr = recast_<std::uintptr_t>(node); std::lock_guard lock(m_mutex); InUseMap<S>& inUseMap = *m_inUseMap; assert(inUseMap.contains(nodeAddr)); // reset to empty node data inUseMap.erase(nodeAddr); node->Init(); m_freeList->Append(node); } constexpr size_t DataSize() { return m_dataSize; } private: template <typename T, typename ... C> T* AllocHelper(C&& ... args) { std::lock_guard lock(m_mutex); CNode<S>* node = m_freeList->PopFront(); assert(node != nullptr); // out of usable memory assert(IsValidPadding(node)); __assume(sizeof(T) <= sizeof(node->m_data)); T* data = new (node->m_data) T(std::forward<C>(args)...); node->MarkAllocated(true); std::uintptr_t nodeAddr = recast_<std::uintptr_t>(node); InUseMap<S>& inUseMap = *m_inUseMap; inUseMap[nodeAddr] = node; return recast_<T*>(data); } bool IsValidPadding(CNode<S>* node) { return memcmp(node->m_padInit.m_pad, g_validPadding.m_pad, sizeof(g_validPadding.m_pad)) == 0 && memcmp(node->m_padEnd.m_pad, g_validPadding.m_pad, sizeof(g_validPadding.m_pad)) == 0; } template <typename T> CNode<S>* CastToNode(T* data) { CNode<S>* node = recast_<CNode<S>*>(recast_<std::uintptr_t>(data) - sizeof(SPadding<g_padByteCount>) - sizeof(SHeader) - sizeof(CListNode<CNode<S>>)); assert(DataInBounds(node)); return node; } template <typename T> bool DataInBounds(T* data) { assert(data != nullptr); std::uintptr_t dataPtr = recast_<std::uintptr_t>(data); std::uintptr_t bufferPtr = recast_<std::uintptr_t>(m_buffer.get()); return dataPtr >= bufferPtr && dataPtr < bufferPtr + m_totalSize; } protected: size_t m_dataSize{ S }; size_t m_segments{ 0 }; size_t m_totalSize{ 0 }; std::unique_ptr<InUseMap<S>> m_inUseMap{ nullptr }; std::unique_ptr<CLinkedList<CNode<S>>> m_freeList{ nullptr }; std::unique_ptr<byte[]> m_buffer{ nullptr }; std::mutex m_mutex; }; #pragma pack(pop) //----------------------------------------------------------------------------------------------- // store up to 64 bytes per node class CDataAllocator64 : public CDataAllocatorBase<64> { public: CDataAllocator64(size_t nodeCount) : CDataAllocatorBase<64>(nodeCount) {}; }; } |
This is a simple example for how it can be used:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
static void TestDataAllocator64() { size_t nodeCount = 1 << 8; // 256 nodes CDataAllocator64 allocator(nodeCount); char* data = allocator.Alloc<char>(); strcpy_s(data, allocator.DataSize(), "this is a test"); std::vector<std::string*> stringData; for (int i = 0; i < nodeCount - 1; ++i) { stringData.push_back(allocator.Alloc<std::string>("this is a test")); } allocator.Delete(data); for (auto data : stringData) { allocator.Delete(data); } stringData.clear(); for (int i = 0; i < nodeCount; ++i) { stringData.push_back(allocator.Alloc<std::string>("this is a test")); } for (auto data : stringData) { allocator.Delete(data); } } |
Some other additional improvements:
- Information on how many nodes are free and in use.
- Allow packing more data into nodes that still have memory bytes available.
- Sorted free list.
Here is a test use case through my CAsyncTaskManager
:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
static CDataAllocator64 s_allocator(1 << 16); // 2^16 nodes static std::function<void(void*)> DeleteVoidFunc = std::function<void(void*)>( [] (void* ptr)->void { s_allocator.Delete(ptr); }); static void ExecuteTimedAsyncTask (unsigned msTimeout, unsigned index) { CAsyncTaskManager& taskManager = CAsyncTaskManager::GetInstance(); FuncArgs args; taskManager.AddFuncArg(args, s_allocator.Alloc<std::string>("ExecuteTimeAsyncTask"), DeleteVoidFunc); taskManager.AddFuncArg(args, s_allocator.Alloc<unsigned>(msTimeout), DeleteVoidFunc); taskManager.AddFuncArg(args, s_allocator.Alloc<unsigned>(index), DeleteVoidFunc); taskManager.AddAsyncTask<CTimedAsyncTask>( taskManager.CreateAsyncTask<CTimedAsyncTask>( args, SAsyncTaskPriorityInfo(stcast_<EAsyncTaskPriority>(std::rand() % stcast_<int>(EAsyncTaskPriority::e_count))), [] (const CAsyncTask& task)->void { const std::string& strArg = task.GetArgument<std::string>(0); const unsigned& timeoutArg = task.GetArgument<unsigned>(1); const unsigned& indexArg = task.GetArgument<unsigned>(2); std::cout << "[" << &strArg << "] " << "Completed Task: " << strArg << std::endl; std::cout << "[" << &timeoutArg << "] " << " Timeout Requested: " << timeoutArg << std::endl; std::cout << "[" << &indexArg << "] " << " Index: " << indexArg << std::endl; } ) ); } |

As you can see by using the CDataAllocator64
, the memory addresses are all within the same range, and consistent size. For example if we were to calculate an address offset: 0x600C - 0x5FAC
or any other consecutive offset, we consistently get 60 bytes. Which aligns with our CDataAllocator64
.
However, if we were to just use new allocations here:
1 2 3 |
taskManager.AddFuncArg(args, new std::string("ExecuteTimeAsyncTask")); taskManager.AddFuncArg(args, new unsigned(msTimeout)); taskManager.AddFuncArg(args, new unsigned(index)); |
We would get something that looks like this:

Which has inconsistent byte offsets between allocated data.