1 /*
2 * Copyright 2012 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16
17 package io.netty.buffer;
18
19 /**
20 * Description of algorithm for PageRun/PoolSubpage allocation from PoolChunk
21 *
22 * Notation: The following terms are important to understand the code
23 * > page - a page is the smallest unit of memory chunk that can be allocated
24 * > chunk - a chunk is a collection of pages
25 * > in this code chunkSize = 2^{maxOrder} * pageSize
26 *
27 * To begin we allocate a byte array of size = chunkSize
28 * Whenever a ByteBuf of given size needs to be created we search for the first position
29 * in the byte array that has enough empty space to accommodate the requested size and
30 * return a (long) handle that encodes this offset information, (this memory segment is then
31 * marked as reserved so it is always used by exactly one ByteBuf and no more)
32 *
33 * For simplicity all sizes are normalized according to PoolArena#normalizeCapacity method
34 * This ensures that when we request for memory segments of size >= pageSize the normalizedCapacity
35 * equals the next nearest power of 2
36 *
37 * To search for the first offset in chunk that has at least requested size available we construct a
38 * complete balanced binary tree and store it in an array (just like heaps) - memoryMap
39 *
40 * The tree looks like this (the size of each node being mentioned in the parenthesis)
41 *
42 * depth=0 1 node (chunkSize)
43 * depth=1 2 nodes (chunkSize/2)
44 * ..
45 * ..
46 * depth=d 2^d nodes (chunkSize/2^d)
47 * ..
48 * depth=maxOrder 2^maxOrder nodes (chunkSize/2^{maxOrder} = pageSize)
49 *
50 * depth=maxOrder is the last level and the leafs consist of pages
51 *
52 * With this tree available searching in chunkArray translates like this:
53 * To allocate a memory segment of size chunkSize/2^k we search for the first node (from left) at height k
54 * which is unused
55 *
56 * Algorithm:
57 * ----------
58 * Encode the tree in memoryMap with the notation
59 * memoryMap[id] = x => in the subtree rooted at id, the first node that is free to be allocated
60 * is at depth x (counted from depth=0) i.e., at depths [depth_of_id, x), there is no node that is free
61 *
62 * As we allocate & free nodes, we update values stored in memoryMap so that the property is maintained
63 *
64 * Initialization -
65 * In the beginning we construct the memoryMap array by storing the depth of a node at each node
66 * i.e., memoryMap[id] = depth_of_id
67 *
68 * Observations:
69 * -------------
70 * 1) memoryMap[id] = depth_of_id => it is free / unallocated
71 * 2) memoryMap[id] > depth_of_id => at least one of its child nodes is allocated, so we cannot allocate it, but
72 * some of its children can still be allocated based on their availability
73 * 3) memoryMap[id] = maxOrder + 1 => the node is fully allocated & thus none of its children can be allocated, it
74 * is thus marked as unusable
75 *
76 * Algorithm: [allocateNode(d) => we want to find the first node (from left) at height h that can be allocated]
77 * ----------
78 * 1) start at root (i.e., depth = 0 or id = 1)
79 * 2) if memoryMap[1] > d => cannot be allocated from this chunk
80 * 3) if left node value <= h; we can allocate from left subtree so move to left and repeat until found
81 * 4) else try in right subtree
82 *
83 * Algorithm: [allocateRun(size)]
84 * ----------
85 * 1) Compute d = log_2(chunkSize/size)
86 * 2) Return allocateNode(d)
87 *
88 * Algorithm: [allocateSubpage(size)]
89 * ----------
90 * 1) use allocateNode(maxOrder) to find an empty (i.e., unused) leaf (i.e., page)
91 * 2) use this handle to construct the PoolSubpage object or if it already exists just call init(normCapacity)
92 * note that this PoolSubpage object is added to subpagesPool in the PoolArena when we init() it
93 *
94 * Note:
95 * -----
96 * In the implementation for improving cache coherence,
97 * we store 2 pieces of information (i.e, 2 byte vals) as a short value in memoryMap
98 *
99 * memoryMap[id]= (depth_of_id, x)
100 * where as per convention defined above
101 * the second value (i.e, x) indicates that the first node which is free to be allocated is at depth x (from root)
102 */
103 final class PoolChunk<T> implements PoolChunkMetric {
104
105 private static final int INTEGER_SIZE_MINUS_ONE = Integer.SIZE - 1;
106
107 final PoolArena<T> arena;
108 final T memory;
109 final boolean unpooled;
110 final int offset;
111
112 private final byte[] memoryMap;
113 private final byte[] depthMap;
114 private final PoolSubpage<T>[] subpages;
115 /** Used to determine if the requested capacity is equal to or greater than pageSize. */
116 private final int subpageOverflowMask;
117 private final int pageSize;
118 private final int pageShifts;
119 private final int maxOrder;
120 private final int chunkSize;
121 private final int log2ChunkSize;
122 private final int maxSubpageAllocs;
123 /** Used to mark memory as unusable */
124 private final byte unusable;
125
126 private int freeBytes;
127
128 PoolChunkList<T> parent;
129 PoolChunk<T> prev;
130 PoolChunk<T> next;
131
132 // TODO: Test if adding padding helps under contention
133 //private long pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7;
134
135 PoolChunk(PoolArena<T> arena, T memory, int pageSize, int maxOrder, int pageShifts, int chunkSize, int offset) {
136 unpooled = false;
137 this.arena = arena;
138 this.memory = memory;
139 this.pageSize = pageSize;
140 this.pageShifts = pageShifts;
141 this.maxOrder = maxOrder;
142 this.chunkSize = chunkSize;
143 this.offset = offset;
144 unusable = (byte) (maxOrder + 1);
145 log2ChunkSize = log2(chunkSize);
146 subpageOverflowMask = ~(pageSize - 1);
147 freeBytes = chunkSize;
148
149 assert maxOrder < 30 : "maxOrder should be < 30, but is: " + maxOrder;
150 maxSubpageAllocs = 1 << maxOrder;
151
152 // Generate the memory map.
153 memoryMap = new byte[maxSubpageAllocs << 1];
154 depthMap = new byte[memoryMap.length];
155 int memoryMapIndex = 1;
156 for (int d = 0; d <= maxOrder; ++ d) { // move down the tree one level at a time
157 int depth = 1 << d;
158 for (int p = 0; p < depth; ++ p) {
159 // in each level traverse left to right and set value to the depth of subtree
160 memoryMap[memoryMapIndex] = (byte) d;
161 depthMap[memoryMapIndex] = (byte) d;
162 memoryMapIndex ++;
163 }
164 }
165
166 subpages = newSubpageArray(maxSubpageAllocs);
167 }
168
169 /** Creates a special chunk that is not pooled. */
170 PoolChunk(PoolArena<T> arena, T memory, int size, int offset) {
171 unpooled = true;
172 this.arena = arena;
173 this.memory = memory;
174 this.offset = offset;
175 memoryMap = null;
176 depthMap = null;
177 subpages = null;
178 subpageOverflowMask = 0;
179 pageSize = 0;
180 pageShifts = 0;
181 maxOrder = 0;
182 unusable = (byte) (maxOrder + 1);
183 chunkSize = size;
184 log2ChunkSize = log2(chunkSize);
185 maxSubpageAllocs = 0;
186 }
187
188 @SuppressWarnings("unchecked")
189 private PoolSubpage<T>[] newSubpageArray(int size) {
190 return new PoolSubpage[size];
191 }
192
193 @Override
194 public int usage() {
195 final int freeBytes;
196 synchronized (arena) {
197 freeBytes = this.freeBytes;
198 }
199 return usage(freeBytes);
200 }
201
202 private int usage(int freeBytes) {
203 if (freeBytes == 0) {
204 return 100;
205 }
206
207 int freePercentage = (int) (freeBytes * 100L / chunkSize);
208 if (freePercentage == 0) {
209 return 99;
210 }
211 return 100 - freePercentage;
212 }
213
214 long allocate(int normCapacity) {
215 if ((normCapacity & subpageOverflowMask) != 0) { // >= pageSize
216 return allocateRun(normCapacity);
217 } else {
218 return allocateSubpage(normCapacity);
219 }
220 }
221
222 /**
223 * Update method used by allocate
224 * This is triggered only when a successor is allocated and all its predecessors
225 * need to update their state
226 * The minimal depth at which subtree rooted at id has some free space
227 *
228 * @param id id
229 */
230 private void updateParentsAlloc(int id) {
231 while (id > 1) {
232 int parentId = id >>> 1;
233 byte val1 = value(id);
234 byte val2 = value(id ^ 1);
235 byte val = val1 < val2 ? val1 : val2;
236 setValue(parentId, val);
237 id = parentId;
238 }
239 }
240
241 /**
242 * Update method used by free
243 * This needs to handle the special case when both children are completely free
244 * in which case parent be directly allocated on request of size = child-size * 2
245 *
246 * @param id id
247 */
248 private void updateParentsFree(int id) {
249 int logChild = depth(id) + 1;
250 while (id > 1) {
251 int parentId = id >>> 1;
252 byte val1 = value(id);
253 byte val2 = value(id ^ 1);
254 logChild -= 1; // in first iteration equals log, subsequently reduce 1 from logChild as we traverse up
255
256 if (val1 == logChild && val2 == logChild) {
257 setValue(parentId, (byte) (logChild - 1));
258 } else {
259 byte val = val1 < val2 ? val1 : val2;
260 setValue(parentId, val);
261 }
262
263 id = parentId;
264 }
265 }
266
267 /**
268 * Algorithm to allocate an index in memoryMap when we query for a free node
269 * at depth d
270 *
271 * @param d depth
272 * @return index in memoryMap
273 */
274 private int allocateNode(int d) {
275 int id = 1;
276 int initial = - (1 << d); // has last d bits = 0 and rest all = 1
277 byte val = value(id);
278 if (val > d) { // unusable
279 return -1;
280 }
281 while (val < d || (id & initial) == 0) { // id & initial == 1 << d for all ids at depth d, for < d it is 0
282 id <<= 1;
283 val = value(id);
284 if (val > d) {
285 id ^= 1;
286 val = value(id);
287 }
288 }
289 byte value = value(id);
290 assert value == d && (id & initial) == 1 << d : String.format("val = %d, id & initial = %d, d = %d",
291 value, id & initial, d);
292 setValue(id, unusable); // mark as unusable
293 updateParentsAlloc(id);
294 return id;
295 }
296
297 /**
298 * Allocate a run of pages (>=1)
299 *
300 * @param normCapacity normalized capacity
301 * @return index in memoryMap
302 */
303 private long allocateRun(int normCapacity) {
304 int d = maxOrder - (log2(normCapacity) - pageShifts);
305 int id = allocateNode(d);
306 if (id < 0) {
307 return id;
308 }
309 freeBytes -= runLength(id);
310 return id;
311 }
312
313 /**
314 * Create/ initialize a new PoolSubpage of normCapacity
315 * Any PoolSubpage created/ initialized here is added to subpage pool in the PoolArena that owns this PoolChunk
316 *
317 * @param normCapacity normalized capacity
318 * @return index in memoryMap
319 */
320 private long allocateSubpage(int normCapacity) {
321 // Obtain the head of the PoolSubPage pool that is owned by the PoolArena and synchronize on it.
322 // This is need as we may add it back and so alter the linked-list structure.
323 PoolSubpage<T> head = arena.findSubpagePoolHead(normCapacity);
324 synchronized (head) {
325 int d = maxOrder; // subpages are only be allocated from pages i.e., leaves
326 int id = allocateNode(d);
327 if (id < 0) {
328 return id;
329 }
330
331 final PoolSubpage<T>[] subpages = this.subpages;
332 final int pageSize = this.pageSize;
333
334 freeBytes -= pageSize;
335
336 int subpageIdx = subpageIdx(id);
337 PoolSubpage<T> subpage = subpages[subpageIdx];
338 if (subpage == null) {
339 subpage = new PoolSubpage<T>(head, this, id, runOffset(id), pageSize, normCapacity);
340 subpages[subpageIdx] = subpage;
341 } else {
342 subpage.init(head, normCapacity);
343 }
344 return subpage.allocate();
345 }
346 }
347
348 /**
349 * Free a subpage or a run of pages
350 * When a subpage is freed from PoolSubpage, it might be added back to subpage pool of the owning PoolArena
351 * If the subpage pool in PoolArena has at least one other PoolSubpage of given elemSize, we can
352 * completely free the owning Page so it is available for subsequent allocations
353 *
354 * @param handle handle to free
355 */
356 void free(long handle) {
357 int memoryMapIdx = memoryMapIdx(handle);
358 int bitmapIdx = bitmapIdx(handle);
359
360 if (bitmapIdx != 0) { // free a subpage
361 PoolSubpage<T> subpage = subpages[subpageIdx(memoryMapIdx)];
362 assert subpage != null && subpage.doNotDestroy;
363
364 // Obtain the head of the PoolSubPage pool that is owned by the PoolArena and synchronize on it.
365 // This is need as we may add it back and so alter the linked-list structure.
366 PoolSubpage<T> head = arena.findSubpagePoolHead(subpage.elemSize);
367 synchronized (head) {
368 if (subpage.free(head, bitmapIdx & 0x3FFFFFFF)) {
369 return;
370 }
371 }
372 }
373 freeBytes += runLength(memoryMapIdx);
374 setValue(memoryMapIdx, depth(memoryMapIdx));
375 updateParentsFree(memoryMapIdx);
376 }
377
378 void initBuf(PooledByteBuf<T> buf, long handle, int reqCapacity) {
379 int memoryMapIdx = memoryMapIdx(handle);
380 int bitmapIdx = bitmapIdx(handle);
381 if (bitmapIdx == 0) {
382 byte val = value(memoryMapIdx);
383 assert val == unusable : String.valueOf(val);
384 buf.init(this, handle, runOffset(memoryMapIdx) + offset, reqCapacity, runLength(memoryMapIdx),
385 arena.parent.threadCache());
386 } else {
387 initBufWithSubpage(buf, handle, bitmapIdx, reqCapacity);
388 }
389 }
390
391 void initBufWithSubpage(PooledByteBuf<T> buf, long handle, int reqCapacity) {
392 initBufWithSubpage(buf, handle, bitmapIdx(handle), reqCapacity);
393 }
394
395 private void initBufWithSubpage(PooledByteBuf<T> buf, long handle, int bitmapIdx, int reqCapacity) {
396 assert bitmapIdx != 0;
397
398 int memoryMapIdx = memoryMapIdx(handle);
399
400 PoolSubpage<T> subpage = subpages[subpageIdx(memoryMapIdx)];
401 assert subpage.doNotDestroy;
402 assert reqCapacity <= subpage.elemSize;
403
404 buf.init(
405 this, handle,
406 runOffset(memoryMapIdx) + (bitmapIdx & 0x3FFFFFFF) * subpage.elemSize + offset,
407 reqCapacity, subpage.elemSize, arena.parent.threadCache());
408 }
409
410 private byte value(int id) {
411 return memoryMap[id];
412 }
413
414 private void setValue(int id, byte val) {
415 memoryMap[id] = val;
416 }
417
418 private byte depth(int id) {
419 return depthMap[id];
420 }
421
422 private static int log2(int val) {
423 // compute the (0-based, with lsb = 0) position of highest set bit i.e, log2
424 return INTEGER_SIZE_MINUS_ONE - Integer.numberOfLeadingZeros(val);
425 }
426
427 private int runLength(int id) {
428 // represents the size in #bytes supported by node 'id' in the tree
429 return 1 << log2ChunkSize - depth(id);
430 }
431
432 private int runOffset(int id) {
433 // represents the 0-based offset in #bytes from start of the byte-array chunk
434 int shift = id ^ 1 << depth(id);
435 return shift * runLength(id);
436 }
437
438 private int subpageIdx(int memoryMapIdx) {
439 return memoryMapIdx ^ maxSubpageAllocs; // remove highest set bit, to get offset
440 }
441
442 private static int memoryMapIdx(long handle) {
443 return (int) handle;
444 }
445
446 private static int bitmapIdx(long handle) {
447 return (int) (handle >>> Integer.SIZE);
448 }
449
450 @Override
451 public int chunkSize() {
452 return chunkSize;
453 }
454
455 @Override
456 public int freeBytes() {
457 synchronized (arena) {
458 return freeBytes;
459 }
460 }
461
462 @Override
463 public String toString() {
464 final int freeBytes;
465 synchronized (arena) {
466 freeBytes = this.freeBytes;
467 }
468
469 return new StringBuilder()
470 .append("Chunk(")
471 .append(Integer.toHexString(System.identityHashCode(this)))
472 .append(": ")
473 .append(usage(freeBytes))
474 .append("%, ")
475 .append(chunkSize - freeBytes)
476 .append('/')
477 .append(chunkSize)
478 .append(')')
479 .toString();
480 }
481
482 void destroy() {
483 arena.destroyChunk(this);
484 }
485 }