Rev 6936 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5270 | serge | 1 | #ifndef __LINUX_GFP_H |
2 | #define __LINUX_GFP_H |
||
3 | |||
4 | #include |
||
6936 | serge | 5 | #include |
5270 | serge | 6 | #include |
7 | #include |
||
8 | |||
9 | struct vm_area_struct; |
||
10 | |||
7143 | serge | 11 | /* |
12 | * In case of changes, please don't forget to update |
||
13 | * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c |
||
14 | */ |
||
15 | |||
5270 | serge | 16 | /* Plain integer GFP bitmasks. Do not use this directly. */ |
17 | #define ___GFP_DMA 0x01u |
||
18 | #define ___GFP_HIGHMEM 0x02u |
||
19 | #define ___GFP_DMA32 0x04u |
||
20 | #define ___GFP_MOVABLE 0x08u |
||
6082 | serge | 21 | #define ___GFP_RECLAIMABLE 0x10u |
5270 | serge | 22 | #define ___GFP_HIGH 0x20u |
23 | #define ___GFP_IO 0x40u |
||
24 | #define ___GFP_FS 0x80u |
||
25 | #define ___GFP_COLD 0x100u |
||
26 | #define ___GFP_NOWARN 0x200u |
||
27 | #define ___GFP_REPEAT 0x400u |
||
28 | #define ___GFP_NOFAIL 0x800u |
||
29 | #define ___GFP_NORETRY 0x1000u |
||
30 | #define ___GFP_MEMALLOC 0x2000u |
||
31 | #define ___GFP_COMP 0x4000u |
||
32 | #define ___GFP_ZERO 0x8000u |
||
33 | #define ___GFP_NOMEMALLOC 0x10000u |
||
34 | #define ___GFP_HARDWALL 0x20000u |
||
35 | #define ___GFP_THISNODE 0x40000u |
||
6082 | serge | 36 | #define ___GFP_ATOMIC 0x80000u |
6936 | serge | 37 | #define ___GFP_ACCOUNT 0x100000u |
5270 | serge | 38 | #define ___GFP_NOTRACK 0x200000u |
6082 | serge | 39 | #define ___GFP_DIRECT_RECLAIM 0x400000u |
5270 | serge | 40 | #define ___GFP_OTHER_NODE 0x800000u |
41 | #define ___GFP_WRITE 0x1000000u |
||
6082 | serge | 42 | #define ___GFP_KSWAPD_RECLAIM 0x2000000u |
5270 | serge | 43 | /* If the above are modified, __GFP_BITS_SHIFT may need updating */ |
44 | |||
45 | /* |
||
6082 | serge | 46 | * Physical address zone modifiers (see linux/mmzone.h - low four bits) |
5270 | serge | 47 | * |
48 | * Do not put any conditional on these. If necessary modify the definitions |
||
49 | * without the underscores and use them consistently. The definitions here may |
||
50 | * be used in bit comparisons. |
||
51 | */ |
||
52 | #define __GFP_DMA ((__force gfp_t)___GFP_DMA) |
||
53 | #define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) |
||
54 | #define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) |
||
6082 | serge | 55 | #define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ |
5270 | serge | 56 | #define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) |
6082 | serge | 57 | |
5270 | serge | 58 | /* |
6082 | serge | 59 | * Page mobility and placement hints |
5270 | serge | 60 | * |
6082 | serge | 61 | * These flags provide hints about how mobile the page is. Pages with similar |
62 | * mobility are placed within the same pageblocks to minimise problems due |
||
63 | * to external fragmentation. |
||
5270 | serge | 64 | * |
6082 | serge | 65 | * __GFP_MOVABLE (also a zone modifier) indicates that the page can be |
66 | * moved by page migration during memory compaction or can be reclaimed. |
||
5270 | serge | 67 | * |
6082 | serge | 68 | * __GFP_RECLAIMABLE is used for slab allocations that specify |
69 | * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers. |
||
5270 | serge | 70 | * |
6082 | serge | 71 | * __GFP_WRITE indicates the caller intends to dirty the page. Where possible, |
72 | * these pages will be spread between local zones to avoid all the dirty |
||
73 | * pages being in one zone (fair zone allocation policy). |
||
74 | * |
||
75 | * __GFP_HARDWALL enforces the cpuset memory allocation policy. |
||
76 | * |
||
77 | * __GFP_THISNODE forces the allocation to be satisified from the requested |
||
78 | * node with no fallbacks or placement policy enforcements. |
||
6936 | serge | 79 | * |
80 | * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg (only relevant |
||
81 | * to kmem allocations). |
||
5270 | serge | 82 | */ |
6082 | serge | 83 | #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) |
84 | #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) |
||
85 | #define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) |
||
86 | #define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) |
||
6936 | serge | 87 | #define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) |
5270 | serge | 88 | |
6082 | serge | 89 | /* |
90 | * Watermark modifiers -- controls access to emergency reserves |
||
91 | * |
||
92 | * __GFP_HIGH indicates that the caller is high-priority and that granting |
||
93 | * the request is necessary before the system can make forward progress. |
||
94 | * For example, creating an IO context to clean pages. |
||
95 | * |
||
96 | * __GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is |
||
97 | * high priority. Users are typically interrupt handlers. This may be |
||
98 | * used in conjunction with __GFP_HIGH |
||
99 | * |
||
100 | * __GFP_MEMALLOC allows access to all memory. This should only be used when |
||
101 | * the caller guarantees the allocation will allow more memory to be freed |
||
102 | * very shortly e.g. process exiting or swapping. Users either should |
||
103 | * be the MM or co-ordinating closely with the VM (e.g. swap over NFS). |
||
104 | * |
||
105 | * __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. |
||
106 | * This takes precedence over the __GFP_MEMALLOC flag if both are set. |
||
107 | */ |
||
108 | #define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) |
||
109 | #define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) |
||
110 | #define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) |
||
111 | #define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) |
||
5270 | serge | 112 | |
113 | /* |
||
6082 | serge | 114 | * Reclaim modifiers |
115 | * |
||
116 | * __GFP_IO can start physical IO. |
||
117 | * |
||
118 | * __GFP_FS can call down to the low-level FS. Clearing the flag avoids the |
||
119 | * allocator recursing into the filesystem which might already be holding |
||
120 | * locks. |
||
121 | * |
||
122 | * __GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim. |
||
123 | * This flag can be cleared to avoid unnecessary delays when a fallback |
||
124 | * option is available. |
||
125 | * |
||
126 | * __GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when |
||
127 | * the low watermark is reached and have it reclaim pages until the high |
||
128 | * watermark is reached. A caller may wish to clear this flag when fallback |
||
129 | * options are available and the reclaim is likely to disrupt the system. The |
||
130 | * canonical example is THP allocation where a fallback is cheap but |
||
131 | * reclaim/compaction may cause indirect stalls. |
||
132 | * |
||
133 | * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. |
||
134 | * |
||
135 | * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt |
||
136 | * _might_ fail. This depends upon the particular VM implementation. |
||
137 | * |
||
138 | * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller |
||
139 | * cannot handle allocation failures. New users should be evaluated carefully |
||
140 | * (and the flag should be used only when there is no reasonable failure |
||
141 | * policy) but it is definitely preferable to use the flag rather than |
||
142 | * opencode endless loop around allocator. |
||
143 | * |
||
144 | * __GFP_NORETRY: The VM implementation must not retry indefinitely and will |
||
145 | * return NULL when direct reclaim and memory compaction have failed to allow |
||
146 | * the allocation to succeed. The OOM killer is not called with the current |
||
147 | * implementation. |
||
5270 | serge | 148 | */ |
6082 | serge | 149 | #define __GFP_IO ((__force gfp_t)___GFP_IO) |
150 | #define __GFP_FS ((__force gfp_t)___GFP_FS) |
||
151 | #define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ |
||
152 | #define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ |
||
153 | #define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) |
||
154 | #define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT) |
||
155 | #define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) |
||
156 | #define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) |
||
157 | |||
158 | /* |
||
159 | * Action modifiers |
||
160 | * |
||
161 | * __GFP_COLD indicates that the caller does not expect to be used in the near |
||
162 | * future. Where possible, a cache-cold page will be returned. |
||
163 | * |
||
164 | * __GFP_NOWARN suppresses allocation failure reports. |
||
165 | * |
||
166 | * __GFP_COMP address compound page metadata. |
||
167 | * |
||
168 | * __GFP_ZERO returns a zeroed page on success. |
||
169 | * |
||
170 | * __GFP_NOTRACK avoids tracking with kmemcheck. |
||
171 | * |
||
172 | * __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of |
||
173 | * distinguishing in the source between false positives and allocations that |
||
174 | * cannot be supported (e.g. page tables). |
||
175 | * |
||
176 | * __GFP_OTHER_NODE is for allocations that are on a remote node but that |
||
177 | * should not be accounted for as a remote allocation in vmstat. A |
||
178 | * typical user would be khugepaged collapsing a huge page on a remote |
||
179 | * node. |
||
180 | */ |
||
181 | #define __GFP_COLD ((__force gfp_t)___GFP_COLD) |
||
182 | #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) |
||
183 | #define __GFP_COMP ((__force gfp_t)___GFP_COMP) |
||
184 | #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) |
||
185 | #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) |
||
5270 | serge | 186 | #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) |
6082 | serge | 187 | #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) |
5270 | serge | 188 | |
6082 | serge | 189 | /* Room for N __GFP_FOO bits */ |
190 | #define __GFP_BITS_SHIFT 26 |
||
5270 | serge | 191 | #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) |
192 | |||
6082 | serge | 193 | /* |
194 | * Useful GFP flag combinations that are commonly used. It is recommended |
||
195 | * that subsystems start with one of these combinations and then set/clear |
||
196 | * __GFP_FOO flags as necessary. |
||
197 | * |
||
198 | * GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower |
||
199 | * watermark is applied to allow access to "atomic reserves" |
||
200 | * |
||
201 | * GFP_KERNEL is typical for kernel-internal allocations. The caller requires |
||
202 | * ZONE_NORMAL or a lower zone for direct access but can direct reclaim. |
||
203 | * |
||
6936 | serge | 204 | * GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is |
205 | * accounted to kmemcg. |
||
206 | * |
||
6082 | serge | 207 | * GFP_NOWAIT is for kernel allocations that should not stall for direct |
208 | * reclaim, start physical IO or use any filesystem callback. |
||
209 | * |
||
210 | * GFP_NOIO will use direct reclaim to discard clean pages or slab pages |
||
211 | * that do not require the starting of any physical IO. |
||
212 | * |
||
213 | * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. |
||
214 | * |
||
215 | * GFP_USER is for userspace allocations that also need to be directly |
||
216 | * accessibly by the kernel or hardware. It is typically used by hardware |
||
217 | * for buffers that are mapped to userspace (e.g. graphics) that hardware |
||
218 | * still must DMA to. cpuset limits are enforced for these allocations. |
||
219 | * |
||
220 | * GFP_DMA exists for historical reasons and should be avoided where possible. |
||
221 | * The flags indicates that the caller requires that the lowest zone be |
||
222 | * used (ZONE_DMA or 16M on x86-64). Ideally, this would be removed but |
||
223 | * it would require careful auditing as some users really require it and |
||
224 | * others use the flag to avoid lowmem reserves in ZONE_DMA and treat the |
||
225 | * lowest zone as a type of emergency reserve. |
||
226 | * |
||
227 | * GFP_DMA32 is similar to GFP_DMA except that the caller requires a 32-bit |
||
228 | * address. |
||
229 | * |
||
230 | * GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, |
||
231 | * do not need to be directly accessible by the kernel but that cannot |
||
232 | * move once in use. An example may be a hardware allocation that maps |
||
233 | * data directly into userspace but has no addressing limitations. |
||
234 | * |
||
235 | * GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not |
||
236 | * need direct access to but can use kmap() when access is required. They |
||
237 | * are expected to be movable via page reclaim or page migration. Typically, |
||
238 | * pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE. |
||
239 | * |
||
240 | * GFP_TRANSHUGE is used for THP allocations. They are compound allocations |
||
241 | * that will fail quickly if memory is not available and will not wake |
||
242 | * kswapd on failure. |
||
243 | */ |
||
244 | #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) |
||
245 | #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) |
||
6936 | serge | 246 | #define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) |
6082 | serge | 247 | #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) |
248 | #define GFP_NOIO (__GFP_RECLAIM) |
||
249 | #define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) |
||
250 | #define GFP_TEMPORARY (__GFP_RECLAIM | __GFP_IO | __GFP_FS | \ |
||
5270 | serge | 251 | __GFP_RECLAIMABLE) |
6082 | serge | 252 | #define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) |
253 | #define GFP_DMA __GFP_DMA |
||
254 | #define GFP_DMA32 __GFP_DMA32 |
||
5270 | serge | 255 | #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) |
256 | #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) |
||
6082 | serge | 257 | #define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ |
258 | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \ |
||
7143 | serge | 259 | ~__GFP_RECLAIM) |
5270 | serge | 260 | |
7143 | serge | 261 | |
6082 | serge | 262 | static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) |
263 | { |
||
6936 | serge | 264 | return !!(gfp_flags & __GFP_DIRECT_RECLAIM); |
6082 | serge | 265 | } |
5270 | serge | 266 | |
267 | #ifdef CONFIG_HIGHMEM |
||
268 | #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM |
||
269 | #else |
||
270 | #define OPT_ZONE_HIGHMEM ZONE_NORMAL |
||
271 | #endif |
||
272 | |||
273 | #ifdef CONFIG_ZONE_DMA |
||
274 | #define OPT_ZONE_DMA ZONE_DMA |
||
275 | #else |
||
276 | #define OPT_ZONE_DMA ZONE_NORMAL |
||
277 | #endif |
||
278 | |||
279 | #ifdef CONFIG_ZONE_DMA32 |
||
280 | #define OPT_ZONE_DMA32 ZONE_DMA32 |
||
281 | #else |
||
282 | #define OPT_ZONE_DMA32 ZONE_NORMAL |
||
283 | #endif |
||
284 | |||
285 | /* |
||
286 | * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the |
||
287 | * zone to use given the lowest 4 bits of gfp_t. Entries are ZONE_SHIFT long |
||
288 | * and there are 16 of them to cover all possible combinations of |
||
289 | * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM. |
||
290 | * |
||
291 | * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA. |
||
292 | * But GFP_MOVABLE is not only a zone specifier but also an allocation |
||
293 | * policy. Therefore __GFP_MOVABLE plus another zone selector is valid. |
||
294 | * Only 1 bit of the lowest 3 bits (DMA,DMA32,HIGHMEM) can be set to "1". |
||
295 | * |
||
296 | * bit result |
||
297 | * ================= |
||
298 | * 0x0 => NORMAL |
||
299 | * 0x1 => DMA or NORMAL |
||
300 | * 0x2 => HIGHMEM or NORMAL |
||
301 | * 0x3 => BAD (DMA+HIGHMEM) |
||
302 | * 0x4 => DMA32 or DMA or NORMAL |
||
303 | * 0x5 => BAD (DMA+DMA32) |
||
304 | * 0x6 => BAD (HIGHMEM+DMA32) |
||
305 | * 0x7 => BAD (HIGHMEM+DMA32+DMA) |
||
306 | * 0x8 => NORMAL (MOVABLE+0) |
||
307 | * 0x9 => DMA or NORMAL (MOVABLE+DMA) |
||
308 | * 0xa => MOVABLE (Movable is valid only if HIGHMEM is set too) |
||
309 | * 0xb => BAD (MOVABLE+HIGHMEM+DMA) |
||
310 | * 0xc => DMA32 (MOVABLE+DMA32) |
||
311 | * 0xd => BAD (MOVABLE+DMA32+DMA) |
||
312 | * 0xe => BAD (MOVABLE+DMA32+HIGHMEM) |
||
313 | * 0xf => BAD (MOVABLE+DMA32+HIGHMEM+DMA) |
||
314 | * |
||
7143 | serge | 315 | * GFP_ZONES_SHIFT must be <= 2 on 32 bit platforms. |
5270 | serge | 316 | */ |
317 | |||
318 | #if 16 * ZONES_SHIFT > BITS_PER_LONG |
||
319 | #error ZONES_SHIFT too large to create GFP_ZONE_TABLE integer |
||
320 | #endif |
||
321 | |||
322 | #define GFP_ZONE_TABLE ( \ |
||
323 | (ZONE_NORMAL << 0 * ZONES_SHIFT) \ |
||
324 | | (OPT_ZONE_DMA << ___GFP_DMA * ZONES_SHIFT) \ |
||
325 | | (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * ZONES_SHIFT) \ |
||
326 | | (OPT_ZONE_DMA32 << ___GFP_DMA32 * ZONES_SHIFT) \ |
||
327 | | (ZONE_NORMAL << ___GFP_MOVABLE * ZONES_SHIFT) \ |
||
328 | | (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * ZONES_SHIFT) \ |
||
329 | | (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * ZONES_SHIFT) \ |
||
330 | | (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * ZONES_SHIFT) \ |
||
331 | ) |
||
332 | |||
333 | /* |
||
334 | * GFP_ZONE_BAD is a bitmap for all combinations of __GFP_DMA, __GFP_DMA32 |
||
335 | * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per |
||
336 | * entry starting with bit 0. Bit is set if the combination is not |
||
337 | * allowed. |
||
338 | */ |
||
339 | #define GFP_ZONE_BAD ( \ |
||
340 | 1 << (___GFP_DMA | ___GFP_HIGHMEM) \ |
||
341 | | 1 << (___GFP_DMA | ___GFP_DMA32) \ |
||
342 | | 1 << (___GFP_DMA32 | ___GFP_HIGHMEM) \ |
||
343 | | 1 << (___GFP_DMA | ___GFP_DMA32 | ___GFP_HIGHMEM) \ |
||
344 | | 1 << (___GFP_MOVABLE | ___GFP_HIGHMEM | ___GFP_DMA) \ |
||
345 | | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA) \ |
||
346 | | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_HIGHMEM) \ |
||
347 | | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM) \ |
||
348 | ) |
||
349 | |||
350 | |||
351 | #endif /* __LINUX_GFP_H */><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>=>><> |