Merge branch 'akpm' (incoming from Andrew)
Merge first patch-bomb from Andrew Morton: - a couple of misc things - inotify/fsnotify work from Jan - ocfs2 updates (partial) - about half of MM * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (117 commits) mm/migrate: remove unused function, fail_migrate_page() mm/migrate: remove putback_lru_pages, fix comment on putback_movable_pages mm/migrate: correct failure handling if !hugepage_migration_support() mm/migrate: add comment about permanent failure path mm, page_alloc: warn for non-blockable __GFP_NOFAIL allocation failure mm: compaction: reset scanner positions immediately when they meet mm: compaction: do not mark unmovable pageblocks as skipped in async compaction mm: compaction: detect when scanners meet in isolate_freepages mm: compaction: reset cached scanner pfn's before reading them mm: compaction: encapsulate defer reset logic mm: compaction: trace compaction begin and end memcg, oom: lock mem_cgroup_print_oom_info sched: add tracepoints related to NUMA task migration mm: numa: do not automatically migrate KSM pages mm: numa: trace tasks that fail migration due to rate limiting mm: numa: limit scope of lock for NUMA migrate rate limiting mm: numa: make NUMA-migrate related functions static lib/show_mem.c: show num_poisoned_pages when oom mm/hwpoison: add '#' to hwpoison_inject mm/memblock: use WARN_ONCE when MAX_NUMNODES passed as input parameter ...
This commit is contained in:
commit
df32e43a54
@ -767,6 +767,7 @@ The "Locked" indicates whether the mapping is locked in memory or not.
|
||||
|
||||
MemTotal: 16344972 kB
|
||||
MemFree: 13634064 kB
|
||||
MemAvailable: 14836172 kB
|
||||
Buffers: 3656 kB
|
||||
Cached: 1195708 kB
|
||||
SwapCached: 0 kB
|
||||
@ -799,6 +800,14 @@ AnonHugePages: 49152 kB
|
||||
MemTotal: Total usable ram (i.e. physical ram minus a few reserved
|
||||
bits and the kernel binary code)
|
||||
MemFree: The sum of LowFree+HighFree
|
||||
MemAvailable: An estimate of how much memory is available for starting new
|
||||
applications, without swapping. Calculated from MemFree,
|
||||
SReclaimable, the size of the file LRU lists, and the low
|
||||
watermarks in each zone.
|
||||
The estimate takes into account that the system needs some
|
||||
page cache to function well, and that not all reclaimable
|
||||
slab will be reclaimable, due to items being in use. The
|
||||
impact of those factors will vary from system to system.
|
||||
Buffers: Relatively temporary storage for raw disk blocks
|
||||
shouldn't get tremendously large (20MB or so)
|
||||
Cached: in-memory cache for files read from the disk (the
|
||||
|
@ -47,6 +47,7 @@ Currently, these files are in /proc/sys/vm:
|
||||
- numa_zonelist_order
|
||||
- oom_dump_tasks
|
||||
- oom_kill_allocating_task
|
||||
- overcommit_kbytes
|
||||
- overcommit_memory
|
||||
- overcommit_ratio
|
||||
- page-cluster
|
||||
@ -574,6 +575,17 @@ The default value is 0.
|
||||
|
||||
==============================================================
|
||||
|
||||
overcommit_kbytes:
|
||||
|
||||
When overcommit_memory is set to 2, the committed address space is not
|
||||
permitted to exceed swap plus this amount of physical RAM. See below.
|
||||
|
||||
Note: overcommit_kbytes is the counterpart of overcommit_ratio. Only one
|
||||
of them may be specified at a time. Setting one disables the other (which
|
||||
then appears as 0 when read).
|
||||
|
||||
==============================================================
|
||||
|
||||
overcommit_memory:
|
||||
|
||||
This value contains a flag that enables memory overcommitment.
|
||||
|
@ -14,8 +14,8 @@ The Linux kernel supports the following overcommit handling modes
|
||||
|
||||
2 - Don't overcommit. The total address space commit
|
||||
for the system is not permitted to exceed swap + a
|
||||
configurable percentage (default is 50) of physical RAM.
|
||||
Depending on the percentage you use, in most situations
|
||||
configurable amount (default is 50%) of physical RAM.
|
||||
Depending on the amount you use, in most situations
|
||||
this means a process will not be killed while accessing
|
||||
pages but will receive errors on memory allocation as
|
||||
appropriate.
|
||||
@ -26,7 +26,8 @@ The Linux kernel supports the following overcommit handling modes
|
||||
|
||||
The overcommit policy is set via the sysctl `vm.overcommit_memory'.
|
||||
|
||||
The overcommit percentage is set via `vm.overcommit_ratio'.
|
||||
The overcommit amount can be set via `vm.overcommit_ratio' (percentage)
|
||||
or `vm.overcommit_kbytes' (absolute value).
|
||||
|
||||
The current overcommit limit and amount committed are viewable in
|
||||
/proc/meminfo as CommitLimit and Committed_AS respectively.
|
||||
|
@ -8,8 +8,8 @@
|
||||
#define MAX_DMA_ADDRESS 0xffffffffUL
|
||||
#else
|
||||
#define MAX_DMA_ADDRESS ({ \
|
||||
extern unsigned long arm_dma_zone_size; \
|
||||
arm_dma_zone_size ? \
|
||||
extern phys_addr_t arm_dma_zone_size; \
|
||||
arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \
|
||||
(PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; })
|
||||
#endif
|
||||
|
||||
|
@ -33,7 +33,7 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
|
||||
|
||||
void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
|
||||
{
|
||||
return alloc_bootmem_align(size, align);
|
||||
return memblock_virt_alloc(size, align);
|
||||
}
|
||||
|
||||
void __init arm_dt_memblock_reserve(void)
|
||||
|
@ -717,7 +717,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
|
||||
kernel_data.end = virt_to_phys(_end - 1);
|
||||
|
||||
for_each_memblock(memory, region) {
|
||||
res = alloc_bootmem_low(sizeof(*res));
|
||||
res = memblock_virt_alloc(sizeof(*res), 0);
|
||||
res->name = "System RAM";
|
||||
res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
|
||||
res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
|
||||
|
@ -2791,9 +2791,7 @@ static int __init _alloc_links(struct omap_hwmod_link **ml,
|
||||
sz = sizeof(struct omap_hwmod_link) * LINKS_PER_OCP_IF;
|
||||
|
||||
*sl = NULL;
|
||||
*ml = alloc_bootmem(sz);
|
||||
|
||||
memset(*ml, 0, sz);
|
||||
*ml = memblock_virt_alloc(sz, 0);
|
||||
|
||||
*sl = (void *)(*ml) + sizeof(struct omap_hwmod_link);
|
||||
|
||||
@ -2912,9 +2910,7 @@ static int __init _alloc_linkspace(struct omap_hwmod_ocp_if **ois)
|
||||
pr_debug("omap_hwmod: %s: allocating %d byte linkspace (%d links)\n",
|
||||
__func__, sz, max_ls);
|
||||
|
||||
linkspace = alloc_bootmem(sz);
|
||||
|
||||
memset(linkspace, 0, sz);
|
||||
linkspace = memblock_virt_alloc(sz, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -92,9 +92,6 @@ void show_mem(unsigned int filter)
|
||||
printk("Mem-info:\n");
|
||||
show_free_areas(filter);
|
||||
|
||||
if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
|
||||
return;
|
||||
|
||||
for_each_bank (i, mi) {
|
||||
struct membank *bank = &mi->bank[i];
|
||||
unsigned int pfn1, pfn2;
|
||||
@ -461,7 +458,7 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)
|
||||
* free the section of the memmap array.
|
||||
*/
|
||||
if (pg < pgend)
|
||||
free_bootmem(pg, pgend - pg);
|
||||
memblock_free_early(pg, pgend - pg);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -31,74 +31,6 @@
|
||||
static unsigned long max_gap;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* show_mem - give short summary of memory stats
|
||||
*
|
||||
* Shows a simple page count of reserved and used pages in the system.
|
||||
* For discontig machines, it does this on a per-pgdat basis.
|
||||
*/
|
||||
void show_mem(unsigned int filter)
|
||||
{
|
||||
int i, total_reserved = 0;
|
||||
int total_shared = 0, total_cached = 0;
|
||||
unsigned long total_present = 0;
|
||||
pg_data_t *pgdat;
|
||||
|
||||
printk(KERN_INFO "Mem-info:\n");
|
||||
show_free_areas(filter);
|
||||
printk(KERN_INFO "Node memory in pages:\n");
|
||||
if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
|
||||
return;
|
||||
for_each_online_pgdat(pgdat) {
|
||||
unsigned long present;
|
||||
unsigned long flags;
|
||||
int shared = 0, cached = 0, reserved = 0;
|
||||
int nid = pgdat->node_id;
|
||||
|
||||
if (skip_free_areas_node(filter, nid))
|
||||
continue;
|
||||
pgdat_resize_lock(pgdat, &flags);
|
||||
present = pgdat->node_present_pages;
|
||||
for(i = 0; i < pgdat->node_spanned_pages; i++) {
|
||||
struct page *page;
|
||||
if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
|
||||
touch_nmi_watchdog();
|
||||
if (pfn_valid(pgdat->node_start_pfn + i))
|
||||
page = pfn_to_page(pgdat->node_start_pfn + i);
|
||||
else {
|
||||
#ifdef CONFIG_VIRTUAL_MEM_MAP
|
||||
if (max_gap < LARGE_GAP)
|
||||
continue;
|
||||
#endif
|
||||
i = vmemmap_find_next_valid_pfn(nid, i) - 1;
|
||||
continue;
|
||||
}
|
||||
if (PageReserved(page))
|
||||
reserved++;
|
||||
else if (PageSwapCache(page))
|
||||
cached++;
|
||||
else if (page_count(page))
|
||||
shared += page_count(page)-1;
|
||||
}
|
||||
pgdat_resize_unlock(pgdat, &flags);
|
||||
total_present += present;
|
||||
total_reserved += reserved;
|
||||
total_cached += cached;
|
||||
total_shared += shared;
|
||||
printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, "
|
||||
"shrd: %10d, swpd: %10d\n", nid,
|
||||
present, reserved, shared, cached);
|
||||
}
|
||||
printk(KERN_INFO "%ld pages of RAM\n", total_present);
|
||||
printk(KERN_INFO "%d reserved pages\n", total_reserved);
|
||||
printk(KERN_INFO "%d pages shared\n", total_shared);
|
||||
printk(KERN_INFO "%d pages swap cached\n", total_cached);
|
||||
printk(KERN_INFO "Total of %ld pages in page table cache\n",
|
||||
quicklist_total_size());
|
||||
printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages());
|
||||
}
|
||||
|
||||
|
||||
/* physical address where the bootmem map is located */
|
||||
unsigned long bootmap_start;
|
||||
|
||||
|
@ -607,69 +607,6 @@ void *per_cpu_init(void)
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/**
|
||||
* show_mem - give short summary of memory stats
|
||||
*
|
||||
* Shows a simple page count of reserved and used pages in the system.
|
||||
* For discontig machines, it does this on a per-pgdat basis.
|
||||
*/
|
||||
void show_mem(unsigned int filter)
|
||||
{
|
||||
int i, total_reserved = 0;
|
||||
int total_shared = 0, total_cached = 0;
|
||||
unsigned long total_present = 0;
|
||||
pg_data_t *pgdat;
|
||||
|
||||
printk(KERN_INFO "Mem-info:\n");
|
||||
show_free_areas(filter);
|
||||
if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
|
||||
return;
|
||||
printk(KERN_INFO "Node memory in pages:\n");
|
||||
for_each_online_pgdat(pgdat) {
|
||||
unsigned long present;
|
||||
unsigned long flags;
|
||||
int shared = 0, cached = 0, reserved = 0;
|
||||
int nid = pgdat->node_id;
|
||||
|
||||
if (skip_free_areas_node(filter, nid))
|
||||
continue;
|
||||
pgdat_resize_lock(pgdat, &flags);
|
||||
present = pgdat->node_present_pages;
|
||||
for(i = 0; i < pgdat->node_spanned_pages; i++) {
|
||||
struct page *page;
|
||||
if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
|
||||
touch_nmi_watchdog();
|
||||
if (pfn_valid(pgdat->node_start_pfn + i))
|
||||
page = pfn_to_page(pgdat->node_start_pfn + i);
|
||||
else {
|
||||
i = vmemmap_find_next_valid_pfn(nid, i) - 1;
|
||||
continue;
|
||||
}
|
||||
if (PageReserved(page))
|
||||
reserved++;
|
||||
else if (PageSwapCache(page))
|
||||
cached++;
|
||||
else if (page_count(page))
|
||||
shared += page_count(page)-1;
|
||||
}
|
||||
pgdat_resize_unlock(pgdat, &flags);
|
||||
total_present += present;
|
||||
total_reserved += reserved;
|
||||
total_cached += cached;
|
||||
total_shared += shared;
|
||||
printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, "
|
||||
"shrd: %10d, swpd: %10d\n", nid,
|
||||
present, reserved, shared, cached);
|
||||
}
|
||||
printk(KERN_INFO "%ld pages of RAM\n", total_present);
|
||||
printk(KERN_INFO "%d reserved pages\n", total_reserved);
|
||||
printk(KERN_INFO "%d pages shared\n", total_shared);
|
||||
printk(KERN_INFO "%d pages swap cached\n", total_cached);
|
||||
printk(KERN_INFO "Total of %ld pages in page table cache\n",
|
||||
quicklist_total_size());
|
||||
printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages());
|
||||
}
|
||||
|
||||
/**
|
||||
* call_pernode_memory - use SRAT to call callback functions with node info
|
||||
* @start: physical start of range
|
||||
|
@ -684,3 +684,51 @@ per_linux32_init(void)
|
||||
}
|
||||
|
||||
__initcall(per_linux32_init);
|
||||
|
||||
/**
|
||||
* show_mem - give short summary of memory stats
|
||||
*
|
||||
* Shows a simple page count of reserved and used pages in the system.
|
||||
* For discontig machines, it does this on a per-pgdat basis.
|
||||
*/
|
||||
void show_mem(unsigned int filter)
|
||||
{
|
||||
int total_reserved = 0;
|
||||
unsigned long total_present = 0;
|
||||
pg_data_t *pgdat;
|
||||
|
||||
printk(KERN_INFO "Mem-info:\n");
|
||||
show_free_areas(filter);
|
||||
printk(KERN_INFO "Node memory in pages:\n");
|
||||
for_each_online_pgdat(pgdat) {
|
||||
unsigned long present;
|
||||
unsigned long flags;
|
||||
int reserved = 0;
|
||||
int nid = pgdat->node_id;
|
||||
int zoneid;
|
||||
|
||||
if (skip_free_areas_node(filter, nid))
|
||||
continue;
|
||||
pgdat_resize_lock(pgdat, &flags);
|
||||
|
||||
for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
|
||||
struct zone *zone = &pgdat->node_zones[zoneid];
|
||||
if (!populated_zone(zone))
|
||||
continue;
|
||||
|
||||
reserved += zone->present_pages - zone->managed_pages;
|
||||
}
|
||||
present = pgdat->node_present_pages;
|
||||
|
||||
pgdat_resize_unlock(pgdat, &flags);
|
||||
total_present += present;
|
||||
total_reserved += reserved;
|
||||
printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, ",
|
||||
nid, present, reserved);
|
||||
}
|
||||
printk(KERN_INFO "%ld pages of RAM\n", total_present);
|
||||
printk(KERN_INFO "%d reserved pages\n", total_reserved);
|
||||
printk(KERN_INFO "Total of %ld pages in page table cache\n",
|
||||
quicklist_total_size());
|
||||
printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages());
|
||||
}
|
||||
|
@ -204,7 +204,8 @@ static void __init do_init_bootmem(void)
|
||||
start_pfn = memblock_region_memory_base_pfn(reg);
|
||||
end_pfn = memblock_region_memory_end_pfn(reg);
|
||||
memblock_set_node(PFN_PHYS(start_pfn),
|
||||
PFN_PHYS(end_pfn - start_pfn), 0);
|
||||
PFN_PHYS(end_pfn - start_pfn),
|
||||
&memblock.memory, 0);
|
||||
}
|
||||
|
||||
/* All of system RAM sits in node 0 for the non-NUMA case */
|
||||
|
@ -42,7 +42,8 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
|
||||
memblock_add(start, end - start);
|
||||
|
||||
memblock_set_node(PFN_PHYS(start_pfn),
|
||||
PFN_PHYS(end_pfn - start_pfn), nid);
|
||||
PFN_PHYS(end_pfn - start_pfn),
|
||||
&memblock.memory, nid);
|
||||
|
||||
/* Node-local pgdat */
|
||||
pgdat_paddr = memblock_alloc_base(sizeof(struct pglist_data),
|
||||
|
@ -192,7 +192,8 @@ void __init setup_memory(void)
|
||||
start_pfn = memblock_region_memory_base_pfn(reg);
|
||||
end_pfn = memblock_region_memory_end_pfn(reg);
|
||||
memblock_set_node(start_pfn << PAGE_SHIFT,
|
||||
(end_pfn - start_pfn) << PAGE_SHIFT, 0);
|
||||
(end_pfn - start_pfn) << PAGE_SHIFT,
|
||||
&memblock.memory, 0);
|
||||
}
|
||||
|
||||
/* free bootmem is whole main memory */
|
||||
|
@ -645,55 +645,30 @@ EXPORT_SYMBOL(empty_zero_page);
|
||||
|
||||
void show_mem(unsigned int filter)
|
||||
{
|
||||
int i,free = 0,total = 0,reserved = 0;
|
||||
int shared = 0, cached = 0;
|
||||
int total = 0,reserved = 0;
|
||||
pg_data_t *pgdat;
|
||||
|
||||
printk(KERN_INFO "Mem-info:\n");
|
||||
show_free_areas(filter);
|
||||
if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
|
||||
return;
|
||||
#ifndef CONFIG_DISCONTIGMEM
|
||||
i = max_mapnr;
|
||||
while (i-- > 0) {
|
||||
total++;
|
||||
if (PageReserved(mem_map+i))
|
||||
reserved++;
|
||||
else if (PageSwapCache(mem_map+i))
|
||||
cached++;
|
||||
else if (!page_count(&mem_map[i]))
|
||||
free++;
|
||||
else
|
||||
shared += page_count(&mem_map[i]) - 1;
|
||||
|
||||
for_each_online_pgdat(pgdat) {
|
||||
unsigned long flags;
|
||||
int zoneid;
|
||||
|
||||
pgdat_resize_lock(pgdat, &flags);
|
||||
for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
|
||||
struct zone *zone = &pgdat->node_zones[zoneid];
|
||||
if (!populated_zone(zone))
|
||||
continue;
|
||||
|
||||
total += zone->present_pages;
|
||||
reserved = zone->present_pages - zone->managed_pages;
|
||||
}
|
||||
pgdat_resize_unlock(pgdat, &flags);
|
||||
}
|
||||
#else
|
||||
for (i = 0; i < npmem_ranges; i++) {
|
||||
int j;
|
||||
|
||||
for (j = node_start_pfn(i); j < node_end_pfn(i); j++) {
|
||||
struct page *p;
|
||||
unsigned long flags;
|
||||
|
||||
pgdat_resize_lock(NODE_DATA(i), &flags);
|
||||
p = nid_page_nr(i, j) - node_start_pfn(i);
|
||||
|
||||
total++;
|
||||
if (PageReserved(p))
|
||||
reserved++;
|
||||
else if (PageSwapCache(p))
|
||||
cached++;
|
||||
else if (!page_count(p))
|
||||
free++;
|
||||
else
|
||||
shared += page_count(p) - 1;
|
||||
pgdat_resize_unlock(NODE_DATA(i), &flags);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
printk(KERN_INFO "%d pages of RAM\n", total);
|
||||
printk(KERN_INFO "%d reserved pages\n", reserved);
|
||||
printk(KERN_INFO "%d pages shared\n", shared);
|
||||
printk(KERN_INFO "%d pages swap cached\n", cached);
|
||||
|
||||
|
||||
#ifdef CONFIG_DISCONTIGMEM
|
||||
{
|
||||
|
@ -209,7 +209,7 @@ void __init do_init_bootmem(void)
|
||||
/* Place all memblock_regions in the same node and merge contiguous
|
||||
* memblock_regions
|
||||
*/
|
||||
memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
|
||||
memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
|
||||
|
||||
/* Add all physical memory to the bootmem map, mark each area
|
||||
* present.
|
||||
|
@ -670,7 +670,8 @@ static void __init parse_drconf_memory(struct device_node *memory)
|
||||
node_set_online(nid);
|
||||
sz = numa_enforce_memory_limit(base, size);
|
||||
if (sz)
|
||||
memblock_set_node(base, sz, nid);
|
||||
memblock_set_node(base, sz,
|
||||
&memblock.memory, nid);
|
||||
} while (--ranges);
|
||||
}
|
||||
}
|
||||
@ -760,7 +761,7 @@ new_range:
|
||||
continue;
|
||||
}
|
||||
|
||||
memblock_set_node(start, size, nid);
|
||||
memblock_set_node(start, size, &memblock.memory, nid);
|
||||
|
||||
if (--ranges)
|
||||
goto new_range;
|
||||
@ -797,7 +798,8 @@ static void __init setup_nonnuma(void)
|
||||
|
||||
fake_numa_create_new_node(end_pfn, &nid);
|
||||
memblock_set_node(PFN_PHYS(start_pfn),
|
||||
PFN_PHYS(end_pfn - start_pfn), nid);
|
||||
PFN_PHYS(end_pfn - start_pfn),
|
||||
&memblock.memory, nid);
|
||||
node_set_online(nid);
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,6 @@ menu "Machine selection"
|
||||
|
||||
config SCORE
|
||||
def_bool y
|
||||
select HAVE_GENERIC_HARDIRQS
|
||||
select GENERIC_IRQ_SHOW
|
||||
select GENERIC_IOMAP
|
||||
select GENERIC_ATOMIC64
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/traps.h>
|
||||
|
||||
|
@ -230,8 +230,8 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
|
||||
pmb_bolt_mapping((unsigned long)__va(start), start, end - start,
|
||||
PAGE_KERNEL);
|
||||
|
||||
memblock_set_node(PFN_PHYS(start_pfn),
|
||||
PFN_PHYS(end_pfn - start_pfn), nid);
|
||||
memblock_set_node(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn),
|
||||
&memblock.memory, nid);
|
||||
}
|
||||
|
||||
void __init __weak plat_early_device_setup(void)
|
||||
|
@ -1021,7 +1021,8 @@ static void __init add_node_ranges(void)
|
||||
"start[%lx] end[%lx]\n",
|
||||
nid, start, this_end);
|
||||
|
||||
memblock_set_node(start, this_end - start, nid);
|
||||
memblock_set_node(start, this_end - start,
|
||||
&memblock.memory, nid);
|
||||
start = this_end;
|
||||
}
|
||||
}
|
||||
@ -1325,7 +1326,7 @@ static void __init bootmem_init_nonnuma(void)
|
||||
(top_of_ram - total_ram) >> 20);
|
||||
|
||||
init_node_masks_nonnuma();
|
||||
memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
|
||||
memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
|
||||
allocate_node_data(0);
|
||||
node_set_online(0);
|
||||
}
|
||||
|
@ -66,9 +66,6 @@ void show_mem(unsigned int filter)
|
||||
printk(KERN_DEFAULT "Mem-info:\n");
|
||||
show_free_areas(filter);
|
||||
|
||||
if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
|
||||
return;
|
||||
|
||||
for_each_bank(i, mi) {
|
||||
struct membank *bank = &mi->bank[i];
|
||||
unsigned int pfn1, pfn2;
|
||||
|
@ -51,9 +51,9 @@ extern int devmem_is_allowed(unsigned long pagenr);
|
||||
extern unsigned long max_low_pfn_mapped;
|
||||
extern unsigned long max_pfn_mapped;
|
||||
|
||||
static inline phys_addr_t get_max_mapped(void)
|
||||
static inline phys_addr_t get_max_low_mapped(void)
|
||||
{
|
||||
return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
|
||||
return (phys_addr_t)max_low_pfn_mapped << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn);
|
||||
|
@ -91,7 +91,7 @@ void __init setup_bios_corruption_check(void)
|
||||
|
||||
corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
|
||||
|
||||
for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) {
|
||||
for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL) {
|
||||
start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE),
|
||||
PAGE_SIZE, corruption_check_size);
|
||||
end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE),
|
||||
|
@ -1120,7 +1120,7 @@ void __init memblock_find_dma_reserve(void)
|
||||
nr_pages += end_pfn - start_pfn;
|
||||
}
|
||||
|
||||
for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) {
|
||||
for_each_free_mem_range(u, NUMA_NO_NODE, &start, &end, NULL) {
|
||||
start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
|
||||
end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
|
||||
if (start_pfn < end_pfn)
|
||||
|
@ -1119,7 +1119,7 @@ void __init setup_arch(char **cmdline_p)
|
||||
|
||||
setup_real_mode();
|
||||
|
||||
memblock_set_current_limit(get_max_mapped());
|
||||
memblock_set_current_limit(get_max_low_mapped());
|
||||
dma_contiguous_reserve(0);
|
||||
|
||||
/*
|
||||
|
@ -665,7 +665,7 @@ void __init initmem_init(void)
|
||||
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
|
||||
#endif
|
||||
|
||||
memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
|
||||
memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
|
||||
sparse_memory_present_with_active_regions(0);
|
||||
|
||||
#ifdef CONFIG_FLATMEM
|
||||
|
@ -643,7 +643,7 @@ kernel_physical_mapping_init(unsigned long start,
|
||||
#ifndef CONFIG_NUMA
|
||||
void __init initmem_init(void)
|
||||
{
|
||||
memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
|
||||
memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -74,7 +74,7 @@ static void __init do_one_pass(u64 pattern, u64 start, u64 end)
|
||||
u64 i;
|
||||
phys_addr_t this_start, this_end;
|
||||
|
||||
for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) {
|
||||
for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) {
|
||||
this_start = clamp_t(phys_addr_t, this_start, start, end);
|
||||
this_end = clamp_t(phys_addr_t, this_end, start, end);
|
||||
if (this_start < this_end) {
|
||||
|
@ -491,7 +491,16 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
|
||||
|
||||
for (i = 0; i < mi->nr_blks; i++) {
|
||||
struct numa_memblk *mb = &mi->blk[i];
|
||||
memblock_set_node(mb->start, mb->end - mb->start, mb->nid);
|
||||
memblock_set_node(mb->start, mb->end - mb->start,
|
||||
&memblock.memory, mb->nid);
|
||||
|
||||
/*
|
||||
* At this time, all memory regions reserved by memblock are
|
||||
* used by the kernel. Set the nid in memblock.reserved will
|
||||
* mark out all the nodes the kernel resides in.
|
||||
*/
|
||||
memblock_set_node(mb->start, mb->end - mb->start,
|
||||
&memblock.reserved, mb->nid);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -553,6 +562,30 @@ static void __init numa_init_array(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void __init numa_clear_kernel_node_hotplug(void)
|
||||
{
|
||||
int i, nid;
|
||||
nodemask_t numa_kernel_nodes;
|
||||
unsigned long start, end;
|
||||
struct memblock_type *type = &memblock.reserved;
|
||||
|
||||
/* Mark all kernel nodes. */
|
||||
for (i = 0; i < type->cnt; i++)
|
||||
node_set(type->regions[i].nid, numa_kernel_nodes);
|
||||
|
||||
/* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
|
||||
for (i = 0; i < numa_meminfo.nr_blks; i++) {
|
||||
nid = numa_meminfo.blk[i].nid;
|
||||
if (!node_isset(nid, numa_kernel_nodes))
|
||||
continue;
|
||||
|
||||
start = numa_meminfo.blk[i].start;
|
||||
end = numa_meminfo.blk[i].end;
|
||||
|
||||
memblock_clear_hotplug(start, end - start);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init numa_init(int (*init_func)(void))
|
||||
{
|
||||
int i;
|
||||
@ -565,7 +598,12 @@ static int __init numa_init(int (*init_func)(void))
|
||||
nodes_clear(node_possible_map);
|
||||
nodes_clear(node_online_map);
|
||||
memset(&numa_meminfo, 0, sizeof(numa_meminfo));
|
||||
WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
|
||||
WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory,
|
||||
MAX_NUMNODES));
|
||||
WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved,
|
||||
MAX_NUMNODES));
|
||||
/* In case that parsing SRAT failed. */
|
||||
WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX));
|
||||
numa_reset_distance();
|
||||
|
||||
ret = init_func();
|
||||
@ -601,6 +639,16 @@ static int __init numa_init(int (*init_func)(void))
|
||||
numa_clear_node(i);
|
||||
}
|
||||
numa_init_array();
|
||||
|
||||
/*
|
||||
* At very early time, the kernel have to use some memory such as
|
||||
* loading the kernel image. We cannot prevent this anyway. So any
|
||||
* node the kernel resides in should be un-hotpluggable.
|
||||
*
|
||||
* And when we come here, numa_init() won't fail.
|
||||
*/
|
||||
numa_clear_kernel_node_hotplug();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -181,6 +181,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
|
||||
(unsigned long long) start, (unsigned long long) end - 1,
|
||||
hotpluggable ? " hotplug" : "");
|
||||
|
||||
/* Mark hotplug range in memblock. */
|
||||
if (hotpluggable && memblock_mark_hotplug(start, ma->length))
|
||||
pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
|
||||
(unsigned long long)start, (unsigned long long)end - 1);
|
||||
|
||||
return 0;
|
||||
out_err_bad_srat:
|
||||
bad_srat();
|
||||
|
@ -22,7 +22,6 @@
|
||||
#include <linux/device.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/splice.h>
|
||||
#include <linux/pfn.h>
|
||||
#include <linux/export.h>
|
||||
|
@ -324,7 +324,7 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type)
|
||||
{
|
||||
struct firmware_map_entry *entry;
|
||||
|
||||
entry = alloc_bootmem(sizeof(struct firmware_map_entry));
|
||||
entry = memblock_virt_alloc(sizeof(struct firmware_map_entry), 0);
|
||||
if (WARN_ON(!entry))
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -917,7 +917,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level,
|
||||
|
||||
/* If range covers entire pagetable, free it */
|
||||
if (!(start_pfn > level_pfn ||
|
||||
last_pfn < level_pfn + level_size(level))) {
|
||||
last_pfn < level_pfn + level_size(level) - 1)) {
|
||||
dma_clear_pte(pte);
|
||||
domain_flush_cache(domain, pte, sizeof(*pte));
|
||||
free_pgtable_page(level_pte);
|
||||
|
@ -680,7 +680,8 @@ static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd,
|
||||
struct i2c_msg __user *tmsgs;
|
||||
struct i2c_msg32 __user *umsgs;
|
||||
compat_caddr_t datap;
|
||||
int nmsgs, i;
|
||||
u32 nmsgs;
|
||||
int i;
|
||||
|
||||
if (get_user(nmsgs, &udata->nmsgs))
|
||||
return -EFAULT;
|
||||
|
@ -82,21 +82,24 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
|
||||
* events.
|
||||
*/
|
||||
static int dnotify_handle_event(struct fsnotify_group *group,
|
||||
struct inode *inode,
|
||||
struct fsnotify_mark *inode_mark,
|
||||
struct fsnotify_mark *vfsmount_mark,
|
||||
struct fsnotify_event *event)
|
||||
u32 mask, void *data, int data_type,
|
||||
const unsigned char *file_name)
|
||||
{
|
||||
struct dnotify_mark *dn_mark;
|
||||
struct inode *to_tell;
|
||||
struct dnotify_struct *dn;
|
||||
struct dnotify_struct **prev;
|
||||
struct fown_struct *fown;
|
||||
__u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD;
|
||||
__u32 test_mask = mask & ~FS_EVENT_ON_CHILD;
|
||||
|
||||
/* not a dir, dnotify doesn't care */
|
||||
if (!S_ISDIR(inode->i_mode))
|
||||
return 0;
|
||||
|
||||
BUG_ON(vfsmount_mark);
|
||||
|
||||
to_tell = event->to_tell;
|
||||
|
||||
dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark);
|
||||
|
||||
spin_lock(&inode_mark->lock);
|
||||
@ -122,23 +125,6 @@ static int dnotify_handle_event(struct fsnotify_group *group,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given an inode and mask determine if dnotify would be interested in sending
|
||||
* userspace notification for that pair.
|
||||
*/
|
||||
static bool dnotify_should_send_event(struct fsnotify_group *group,
|
||||
struct inode *inode,
|
||||
struct fsnotify_mark *inode_mark,
|
||||
struct fsnotify_mark *vfsmount_mark,
|
||||
__u32 mask, void *data, int data_type)
|
||||
{
|
||||
/* not a dir, dnotify doesn't care */
|
||||
if (!S_ISDIR(inode->i_mode))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
|
||||
{
|
||||
struct dnotify_mark *dn_mark = container_of(fsn_mark,
|
||||
@ -152,10 +138,6 @@ static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
|
||||
|
||||
static struct fsnotify_ops dnotify_fsnotify_ops = {
|
||||
.handle_event = dnotify_handle_event,
|
||||
.should_send_event = dnotify_should_send_event,
|
||||
.free_group_priv = NULL,
|
||||
.freeing_mark = NULL,
|
||||
.free_event_priv = NULL,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -9,31 +9,27 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
|
||||
{
|
||||
pr_debug("%s: old=%p new=%p\n", __func__, old, new);
|
||||
#include "fanotify.h"
|
||||
|
||||
static bool should_merge(struct fsnotify_event *old_fsn,
|
||||
struct fsnotify_event *new_fsn)
|
||||
{
|
||||
struct fanotify_event_info *old, *new;
|
||||
|
||||
if (old->to_tell == new->to_tell &&
|
||||
old->data_type == new->data_type &&
|
||||
old->tgid == new->tgid) {
|
||||
switch (old->data_type) {
|
||||
case (FSNOTIFY_EVENT_PATH):
|
||||
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
|
||||
/* dont merge two permission events */
|
||||
if ((old->mask & FAN_ALL_PERM_EVENTS) &&
|
||||
(new->mask & FAN_ALL_PERM_EVENTS))
|
||||
return false;
|
||||
/* dont merge two permission events */
|
||||
if ((old_fsn->mask & FAN_ALL_PERM_EVENTS) &&
|
||||
(new_fsn->mask & FAN_ALL_PERM_EVENTS))
|
||||
return false;
|
||||
#endif
|
||||
if ((old->path.mnt == new->path.mnt) &&
|
||||
(old->path.dentry == new->path.dentry))
|
||||
return true;
|
||||
break;
|
||||
case (FSNOTIFY_EVENT_NONE):
|
||||
return true;
|
||||
default:
|
||||
BUG();
|
||||
};
|
||||
}
|
||||
pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn);
|
||||
old = FANOTIFY_E(old_fsn);
|
||||
new = FANOTIFY_E(new_fsn);
|
||||
|
||||
if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid &&
|
||||
old->path.mnt == new->path.mnt &&
|
||||
old->path.dentry == new->path.dentry)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -41,59 +37,28 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
|
||||
static struct fsnotify_event *fanotify_merge(struct list_head *list,
|
||||
struct fsnotify_event *event)
|
||||
{
|
||||
struct fsnotify_event_holder *test_holder;
|
||||
struct fsnotify_event *test_event = NULL;
|
||||
struct fsnotify_event *new_event;
|
||||
struct fsnotify_event *test_event;
|
||||
bool do_merge = false;
|
||||
|
||||
pr_debug("%s: list=%p event=%p\n", __func__, list, event);
|
||||
|
||||
|
||||
list_for_each_entry_reverse(test_holder, list, event_list) {
|
||||
if (should_merge(test_holder->event, event)) {
|
||||
test_event = test_holder->event;
|
||||
list_for_each_entry_reverse(test_event, list, list) {
|
||||
if (should_merge(test_event, event)) {
|
||||
do_merge = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!test_event)
|
||||
if (!do_merge)
|
||||
return NULL;
|
||||
|
||||
fsnotify_get_event(test_event);
|
||||
|
||||
/* if they are exactly the same we are done */
|
||||
if (test_event->mask == event->mask)
|
||||
return test_event;
|
||||
|
||||
/*
|
||||
* if the refcnt == 2 this is the only queue
|
||||
* for this event and so we can update the mask
|
||||
* in place.
|
||||
*/
|
||||
if (atomic_read(&test_event->refcnt) == 2) {
|
||||
test_event->mask |= event->mask;
|
||||
return test_event;
|
||||
}
|
||||
|
||||
new_event = fsnotify_clone_event(test_event);
|
||||
|
||||
/* done with test_event */
|
||||
fsnotify_put_event(test_event);
|
||||
|
||||
/* couldn't allocate memory, merge was not possible */
|
||||
if (unlikely(!new_event))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/* build new event and replace it on the list */
|
||||
new_event->mask = (test_event->mask | event->mask);
|
||||
fsnotify_replace_event(test_holder, new_event);
|
||||
|
||||
/* we hold a reference on new_event from clone_event */
|
||||
return new_event;
|
||||
test_event->mask |= event->mask;
|
||||
return test_event;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
|
||||
static int fanotify_get_response_from_access(struct fsnotify_group *group,
|
||||
struct fsnotify_event *event)
|
||||
struct fanotify_event_info *event)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@ -106,7 +71,6 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
|
||||
return 0;
|
||||
|
||||
/* userspace responded, convert to something usable */
|
||||
spin_lock(&event->lock);
|
||||
switch (event->response) {
|
||||
case FAN_ALLOW:
|
||||
ret = 0;
|
||||
@ -116,7 +80,6 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
|
||||
ret = -EPERM;
|
||||
}
|
||||
event->response = 0;
|
||||
spin_unlock(&event->lock);
|
||||
|
||||
pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
|
||||
group, event, ret);
|
||||
@ -125,58 +88,17 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
|
||||
}
|
||||
#endif
|
||||
|
||||
static int fanotify_handle_event(struct fsnotify_group *group,
|
||||
struct fsnotify_mark *inode_mark,
|
||||
struct fsnotify_mark *fanotify_mark,
|
||||
struct fsnotify_event *event)
|
||||
{
|
||||
int ret = 0;
|
||||
struct fsnotify_event *notify_event = NULL;
|
||||
|
||||
BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
|
||||
BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
|
||||
BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
|
||||
BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
|
||||
BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
|
||||
BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
|
||||
BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
|
||||
BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
|
||||
BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
|
||||
BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
|
||||
|
||||
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
|
||||
|
||||
notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
|
||||
if (IS_ERR(notify_event))
|
||||
return PTR_ERR(notify_event);
|
||||
|
||||
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
|
||||
if (event->mask & FAN_ALL_PERM_EVENTS) {
|
||||
/* if we merged we need to wait on the new event */
|
||||
if (notify_event)
|
||||
event = notify_event;
|
||||
ret = fanotify_get_response_from_access(group, event);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (notify_event)
|
||||
fsnotify_put_event(notify_event);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool fanotify_should_send_event(struct fsnotify_group *group,
|
||||
struct inode *to_tell,
|
||||
struct fsnotify_mark *inode_mark,
|
||||
static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
|
||||
struct fsnotify_mark *vfsmnt_mark,
|
||||
__u32 event_mask, void *data, int data_type)
|
||||
u32 event_mask,
|
||||
void *data, int data_type)
|
||||
{
|
||||
__u32 marks_mask, marks_ignored_mask;
|
||||
struct path *path = data;
|
||||
|
||||
pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
|
||||
"mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
|
||||
inode_mark, vfsmnt_mark, event_mask, data, data_type);
|
||||
pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p"
|
||||
" data_type=%d\n", __func__, inode_mark, vfsmnt_mark,
|
||||
event_mask, data, data_type);
|
||||
|
||||
/* if we don't have enough info to send an event to userspace say no */
|
||||
if (data_type != FSNOTIFY_EVENT_PATH)
|
||||
@ -217,6 +139,74 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
|
||||
return false;
|
||||
}
|
||||
|
||||
static int fanotify_handle_event(struct fsnotify_group *group,
|
||||
struct inode *inode,
|
||||
struct fsnotify_mark *inode_mark,
|
||||
struct fsnotify_mark *fanotify_mark,
|
||||
u32 mask, void *data, int data_type,
|
||||
const unsigned char *file_name)
|
||||
{
|
||||
int ret = 0;
|
||||
struct fanotify_event_info *event;
|
||||
struct fsnotify_event *fsn_event;
|
||||
struct fsnotify_event *notify_fsn_event;
|
||||
|
||||
BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
|
||||
BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
|
||||
BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
|
||||
BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
|
||||
BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
|
||||
BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
|
||||
BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
|
||||
BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
|
||||
BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
|
||||
BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
|
||||
|
||||
if (!fanotify_should_send_event(inode_mark, fanotify_mark, mask, data,
|
||||
data_type))
|
||||
return 0;
|
||||
|
||||
pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
|
||||
mask);
|
||||
|
||||
event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL);
|
||||
if (unlikely(!event))
|
||||
return -ENOMEM;
|
||||
|
||||
fsn_event = &event->fse;
|
||||
fsnotify_init_event(fsn_event, inode, mask);
|
||||
event->tgid = get_pid(task_tgid(current));
|
||||
if (data_type == FSNOTIFY_EVENT_PATH) {
|
||||
struct path *path = data;
|
||||
event->path = *path;
|
||||
path_get(&event->path);
|
||||
} else {
|
||||
event->path.mnt = NULL;
|
||||
event->path.dentry = NULL;
|
||||
}
|
||||
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
|
||||
event->response = 0;
|
||||
#endif
|
||||
|
||||
notify_fsn_event = fsnotify_add_notify_event(group, fsn_event,
|
||||
fanotify_merge);
|
||||
if (notify_fsn_event) {
|
||||
/* Our event wasn't used in the end. Free it. */
|
||||
fsnotify_destroy_event(group, fsn_event);
|
||||
if (IS_ERR(notify_fsn_event))
|
||||
return PTR_ERR(notify_fsn_event);
|
||||
/* We need to ask about a different events after a merge... */
|
||||
event = FANOTIFY_E(notify_fsn_event);
|
||||
fsn_event = notify_fsn_event;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
|
||||
if (fsn_event->mask & FAN_ALL_PERM_EVENTS)
|
||||
ret = fanotify_get_response_from_access(group, event);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void fanotify_free_group_priv(struct fsnotify_group *group)
|
||||
{
|
||||
struct user_struct *user;
|
||||
@ -226,10 +216,18 @@ static void fanotify_free_group_priv(struct fsnotify_group *group)
|
||||
free_uid(user);
|
||||
}
|
||||
|
||||
static void fanotify_free_event(struct fsnotify_event *fsn_event)
|
||||
{
|
||||
struct fanotify_event_info *event;
|
||||
|
||||
event = FANOTIFY_E(fsn_event);
|
||||
path_put(&event->path);
|
||||
put_pid(event->tgid);
|
||||
kmem_cache_free(fanotify_event_cachep, event);
|
||||
}
|
||||
|
||||
const struct fsnotify_ops fanotify_fsnotify_ops = {
|
||||
.handle_event = fanotify_handle_event,
|
||||
.should_send_event = fanotify_should_send_event,
|
||||
.free_group_priv = fanotify_free_group_priv,
|
||||
.free_event_priv = NULL,
|
||||
.freeing_mark = NULL,
|
||||
.free_event = fanotify_free_event,
|
||||
};
|
||||
|
23
fs/notify/fanotify/fanotify.h
Normal file
23
fs/notify/fanotify/fanotify.h
Normal file
@ -0,0 +1,23 @@
|
||||
#include <linux/fsnotify_backend.h>
|
||||
#include <linux/path.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
extern struct kmem_cache *fanotify_event_cachep;
|
||||
|
||||
struct fanotify_event_info {
|
||||
struct fsnotify_event fse;
|
||||
/*
|
||||
* We hold ref to this path so it may be dereferenced at any point
|
||||
* during this object's lifetime
|
||||
*/
|
||||
struct path path;
|
||||
struct pid *tgid;
|
||||
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
|
||||
u32 response; /* userspace answer to question */
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse)
|
||||
{
|
||||
return container_of(fse, struct fanotify_event_info, fse);
|
||||
}
|
@ -19,6 +19,7 @@
|
||||
|
||||
#include "../../mount.h"
|
||||
#include "../fdinfo.h"
|
||||
#include "fanotify.h"
|
||||
|
||||
#define FANOTIFY_DEFAULT_MAX_EVENTS 16384
|
||||
#define FANOTIFY_DEFAULT_MAX_MARKS 8192
|
||||
@ -28,11 +29,12 @@ extern const struct fsnotify_ops fanotify_fsnotify_ops;
|
||||
|
||||
static struct kmem_cache *fanotify_mark_cache __read_mostly;
|
||||
static struct kmem_cache *fanotify_response_event_cache __read_mostly;
|
||||
struct kmem_cache *fanotify_event_cachep __read_mostly;
|
||||
|
||||
struct fanotify_response_event {
|
||||
struct list_head list;
|
||||
__s32 fd;
|
||||
struct fsnotify_event *event;
|
||||
struct fanotify_event_info *event;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -61,8 +63,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
|
||||
}
|
||||
|
||||
static int create_fd(struct fsnotify_group *group,
|
||||
struct fsnotify_event *event,
|
||||
struct file **file)
|
||||
struct fanotify_event_info *event,
|
||||
struct file **file)
|
||||
{
|
||||
int client_fd;
|
||||
struct file *new_file;
|
||||
@ -73,12 +75,6 @@ static int create_fd(struct fsnotify_group *group,
|
||||
if (client_fd < 0)
|
||||
return client_fd;
|
||||
|
||||
if (event->data_type != FSNOTIFY_EVENT_PATH) {
|
||||
WARN_ON(1);
|
||||
put_unused_fd(client_fd);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* we need a new file handle for the userspace program so it can read even if it was
|
||||
* originally opened O_WRONLY.
|
||||
@ -109,23 +105,25 @@ static int create_fd(struct fsnotify_group *group,
|
||||
}
|
||||
|
||||
static int fill_event_metadata(struct fsnotify_group *group,
|
||||
struct fanotify_event_metadata *metadata,
|
||||
struct fsnotify_event *event,
|
||||
struct file **file)
|
||||
struct fanotify_event_metadata *metadata,
|
||||
struct fsnotify_event *fsn_event,
|
||||
struct file **file)
|
||||
{
|
||||
int ret = 0;
|
||||
struct fanotify_event_info *event;
|
||||
|
||||
pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
|
||||
group, metadata, event);
|
||||
group, metadata, fsn_event);
|
||||
|
||||
*file = NULL;
|
||||
event = container_of(fsn_event, struct fanotify_event_info, fse);
|
||||
metadata->event_len = FAN_EVENT_METADATA_LEN;
|
||||
metadata->metadata_len = FAN_EVENT_METADATA_LEN;
|
||||
metadata->vers = FANOTIFY_METADATA_VERSION;
|
||||
metadata->reserved = 0;
|
||||
metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
|
||||
metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS;
|
||||
metadata->pid = pid_vnr(event->tgid);
|
||||
if (unlikely(event->mask & FAN_Q_OVERFLOW))
|
||||
if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW))
|
||||
metadata->fd = FAN_NOFD;
|
||||
else {
|
||||
metadata->fd = create_fd(group, event, file);
|
||||
@ -209,7 +207,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
|
||||
if (!re)
|
||||
return -ENOMEM;
|
||||
|
||||
re->event = event;
|
||||
re->event = FANOTIFY_E(event);
|
||||
re->fd = fd;
|
||||
|
||||
mutex_lock(&group->fanotify_data.access_mutex);
|
||||
@ -217,7 +215,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
|
||||
if (atomic_read(&group->fanotify_data.bypass_perm)) {
|
||||
mutex_unlock(&group->fanotify_data.access_mutex);
|
||||
kmem_cache_free(fanotify_response_event_cache, re);
|
||||
event->response = FAN_ALLOW;
|
||||
FANOTIFY_E(event)->response = FAN_ALLOW;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -273,7 +271,7 @@ out_close_fd:
|
||||
out:
|
||||
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
|
||||
if (event->mask & FAN_ALL_PERM_EVENTS) {
|
||||
event->response = FAN_DENY;
|
||||
FANOTIFY_E(event)->response = FAN_DENY;
|
||||
wake_up(&group->fanotify_data.access_waitq);
|
||||
}
|
||||
#endif
|
||||
@ -321,7 +319,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
|
||||
if (IS_ERR(kevent))
|
||||
break;
|
||||
ret = copy_event_to_user(group, kevent, buf);
|
||||
fsnotify_put_event(kevent);
|
||||
fsnotify_destroy_event(group, kevent);
|
||||
if (ret < 0)
|
||||
break;
|
||||
buf += ret;
|
||||
@ -409,7 +407,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
|
||||
static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct fsnotify_group *group;
|
||||
struct fsnotify_event_holder *holder;
|
||||
struct fsnotify_event *fsn_event;
|
||||
void __user *p;
|
||||
int ret = -ENOTTY;
|
||||
size_t send_len = 0;
|
||||
@ -421,7 +419,7 @@ static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long ar
|
||||
switch (cmd) {
|
||||
case FIONREAD:
|
||||
mutex_lock(&group->notification_mutex);
|
||||
list_for_each_entry(holder, &group->notification_list, event_list)
|
||||
list_for_each_entry(fsn_event, &group->notification_list, list)
|
||||
send_len += FAN_EVENT_METADATA_LEN;
|
||||
mutex_unlock(&group->notification_mutex);
|
||||
ret = put_user(send_len, (int __user *) p);
|
||||
@ -906,6 +904,7 @@ static int __init fanotify_user_setup(void)
|
||||
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
|
||||
fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event,
|
||||
SLAB_PANIC);
|
||||
fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -128,8 +128,7 @@ static int send_to_group(struct inode *to_tell,
|
||||
struct fsnotify_mark *vfsmount_mark,
|
||||
__u32 mask, void *data,
|
||||
int data_is, u32 cookie,
|
||||
const unsigned char *file_name,
|
||||
struct fsnotify_event **event)
|
||||
const unsigned char *file_name)
|
||||
{
|
||||
struct fsnotify_group *group = NULL;
|
||||
__u32 inode_test_mask = 0;
|
||||
@ -170,27 +169,17 @@ static int send_to_group(struct inode *to_tell,
|
||||
|
||||
pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p"
|
||||
" inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x"
|
||||
" data=%p data_is=%d cookie=%d event=%p\n",
|
||||
" data=%p data_is=%d cookie=%d\n",
|
||||
__func__, group, to_tell, mask, inode_mark,
|
||||
inode_test_mask, vfsmount_mark, vfsmount_test_mask, data,
|
||||
data_is, cookie, *event);
|
||||
data_is, cookie);
|
||||
|
||||
if (!inode_test_mask && !vfsmount_test_mask)
|
||||
return 0;
|
||||
|
||||
if (group->ops->should_send_event(group, to_tell, inode_mark,
|
||||
vfsmount_mark, mask, data,
|
||||
data_is) == false)
|
||||
return 0;
|
||||
|
||||
if (!*event) {
|
||||
*event = fsnotify_create_event(to_tell, mask, data,
|
||||
data_is, file_name,
|
||||
cookie, GFP_KERNEL);
|
||||
if (!*event)
|
||||
return -ENOMEM;
|
||||
}
|
||||
return group->ops->handle_event(group, inode_mark, vfsmount_mark, *event);
|
||||
return group->ops->handle_event(group, to_tell, inode_mark,
|
||||
vfsmount_mark, mask, data, data_is,
|
||||
file_name);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -205,7 +194,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
|
||||
struct hlist_node *inode_node = NULL, *vfsmount_node = NULL;
|
||||
struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
|
||||
struct fsnotify_group *inode_group, *vfsmount_group;
|
||||
struct fsnotify_event *event = NULL;
|
||||
struct mount *mnt;
|
||||
int idx, ret = 0;
|
||||
/* global tests shouldn't care about events on child only the specific event */
|
||||
@ -258,18 +246,18 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
|
||||
|
||||
if (inode_group > vfsmount_group) {
|
||||
/* handle inode */
|
||||
ret = send_to_group(to_tell, inode_mark, NULL, mask, data,
|
||||
data_is, cookie, file_name, &event);
|
||||
ret = send_to_group(to_tell, inode_mark, NULL, mask,
|
||||
data, data_is, cookie, file_name);
|
||||
/* we didn't use the vfsmount_mark */
|
||||
vfsmount_group = NULL;
|
||||
} else if (vfsmount_group > inode_group) {
|
||||
ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, data,
|
||||
data_is, cookie, file_name, &event);
|
||||
ret = send_to_group(to_tell, NULL, vfsmount_mark, mask,
|
||||
data, data_is, cookie, file_name);
|
||||
inode_group = NULL;
|
||||
} else {
|
||||
ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
|
||||
mask, data, data_is, cookie, file_name,
|
||||
&event);
|
||||
mask, data, data_is, cookie,
|
||||
file_name);
|
||||
}
|
||||
|
||||
if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
|
||||
@ -285,12 +273,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
|
||||
ret = 0;
|
||||
out:
|
||||
srcu_read_unlock(&fsnotify_mark_srcu, idx);
|
||||
/*
|
||||
* fsnotify_create_event() took a reference so the event can't be cleaned
|
||||
* up while we are still trying to add it to lists, drop that one.
|
||||
*/
|
||||
if (event)
|
||||
fsnotify_put_event(event);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -99,6 +99,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
|
||||
INIT_LIST_HEAD(&group->marks_list);
|
||||
|
||||
group->ops = ops;
|
||||
fsnotify_init_event(&group->overflow_event, NULL, FS_Q_OVERFLOW);
|
||||
|
||||
return group;
|
||||
}
|
||||
|
@ -2,11 +2,12 @@
|
||||
#include <linux/inotify.h>
|
||||
#include <linux/slab.h> /* struct kmem_cache */
|
||||
|
||||
extern struct kmem_cache *event_priv_cachep;
|
||||
|
||||
struct inotify_event_private_data {
|
||||
struct fsnotify_event_private_data fsnotify_event_priv_data;
|
||||
struct inotify_event_info {
|
||||
struct fsnotify_event fse;
|
||||
int wd;
|
||||
u32 sync_cookie;
|
||||
int name_len;
|
||||
char name[];
|
||||
};
|
||||
|
||||
struct inotify_inode_mark {
|
||||
@ -14,8 +15,18 @@ struct inotify_inode_mark {
|
||||
int wd;
|
||||
};
|
||||
|
||||
static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
|
||||
{
|
||||
return container_of(fse, struct inotify_event_info, fse);
|
||||
}
|
||||
|
||||
extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
|
||||
struct fsnotify_group *group);
|
||||
extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
|
||||
extern int inotify_handle_event(struct fsnotify_group *group,
|
||||
struct inode *inode,
|
||||
struct fsnotify_mark *inode_mark,
|
||||
struct fsnotify_mark *vfsmount_mark,
|
||||
u32 mask, void *data, int data_type,
|
||||
const unsigned char *file_name);
|
||||
|
||||
extern const struct fsnotify_ops inotify_fsnotify_ops;
|
||||
|
@ -34,100 +34,87 @@
|
||||
#include "inotify.h"
|
||||
|
||||
/*
|
||||
* Check if 2 events contain the same information. We do not compare private data
|
||||
* but at this moment that isn't a problem for any know fsnotify listeners.
|
||||
* Check if 2 events contain the same information.
|
||||
*/
|
||||
static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
|
||||
static bool event_compare(struct fsnotify_event *old_fsn,
|
||||
struct fsnotify_event *new_fsn)
|
||||
{
|
||||
if ((old->mask == new->mask) &&
|
||||
(old->to_tell == new->to_tell) &&
|
||||
(old->data_type == new->data_type) &&
|
||||
(old->name_len == new->name_len)) {
|
||||
switch (old->data_type) {
|
||||
case (FSNOTIFY_EVENT_INODE):
|
||||
/* remember, after old was put on the wait_q we aren't
|
||||
* allowed to look at the inode any more, only thing
|
||||
* left to check was if the file_name is the same */
|
||||
if (!old->name_len ||
|
||||
!strcmp(old->file_name, new->file_name))
|
||||
return true;
|
||||
break;
|
||||
case (FSNOTIFY_EVENT_PATH):
|
||||
if ((old->path.mnt == new->path.mnt) &&
|
||||
(old->path.dentry == new->path.dentry))
|
||||
return true;
|
||||
break;
|
||||
case (FSNOTIFY_EVENT_NONE):
|
||||
if (old->mask & FS_Q_OVERFLOW)
|
||||
return true;
|
||||
else if (old->mask & FS_IN_IGNORED)
|
||||
return false;
|
||||
return true;
|
||||
};
|
||||
}
|
||||
struct inotify_event_info *old, *new;
|
||||
|
||||
if (old_fsn->mask & FS_IN_IGNORED)
|
||||
return false;
|
||||
old = INOTIFY_E(old_fsn);
|
||||
new = INOTIFY_E(new_fsn);
|
||||
if ((old_fsn->mask == new_fsn->mask) &&
|
||||
(old_fsn->inode == new_fsn->inode) &&
|
||||
(old->name_len == new->name_len) &&
|
||||
(!old->name_len || !strcmp(old->name, new->name)))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static struct fsnotify_event *inotify_merge(struct list_head *list,
|
||||
struct fsnotify_event *event)
|
||||
{
|
||||
struct fsnotify_event_holder *last_holder;
|
||||
struct fsnotify_event *last_event;
|
||||
|
||||
/* and the list better be locked by something too */
|
||||
spin_lock(&event->lock);
|
||||
|
||||
last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
|
||||
last_event = last_holder->event;
|
||||
if (event_compare(last_event, event))
|
||||
fsnotify_get_event(last_event);
|
||||
else
|
||||
last_event = NULL;
|
||||
|
||||
spin_unlock(&event->lock);
|
||||
|
||||
last_event = list_entry(list->prev, struct fsnotify_event, list);
|
||||
if (!event_compare(last_event, event))
|
||||
return NULL;
|
||||
return last_event;
|
||||
}
|
||||
|
||||
static int inotify_handle_event(struct fsnotify_group *group,
|
||||
struct fsnotify_mark *inode_mark,
|
||||
struct fsnotify_mark *vfsmount_mark,
|
||||
struct fsnotify_event *event)
|
||||
int inotify_handle_event(struct fsnotify_group *group,
|
||||
struct inode *inode,
|
||||
struct fsnotify_mark *inode_mark,
|
||||
struct fsnotify_mark *vfsmount_mark,
|
||||
u32 mask, void *data, int data_type,
|
||||
const unsigned char *file_name)
|
||||
{
|
||||
struct inotify_inode_mark *i_mark;
|
||||
struct inode *to_tell;
|
||||
struct inotify_event_private_data *event_priv;
|
||||
struct fsnotify_event_private_data *fsn_event_priv;
|
||||
struct inotify_event_info *event;
|
||||
struct fsnotify_event *added_event;
|
||||
int wd, ret = 0;
|
||||
struct fsnotify_event *fsn_event;
|
||||
int ret = 0;
|
||||
int len = 0;
|
||||
int alloc_len = sizeof(struct inotify_event_info);
|
||||
|
||||
BUG_ON(vfsmount_mark);
|
||||
|
||||
pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group,
|
||||
event, event->to_tell, event->mask);
|
||||
if ((inode_mark->mask & FS_EXCL_UNLINK) &&
|
||||
(data_type == FSNOTIFY_EVENT_PATH)) {
|
||||
struct path *path = data;
|
||||
|
||||
to_tell = event->to_tell;
|
||||
if (d_unlinked(path->dentry))
|
||||
return 0;
|
||||
}
|
||||
if (file_name) {
|
||||
len = strlen(file_name);
|
||||
alloc_len += len + 1;
|
||||
}
|
||||
|
||||
pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
|
||||
mask);
|
||||
|
||||
i_mark = container_of(inode_mark, struct inotify_inode_mark,
|
||||
fsn_mark);
|
||||
wd = i_mark->wd;
|
||||
|
||||
event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
|
||||
if (unlikely(!event_priv))
|
||||
event = kmalloc(alloc_len, GFP_KERNEL);
|
||||
if (unlikely(!event))
|
||||
return -ENOMEM;
|
||||
|
||||
fsn_event_priv = &event_priv->fsnotify_event_priv_data;
|
||||
fsn_event = &event->fse;
|
||||
fsnotify_init_event(fsn_event, inode, mask);
|
||||
event->wd = i_mark->wd;
|
||||
event->name_len = len;
|
||||
if (len)
|
||||
strcpy(event->name, file_name);
|
||||
|
||||
fsnotify_get_group(group);
|
||||
fsn_event_priv->group = group;
|
||||
event_priv->wd = wd;
|
||||
|
||||
added_event = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge);
|
||||
added_event = fsnotify_add_notify_event(group, fsn_event, inotify_merge);
|
||||
if (added_event) {
|
||||
inotify_free_event_priv(fsn_event_priv);
|
||||
if (!IS_ERR(added_event))
|
||||
fsnotify_put_event(added_event);
|
||||
else
|
||||
/* Our event wasn't used in the end. Free it. */
|
||||
fsnotify_destroy_event(group, fsn_event);
|
||||
if (IS_ERR(added_event))
|
||||
ret = PTR_ERR(added_event);
|
||||
}
|
||||
|
||||
@ -142,22 +129,6 @@ static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify
|
||||
inotify_ignored_and_remove_idr(fsn_mark, group);
|
||||
}
|
||||
|
||||
static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
|
||||
struct fsnotify_mark *inode_mark,
|
||||
struct fsnotify_mark *vfsmount_mark,
|
||||
__u32 mask, void *data, int data_type)
|
||||
{
|
||||
if ((inode_mark->mask & FS_EXCL_UNLINK) &&
|
||||
(data_type == FSNOTIFY_EVENT_PATH)) {
|
||||
struct path *path = data;
|
||||
|
||||
if (d_unlinked(path->dentry))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is NEVER supposed to be called. Inotify marks should either have been
|
||||
* removed from the idr when the watch was removed or in the
|
||||
@ -202,22 +173,14 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
|
||||
free_uid(group->inotify_data.user);
|
||||
}
|
||||
|
||||
void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv)
|
||||
static void inotify_free_event(struct fsnotify_event *fsn_event)
|
||||
{
|
||||
struct inotify_event_private_data *event_priv;
|
||||
|
||||
|
||||
event_priv = container_of(fsn_event_priv, struct inotify_event_private_data,
|
||||
fsnotify_event_priv_data);
|
||||
|
||||
fsnotify_put_group(fsn_event_priv->group);
|
||||
kmem_cache_free(event_priv_cachep, event_priv);
|
||||
kfree(INOTIFY_E(fsn_event));
|
||||
}
|
||||
|
||||
const struct fsnotify_ops inotify_fsnotify_ops = {
|
||||
.handle_event = inotify_handle_event,
|
||||
.should_send_event = inotify_should_send_event,
|
||||
.free_group_priv = inotify_free_group_priv,
|
||||
.free_event_priv = inotify_free_event_priv,
|
||||
.free_event = inotify_free_event,
|
||||
.freeing_mark = inotify_freeing_mark,
|
||||
};
|
||||
|
@ -50,7 +50,6 @@ static int inotify_max_queued_events __read_mostly;
|
||||
static int inotify_max_user_watches __read_mostly;
|
||||
|
||||
static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
|
||||
struct kmem_cache *event_priv_cachep __read_mostly;
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
|
||||
@ -124,6 +123,16 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int round_event_name_len(struct fsnotify_event *fsn_event)
|
||||
{
|
||||
struct inotify_event_info *event;
|
||||
|
||||
event = INOTIFY_E(fsn_event);
|
||||
if (!event->name_len)
|
||||
return 0;
|
||||
return roundup(event->name_len + 1, sizeof(struct inotify_event));
|
||||
}
|
||||
|
||||
/*
|
||||
* Get an inotify_kernel_event if one exists and is small
|
||||
* enough to fit in "count". Return an error pointer if
|
||||
@ -144,9 +153,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
|
||||
|
||||
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
|
||||
|
||||
if (event->name_len)
|
||||
event_size += roundup(event->name_len + 1, event_size);
|
||||
|
||||
event_size += round_event_name_len(event);
|
||||
if (event_size > count)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
@ -164,40 +171,27 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
|
||||
* buffer we had in "get_one_event()" above.
|
||||
*/
|
||||
static ssize_t copy_event_to_user(struct fsnotify_group *group,
|
||||
struct fsnotify_event *event,
|
||||
struct fsnotify_event *fsn_event,
|
||||
char __user *buf)
|
||||
{
|
||||
struct inotify_event inotify_event;
|
||||
struct fsnotify_event_private_data *fsn_priv;
|
||||
struct inotify_event_private_data *priv;
|
||||
struct inotify_event_info *event;
|
||||
size_t event_size = sizeof(struct inotify_event);
|
||||
size_t name_len = 0;
|
||||
size_t name_len;
|
||||
size_t pad_name_len;
|
||||
|
||||
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
|
||||
|
||||
/* we get the inotify watch descriptor from the event private data */
|
||||
spin_lock(&event->lock);
|
||||
fsn_priv = fsnotify_remove_priv_from_event(group, event);
|
||||
spin_unlock(&event->lock);
|
||||
|
||||
if (!fsn_priv)
|
||||
inotify_event.wd = -1;
|
||||
else {
|
||||
priv = container_of(fsn_priv, struct inotify_event_private_data,
|
||||
fsnotify_event_priv_data);
|
||||
inotify_event.wd = priv->wd;
|
||||
inotify_free_event_priv(fsn_priv);
|
||||
}
|
||||
pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event);
|
||||
|
||||
event = INOTIFY_E(fsn_event);
|
||||
name_len = event->name_len;
|
||||
/*
|
||||
* round up event->name_len so it is a multiple of event_size
|
||||
* round up name length so it is a multiple of event_size
|
||||
* plus an extra byte for the terminating '\0'.
|
||||
*/
|
||||
if (event->name_len)
|
||||
name_len = roundup(event->name_len + 1, event_size);
|
||||
inotify_event.len = name_len;
|
||||
|
||||
inotify_event.mask = inotify_mask_to_arg(event->mask);
|
||||
pad_name_len = round_event_name_len(fsn_event);
|
||||
inotify_event.len = pad_name_len;
|
||||
inotify_event.mask = inotify_mask_to_arg(fsn_event->mask);
|
||||
inotify_event.wd = event->wd;
|
||||
inotify_event.cookie = event->sync_cookie;
|
||||
|
||||
/* send the main event */
|
||||
@ -209,20 +203,18 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
|
||||
/*
|
||||
* fsnotify only stores the pathname, so here we have to send the pathname
|
||||
* and then pad that pathname out to a multiple of sizeof(inotify_event)
|
||||
* with zeros. I get my zeros from the nul_inotify_event.
|
||||
* with zeros.
|
||||
*/
|
||||
if (name_len) {
|
||||
unsigned int len_to_zero = name_len - event->name_len;
|
||||
if (pad_name_len) {
|
||||
/* copy the path name */
|
||||
if (copy_to_user(buf, event->file_name, event->name_len))
|
||||
if (copy_to_user(buf, event->name, name_len))
|
||||
return -EFAULT;
|
||||
buf += event->name_len;
|
||||
buf += name_len;
|
||||
|
||||
/* fill userspace with 0's */
|
||||
if (clear_user(buf, len_to_zero))
|
||||
if (clear_user(buf, pad_name_len - name_len))
|
||||
return -EFAULT;
|
||||
buf += len_to_zero;
|
||||
event_size += name_len;
|
||||
event_size += pad_name_len;
|
||||
}
|
||||
|
||||
return event_size;
|
||||
@ -254,7 +246,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
|
||||
if (IS_ERR(kevent))
|
||||
break;
|
||||
ret = copy_event_to_user(group, kevent, buf);
|
||||
fsnotify_put_event(kevent);
|
||||
fsnotify_destroy_event(group, kevent);
|
||||
if (ret < 0)
|
||||
break;
|
||||
buf += ret;
|
||||
@ -297,8 +289,7 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
struct fsnotify_group *group;
|
||||
struct fsnotify_event_holder *holder;
|
||||
struct fsnotify_event *event;
|
||||
struct fsnotify_event *fsn_event;
|
||||
void __user *p;
|
||||
int ret = -ENOTTY;
|
||||
size_t send_len = 0;
|
||||
@ -311,12 +302,10 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
|
||||
switch (cmd) {
|
||||
case FIONREAD:
|
||||
mutex_lock(&group->notification_mutex);
|
||||
list_for_each_entry(holder, &group->notification_list, event_list) {
|
||||
event = holder->event;
|
||||
list_for_each_entry(fsn_event, &group->notification_list,
|
||||
list) {
|
||||
send_len += sizeof(struct inotify_event);
|
||||
if (event->name_len)
|
||||
send_len += roundup(event->name_len + 1,
|
||||
sizeof(struct inotify_event));
|
||||
send_len += round_event_name_len(fsn_event);
|
||||
}
|
||||
mutex_unlock(&group->notification_mutex);
|
||||
ret = put_user(send_len, (int __user *) p);
|
||||
@ -503,43 +492,12 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
|
||||
struct fsnotify_group *group)
|
||||
{
|
||||
struct inotify_inode_mark *i_mark;
|
||||
struct fsnotify_event *ignored_event, *notify_event;
|
||||
struct inotify_event_private_data *event_priv;
|
||||
struct fsnotify_event_private_data *fsn_event_priv;
|
||||
int ret;
|
||||
|
||||
/* Queue ignore event for the watch */
|
||||
inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED,
|
||||
NULL, FSNOTIFY_EVENT_NONE, NULL);
|
||||
|
||||
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
|
||||
|
||||
ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
|
||||
FSNOTIFY_EVENT_NONE, NULL, 0,
|
||||
GFP_NOFS);
|
||||
if (!ignored_event)
|
||||
goto skip_send_ignore;
|
||||
|
||||
event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
|
||||
if (unlikely(!event_priv))
|
||||
goto skip_send_ignore;
|
||||
|
||||
fsn_event_priv = &event_priv->fsnotify_event_priv_data;
|
||||
|
||||
fsnotify_get_group(group);
|
||||
fsn_event_priv->group = group;
|
||||
event_priv->wd = i_mark->wd;
|
||||
|
||||
notify_event = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL);
|
||||
if (notify_event) {
|
||||
if (IS_ERR(notify_event))
|
||||
ret = PTR_ERR(notify_event);
|
||||
else
|
||||
fsnotify_put_event(notify_event);
|
||||
inotify_free_event_priv(fsn_event_priv);
|
||||
}
|
||||
|
||||
skip_send_ignore:
|
||||
/* matches the reference taken when the event was created */
|
||||
if (ignored_event)
|
||||
fsnotify_put_event(ignored_event);
|
||||
|
||||
/* remove this mark from the idr */
|
||||
inotify_remove_from_idr(group, i_mark);
|
||||
|
||||
@ -836,7 +794,6 @@ static int __init inotify_user_setup(void)
|
||||
BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
|
||||
|
||||
inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
|
||||
event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
|
||||
|
||||
inotify_max_queued_events = 16384;
|
||||
inotify_max_user_instances = 128;
|
||||
|
@ -48,15 +48,6 @@
|
||||
#include <linux/fsnotify_backend.h>
|
||||
#include "fsnotify.h"
|
||||
|
||||
static struct kmem_cache *fsnotify_event_cachep;
|
||||
static struct kmem_cache *fsnotify_event_holder_cachep;
|
||||
/*
|
||||
* This is a magic event we send when the q is too full. Since it doesn't
|
||||
* hold real event information we just keep one system wide and use it any time
|
||||
* it is needed. It's refcnt is set 1 at kernel init time and will never
|
||||
* get set to 0 so it will never get 'freed'
|
||||
*/
|
||||
static struct fsnotify_event *q_overflow_event;
|
||||
static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0);
|
||||
|
||||
/**
|
||||
@ -76,60 +67,14 @@ bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
|
||||
return list_empty(&group->notification_list) ? true : false;
|
||||
}
|
||||
|
||||
void fsnotify_get_event(struct fsnotify_event *event)
|
||||
void fsnotify_destroy_event(struct fsnotify_group *group,
|
||||
struct fsnotify_event *event)
|
||||
{
|
||||
atomic_inc(&event->refcnt);
|
||||
}
|
||||
|
||||
void fsnotify_put_event(struct fsnotify_event *event)
|
||||
{
|
||||
if (!event)
|
||||
/* Overflow events are per-group and we don't want to free them */
|
||||
if (!event || event->mask == FS_Q_OVERFLOW)
|
||||
return;
|
||||
|
||||
if (atomic_dec_and_test(&event->refcnt)) {
|
||||
pr_debug("%s: event=%p\n", __func__, event);
|
||||
|
||||
if (event->data_type == FSNOTIFY_EVENT_PATH)
|
||||
path_put(&event->path);
|
||||
|
||||
BUG_ON(!list_empty(&event->private_data_list));
|
||||
|
||||
kfree(event->file_name);
|
||||
put_pid(event->tgid);
|
||||
kmem_cache_free(fsnotify_event_cachep, event);
|
||||
}
|
||||
}
|
||||
|
||||
struct fsnotify_event_holder *fsnotify_alloc_event_holder(void)
|
||||
{
|
||||
return kmem_cache_alloc(fsnotify_event_holder_cachep, GFP_KERNEL);
|
||||
}
|
||||
|
||||
void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
|
||||
{
|
||||
if (holder)
|
||||
kmem_cache_free(fsnotify_event_holder_cachep, holder);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the private data that the group previously attached to this event when
|
||||
* the group added the event to the notification queue (fsnotify_add_notify_event)
|
||||
*/
|
||||
struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event)
|
||||
{
|
||||
struct fsnotify_event_private_data *lpriv;
|
||||
struct fsnotify_event_private_data *priv = NULL;
|
||||
|
||||
assert_spin_locked(&event->lock);
|
||||
|
||||
list_for_each_entry(lpriv, &event->private_data_list, event_list) {
|
||||
if (lpriv->group == group) {
|
||||
priv = lpriv;
|
||||
list_del(&priv->event_list);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return priv;
|
||||
group->ops->free_event(event);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -137,91 +82,35 @@ struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnot
|
||||
* event off the queue to deal with. If the event is successfully added to the
|
||||
* group's notification queue, a reference is taken on event.
|
||||
*/
|
||||
struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
|
||||
struct fsnotify_event_private_data *priv,
|
||||
struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group,
|
||||
struct fsnotify_event *event,
|
||||
struct fsnotify_event *(*merge)(struct list_head *,
|
||||
struct fsnotify_event *))
|
||||
{
|
||||
struct fsnotify_event *return_event = NULL;
|
||||
struct fsnotify_event_holder *holder = NULL;
|
||||
struct list_head *list = &group->notification_list;
|
||||
|
||||
pr_debug("%s: group=%p event=%p priv=%p\n", __func__, group, event, priv);
|
||||
|
||||
/*
|
||||
* There is one fsnotify_event_holder embedded inside each fsnotify_event.
|
||||
* Check if we expect to be able to use that holder. If not alloc a new
|
||||
* holder.
|
||||
* For the overflow event it's possible that something will use the in
|
||||
* event holder before we get the lock so we may need to jump back and
|
||||
* alloc a new holder, this can't happen for most events...
|
||||
*/
|
||||
if (!list_empty(&event->holder.event_list)) {
|
||||
alloc_holder:
|
||||
holder = fsnotify_alloc_event_holder();
|
||||
if (!holder)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
|
||||
|
||||
mutex_lock(&group->notification_mutex);
|
||||
|
||||
if (group->q_len >= group->max_events) {
|
||||
event = q_overflow_event;
|
||||
|
||||
/*
|
||||
* we need to return the overflow event
|
||||
* which means we need a ref
|
||||
*/
|
||||
fsnotify_get_event(event);
|
||||
/* Queue overflow event only if it isn't already queued */
|
||||
if (list_empty(&group->overflow_event.list))
|
||||
event = &group->overflow_event;
|
||||
return_event = event;
|
||||
|
||||
/* sorry, no private data on the overflow event */
|
||||
priv = NULL;
|
||||
}
|
||||
|
||||
if (!list_empty(list) && merge) {
|
||||
struct fsnotify_event *tmp;
|
||||
|
||||
tmp = merge(list, event);
|
||||
if (tmp) {
|
||||
mutex_unlock(&group->notification_mutex);
|
||||
|
||||
if (return_event)
|
||||
fsnotify_put_event(return_event);
|
||||
if (holder != &event->holder)
|
||||
fsnotify_destroy_event_holder(holder);
|
||||
return tmp;
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock(&event->lock);
|
||||
|
||||
if (list_empty(&event->holder.event_list)) {
|
||||
if (unlikely(holder))
|
||||
fsnotify_destroy_event_holder(holder);
|
||||
holder = &event->holder;
|
||||
} else if (unlikely(!holder)) {
|
||||
/* between the time we checked above and got the lock the in
|
||||
* event holder was used, go back and get a new one */
|
||||
spin_unlock(&event->lock);
|
||||
mutex_unlock(&group->notification_mutex);
|
||||
|
||||
return_event = merge(list, event);
|
||||
if (return_event) {
|
||||
fsnotify_put_event(return_event);
|
||||
return_event = NULL;
|
||||
mutex_unlock(&group->notification_mutex);
|
||||
return return_event;
|
||||
}
|
||||
|
||||
goto alloc_holder;
|
||||
}
|
||||
|
||||
group->q_len++;
|
||||
holder->event = event;
|
||||
|
||||
fsnotify_get_event(event);
|
||||
list_add_tail(&holder->event_list, list);
|
||||
if (priv)
|
||||
list_add_tail(&priv->event_list, &event->private_data_list);
|
||||
spin_unlock(&event->lock);
|
||||
list_add_tail(&event->list, list);
|
||||
mutex_unlock(&group->notification_mutex);
|
||||
|
||||
wake_up(&group->notification_waitq);
|
||||
@ -230,32 +119,20 @@ alloc_holder:
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove and return the first event from the notification list. There is a
|
||||
* reference held on this event since it was on the list. It is the responsibility
|
||||
* of the caller to drop this reference.
|
||||
* Remove and return the first event from the notification list. It is the
|
||||
* responsibility of the caller to destroy the obtained event
|
||||
*/
|
||||
struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
|
||||
{
|
||||
struct fsnotify_event *event;
|
||||
struct fsnotify_event_holder *holder;
|
||||
|
||||
BUG_ON(!mutex_is_locked(&group->notification_mutex));
|
||||
|
||||
pr_debug("%s: group=%p\n", __func__, group);
|
||||
|
||||
holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
|
||||
|
||||
event = holder->event;
|
||||
|
||||
spin_lock(&event->lock);
|
||||
holder->event = NULL;
|
||||
list_del_init(&holder->event_list);
|
||||
spin_unlock(&event->lock);
|
||||
|
||||
/* event == holder means we are referenced through the in event holder */
|
||||
if (holder != &event->holder)
|
||||
fsnotify_destroy_event_holder(holder);
|
||||
|
||||
event = list_first_entry(&group->notification_list,
|
||||
struct fsnotify_event, list);
|
||||
list_del(&event->list);
|
||||
group->q_len--;
|
||||
|
||||
return event;
|
||||
@ -266,15 +143,10 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
|
||||
*/
|
||||
struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
|
||||
{
|
||||
struct fsnotify_event *event;
|
||||
struct fsnotify_event_holder *holder;
|
||||
|
||||
BUG_ON(!mutex_is_locked(&group->notification_mutex));
|
||||
|
||||
holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
|
||||
event = holder->event;
|
||||
|
||||
return event;
|
||||
return list_first_entry(&group->notification_list,
|
||||
struct fsnotify_event, list);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -284,181 +156,31 @@ struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
|
||||
void fsnotify_flush_notify(struct fsnotify_group *group)
|
||||
{
|
||||
struct fsnotify_event *event;
|
||||
struct fsnotify_event_private_data *priv;
|
||||
|
||||
mutex_lock(&group->notification_mutex);
|
||||
while (!fsnotify_notify_queue_is_empty(group)) {
|
||||
event = fsnotify_remove_notify_event(group);
|
||||
/* if they don't implement free_event_priv they better not have attached any */
|
||||
if (group->ops->free_event_priv) {
|
||||
spin_lock(&event->lock);
|
||||
priv = fsnotify_remove_priv_from_event(group, event);
|
||||
spin_unlock(&event->lock);
|
||||
if (priv)
|
||||
group->ops->free_event_priv(priv);
|
||||
}
|
||||
fsnotify_put_event(event); /* matches fsnotify_add_notify_event */
|
||||
fsnotify_destroy_event(group, event);
|
||||
}
|
||||
mutex_unlock(&group->notification_mutex);
|
||||
}
|
||||
|
||||
static void initialize_event(struct fsnotify_event *event)
|
||||
{
|
||||
INIT_LIST_HEAD(&event->holder.event_list);
|
||||
atomic_set(&event->refcnt, 1);
|
||||
|
||||
spin_lock_init(&event->lock);
|
||||
|
||||
INIT_LIST_HEAD(&event->private_data_list);
|
||||
}
|
||||
|
||||
/*
|
||||
* Caller damn well better be holding whatever mutex is protecting the
|
||||
* old_holder->event_list and the new_event must be a clean event which
|
||||
* cannot be found anywhere else in the kernel.
|
||||
*/
|
||||
int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
|
||||
struct fsnotify_event *new_event)
|
||||
{
|
||||
struct fsnotify_event *old_event = old_holder->event;
|
||||
struct fsnotify_event_holder *new_holder = &new_event->holder;
|
||||
|
||||
enum event_spinlock_class {
|
||||
SPINLOCK_OLD,
|
||||
SPINLOCK_NEW,
|
||||
};
|
||||
|
||||
pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, new_event);
|
||||
|
||||
/*
|
||||
* if the new_event's embedded holder is in use someone
|
||||
* screwed up and didn't give us a clean new event.
|
||||
*/
|
||||
BUG_ON(!list_empty(&new_holder->event_list));
|
||||
|
||||
spin_lock_nested(&old_event->lock, SPINLOCK_OLD);
|
||||
spin_lock_nested(&new_event->lock, SPINLOCK_NEW);
|
||||
|
||||
new_holder->event = new_event;
|
||||
list_replace_init(&old_holder->event_list, &new_holder->event_list);
|
||||
|
||||
spin_unlock(&new_event->lock);
|
||||
spin_unlock(&old_event->lock);
|
||||
|
||||
/* event == holder means we are referenced through the in event holder */
|
||||
if (old_holder != &old_event->holder)
|
||||
fsnotify_destroy_event_holder(old_holder);
|
||||
|
||||
fsnotify_get_event(new_event); /* on the list take reference */
|
||||
fsnotify_put_event(old_event); /* off the list, drop reference */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event)
|
||||
{
|
||||
struct fsnotify_event *event;
|
||||
|
||||
event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
|
||||
if (!event)
|
||||
return NULL;
|
||||
|
||||
pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, event);
|
||||
|
||||
memcpy(event, old_event, sizeof(*event));
|
||||
initialize_event(event);
|
||||
|
||||
if (event->name_len) {
|
||||
event->file_name = kstrdup(old_event->file_name, GFP_KERNEL);
|
||||
if (!event->file_name) {
|
||||
kmem_cache_free(fsnotify_event_cachep, event);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
event->tgid = get_pid(old_event->tgid);
|
||||
if (event->data_type == FSNOTIFY_EVENT_PATH)
|
||||
path_get(&event->path);
|
||||
|
||||
return event;
|
||||
}
|
||||
|
||||
/*
|
||||
* fsnotify_create_event - Allocate a new event which will be sent to each
|
||||
* group's handle_event function if the group was interested in this
|
||||
* particular event.
|
||||
*
|
||||
* @to_tell the inode which is supposed to receive the event (sometimes a
|
||||
* @inode the inode which is supposed to receive the event (sometimes a
|
||||
* parent of the inode to which the event happened.
|
||||
* @mask what actually happened.
|
||||
* @data pointer to the object which was actually affected
|
||||
* @data_type flag indication if the data is a file, path, inode, nothing...
|
||||
* @name the filename, if available
|
||||
*/
|
||||
struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
|
||||
int data_type, const unsigned char *name,
|
||||
u32 cookie, gfp_t gfp)
|
||||
void fsnotify_init_event(struct fsnotify_event *event, struct inode *inode,
|
||||
u32 mask)
|
||||
{
|
||||
struct fsnotify_event *event;
|
||||
|
||||
event = kmem_cache_zalloc(fsnotify_event_cachep, gfp);
|
||||
if (!event)
|
||||
return NULL;
|
||||
|
||||
pr_debug("%s: event=%p to_tell=%p mask=%x data=%p data_type=%d\n",
|
||||
__func__, event, to_tell, mask, data, data_type);
|
||||
|
||||
initialize_event(event);
|
||||
|
||||
if (name) {
|
||||
event->file_name = kstrdup(name, gfp);
|
||||
if (!event->file_name) {
|
||||
kmem_cache_free(fsnotify_event_cachep, event);
|
||||
return NULL;
|
||||
}
|
||||
event->name_len = strlen(event->file_name);
|
||||
}
|
||||
|
||||
event->tgid = get_pid(task_tgid(current));
|
||||
event->sync_cookie = cookie;
|
||||
event->to_tell = to_tell;
|
||||
event->data_type = data_type;
|
||||
|
||||
switch (data_type) {
|
||||
case FSNOTIFY_EVENT_PATH: {
|
||||
struct path *path = data;
|
||||
event->path.dentry = path->dentry;
|
||||
event->path.mnt = path->mnt;
|
||||
path_get(&event->path);
|
||||
break;
|
||||
}
|
||||
case FSNOTIFY_EVENT_INODE:
|
||||
event->inode = data;
|
||||
break;
|
||||
case FSNOTIFY_EVENT_NONE:
|
||||
event->inode = NULL;
|
||||
event->path.dentry = NULL;
|
||||
event->path.mnt = NULL;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&event->list);
|
||||
event->inode = inode;
|
||||
event->mask = mask;
|
||||
|
||||
return event;
|
||||
}
|
||||
|
||||
static __init int fsnotify_notification_init(void)
|
||||
{
|
||||
fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
|
||||
fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
|
||||
|
||||
q_overflow_event = fsnotify_create_event(NULL, FS_Q_OVERFLOW, NULL,
|
||||
FSNOTIFY_EVENT_NONE, NULL, 0,
|
||||
GFP_KERNEL);
|
||||
if (!q_overflow_event)
|
||||
panic("unable to allocate fsnotify q_overflow_event\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(fsnotify_notification_init);
|
||||
|
@ -38,7 +38,6 @@ ocfs2-objs := \
|
||||
symlink.o \
|
||||
sysfile.o \
|
||||
uptodate.o \
|
||||
ver.o \
|
||||
quota_local.o \
|
||||
quota_global.o \
|
||||
xattr.o \
|
||||
|
@ -7260,14 +7260,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
|
||||
start = range->start >> osb->s_clustersize_bits;
|
||||
len = range->len >> osb->s_clustersize_bits;
|
||||
minlen = range->minlen >> osb->s_clustersize_bits;
|
||||
trimmed = 0;
|
||||
|
||||
if (!len) {
|
||||
range->len = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (minlen >= osb->bitmap_cpg)
|
||||
if (minlen >= osb->bitmap_cpg || range->len < sb->s_blocksize)
|
||||
return -EINVAL;
|
||||
|
||||
main_bm_inode = ocfs2_get_system_file_inode(osb,
|
||||
@ -7293,6 +7287,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
len = range->len >> osb->s_clustersize_bits;
|
||||
if (start + len > le32_to_cpu(main_bm->i_clusters))
|
||||
len = le32_to_cpu(main_bm->i_clusters) - start;
|
||||
|
||||
@ -7307,6 +7302,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
|
||||
last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
|
||||
last_bit = osb->bitmap_cpg;
|
||||
|
||||
trimmed = 0;
|
||||
for (group = first_group; group <= last_group;) {
|
||||
if (first_bit + len >= osb->bitmap_cpg)
|
||||
last_bit = osb->bitmap_cpg;
|
||||
|
@ -1,4 +1,4 @@
|
||||
obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o
|
||||
|
||||
ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \
|
||||
quorum.o tcp.o netdebug.o ver.o
|
||||
quorum.o tcp.o netdebug.o
|
||||
|
@ -29,7 +29,6 @@
|
||||
#include "heartbeat.h"
|
||||
#include "masklog.h"
|
||||
#include "sys.h"
|
||||
#include "ver.h"
|
||||
|
||||
/* for now we operate under the assertion that there can be only one
|
||||
* cluster active at a time. Changing this will require trickling
|
||||
@ -945,8 +944,6 @@ static int __init init_o2nm(void)
|
||||
{
|
||||
int ret = -1;
|
||||
|
||||
cluster_print_version();
|
||||
|
||||
ret = o2hb_init();
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -984,6 +981,7 @@ out:
|
||||
|
||||
MODULE_AUTHOR("Oracle");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("OCFS2 cluster management");
|
||||
|
||||
module_init(init_o2nm)
|
||||
module_exit(exit_o2nm)
|
||||
|
@ -1,42 +0,0 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* ver.c
|
||||
*
|
||||
* version string
|
||||
*
|
||||
* Copyright (C) 2002, 2005 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include "ver.h"
|
||||
|
||||
#define CLUSTER_BUILD_VERSION "1.5.0"
|
||||
|
||||
#define VERSION_STR "OCFS2 Node Manager " CLUSTER_BUILD_VERSION
|
||||
|
||||
void cluster_print_version(void)
|
||||
{
|
||||
printk(KERN_INFO "%s\n", VERSION_STR);
|
||||
}
|
||||
|
||||
MODULE_DESCRIPTION(VERSION_STR);
|
||||
|
||||
MODULE_VERSION(CLUSTER_BUILD_VERSION);
|
@ -1,31 +0,0 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* ver.h
|
||||
*
|
||||
* Function prototypes
|
||||
*
|
||||
* Copyright (C) 2005 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef O2CLUSTER_VER_H
|
||||
#define O2CLUSTER_VER_H
|
||||
|
||||
void cluster_print_version(void);
|
||||
|
||||
#endif /* O2CLUSTER_VER_H */
|
@ -3,5 +3,5 @@ ccflags-y := -Ifs/ocfs2
|
||||
obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o
|
||||
|
||||
ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \
|
||||
dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o
|
||||
dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o
|
||||
|
||||
|
@ -43,8 +43,6 @@
|
||||
#include "dlmdomain.h"
|
||||
#include "dlmdebug.h"
|
||||
|
||||
#include "dlmver.h"
|
||||
|
||||
#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)
|
||||
#include "cluster/masklog.h"
|
||||
|
||||
@ -2328,8 +2326,6 @@ static int __init dlm_init(void)
|
||||
{
|
||||
int status;
|
||||
|
||||
dlm_print_version();
|
||||
|
||||
status = dlm_init_mle_cache();
|
||||
if (status) {
|
||||
mlog(ML_ERROR, "Could not create o2dlm_mle slabcache\n");
|
||||
@ -2379,6 +2375,7 @@ static void __exit dlm_exit (void)
|
||||
|
||||
MODULE_AUTHOR("Oracle");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("OCFS2 Distributed Lock Management");
|
||||
|
||||
module_init(dlm_init);
|
||||
module_exit(dlm_exit);
|
||||
|
@ -1,42 +0,0 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmver.c
|
||||
*
|
||||
* version string
|
||||
*
|
||||
* Copyright (C) 2002, 2005 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include "dlmver.h"
|
||||
|
||||
#define DLM_BUILD_VERSION "1.5.0"
|
||||
|
||||
#define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION
|
||||
|
||||
void dlm_print_version(void)
|
||||
{
|
||||
printk(KERN_INFO "%s\n", VERSION_STR);
|
||||
}
|
||||
|
||||
MODULE_DESCRIPTION(VERSION_STR);
|
||||
|
||||
MODULE_VERSION(DLM_BUILD_VERSION);
|
@ -1,31 +0,0 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmfsver.h
|
||||
*
|
||||
* Function prototypes
|
||||
*
|
||||
* Copyright (C) 2005 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef DLM_VER_H
|
||||
#define DLM_VER_H
|
||||
|
||||
void dlm_print_version(void);
|
||||
|
||||
#endif /* DLM_VER_H */
|
@ -2,4 +2,4 @@ ccflags-y := -Ifs/ocfs2
|
||||
|
||||
obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o
|
||||
|
||||
ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o
|
||||
ocfs2_dlmfs-objs := userdlm.o dlmfs.o
|
||||
|
@ -49,7 +49,6 @@
|
||||
|
||||
#include "stackglue.h"
|
||||
#include "userdlm.h"
|
||||
#include "dlmfsver.h"
|
||||
|
||||
#define MLOG_MASK_PREFIX ML_DLMFS
|
||||
#include "cluster/masklog.h"
|
||||
@ -644,8 +643,6 @@ static int __init init_dlmfs_fs(void)
|
||||
int status;
|
||||
int cleanup_inode = 0, cleanup_worker = 0;
|
||||
|
||||
dlmfs_print_version();
|
||||
|
||||
status = bdi_init(&dlmfs_backing_dev_info);
|
||||
if (status)
|
||||
return status;
|
||||
@ -701,6 +698,7 @@ static void __exit exit_dlmfs_fs(void)
|
||||
|
||||
MODULE_AUTHOR("Oracle");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("OCFS2 DLM-Filesystem");
|
||||
|
||||
module_init(init_dlmfs_fs)
|
||||
module_exit(exit_dlmfs_fs)
|
||||
|
@ -1,42 +0,0 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmfsver.c
|
||||
*
|
||||
* version string
|
||||
*
|
||||
* Copyright (C) 2002, 2005 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include "dlmfsver.h"
|
||||
|
||||
#define DLM_BUILD_VERSION "1.5.0"
|
||||
|
||||
#define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION
|
||||
|
||||
void dlmfs_print_version(void)
|
||||
{
|
||||
printk(KERN_INFO "%s\n", VERSION_STR);
|
||||
}
|
||||
|
||||
MODULE_DESCRIPTION(VERSION_STR);
|
||||
|
||||
MODULE_VERSION(DLM_BUILD_VERSION);
|
@ -1,31 +0,0 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmver.h
|
||||
*
|
||||
* Function prototypes
|
||||
*
|
||||
* Copyright (C) 2005 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef DLMFS_VER_H
|
||||
#define DLMFS_VER_H
|
||||
|
||||
void dlmfs_print_version(void);
|
||||
|
||||
#endif /* DLMFS_VER_H */
|
@ -2996,6 +2996,8 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
|
||||
|
||||
/* for now, uuid == domain */
|
||||
status = ocfs2_cluster_connect(osb->osb_cluster_stack,
|
||||
osb->osb_cluster_name,
|
||||
strlen(osb->osb_cluster_name),
|
||||
osb->uuid_str,
|
||||
strlen(osb->uuid_str),
|
||||
&lproto, ocfs2_do_node_down, osb,
|
||||
@ -3005,7 +3007,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
|
||||
goto bail;
|
||||
}
|
||||
|
||||
status = ocfs2_cluster_this_node(&osb->node_num);
|
||||
status = ocfs2_cluster_this_node(conn, &osb->node_num);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
mlog(ML_ERROR,
|
||||
|
@ -1869,7 +1869,8 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
|
||||
}
|
||||
size = sr->l_start + sr->l_len;
|
||||
|
||||
if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) {
|
||||
if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64 ||
|
||||
cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) {
|
||||
if (sr->l_len <= 0) {
|
||||
ret = -EINVAL;
|
||||
goto out_inode_unlock;
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/compat.h>
|
||||
|
||||
#include <cluster/masklog.h>
|
||||
@ -966,15 +967,21 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
case FITRIM:
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct request_queue *q = bdev_get_queue(sb->s_bdev);
|
||||
struct fstrim_range range;
|
||||
int ret = 0;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (!blk_queue_discard(q))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (copy_from_user(&range, argp, sizeof(range)))
|
||||
return -EFAULT;
|
||||
|
||||
range.minlen = max_t(u64, q->limits.discard_granularity,
|
||||
range.minlen);
|
||||
ret = ocfs2_trim_fs(sb, &range);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
@ -561,83 +561,6 @@ static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh,
|
||||
mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos);
|
||||
}
|
||||
|
||||
static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
|
||||
handle_t *handle,
|
||||
struct buffer_head *di_bh,
|
||||
u32 num_bits,
|
||||
u16 chain)
|
||||
{
|
||||
int ret;
|
||||
u32 tmp_used;
|
||||
struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
|
||||
struct ocfs2_chain_list *cl =
|
||||
(struct ocfs2_chain_list *) &di->id2.i_chain;
|
||||
|
||||
ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
|
||||
di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
|
||||
le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
|
||||
ocfs2_journal_dirty(handle, di_bh);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int ocfs2_block_group_set_bits(handle_t *handle,
|
||||
struct inode *alloc_inode,
|
||||
struct ocfs2_group_desc *bg,
|
||||
struct buffer_head *group_bh,
|
||||
unsigned int bit_off,
|
||||
unsigned int num_bits)
|
||||
{
|
||||
int status;
|
||||
void *bitmap = bg->bg_bitmap;
|
||||
int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
|
||||
|
||||
/* All callers get the descriptor via
|
||||
* ocfs2_read_group_descriptor(). Any corruption is a code bug. */
|
||||
BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
|
||||
BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
|
||||
|
||||
mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
|
||||
num_bits);
|
||||
|
||||
if (ocfs2_is_cluster_bitmap(alloc_inode))
|
||||
journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
|
||||
|
||||
status = ocfs2_journal_access_gd(handle,
|
||||
INODE_CACHE(alloc_inode),
|
||||
group_bh,
|
||||
journal_type);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
|
||||
if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
|
||||
ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit"
|
||||
" count %u but claims %u are freed. num_bits %d",
|
||||
(unsigned long long)le64_to_cpu(bg->bg_blkno),
|
||||
le16_to_cpu(bg->bg_bits),
|
||||
le16_to_cpu(bg->bg_free_bits_count), num_bits);
|
||||
return -EROFS;
|
||||
}
|
||||
while (num_bits--)
|
||||
ocfs2_set_bit(bit_off++, bitmap);
|
||||
|
||||
ocfs2_journal_dirty(handle, group_bh);
|
||||
|
||||
bail:
|
||||
return status;
|
||||
}
|
||||
|
||||
static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
|
||||
u32 cpos, u32 phys_cpos, u32 *new_phys_cpos,
|
||||
u32 len, int ext_flags)
|
||||
|
@ -387,6 +387,7 @@ struct ocfs2_super
|
||||
u8 osb_stackflags;
|
||||
|
||||
char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
|
||||
char osb_cluster_name[OCFS2_CLUSTER_NAME_LEN + 1];
|
||||
struct ocfs2_cluster_connection *cconn;
|
||||
struct ocfs2_lock_res osb_super_lockres;
|
||||
struct ocfs2_lock_res osb_rename_lockres;
|
||||
|
@ -398,7 +398,8 @@ static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int o2cb_cluster_this_node(unsigned int *node)
|
||||
static int o2cb_cluster_this_node(struct ocfs2_cluster_connection *conn,
|
||||
unsigned int *node)
|
||||
{
|
||||
int node_num;
|
||||
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
#include "stackglue.h"
|
||||
@ -102,6 +103,12 @@
|
||||
#define OCFS2_TEXT_UUID_LEN 32
|
||||
#define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2
|
||||
#define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8
|
||||
#define VERSION_LOCK "version_lock"
|
||||
|
||||
enum ocfs2_connection_type {
|
||||
WITH_CONTROLD,
|
||||
NO_CONTROLD
|
||||
};
|
||||
|
||||
/*
|
||||
* ocfs2_live_connection is refcounted because the filesystem and
|
||||
@ -110,6 +117,13 @@
|
||||
struct ocfs2_live_connection {
|
||||
struct list_head oc_list;
|
||||
struct ocfs2_cluster_connection *oc_conn;
|
||||
enum ocfs2_connection_type oc_type;
|
||||
atomic_t oc_this_node;
|
||||
int oc_our_slot;
|
||||
struct dlm_lksb oc_version_lksb;
|
||||
char oc_lvb[DLM_LVB_LEN];
|
||||
struct completion oc_sync_wait;
|
||||
wait_queue_head_t oc_wait;
|
||||
};
|
||||
|
||||
struct ocfs2_control_private {
|
||||
@ -198,20 +212,15 @@ static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
|
||||
* mount path. Since the VFS prevents multiple calls to
|
||||
* fill_super(), we can't get dupes here.
|
||||
*/
|
||||
static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
|
||||
struct ocfs2_live_connection **c_ret)
|
||||
static int ocfs2_live_connection_attach(struct ocfs2_cluster_connection *conn,
|
||||
struct ocfs2_live_connection *c)
|
||||
{
|
||||
int rc = 0;
|
||||
struct ocfs2_live_connection *c;
|
||||
|
||||
c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
|
||||
if (!c)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_lock(&ocfs2_control_lock);
|
||||
c->oc_conn = conn;
|
||||
|
||||
if (atomic_read(&ocfs2_control_opened))
|
||||
if ((c->oc_type == NO_CONTROLD) || atomic_read(&ocfs2_control_opened))
|
||||
list_add(&c->oc_list, &ocfs2_live_connection_list);
|
||||
else {
|
||||
printk(KERN_ERR
|
||||
@ -220,12 +229,6 @@ static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
|
||||
}
|
||||
|
||||
mutex_unlock(&ocfs2_control_lock);
|
||||
|
||||
if (!rc)
|
||||
*c_ret = c;
|
||||
else
|
||||
kfree(c);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -799,18 +802,251 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void lvb_to_version(char *lvb, struct ocfs2_protocol_version *ver)
|
||||
{
|
||||
struct ocfs2_protocol_version *pv =
|
||||
(struct ocfs2_protocol_version *)lvb;
|
||||
/*
|
||||
* ocfs2_protocol_version has two u8 variables, so we don't
|
||||
* need any endian conversion.
|
||||
*/
|
||||
ver->pv_major = pv->pv_major;
|
||||
ver->pv_minor = pv->pv_minor;
|
||||
}
|
||||
|
||||
static void version_to_lvb(struct ocfs2_protocol_version *ver, char *lvb)
|
||||
{
|
||||
struct ocfs2_protocol_version *pv =
|
||||
(struct ocfs2_protocol_version *)lvb;
|
||||
/*
|
||||
* ocfs2_protocol_version has two u8 variables, so we don't
|
||||
* need any endian conversion.
|
||||
*/
|
||||
pv->pv_major = ver->pv_major;
|
||||
pv->pv_minor = ver->pv_minor;
|
||||
}
|
||||
|
||||
static void sync_wait_cb(void *arg)
|
||||
{
|
||||
struct ocfs2_cluster_connection *conn = arg;
|
||||
struct ocfs2_live_connection *lc = conn->cc_private;
|
||||
complete(&lc->oc_sync_wait);
|
||||
}
|
||||
|
||||
static int sync_unlock(struct ocfs2_cluster_connection *conn,
|
||||
struct dlm_lksb *lksb, char *name)
|
||||
{
|
||||
int error;
|
||||
struct ocfs2_live_connection *lc = conn->cc_private;
|
||||
|
||||
error = dlm_unlock(conn->cc_lockspace, lksb->sb_lkid, 0, lksb, conn);
|
||||
if (error) {
|
||||
printk(KERN_ERR "%s lkid %x error %d\n",
|
||||
name, lksb->sb_lkid, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
wait_for_completion(&lc->oc_sync_wait);
|
||||
|
||||
if (lksb->sb_status != -DLM_EUNLOCK) {
|
||||
printk(KERN_ERR "%s lkid %x status %d\n",
|
||||
name, lksb->sb_lkid, lksb->sb_status);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sync_lock(struct ocfs2_cluster_connection *conn,
|
||||
int mode, uint32_t flags,
|
||||
struct dlm_lksb *lksb, char *name)
|
||||
{
|
||||
int error, status;
|
||||
struct ocfs2_live_connection *lc = conn->cc_private;
|
||||
|
||||
error = dlm_lock(conn->cc_lockspace, mode, lksb, flags,
|
||||
name, strlen(name),
|
||||
0, sync_wait_cb, conn, NULL);
|
||||
if (error) {
|
||||
printk(KERN_ERR "%s lkid %x flags %x mode %d error %d\n",
|
||||
name, lksb->sb_lkid, flags, mode, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
wait_for_completion(&lc->oc_sync_wait);
|
||||
|
||||
status = lksb->sb_status;
|
||||
|
||||
if (status && status != -EAGAIN) {
|
||||
printk(KERN_ERR "%s lkid %x flags %x mode %d status %d\n",
|
||||
name, lksb->sb_lkid, flags, mode, status);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
static int version_lock(struct ocfs2_cluster_connection *conn, int mode,
|
||||
int flags)
|
||||
{
|
||||
struct ocfs2_live_connection *lc = conn->cc_private;
|
||||
return sync_lock(conn, mode, flags,
|
||||
&lc->oc_version_lksb, VERSION_LOCK);
|
||||
}
|
||||
|
||||
static int version_unlock(struct ocfs2_cluster_connection *conn)
|
||||
{
|
||||
struct ocfs2_live_connection *lc = conn->cc_private;
|
||||
return sync_unlock(conn, &lc->oc_version_lksb, VERSION_LOCK);
|
||||
}
|
||||
|
||||
/* get_protocol_version()
|
||||
*
|
||||
* To exchange ocfs2 versioning, we use the LVB of the version dlm lock.
|
||||
* The algorithm is:
|
||||
* 1. Attempt to take the lock in EX mode (non-blocking).
|
||||
* 2. If successful (which means it is the first mount), write the
|
||||
* version number and downconvert to PR lock.
|
||||
* 3. If unsuccessful (returns -EAGAIN), read the version from the LVB after
|
||||
* taking the PR lock.
|
||||
*/
|
||||
|
||||
static int get_protocol_version(struct ocfs2_cluster_connection *conn)
|
||||
{
|
||||
int ret;
|
||||
struct ocfs2_live_connection *lc = conn->cc_private;
|
||||
struct ocfs2_protocol_version pv;
|
||||
|
||||
running_proto.pv_major =
|
||||
ocfs2_user_plugin.sp_max_proto.pv_major;
|
||||
running_proto.pv_minor =
|
||||
ocfs2_user_plugin.sp_max_proto.pv_minor;
|
||||
|
||||
lc->oc_version_lksb.sb_lvbptr = lc->oc_lvb;
|
||||
ret = version_lock(conn, DLM_LOCK_EX,
|
||||
DLM_LKF_VALBLK|DLM_LKF_NOQUEUE);
|
||||
if (!ret) {
|
||||
conn->cc_version.pv_major = running_proto.pv_major;
|
||||
conn->cc_version.pv_minor = running_proto.pv_minor;
|
||||
version_to_lvb(&running_proto, lc->oc_lvb);
|
||||
version_lock(conn, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
|
||||
} else if (ret == -EAGAIN) {
|
||||
ret = version_lock(conn, DLM_LOCK_PR, DLM_LKF_VALBLK);
|
||||
if (ret)
|
||||
goto out;
|
||||
lvb_to_version(lc->oc_lvb, &pv);
|
||||
|
||||
if ((pv.pv_major != running_proto.pv_major) ||
|
||||
(pv.pv_minor > running_proto.pv_minor)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
conn->cc_version.pv_major = pv.pv_major;
|
||||
conn->cc_version.pv_minor = pv.pv_minor;
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void user_recover_prep(void *arg)
|
||||
{
|
||||
}
|
||||
|
||||
static void user_recover_slot(void *arg, struct dlm_slot *slot)
|
||||
{
|
||||
struct ocfs2_cluster_connection *conn = arg;
|
||||
printk(KERN_INFO "ocfs2: Node %d/%d down. Initiating recovery.\n",
|
||||
slot->nodeid, slot->slot);
|
||||
conn->cc_recovery_handler(slot->nodeid, conn->cc_recovery_data);
|
||||
|
||||
}
|
||||
|
||||
static void user_recover_done(void *arg, struct dlm_slot *slots,
|
||||
int num_slots, int our_slot,
|
||||
uint32_t generation)
|
||||
{
|
||||
struct ocfs2_cluster_connection *conn = arg;
|
||||
struct ocfs2_live_connection *lc = conn->cc_private;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_slots; i++)
|
||||
if (slots[i].slot == our_slot) {
|
||||
atomic_set(&lc->oc_this_node, slots[i].nodeid);
|
||||
break;
|
||||
}
|
||||
|
||||
lc->oc_our_slot = our_slot;
|
||||
wake_up(&lc->oc_wait);
|
||||
}
|
||||
|
||||
static const struct dlm_lockspace_ops ocfs2_ls_ops = {
|
||||
.recover_prep = user_recover_prep,
|
||||
.recover_slot = user_recover_slot,
|
||||
.recover_done = user_recover_done,
|
||||
};
|
||||
|
||||
static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
|
||||
{
|
||||
version_unlock(conn);
|
||||
dlm_release_lockspace(conn->cc_lockspace, 2);
|
||||
conn->cc_lockspace = NULL;
|
||||
ocfs2_live_connection_drop(conn->cc_private);
|
||||
conn->cc_private = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
|
||||
{
|
||||
dlm_lockspace_t *fsdlm;
|
||||
struct ocfs2_live_connection *uninitialized_var(control);
|
||||
int rc = 0;
|
||||
struct ocfs2_live_connection *lc;
|
||||
int rc, ops_rv;
|
||||
|
||||
BUG_ON(conn == NULL);
|
||||
|
||||
rc = ocfs2_live_connection_new(conn, &control);
|
||||
lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
|
||||
if (!lc) {
|
||||
rc = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
init_waitqueue_head(&lc->oc_wait);
|
||||
init_completion(&lc->oc_sync_wait);
|
||||
atomic_set(&lc->oc_this_node, 0);
|
||||
conn->cc_private = lc;
|
||||
lc->oc_type = NO_CONTROLD;
|
||||
|
||||
rc = dlm_new_lockspace(conn->cc_name, conn->cc_cluster_name,
|
||||
DLM_LSFL_FS, DLM_LVB_LEN,
|
||||
&ocfs2_ls_ops, conn, &ops_rv, &fsdlm);
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
if (ops_rv == -EOPNOTSUPP) {
|
||||
lc->oc_type = WITH_CONTROLD;
|
||||
printk(KERN_NOTICE "ocfs2: You seem to be using an older "
|
||||
"version of dlm_controld and/or ocfs2-tools."
|
||||
" Please consider upgrading.\n");
|
||||
} else if (ops_rv) {
|
||||
rc = ops_rv;
|
||||
goto out;
|
||||
}
|
||||
conn->cc_lockspace = fsdlm;
|
||||
|
||||
rc = ocfs2_live_connection_attach(conn, lc);
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
if (lc->oc_type == NO_CONTROLD) {
|
||||
rc = get_protocol_version(conn);
|
||||
if (rc) {
|
||||
printk(KERN_ERR "ocfs2: Could not determine"
|
||||
" locking version\n");
|
||||
user_cluster_disconnect(conn);
|
||||
goto out;
|
||||
}
|
||||
wait_event(lc->oc_wait, (atomic_read(&lc->oc_this_node) > 0));
|
||||
}
|
||||
|
||||
/*
|
||||
* running_proto must have been set before we allowed any mounts
|
||||
* to proceed.
|
||||
@ -818,42 +1054,34 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
|
||||
if (fs_protocol_compare(&running_proto, &conn->cc_version)) {
|
||||
printk(KERN_ERR
|
||||
"Unable to mount with fs locking protocol version "
|
||||
"%u.%u because the userspace control daemon has "
|
||||
"negotiated %u.%u\n",
|
||||
"%u.%u because negotiated protocol is %u.%u\n",
|
||||
conn->cc_version.pv_major, conn->cc_version.pv_minor,
|
||||
running_proto.pv_major, running_proto.pv_minor);
|
||||
rc = -EPROTO;
|
||||
ocfs2_live_connection_drop(control);
|
||||
goto out;
|
||||
ocfs2_live_connection_drop(lc);
|
||||
lc = NULL;
|
||||
}
|
||||
|
||||
rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN,
|
||||
NULL, NULL, NULL, &fsdlm);
|
||||
if (rc) {
|
||||
ocfs2_live_connection_drop(control);
|
||||
goto out;
|
||||
}
|
||||
|
||||
conn->cc_private = control;
|
||||
conn->cc_lockspace = fsdlm;
|
||||
out:
|
||||
if (rc && lc)
|
||||
kfree(lc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
|
||||
{
|
||||
dlm_release_lockspace(conn->cc_lockspace, 2);
|
||||
conn->cc_lockspace = NULL;
|
||||
ocfs2_live_connection_drop(conn->cc_private);
|
||||
conn->cc_private = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int user_cluster_this_node(unsigned int *this_node)
|
||||
static int user_cluster_this_node(struct ocfs2_cluster_connection *conn,
|
||||
unsigned int *this_node)
|
||||
{
|
||||
int rc;
|
||||
struct ocfs2_live_connection *lc = conn->cc_private;
|
||||
|
||||
if (lc->oc_type == WITH_CONTROLD)
|
||||
rc = ocfs2_control_get_this_node();
|
||||
else if (lc->oc_type == NO_CONTROLD)
|
||||
rc = atomic_read(&lc->oc_this_node);
|
||||
else
|
||||
rc = -EINVAL;
|
||||
|
||||
rc = ocfs2_control_get_this_node();
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
|
||||
|
@ -309,6 +309,8 @@ int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino,
|
||||
EXPORT_SYMBOL_GPL(ocfs2_plock);
|
||||
|
||||
int ocfs2_cluster_connect(const char *stack_name,
|
||||
const char *cluster_name,
|
||||
int cluster_name_len,
|
||||
const char *group,
|
||||
int grouplen,
|
||||
struct ocfs2_locking_protocol *lproto,
|
||||
@ -342,8 +344,10 @@ int ocfs2_cluster_connect(const char *stack_name,
|
||||
goto out;
|
||||
}
|
||||
|
||||
memcpy(new_conn->cc_name, group, grouplen);
|
||||
strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1);
|
||||
new_conn->cc_namelen = grouplen;
|
||||
strlcpy(new_conn->cc_cluster_name, cluster_name, CLUSTER_NAME_MAX + 1);
|
||||
new_conn->cc_cluster_name_len = cluster_name_len;
|
||||
new_conn->cc_recovery_handler = recovery_handler;
|
||||
new_conn->cc_recovery_data = recovery_data;
|
||||
|
||||
@ -386,8 +390,9 @@ int ocfs2_cluster_connect_agnostic(const char *group,
|
||||
|
||||
if (cluster_stack_name[0])
|
||||
stack_name = cluster_stack_name;
|
||||
return ocfs2_cluster_connect(stack_name, group, grouplen, lproto,
|
||||
recovery_handler, recovery_data, conn);
|
||||
return ocfs2_cluster_connect(stack_name, NULL, 0, group, grouplen,
|
||||
lproto, recovery_handler, recovery_data,
|
||||
conn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_cluster_connect_agnostic);
|
||||
|
||||
@ -460,9 +465,10 @@ void ocfs2_cluster_hangup(const char *group, int grouplen)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup);
|
||||
|
||||
int ocfs2_cluster_this_node(unsigned int *node)
|
||||
int ocfs2_cluster_this_node(struct ocfs2_cluster_connection *conn,
|
||||
unsigned int *node)
|
||||
{
|
||||
return active_stack->sp_ops->this_node(node);
|
||||
return active_stack->sp_ops->this_node(conn, node);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node);
|
||||
|
||||
|
@ -45,6 +45,9 @@ struct file_lock;
|
||||
*/
|
||||
#define GROUP_NAME_MAX 64
|
||||
|
||||
/* This shadows OCFS2_CLUSTER_NAME_LEN */
|
||||
#define CLUSTER_NAME_MAX 16
|
||||
|
||||
|
||||
/*
|
||||
* ocfs2_protocol_version changes when ocfs2 does something different in
|
||||
@ -97,8 +100,10 @@ struct ocfs2_locking_protocol {
|
||||
* locking compatibility.
|
||||
*/
|
||||
struct ocfs2_cluster_connection {
|
||||
char cc_name[GROUP_NAME_MAX];
|
||||
char cc_name[GROUP_NAME_MAX + 1];
|
||||
int cc_namelen;
|
||||
char cc_cluster_name[CLUSTER_NAME_MAX + 1];
|
||||
int cc_cluster_name_len;
|
||||
struct ocfs2_protocol_version cc_version;
|
||||
struct ocfs2_locking_protocol *cc_proto;
|
||||
void (*cc_recovery_handler)(int node_num, void *recovery_data);
|
||||
@ -152,7 +157,8 @@ struct ocfs2_stack_operations {
|
||||
* ->this_node() returns the cluster's unique identifier for the
|
||||
* local node.
|
||||
*/
|
||||
int (*this_node)(unsigned int *node);
|
||||
int (*this_node)(struct ocfs2_cluster_connection *conn,
|
||||
unsigned int *node);
|
||||
|
||||
/*
|
||||
* Call the underlying dlm lock function. The ->dlm_lock()
|
||||
@ -239,6 +245,8 @@ struct ocfs2_stack_plugin {
|
||||
|
||||
/* Used by the filesystem */
|
||||
int ocfs2_cluster_connect(const char *stack_name,
|
||||
const char *cluster_name,
|
||||
int cluster_name_len,
|
||||
const char *group,
|
||||
int grouplen,
|
||||
struct ocfs2_locking_protocol *lproto,
|
||||
@ -260,7 +268,8 @@ int ocfs2_cluster_connect_agnostic(const char *group,
|
||||
int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn,
|
||||
int hangup_pending);
|
||||
void ocfs2_cluster_hangup(const char *group, int grouplen);
|
||||
int ocfs2_cluster_this_node(unsigned int *node);
|
||||
int ocfs2_cluster_this_node(struct ocfs2_cluster_connection *conn,
|
||||
unsigned int *node);
|
||||
|
||||
struct ocfs2_lock_res;
|
||||
int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn,
|
||||
|
@ -113,12 +113,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
|
||||
struct ocfs2_suballoc_result *res);
|
||||
static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
|
||||
int nr);
|
||||
static inline int ocfs2_block_group_set_bits(handle_t *handle,
|
||||
struct inode *alloc_inode,
|
||||
struct ocfs2_group_desc *bg,
|
||||
struct buffer_head *group_bh,
|
||||
unsigned int bit_off,
|
||||
unsigned int num_bits);
|
||||
static int ocfs2_relink_block_group(handle_t *handle,
|
||||
struct inode *alloc_inode,
|
||||
struct buffer_head *fe_bh,
|
||||
@ -1343,7 +1337,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
|
||||
return status;
|
||||
}
|
||||
|
||||
static inline int ocfs2_block_group_set_bits(handle_t *handle,
|
||||
int ocfs2_block_group_set_bits(handle_t *handle,
|
||||
struct inode *alloc_inode,
|
||||
struct ocfs2_group_desc *bg,
|
||||
struct buffer_head *group_bh,
|
||||
@ -1388,8 +1382,6 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
|
||||
ocfs2_journal_dirty(handle, group_bh);
|
||||
|
||||
bail:
|
||||
if (status)
|
||||
mlog_errno(status);
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -1588,7 +1580,7 @@ static int ocfs2_block_group_search(struct inode *inode,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
|
||||
int ocfs2_alloc_dinode_update_counts(struct inode *inode,
|
||||
handle_t *handle,
|
||||
struct buffer_head *di_bh,
|
||||
u32 num_bits,
|
||||
|
@ -86,6 +86,18 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
|
||||
u32 bits_wanted,
|
||||
struct ocfs2_alloc_context **ac);
|
||||
|
||||
int ocfs2_alloc_dinode_update_counts(struct inode *inode,
|
||||
handle_t *handle,
|
||||
struct buffer_head *di_bh,
|
||||
u32 num_bits,
|
||||
u16 chain);
|
||||
int ocfs2_block_group_set_bits(handle_t *handle,
|
||||
struct inode *alloc_inode,
|
||||
struct ocfs2_group_desc *bg,
|
||||
struct buffer_head *group_bh,
|
||||
unsigned int bit_off,
|
||||
unsigned int num_bits);
|
||||
|
||||
int ocfs2_claim_metadata(handle_t *handle,
|
||||
struct ocfs2_alloc_context *ac,
|
||||
u32 bits_wanted,
|
||||
|
@ -68,7 +68,6 @@
|
||||
#include "super.h"
|
||||
#include "sysfile.h"
|
||||
#include "uptodate.h"
|
||||
#include "ver.h"
|
||||
#include "xattr.h"
|
||||
#include "quota.h"
|
||||
#include "refcounttree.h"
|
||||
@ -90,6 +89,7 @@ static struct dentry *ocfs2_debugfs_root = NULL;
|
||||
|
||||
MODULE_AUTHOR("Oracle");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("OCFS2 cluster file system");
|
||||
|
||||
struct mount_options
|
||||
{
|
||||
@ -1618,8 +1618,6 @@ static int __init ocfs2_init(void)
|
||||
{
|
||||
int status, i;
|
||||
|
||||
ocfs2_print_version();
|
||||
|
||||
for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++)
|
||||
init_waitqueue_head(&ocfs2__ioend_wq[i]);
|
||||
|
||||
@ -1947,11 +1945,15 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
|
||||
|
||||
ocfs2_shutdown_local_alloc(osb);
|
||||
|
||||
ocfs2_truncate_log_shutdown(osb);
|
||||
|
||||
/* This will disable recovery and flush any recovery work. */
|
||||
ocfs2_recovery_exit(osb);
|
||||
|
||||
/*
|
||||
* During dismount, when it recovers another node it will call
|
||||
* ocfs2_recover_orphans and queue delayed work osb_truncate_log_wq.
|
||||
*/
|
||||
ocfs2_truncate_log_shutdown(osb);
|
||||
|
||||
ocfs2_journal_shutdown(osb);
|
||||
|
||||
ocfs2_sync_blockdev(sb);
|
||||
@ -2225,10 +2227,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
|
||||
if (ocfs2_clusterinfo_valid(osb)) {
|
||||
osb->osb_stackflags =
|
||||
OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
|
||||
memcpy(osb->osb_cluster_stack,
|
||||
strlcpy(osb->osb_cluster_stack,
|
||||
OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
|
||||
OCFS2_STACK_LABEL_LEN);
|
||||
osb->osb_cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
|
||||
OCFS2_STACK_LABEL_LEN + 1);
|
||||
if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
|
||||
mlog(ML_ERROR,
|
||||
"couldn't mount because of an invalid "
|
||||
@ -2237,6 +2238,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
|
||||
status = -EINVAL;
|
||||
goto bail;
|
||||
}
|
||||
strlcpy(osb->osb_cluster_name,
|
||||
OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
|
||||
OCFS2_CLUSTER_NAME_LEN + 1);
|
||||
} else {
|
||||
/* The empty string is identical with classic tools that
|
||||
* don't know about s_cluster_info. */
|
||||
|
@ -1,43 +0,0 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* ver.c
|
||||
*
|
||||
* version string
|
||||
*
|
||||
* Copyright (C) 2002, 2005 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include "ver.h"
|
||||
|
||||
#define OCFS2_BUILD_VERSION "1.5.0"
|
||||
|
||||
#define VERSION_STR "OCFS2 " OCFS2_BUILD_VERSION
|
||||
|
||||
void ocfs2_print_version(void)
|
||||
{
|
||||
printk(KERN_INFO "%s\n", VERSION_STR);
|
||||
}
|
||||
|
||||
MODULE_DESCRIPTION(VERSION_STR);
|
||||
|
||||
MODULE_VERSION(OCFS2_BUILD_VERSION);
|
@ -1,31 +0,0 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* ver.h
|
||||
*
|
||||
* Function prototypes
|
||||
*
|
||||
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef OCFS2_VER_H
|
||||
#define OCFS2_VER_H
|
||||
|
||||
void ocfs2_print_version(void);
|
||||
|
||||
#endif /* OCFS2_VER_H */
|
@ -22,11 +22,80 @@
|
||||
|
||||
#include <linux/errno.h>
|
||||
|
||||
EXPORT_SYMBOL(posix_acl_init);
|
||||
EXPORT_SYMBOL(posix_acl_alloc);
|
||||
EXPORT_SYMBOL(posix_acl_valid);
|
||||
EXPORT_SYMBOL(posix_acl_equiv_mode);
|
||||
EXPORT_SYMBOL(posix_acl_from_mode);
|
||||
struct posix_acl **acl_by_type(struct inode *inode, int type)
|
||||
{
|
||||
switch (type) {
|
||||
case ACL_TYPE_ACCESS:
|
||||
return &inode->i_acl;
|
||||
case ACL_TYPE_DEFAULT:
|
||||
return &inode->i_default_acl;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(acl_by_type);
|
||||
|
||||
struct posix_acl *get_cached_acl(struct inode *inode, int type)
|
||||
{
|
||||
struct posix_acl **p = acl_by_type(inode, type);
|
||||
struct posix_acl *acl = ACCESS_ONCE(*p);
|
||||
if (acl) {
|
||||
spin_lock(&inode->i_lock);
|
||||
acl = *p;
|
||||
if (acl != ACL_NOT_CACHED)
|
||||
acl = posix_acl_dup(acl);
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
return acl;
|
||||
}
|
||||
EXPORT_SYMBOL(get_cached_acl);
|
||||
|
||||
struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type)
|
||||
{
|
||||
return rcu_dereference(*acl_by_type(inode, type));
|
||||
}
|
||||
EXPORT_SYMBOL(get_cached_acl_rcu);
|
||||
|
||||
void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl)
|
||||
{
|
||||
struct posix_acl **p = acl_by_type(inode, type);
|
||||
struct posix_acl *old;
|
||||
spin_lock(&inode->i_lock);
|
||||
old = *p;
|
||||
rcu_assign_pointer(*p, posix_acl_dup(acl));
|
||||
spin_unlock(&inode->i_lock);
|
||||
if (old != ACL_NOT_CACHED)
|
||||
posix_acl_release(old);
|
||||
}
|
||||
EXPORT_SYMBOL(set_cached_acl);
|
||||
|
||||
void forget_cached_acl(struct inode *inode, int type)
|
||||
{
|
||||
struct posix_acl **p = acl_by_type(inode, type);
|
||||
struct posix_acl *old;
|
||||
spin_lock(&inode->i_lock);
|
||||
old = *p;
|
||||
*p = ACL_NOT_CACHED;
|
||||
spin_unlock(&inode->i_lock);
|
||||
if (old != ACL_NOT_CACHED)
|
||||
posix_acl_release(old);
|
||||
}
|
||||
EXPORT_SYMBOL(forget_cached_acl);
|
||||
|
||||
void forget_all_cached_acls(struct inode *inode)
|
||||
{
|
||||
struct posix_acl *old_access, *old_default;
|
||||
spin_lock(&inode->i_lock);
|
||||
old_access = inode->i_acl;
|
||||
old_default = inode->i_default_acl;
|
||||
inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
|
||||
spin_unlock(&inode->i_lock);
|
||||
if (old_access != ACL_NOT_CACHED)
|
||||
posix_acl_release(old_access);
|
||||
if (old_default != ACL_NOT_CACHED)
|
||||
posix_acl_release(old_default);
|
||||
}
|
||||
EXPORT_SYMBOL(forget_all_cached_acls);
|
||||
|
||||
/*
|
||||
* Init a fresh posix_acl
|
||||
@ -37,6 +106,7 @@ posix_acl_init(struct posix_acl *acl, int count)
|
||||
atomic_set(&acl->a_refcount, 1);
|
||||
acl->a_count = count;
|
||||
}
|
||||
EXPORT_SYMBOL(posix_acl_init);
|
||||
|
||||
/*
|
||||
* Allocate a new ACL with the specified number of entries.
|
||||
@ -51,6 +121,7 @@ posix_acl_alloc(int count, gfp_t flags)
|
||||
posix_acl_init(acl, count);
|
||||
return acl;
|
||||
}
|
||||
EXPORT_SYMBOL(posix_acl_alloc);
|
||||
|
||||
/*
|
||||
* Clone an ACL.
|
||||
@ -146,6 +217,7 @@ posix_acl_valid(const struct posix_acl *acl)
|
||||
return 0;
|
||||
return -EINVAL;
|
||||
}
|
||||
EXPORT_SYMBOL(posix_acl_valid);
|
||||
|
||||
/*
|
||||
* Returns 0 if the acl can be exactly represented in the traditional
|
||||
@ -186,6 +258,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p)
|
||||
*mode_p = (*mode_p & ~S_IRWXUGO) | mode;
|
||||
return not_equiv;
|
||||
}
|
||||
EXPORT_SYMBOL(posix_acl_equiv_mode);
|
||||
|
||||
/*
|
||||
* Create an ACL representing the file mode permission bits of an inode.
|
||||
@ -207,6 +280,7 @@ posix_acl_from_mode(umode_t mode, gfp_t flags)
|
||||
acl->a_entries[2].e_perm = (mode & S_IRWXO);
|
||||
return acl;
|
||||
}
|
||||
EXPORT_SYMBOL(posix_acl_from_mode);
|
||||
|
||||
/*
|
||||
* Return 0 if current is granted want access to the inode
|
||||
|
@ -26,7 +26,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
|
||||
unsigned long committed;
|
||||
struct vmalloc_info vmi;
|
||||
long cached;
|
||||
long available;
|
||||
unsigned long pagecache;
|
||||
unsigned long wmark_low = 0;
|
||||
unsigned long pages[NR_LRU_LISTS];
|
||||
struct zone *zone;
|
||||
int lru;
|
||||
|
||||
/*
|
||||
@ -47,12 +51,44 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
|
||||
for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
|
||||
pages[lru] = global_page_state(NR_LRU_BASE + lru);
|
||||
|
||||
for_each_zone(zone)
|
||||
wmark_low += zone->watermark[WMARK_LOW];
|
||||
|
||||
/*
|
||||
* Estimate the amount of memory available for userspace allocations,
|
||||
* without causing swapping.
|
||||
*
|
||||
* Free memory cannot be taken below the low watermark, before the
|
||||
* system starts swapping.
|
||||
*/
|
||||
available = i.freeram - wmark_low;
|
||||
|
||||
/*
|
||||
* Not all the page cache can be freed, otherwise the system will
|
||||
* start swapping. Assume at least half of the page cache, or the
|
||||
* low watermark worth of cache, needs to stay.
|
||||
*/
|
||||
pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
|
||||
pagecache -= min(pagecache / 2, wmark_low);
|
||||
available += pagecache;
|
||||
|
||||
/*
|
||||
* Part of the reclaimable swap consists of items that are in use,
|
||||
* and cannot be freed. Cap this estimate at the low watermark.
|
||||
*/
|
||||
available += global_page_state(NR_SLAB_RECLAIMABLE) -
|
||||
min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
|
||||
|
||||
if (available < 0)
|
||||
available = 0;
|
||||
|
||||
/*
|
||||
* Tagged format, for easy grepping and expansion.
|
||||
*/
|
||||
seq_printf(m,
|
||||
"MemTotal: %8lu kB\n"
|
||||
"MemFree: %8lu kB\n"
|
||||
"MemAvailable: %8lu kB\n"
|
||||
"Buffers: %8lu kB\n"
|
||||
"Cached: %8lu kB\n"
|
||||
"SwapCached: %8lu kB\n"
|
||||
@ -105,6 +141,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
|
||||
,
|
||||
K(i.totalram),
|
||||
K(i.freeram),
|
||||
K(available),
|
||||
K(i.bufferram),
|
||||
K(cached),
|
||||
K(total_swapcache_pages()),
|
||||
|
@ -275,4 +275,4 @@ int __init init_ramfs_fs(void)
|
||||
|
||||
return err;
|
||||
}
|
||||
module_init(init_ramfs_fs)
|
||||
fs_initcall(init_ramfs_fs);
|
||||
|
@ -901,10 +901,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
|
||||
io_fn_t fn;
|
||||
iov_fn_t fnv;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
|
||||
goto out;
|
||||
|
||||
ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
|
||||
UIO_FASTIOV, iovstack, &iov);
|
||||
if (ret <= 0)
|
||||
|
@ -166,6 +166,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
|
||||
if (!s)
|
||||
return NULL;
|
||||
|
||||
INIT_LIST_HEAD(&s->s_mounts);
|
||||
|
||||
if (security_sb_alloc(s))
|
||||
goto fail;
|
||||
|
||||
@ -188,7 +190,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
|
||||
if (list_lru_init(&s->s_inode_lru))
|
||||
goto fail;
|
||||
|
||||
INIT_LIST_HEAD(&s->s_mounts);
|
||||
init_rwsem(&s->s_umount);
|
||||
lockdep_set_class(&s->s_umount, &type->s_umount_key);
|
||||
/*
|
||||
|
@ -5,6 +5,7 @@
|
||||
#define _LINUX_BOOTMEM_H
|
||||
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <asm/dma.h>
|
||||
|
||||
/*
|
||||
@ -52,7 +53,6 @@ extern void free_bootmem_node(pg_data_t *pgdat,
|
||||
unsigned long size);
|
||||
extern void free_bootmem(unsigned long physaddr, unsigned long size);
|
||||
extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
|
||||
extern void __free_pages_bootmem(struct page *page, unsigned int order);
|
||||
|
||||
/*
|
||||
* Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
|
||||
@ -142,6 +142,157 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
|
||||
#define alloc_bootmem_low_pages_node(pgdat, x) \
|
||||
__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
|
||||
|
||||
|
||||
#if defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM)
|
||||
|
||||
/* FIXME: use MEMBLOCK_ALLOC_* variants here */
|
||||
#define BOOTMEM_ALLOC_ACCESSIBLE 0
|
||||
#define BOOTMEM_ALLOC_ANYWHERE (~(phys_addr_t)0)
|
||||
|
||||
/* FIXME: Move to memblock.h at a point where we remove nobootmem.c */
|
||||
void *memblock_virt_alloc_try_nid_nopanic(phys_addr_t size,
|
||||
phys_addr_t align, phys_addr_t min_addr,
|
||||
phys_addr_t max_addr, int nid);
|
||||
void *memblock_virt_alloc_try_nid(phys_addr_t size, phys_addr_t align,
|
||||
phys_addr_t min_addr, phys_addr_t max_addr, int nid);
|
||||
void __memblock_free_early(phys_addr_t base, phys_addr_t size);
|
||||
void __memblock_free_late(phys_addr_t base, phys_addr_t size);
|
||||
|
||||
static inline void * __init memblock_virt_alloc(
|
||||
phys_addr_t size, phys_addr_t align)
|
||||
{
|
||||
return memblock_virt_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
|
||||
BOOTMEM_ALLOC_ACCESSIBLE,
|
||||
NUMA_NO_NODE);
|
||||
}
|
||||
|
||||
static inline void * __init memblock_virt_alloc_nopanic(
|
||||
phys_addr_t size, phys_addr_t align)
|
||||
{
|
||||
return memblock_virt_alloc_try_nid_nopanic(size, align,
|
||||
BOOTMEM_LOW_LIMIT,
|
||||
BOOTMEM_ALLOC_ACCESSIBLE,
|
||||
NUMA_NO_NODE);
|
||||
}
|
||||
|
||||
static inline void * __init memblock_virt_alloc_from_nopanic(
|
||||
phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
|
||||
{
|
||||
return memblock_virt_alloc_try_nid_nopanic(size, align, min_addr,
|
||||
BOOTMEM_ALLOC_ACCESSIBLE,
|
||||
NUMA_NO_NODE);
|
||||
}
|
||||
|
||||
static inline void * __init memblock_virt_alloc_node(
|
||||
phys_addr_t size, int nid)
|
||||
{
|
||||
return memblock_virt_alloc_try_nid(size, 0, BOOTMEM_LOW_LIMIT,
|
||||
BOOTMEM_ALLOC_ACCESSIBLE, nid);
|
||||
}
|
||||
|
||||
static inline void * __init memblock_virt_alloc_node_nopanic(
|
||||
phys_addr_t size, int nid)
|
||||
{
|
||||
return memblock_virt_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT,
|
||||
BOOTMEM_ALLOC_ACCESSIBLE,
|
||||
nid);
|
||||
}
|
||||
|
||||
static inline void __init memblock_free_early(
|
||||
phys_addr_t base, phys_addr_t size)
|
||||
{
|
||||
__memblock_free_early(base, size);
|
||||
}
|
||||
|
||||
static inline void __init memblock_free_early_nid(
|
||||
phys_addr_t base, phys_addr_t size, int nid)
|
||||
{
|
||||
__memblock_free_early(base, size);
|
||||
}
|
||||
|
||||
static inline void __init memblock_free_late(
|
||||
phys_addr_t base, phys_addr_t size)
|
||||
{
|
||||
__memblock_free_late(base, size);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define BOOTMEM_ALLOC_ACCESSIBLE 0
|
||||
|
||||
|
||||
/* Fall back to all the existing bootmem APIs */
|
||||
static inline void * __init memblock_virt_alloc(
|
||||
phys_addr_t size, phys_addr_t align)
|
||||
{
|
||||
if (!align)
|
||||
align = SMP_CACHE_BYTES;
|
||||
return __alloc_bootmem(size, align, BOOTMEM_LOW_LIMIT);
|
||||
}
|
||||
|
||||
static inline void * __init memblock_virt_alloc_nopanic(
|
||||
phys_addr_t size, phys_addr_t align)
|
||||
{
|
||||
if (!align)
|
||||
align = SMP_CACHE_BYTES;
|
||||
return __alloc_bootmem_nopanic(size, align, BOOTMEM_LOW_LIMIT);
|
||||
}
|
||||
|
||||
static inline void * __init memblock_virt_alloc_from_nopanic(
|
||||
phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
|
||||
{
|
||||
return __alloc_bootmem_nopanic(size, align, min_addr);
|
||||
}
|
||||
|
||||
static inline void * __init memblock_virt_alloc_node(
|
||||
phys_addr_t size, int nid)
|
||||
{
|
||||
return __alloc_bootmem_node(NODE_DATA(nid), size, SMP_CACHE_BYTES,
|
||||
BOOTMEM_LOW_LIMIT);
|
||||
}
|
||||
|
||||
static inline void * __init memblock_virt_alloc_node_nopanic(
|
||||
phys_addr_t size, int nid)
|
||||
{
|
||||
return __alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
|
||||
SMP_CACHE_BYTES,
|
||||
BOOTMEM_LOW_LIMIT);
|
||||
}
|
||||
|
||||
static inline void * __init memblock_virt_alloc_try_nid(phys_addr_t size,
|
||||
phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid)
|
||||
{
|
||||
return __alloc_bootmem_node_high(NODE_DATA(nid), size, align,
|
||||
min_addr);
|
||||
}
|
||||
|
||||
static inline void * __init memblock_virt_alloc_try_nid_nopanic(
|
||||
phys_addr_t size, phys_addr_t align,
|
||||
phys_addr_t min_addr, phys_addr_t max_addr, int nid)
|
||||
{
|
||||
return ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, align,
|
||||
min_addr, max_addr);
|
||||
}
|
||||
|
||||
static inline void __init memblock_free_early(
|
||||
phys_addr_t base, phys_addr_t size)
|
||||
{
|
||||
free_bootmem(base, size);
|
||||
}
|
||||
|
||||
static inline void __init memblock_free_early_nid(
|
||||
phys_addr_t base, phys_addr_t size, int nid)
|
||||
{
|
||||
free_bootmem_node(NODE_DATA(nid), base, size);
|
||||
}
|
||||
|
||||
static inline void __init memblock_free_late(
|
||||
phys_addr_t base, phys_addr_t size)
|
||||
{
|
||||
free_bootmem_late(base, size);
|
||||
}
|
||||
#endif /* defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM) */
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
|
||||
extern void *alloc_remap(int nid, unsigned long size);
|
||||
#else
|
||||
|
@ -62,6 +62,22 @@ static inline bool compaction_deferred(struct zone *zone, int order)
|
||||
return zone->compact_considered < defer_limit;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update defer tracking counters after successful compaction of given order,
|
||||
* which means an allocation either succeeded (alloc_success == true) or is
|
||||
* expected to succeed.
|
||||
*/
|
||||
static inline void compaction_defer_reset(struct zone *zone, int order,
|
||||
bool alloc_success)
|
||||
{
|
||||
if (alloc_success) {
|
||||
zone->compact_considered = 0;
|
||||
zone->compact_defer_shift = 0;
|
||||
}
|
||||
if (order >= zone->compact_order_failed)
|
||||
zone->compact_order_failed = order + 1;
|
||||
}
|
||||
|
||||
/* Returns true if restarting compaction after many failures */
|
||||
static inline bool compaction_restarting(struct zone *zone, int order)
|
||||
{
|
||||
|
@ -85,6 +85,8 @@ extern void debug_dma_sync_sg_for_device(struct device *dev,
|
||||
|
||||
extern void debug_dma_dump_mappings(struct device *dev);
|
||||
|
||||
extern void debug_dma_assert_idle(struct page *page);
|
||||
|
||||
#else /* CONFIG_DMA_API_DEBUG */
|
||||
|
||||
static inline void dma_debug_add_bus(struct bus_type *bus)
|
||||
@ -183,6 +185,10 @@ static inline void debug_dma_dump_mappings(struct device *dev)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void debug_dma_assert_idle(struct page *page)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_DMA_API_DEBUG */
|
||||
|
||||
#endif /* __DMA_DEBUG_H */
|
||||
|
@ -15,7 +15,6 @@
|
||||
#include <linux/path.h> /* struct path */
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <linux/atomic.h>
|
||||
|
||||
/*
|
||||
@ -79,6 +78,7 @@ struct fsnotify_group;
|
||||
struct fsnotify_event;
|
||||
struct fsnotify_mark;
|
||||
struct fsnotify_event_private_data;
|
||||
struct fsnotify_fname;
|
||||
|
||||
/*
|
||||
* Each group much define these ops. The fsnotify infrastructure will call
|
||||
@ -94,17 +94,27 @@ struct fsnotify_event_private_data;
|
||||
* userspace messages that marks have been removed.
|
||||
*/
|
||||
struct fsnotify_ops {
|
||||
bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode,
|
||||
struct fsnotify_mark *inode_mark,
|
||||
struct fsnotify_mark *vfsmount_mark,
|
||||
__u32 mask, void *data, int data_type);
|
||||
int (*handle_event)(struct fsnotify_group *group,
|
||||
struct inode *inode,
|
||||
struct fsnotify_mark *inode_mark,
|
||||
struct fsnotify_mark *vfsmount_mark,
|
||||
struct fsnotify_event *event);
|
||||
u32 mask, void *data, int data_type,
|
||||
const unsigned char *file_name);
|
||||
void (*free_group_priv)(struct fsnotify_group *group);
|
||||
void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
|
||||
void (*free_event_priv)(struct fsnotify_event_private_data *priv);
|
||||
void (*free_event)(struct fsnotify_event *event);
|
||||
};
|
||||
|
||||
/*
|
||||
* all of the information about the original object we want to now send to
|
||||
* a group. If you want to carry more info from the accessing task to the
|
||||
* listener this structure is where you need to be adding fields.
|
||||
*/
|
||||
struct fsnotify_event {
|
||||
struct list_head list;
|
||||
/* inode may ONLY be dereferenced during handle_event(). */
|
||||
struct inode *inode; /* either the inode the event happened to or its parent */
|
||||
u32 mask; /* the type of access, bitwise OR for FS_* event types */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -148,7 +158,11 @@ struct fsnotify_group {
|
||||
* a group */
|
||||
struct list_head marks_list; /* all inode marks for this group */
|
||||
|
||||
struct fasync_struct *fsn_fa; /* async notification */
|
||||
struct fasync_struct *fsn_fa; /* async notification */
|
||||
|
||||
struct fsnotify_event overflow_event; /* Event we queue when the
|
||||
* notification list is too
|
||||
* full */
|
||||
|
||||
/* groups can define private fields here or use the void *private */
|
||||
union {
|
||||
@ -177,76 +191,10 @@ struct fsnotify_group {
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
* A single event can be queued in multiple group->notification_lists.
|
||||
*
|
||||
* each group->notification_list will point to an event_holder which in turns points
|
||||
* to the actual event that needs to be sent to userspace.
|
||||
*
|
||||
* Seemed cheaper to create a refcnt'd event and a small holder for every group
|
||||
* than create a different event for every group
|
||||
*
|
||||
*/
|
||||
struct fsnotify_event_holder {
|
||||
struct fsnotify_event *event;
|
||||
struct list_head event_list;
|
||||
};
|
||||
|
||||
/*
|
||||
* Inotify needs to tack data onto an event. This struct lets us later find the
|
||||
* correct private data of the correct group.
|
||||
*/
|
||||
struct fsnotify_event_private_data {
|
||||
struct fsnotify_group *group;
|
||||
struct list_head event_list;
|
||||
};
|
||||
|
||||
/*
|
||||
* all of the information about the original object we want to now send to
|
||||
* a group. If you want to carry more info from the accessing task to the
|
||||
* listener this structure is where you need to be adding fields.
|
||||
*/
|
||||
struct fsnotify_event {
|
||||
/*
|
||||
* If we create an event we are also likely going to need a holder
|
||||
* to link to a group. So embed one holder in the event. Means only
|
||||
* one allocation for the common case where we only have one group
|
||||
*/
|
||||
struct fsnotify_event_holder holder;
|
||||
spinlock_t lock; /* protection for the associated event_holder and private_list */
|
||||
/* to_tell may ONLY be dereferenced during handle_event(). */
|
||||
struct inode *to_tell; /* either the inode the event happened to or its parent */
|
||||
/*
|
||||
* depending on the event type we should have either a path or inode
|
||||
* We hold a reference on path, but NOT on inode. Since we have the ref on
|
||||
* the path, it may be dereferenced at any point during this object's
|
||||
* lifetime. That reference is dropped when this object's refcnt hits
|
||||
* 0. If this event contains an inode instead of a path, the inode may
|
||||
* ONLY be used during handle_event().
|
||||
*/
|
||||
union {
|
||||
struct path path;
|
||||
struct inode *inode;
|
||||
};
|
||||
/* when calling fsnotify tell it if the data is a path or inode */
|
||||
#define FSNOTIFY_EVENT_NONE 0
|
||||
#define FSNOTIFY_EVENT_PATH 1
|
||||
#define FSNOTIFY_EVENT_INODE 2
|
||||
int data_type; /* which of the above union we have */
|
||||
atomic_t refcnt; /* how many groups still are using/need to send this event */
|
||||
__u32 mask; /* the type of access, bitwise OR for FS_* event types */
|
||||
|
||||
u32 sync_cookie; /* used to corrolate events, namely inotify mv events */
|
||||
const unsigned char *file_name;
|
||||
size_t name_len;
|
||||
struct pid *tgid;
|
||||
|
||||
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
|
||||
__u32 response; /* userspace answer to question */
|
||||
#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
|
||||
|
||||
struct list_head private_data_list; /* groups can store private data here */
|
||||
};
|
||||
|
||||
/*
|
||||
* Inode specific fields in an fsnotify_mark
|
||||
@ -370,17 +318,12 @@ extern void fsnotify_put_group(struct fsnotify_group *group);
|
||||
extern void fsnotify_destroy_group(struct fsnotify_group *group);
|
||||
/* fasync handler function */
|
||||
extern int fsnotify_fasync(int fd, struct file *file, int on);
|
||||
/* take a reference to an event */
|
||||
extern void fsnotify_get_event(struct fsnotify_event *event);
|
||||
extern void fsnotify_put_event(struct fsnotify_event *event);
|
||||
/* find private data previously attached to an event and unlink it */
|
||||
extern struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group,
|
||||
struct fsnotify_event *event);
|
||||
|
||||
/* Free event from memory */
|
||||
extern void fsnotify_destroy_event(struct fsnotify_group *group,
|
||||
struct fsnotify_event *event);
|
||||
/* attach the event to the group notification queue */
|
||||
extern struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group,
|
||||
struct fsnotify_event *event,
|
||||
struct fsnotify_event_private_data *priv,
|
||||
struct fsnotify_event *(*merge)(struct list_head *,
|
||||
struct fsnotify_event *));
|
||||
/* true if the group notification queue is empty */
|
||||
@ -430,15 +373,8 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark);
|
||||
extern void fsnotify_unmount_inodes(struct list_head *list);
|
||||
|
||||
/* put here because inotify does some weird stuff when destroying watches */
|
||||
extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
|
||||
void *data, int data_is,
|
||||
const unsigned char *name,
|
||||
u32 cookie, gfp_t gfp);
|
||||
|
||||
/* fanotify likes to change events after they are on lists... */
|
||||
extern struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event);
|
||||
extern int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
|
||||
struct fsnotify_event *new_event);
|
||||
extern void fsnotify_init_event(struct fsnotify_event *event,
|
||||
struct inode *to_tell, u32 mask);
|
||||
|
||||
#else
|
||||
|
||||
|
@ -157,6 +157,26 @@ static inline int hpage_nr_pages(struct page *page)
|
||||
return HPAGE_PMD_NR;
|
||||
return 1;
|
||||
}
|
||||
/*
|
||||
* compound_trans_head() should be used instead of compound_head(),
|
||||
* whenever the "page" passed as parameter could be the tail of a
|
||||
* transparent hugepage that could be undergoing a
|
||||
* __split_huge_page_refcount(). The page structure layout often
|
||||
* changes across releases and it makes extensive use of unions. So if
|
||||
* the page structure layout will change in a way that
|
||||
* page->first_page gets clobbered by __split_huge_page_refcount, the
|
||||
* implementation making use of smp_rmb() will be required.
|
||||
*
|
||||
* Currently we define compound_trans_head as compound_head, because
|
||||
* page->private is in the same union with page->first_page, and
|
||||
* page->private isn't clobbered. However this also means we're
|
||||
* currently leaving dirt into the page->private field of anonymous
|
||||
* pages resulting from a THP split, instead of setting page->private
|
||||
* to zero like for every other page that has PG_private not set. But
|
||||
* anonymous pages don't use page->private so this is not a problem.
|
||||
*/
|
||||
#if 0
|
||||
/* This will be needed if page->private will be clobbered in split_huge_page */
|
||||
static inline struct page *compound_trans_head(struct page *page)
|
||||
{
|
||||
if (PageTail(page)) {
|
||||
@ -174,6 +194,9 @@ static inline struct page *compound_trans_head(struct page *page)
|
||||
}
|
||||
return page;
|
||||
}
|
||||
#else
|
||||
#define compound_trans_head(page) compound_head(page)
|
||||
#endif
|
||||
|
||||
extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
unsigned long addr, pmd_t pmd, pmd_t *pmdp);
|
||||
|
@ -31,7 +31,6 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
|
||||
void hugepage_put_subpool(struct hugepage_subpool *spool);
|
||||
|
||||
int PageHuge(struct page *page);
|
||||
int PageHeadHuge(struct page *page_head);
|
||||
|
||||
void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
|
||||
int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
|
||||
@ -104,11 +103,6 @@ static inline int PageHuge(struct page *page)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int PageHeadHuge(struct page *page_head)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
|
||||
{
|
||||
}
|
||||
@ -360,6 +354,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
|
||||
|
||||
static inline struct hstate *page_hstate(struct page *page)
|
||||
{
|
||||
VM_BUG_ON(!PageHuge(page));
|
||||
return size_to_hstate(PAGE_SIZE << compound_order(page));
|
||||
}
|
||||
|
||||
|
@ -41,6 +41,7 @@ extern struct fs_struct init_fs;
|
||||
|
||||
#define INIT_SIGNALS(sig) { \
|
||||
.nr_threads = 1, \
|
||||
.thread_head = LIST_HEAD_INIT(init_task.thread_node), \
|
||||
.wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
|
||||
.shared_pending = { \
|
||||
.list = LIST_HEAD_INIT(sig.shared_pending.list), \
|
||||
@ -222,6 +223,7 @@ extern struct task_group root_task_group;
|
||||
[PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \
|
||||
}, \
|
||||
.thread_group = LIST_HEAD_INIT(tsk.thread_group), \
|
||||
.thread_node = LIST_HEAD_INIT(init_signals.thread_head), \
|
||||
INIT_IDS \
|
||||
INIT_PERF_EVENTS(tsk) \
|
||||
INIT_TRACE_IRQFLAGS \
|
||||
|
@ -73,11 +73,7 @@ static inline void set_page_stable_node(struct page *page,
|
||||
struct page *ksm_might_need_to_copy(struct page *page,
|
||||
struct vm_area_struct *vma, unsigned long address);
|
||||
|
||||
int page_referenced_ksm(struct page *page,
|
||||
struct mem_cgroup *memcg, unsigned long *vm_flags);
|
||||
int try_to_unmap_ksm(struct page *page, enum ttu_flags flags);
|
||||
int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
|
||||
struct vm_area_struct *, unsigned long, void *), void *arg);
|
||||
int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
|
||||
void ksm_migrate_page(struct page *newpage, struct page *oldpage);
|
||||
|
||||
#else /* !CONFIG_KSM */
|
||||
@ -115,13 +111,8 @@ static inline int page_referenced_ksm(struct page *page,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page*,
|
||||
struct vm_area_struct *, unsigned long, void *), void *arg)
|
||||
static inline int rmap_walk_ksm(struct page *page,
|
||||
struct rmap_walk_control *rwc)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@ -19,9 +19,13 @@
|
||||
|
||||
#define INIT_MEMBLOCK_REGIONS 128
|
||||
|
||||
/* Definition of memblock flags. */
|
||||
#define MEMBLOCK_HOTPLUG 0x1 /* hotpluggable region */
|
||||
|
||||
struct memblock_region {
|
||||
phys_addr_t base;
|
||||
phys_addr_t size;
|
||||
unsigned long flags;
|
||||
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
|
||||
int nid;
|
||||
#endif
|
||||
@ -43,12 +47,17 @@ struct memblock {
|
||||
|
||||
extern struct memblock memblock;
|
||||
extern int memblock_debug;
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
/* If movable_node boot option specified */
|
||||
extern bool movable_node_enabled;
|
||||
#endif /* CONFIG_MOVABLE_NODE */
|
||||
|
||||
#define memblock_dbg(fmt, ...) \
|
||||
if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
|
||||
|
||||
phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end,
|
||||
phys_addr_t size, phys_addr_t align, int nid);
|
||||
phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
|
||||
phys_addr_t start, phys_addr_t end,
|
||||
int nid);
|
||||
phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
|
||||
phys_addr_t size, phys_addr_t align);
|
||||
phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr);
|
||||
@ -59,6 +68,28 @@ int memblock_remove(phys_addr_t base, phys_addr_t size);
|
||||
int memblock_free(phys_addr_t base, phys_addr_t size);
|
||||
int memblock_reserve(phys_addr_t base, phys_addr_t size);
|
||||
void memblock_trim_memory(phys_addr_t align);
|
||||
int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
|
||||
int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
static inline bool memblock_is_hotpluggable(struct memblock_region *m)
|
||||
{
|
||||
return m->flags & MEMBLOCK_HOTPLUG;
|
||||
}
|
||||
|
||||
static inline bool movable_node_is_enabled(void)
|
||||
{
|
||||
return movable_node_enabled;
|
||||
}
|
||||
#else
|
||||
static inline bool memblock_is_hotpluggable(struct memblock_region *m)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool movable_node_is_enabled(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
|
||||
int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
|
||||
@ -87,7 +118,7 @@ void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
|
||||
/**
|
||||
* for_each_free_mem_range - iterate through free memblock areas
|
||||
* @i: u64 used as loop variable
|
||||
* @nid: node selector, %MAX_NUMNODES for all nodes
|
||||
* @nid: node selector, %NUMA_NO_NODE for all nodes
|
||||
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
|
||||
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
|
||||
* @p_nid: ptr to int for nid of the range, can be %NULL
|
||||
@ -107,7 +138,7 @@ void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
|
||||
/**
|
||||
* for_each_free_mem_range_reverse - rev-iterate through free memblock areas
|
||||
* @i: u64 used as loop variable
|
||||
* @nid: node selector, %MAX_NUMNODES for all nodes
|
||||
* @nid: node selector, %NUMA_NO_NODE for all nodes
|
||||
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
|
||||
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
|
||||
* @p_nid: ptr to int for nid of the range, can be %NULL
|
||||
@ -121,8 +152,21 @@ void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
|
||||
i != (u64)ULLONG_MAX; \
|
||||
__next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid))
|
||||
|
||||
static inline void memblock_set_region_flags(struct memblock_region *r,
|
||||
unsigned long flags)
|
||||
{
|
||||
r->flags |= flags;
|
||||
}
|
||||
|
||||
static inline void memblock_clear_region_flags(struct memblock_region *r,
|
||||
unsigned long flags)
|
||||
{
|
||||
r->flags &= ~flags;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
|
||||
int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid);
|
||||
int memblock_set_node(phys_addr_t base, phys_addr_t size,
|
||||
struct memblock_type *type, int nid);
|
||||
|
||||
static inline void memblock_set_region_node(struct memblock_region *r, int nid)
|
||||
{
|
||||
|
@ -211,20 +211,8 @@ static inline void mpol_get(struct mempolicy *pol)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct mempolicy *mpol_dup(struct mempolicy *old)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct shared_policy {};
|
||||
|
||||
static inline int mpol_set_shared_policy(struct shared_policy *info,
|
||||
struct vm_area_struct *vma,
|
||||
struct mempolicy *new)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static inline void mpol_shared_policy_init(struct shared_policy *sp,
|
||||
struct mempolicy *mpol)
|
||||
{
|
||||
@ -234,12 +222,6 @@ static inline void mpol_free_shared_policy(struct shared_policy *p)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct mempolicy *
|
||||
mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define vma_policy(vma) NULL
|
||||
|
||||
static inline int
|
||||
@ -266,10 +248,6 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void mpol_fix_fork_child_flag(struct task_struct *p)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
|
||||
unsigned long addr, gfp_t gfp_flags,
|
||||
struct mempolicy **mpol, nodemask_t **nodemask)
|
||||
@ -284,12 +262,6 @@ static inline bool init_nodemask_of_mempolicy(nodemask_t *m)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool mempolicy_nodemask_intersects(struct task_struct *tsk,
|
||||
const nodemask_t *mask)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
|
||||
const nodemask_t *to, int flags)
|
||||
{
|
||||
@ -307,10 +279,6 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
|
||||
unsigned long address)
|
||||
{
|
||||
|
@ -35,16 +35,12 @@ enum migrate_reason {
|
||||
|
||||
#ifdef CONFIG_MIGRATION
|
||||
|
||||
extern void putback_lru_pages(struct list_head *l);
|
||||
extern void putback_movable_pages(struct list_head *l);
|
||||
extern int migrate_page(struct address_space *,
|
||||
struct page *, struct page *, enum migrate_mode);
|
||||
extern int migrate_pages(struct list_head *l, new_page_t x,
|
||||
unsigned long private, enum migrate_mode mode, int reason);
|
||||
|
||||
extern int fail_migrate_page(struct address_space *,
|
||||
struct page *, struct page *);
|
||||
|
||||
extern int migrate_prep(void);
|
||||
extern int migrate_prep_local(void);
|
||||
extern int migrate_vmas(struct mm_struct *mm,
|
||||
@ -59,7 +55,6 @@ extern int migrate_page_move_mapping(struct address_space *mapping,
|
||||
int extra_count);
|
||||
#else
|
||||
|
||||
static inline void putback_lru_pages(struct list_head *l) {}
|
||||
static inline void putback_movable_pages(struct list_head *l) {}
|
||||
static inline int migrate_pages(struct list_head *l, new_page_t x,
|
||||
unsigned long private, enum migrate_mode mode, int reason)
|
||||
@ -86,7 +81,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
|
||||
|
||||
/* Possible settings for the migrate_page() method in address_operations */
|
||||
#define migrate_page NULL
|
||||
#define fail_migrate_page NULL
|
||||
|
||||
#endif /* CONFIG_MIGRATION */
|
||||
|
||||
|
@ -57,6 +57,15 @@ extern int sysctl_legacy_va_layout;
|
||||
extern unsigned long sysctl_user_reserve_kbytes;
|
||||
extern unsigned long sysctl_admin_reserve_kbytes;
|
||||
|
||||
extern int sysctl_overcommit_memory;
|
||||
extern int sysctl_overcommit_ratio;
|
||||
extern unsigned long sysctl_overcommit_kbytes;
|
||||
|
||||
extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *,
|
||||
size_t *, loff_t *);
|
||||
extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
|
||||
size_t *, loff_t *);
|
||||
|
||||
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
|
||||
|
||||
/* to align the pointer to the (next) page boundary */
|
||||
@ -414,15 +423,44 @@ static inline int page_count(struct page *page)
|
||||
return atomic_read(&compound_head(page)->_count);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
extern int PageHeadHuge(struct page *page_head);
|
||||
#else /* CONFIG_HUGETLB_PAGE */
|
||||
static inline int PageHeadHuge(struct page *page_head)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_HUGETLB_PAGE */
|
||||
|
||||
static inline bool __compound_tail_refcounted(struct page *page)
|
||||
{
|
||||
return !PageSlab(page) && !PageHeadHuge(page);
|
||||
}
|
||||
|
||||
/*
|
||||
* This takes a head page as parameter and tells if the
|
||||
* tail page reference counting can be skipped.
|
||||
*
|
||||
* For this to be safe, PageSlab and PageHeadHuge must remain true on
|
||||
* any given page where they return true here, until all tail pins
|
||||
* have been released.
|
||||
*/
|
||||
static inline bool compound_tail_refcounted(struct page *page)
|
||||
{
|
||||
VM_BUG_ON(!PageHead(page));
|
||||
return __compound_tail_refcounted(page);
|
||||
}
|
||||
|
||||
static inline void get_huge_page_tail(struct page *page)
|
||||
{
|
||||
/*
|
||||
* __split_huge_page_refcount() cannot run
|
||||
* from under us.
|
||||
* __split_huge_page_refcount() cannot run from under us.
|
||||
*/
|
||||
VM_BUG_ON(!PageTail(page));
|
||||
VM_BUG_ON(page_mapcount(page) < 0);
|
||||
VM_BUG_ON(atomic_read(&page->_count) != 0);
|
||||
atomic_inc(&page->_mapcount);
|
||||
if (compound_tail_refcounted(page->first_page))
|
||||
atomic_inc(&page->_mapcount);
|
||||
}
|
||||
|
||||
extern bool __get_page_tail(struct page *page);
|
||||
@ -846,11 +884,14 @@ static __always_inline void *lowmem_page_address(const struct page *page)
|
||||
#endif
|
||||
|
||||
#if defined(WANT_PAGE_VIRTUAL)
|
||||
#define page_address(page) ((page)->virtual)
|
||||
#define set_page_address(page, address) \
|
||||
do { \
|
||||
(page)->virtual = (address); \
|
||||
} while(0)
|
||||
static inline void *page_address(const struct page *page)
|
||||
{
|
||||
return page->virtual;
|
||||
}
|
||||
static inline void set_page_address(struct page *page, void *address)
|
||||
{
|
||||
page->virtual = address;
|
||||
}
|
||||
#define page_address_init() do { } while(0)
|
||||
#endif
|
||||
|
||||
@ -984,7 +1025,6 @@ extern void pagefault_out_of_memory(void);
|
||||
* various contexts.
|
||||
*/
|
||||
#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */
|
||||
#define SHOW_MEM_FILTER_PAGE_COUNT (0x0002u) /* page type count */
|
||||
|
||||
extern void show_free_areas(unsigned int flags);
|
||||
extern bool skip_free_areas_node(unsigned int flags, int nid);
|
||||
@ -1318,6 +1358,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
|
||||
|
||||
#if USE_SPLIT_PTE_PTLOCKS
|
||||
#if ALLOC_SPLIT_PTLOCKS
|
||||
void __init ptlock_cache_init(void);
|
||||
extern bool ptlock_alloc(struct page *page);
|
||||
extern void ptlock_free(struct page *page);
|
||||
|
||||
@ -1326,6 +1367,10 @@ static inline spinlock_t *ptlock_ptr(struct page *page)
|
||||
return page->ptl;
|
||||
}
|
||||
#else /* ALLOC_SPLIT_PTLOCKS */
|
||||
static inline void ptlock_cache_init(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool ptlock_alloc(struct page *page)
|
||||
{
|
||||
return true;
|
||||
@ -1378,10 +1423,17 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
|
||||
{
|
||||
return &mm->page_table_lock;
|
||||
}
|
||||
static inline void ptlock_cache_init(void) {}
|
||||
static inline bool ptlock_init(struct page *page) { return true; }
|
||||
static inline void pte_lock_deinit(struct page *page) {}
|
||||
#endif /* USE_SPLIT_PTE_PTLOCKS */
|
||||
|
||||
static inline void pgtable_init(void)
|
||||
{
|
||||
ptlock_cache_init();
|
||||
pgtable_cache_init();
|
||||
}
|
||||
|
||||
static inline bool pgtable_page_ctor(struct page *page)
|
||||
{
|
||||
inc_zone_page_state(page, NR_PAGETABLE);
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
extern int sysctl_overcommit_memory;
|
||||
extern int sysctl_overcommit_ratio;
|
||||
extern unsigned long sysctl_overcommit_kbytes;
|
||||
extern struct percpu_counter vm_committed_as;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
@ -489,6 +489,12 @@ struct zone {
|
||||
unsigned long present_pages;
|
||||
unsigned long managed_pages;
|
||||
|
||||
/*
|
||||
* Number of MIGRATE_RESEVE page block. To maintain for just
|
||||
* optimization. Protected by zone->lock.
|
||||
*/
|
||||
int nr_migrate_reserve_block;
|
||||
|
||||
/*
|
||||
* rarely used fields:
|
||||
*/
|
||||
@ -758,10 +764,7 @@ typedef struct pglist_data {
|
||||
int kswapd_max_order;
|
||||
enum zone_type classzone_idx;
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
/*
|
||||
* Lock serializing the per destination node AutoNUMA memory
|
||||
* migration rate limiting data.
|
||||
*/
|
||||
/* Lock serializing the migrate rate limiting window */
|
||||
spinlock_t numabalancing_migrate_lock;
|
||||
|
||||
/* Rate limiting time interval */
|
||||
|
@ -94,78 +94,12 @@ extern int posix_acl_chmod(struct posix_acl **, gfp_t, umode_t);
|
||||
extern struct posix_acl *get_posix_acl(struct inode *, int);
|
||||
extern int set_posix_acl(struct inode *, int, struct posix_acl *);
|
||||
|
||||
#ifdef CONFIG_FS_POSIX_ACL
|
||||
static inline struct posix_acl **acl_by_type(struct inode *inode, int type)
|
||||
{
|
||||
switch (type) {
|
||||
case ACL_TYPE_ACCESS:
|
||||
return &inode->i_acl;
|
||||
case ACL_TYPE_DEFAULT:
|
||||
return &inode->i_default_acl;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct posix_acl *get_cached_acl(struct inode *inode, int type)
|
||||
{
|
||||
struct posix_acl **p = acl_by_type(inode, type);
|
||||
struct posix_acl *acl = ACCESS_ONCE(*p);
|
||||
if (acl) {
|
||||
spin_lock(&inode->i_lock);
|
||||
acl = *p;
|
||||
if (acl != ACL_NOT_CACHED)
|
||||
acl = posix_acl_dup(acl);
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
return acl;
|
||||
}
|
||||
|
||||
static inline struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type)
|
||||
{
|
||||
return rcu_dereference(*acl_by_type(inode, type));
|
||||
}
|
||||
|
||||
static inline void set_cached_acl(struct inode *inode,
|
||||
int type,
|
||||
struct posix_acl *acl)
|
||||
{
|
||||
struct posix_acl **p = acl_by_type(inode, type);
|
||||
struct posix_acl *old;
|
||||
spin_lock(&inode->i_lock);
|
||||
old = *p;
|
||||
rcu_assign_pointer(*p, posix_acl_dup(acl));
|
||||
spin_unlock(&inode->i_lock);
|
||||
if (old != ACL_NOT_CACHED)
|
||||
posix_acl_release(old);
|
||||
}
|
||||
|
||||
static inline void forget_cached_acl(struct inode *inode, int type)
|
||||
{
|
||||
struct posix_acl **p = acl_by_type(inode, type);
|
||||
struct posix_acl *old;
|
||||
spin_lock(&inode->i_lock);
|
||||
old = *p;
|
||||
*p = ACL_NOT_CACHED;
|
||||
spin_unlock(&inode->i_lock);
|
||||
if (old != ACL_NOT_CACHED)
|
||||
posix_acl_release(old);
|
||||
}
|
||||
|
||||
static inline void forget_all_cached_acls(struct inode *inode)
|
||||
{
|
||||
struct posix_acl *old_access, *old_default;
|
||||
spin_lock(&inode->i_lock);
|
||||
old_access = inode->i_acl;
|
||||
old_default = inode->i_default_acl;
|
||||
inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
|
||||
spin_unlock(&inode->i_lock);
|
||||
if (old_access != ACL_NOT_CACHED)
|
||||
posix_acl_release(old_access);
|
||||
if (old_default != ACL_NOT_CACHED)
|
||||
posix_acl_release(old_default);
|
||||
}
|
||||
#endif
|
||||
struct posix_acl **acl_by_type(struct inode *inode, int type);
|
||||
struct posix_acl *get_cached_acl(struct inode *inode, int type);
|
||||
struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type);
|
||||
void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl);
|
||||
void forget_cached_acl(struct inode *inode, int type);
|
||||
void forget_all_cached_acls(struct inode *inode);
|
||||
|
||||
static inline void cache_no_acl(struct inode *inode)
|
||||
{
|
||||
|
@ -184,13 +184,13 @@ static inline void page_dup_rmap(struct page *page)
|
||||
int page_referenced(struct page *, int is_locked,
|
||||
struct mem_cgroup *memcg, unsigned long *vm_flags);
|
||||
int page_referenced_one(struct page *, struct vm_area_struct *,
|
||||
unsigned long address, unsigned int *mapcount, unsigned long *vm_flags);
|
||||
unsigned long address, void *arg);
|
||||
|
||||
#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
|
||||
|
||||
int try_to_unmap(struct page *, enum ttu_flags flags);
|
||||
int try_to_unmap_one(struct page *, struct vm_area_struct *,
|
||||
unsigned long address, enum ttu_flags flags);
|
||||
unsigned long address, void *arg);
|
||||
|
||||
/*
|
||||
* Called from mm/filemap_xip.c to unmap empty zero page
|
||||
@ -236,10 +236,27 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma);
|
||||
int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
|
||||
|
||||
/*
|
||||
* Called by migrate.c to remove migration ptes, but might be used more later.
|
||||
* rmap_walk_control: To control rmap traversing for specific needs
|
||||
*
|
||||
* arg: passed to rmap_one() and invalid_vma()
|
||||
* rmap_one: executed on each vma where page is mapped
|
||||
* done: for checking traversing termination condition
|
||||
* file_nonlinear: for handling file nonlinear mapping
|
||||
* anon_lock: for getting anon_lock by optimized way rather than default
|
||||
* invalid_vma: for skipping uninterested vma
|
||||
*/
|
||||
int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
|
||||
struct vm_area_struct *, unsigned long, void *), void *arg);
|
||||
struct rmap_walk_control {
|
||||
void *arg;
|
||||
int (*rmap_one)(struct page *page, struct vm_area_struct *vma,
|
||||
unsigned long addr, void *arg);
|
||||
int (*done)(struct page *page);
|
||||
int (*file_nonlinear)(struct page *, struct address_space *,
|
||||
struct vm_area_struct *vma);
|
||||
struct anon_vma *(*anon_lock)(struct page *page);
|
||||
bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
|
||||
};
|
||||
|
||||
int rmap_walk(struct page *page, struct rmap_walk_control *rwc);
|
||||
|
||||
#else /* !CONFIG_MMU */
|
||||
|
||||
|
@ -549,6 +549,7 @@ struct signal_struct {
|
||||
atomic_t sigcnt;
|
||||
atomic_t live;
|
||||
int nr_threads;
|
||||
struct list_head thread_head;
|
||||
|
||||
wait_queue_head_t wait_chldexit; /* for wait4() */
|
||||
|
||||
@ -1271,6 +1272,7 @@ struct task_struct {
|
||||
/* PID/PID hash table linkage. */
|
||||
struct pid_link pids[PIDTYPE_MAX];
|
||||
struct list_head thread_group;
|
||||
struct list_head thread_node;
|
||||
|
||||
struct completion *vfork_done; /* for vfork() */
|
||||
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
|
||||
@ -2341,6 +2343,16 @@ extern bool current_is_single_threaded(void);
|
||||
#define while_each_thread(g, t) \
|
||||
while ((t = next_thread(t)) != g)
|
||||
|
||||
#define __for_each_thread(signal, t) \
|
||||
list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
|
||||
|
||||
#define for_each_thread(p, t) \
|
||||
__for_each_thread((p)->signal, t)
|
||||
|
||||
/* Careful: this is a double loop, 'break' won't work as expected. */
|
||||
#define for_each_process_thread(p, t) \
|
||||
for_each_process(p) for_each_thread(p, t)
|
||||
|
||||
static inline int get_nr_threads(struct task_struct *tsk)
|
||||
{
|
||||
return tsk->signal->nr_threads;
|
||||
|
@ -67,6 +67,48 @@ TRACE_EVENT(mm_compaction_migratepages,
|
||||
__entry->nr_failed)
|
||||
);
|
||||
|
||||
TRACE_EVENT(mm_compaction_begin,
|
||||
TP_PROTO(unsigned long zone_start, unsigned long migrate_start,
|
||||
unsigned long free_start, unsigned long zone_end),
|
||||
|
||||
TP_ARGS(zone_start, migrate_start, free_start, zone_end),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, zone_start)
|
||||
__field(unsigned long, migrate_start)
|
||||
__field(unsigned long, free_start)
|
||||
__field(unsigned long, zone_end)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->zone_start = zone_start;
|
||||
__entry->migrate_start = migrate_start;
|
||||
__entry->free_start = free_start;
|
||||
__entry->zone_end = zone_end;
|
||||
),
|
||||
|
||||
TP_printk("zone_start=%lu migrate_start=%lu free_start=%lu zone_end=%lu",
|
||||
__entry->zone_start,
|
||||
__entry->migrate_start,
|
||||
__entry->free_start,
|
||||
__entry->zone_end)
|
||||
);
|
||||
|
||||
TRACE_EVENT(mm_compaction_end,
|
||||
TP_PROTO(int status),
|
||||
|
||||
TP_ARGS(status),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, status)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->status = status;
|
||||
),
|
||||
|
||||
TP_printk("status=%d", __entry->status)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_COMPACTION_H */
|
||||
|
||||
|
@ -45,6 +45,32 @@ TRACE_EVENT(mm_migrate_pages,
|
||||
__print_symbolic(__entry->reason, MIGRATE_REASON))
|
||||
);
|
||||
|
||||
TRACE_EVENT(mm_numa_migrate_ratelimit,
|
||||
|
||||
TP_PROTO(struct task_struct *p, int dst_nid, unsigned long nr_pages),
|
||||
|
||||
TP_ARGS(p, dst_nid, nr_pages),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array( char, comm, TASK_COMM_LEN)
|
||||
__field( pid_t, pid)
|
||||
__field( int, dst_nid)
|
||||
__field( unsigned long, nr_pages)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
||||
__entry->pid = p->pid;
|
||||
__entry->dst_nid = dst_nid;
|
||||
__entry->nr_pages = nr_pages;
|
||||
),
|
||||
|
||||
TP_printk("comm=%s pid=%d dst_nid=%d nr_pages=%lu",
|
||||
__entry->comm,
|
||||
__entry->pid,
|
||||
__entry->dst_nid,
|
||||
__entry->nr_pages)
|
||||
);
|
||||
#endif /* _TRACE_MIGRATE_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
@ -443,6 +443,93 @@ TRACE_EVENT(sched_process_hang,
|
||||
);
|
||||
#endif /* CONFIG_DETECT_HUNG_TASK */
|
||||
|
||||
DECLARE_EVENT_CLASS(sched_move_task_template,
|
||||
|
||||
TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
|
||||
|
||||
TP_ARGS(tsk, src_cpu, dst_cpu),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( pid_t, pid )
|
||||
__field( pid_t, tgid )
|
||||
__field( pid_t, ngid )
|
||||
__field( int, src_cpu )
|
||||
__field( int, src_nid )
|
||||
__field( int, dst_cpu )
|
||||
__field( int, dst_nid )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->pid = task_pid_nr(tsk);
|
||||
__entry->tgid = task_tgid_nr(tsk);
|
||||
__entry->ngid = task_numa_group_id(tsk);
|
||||
__entry->src_cpu = src_cpu;
|
||||
__entry->src_nid = cpu_to_node(src_cpu);
|
||||
__entry->dst_cpu = dst_cpu;
|
||||
__entry->dst_nid = cpu_to_node(dst_cpu);
|
||||
),
|
||||
|
||||
TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
|
||||
__entry->pid, __entry->tgid, __entry->ngid,
|
||||
__entry->src_cpu, __entry->src_nid,
|
||||
__entry->dst_cpu, __entry->dst_nid)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracks migration of tasks from one runqueue to another. Can be used to
|
||||
* detect if automatic NUMA balancing is bouncing between nodes
|
||||
*/
|
||||
DEFINE_EVENT(sched_move_task_template, sched_move_numa,
|
||||
TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
|
||||
|
||||
TP_ARGS(tsk, src_cpu, dst_cpu)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(sched_move_task_template, sched_stick_numa,
|
||||
TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
|
||||
|
||||
TP_ARGS(tsk, src_cpu, dst_cpu)
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_swap_numa,
|
||||
|
||||
TP_PROTO(struct task_struct *src_tsk, int src_cpu,
|
||||
struct task_struct *dst_tsk, int dst_cpu),
|
||||
|
||||
TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( pid_t, src_pid )
|
||||
__field( pid_t, src_tgid )
|
||||
__field( pid_t, src_ngid )
|
||||
__field( int, src_cpu )
|
||||
__field( int, src_nid )
|
||||
__field( pid_t, dst_pid )
|
||||
__field( pid_t, dst_tgid )
|
||||
__field( pid_t, dst_ngid )
|
||||
__field( int, dst_cpu )
|
||||
__field( int, dst_nid )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->src_pid = task_pid_nr(src_tsk);
|
||||
__entry->src_tgid = task_tgid_nr(src_tsk);
|
||||
__entry->src_ngid = task_numa_group_id(src_tsk);
|
||||
__entry->src_cpu = src_cpu;
|
||||
__entry->src_nid = cpu_to_node(src_cpu);
|
||||
__entry->dst_pid = task_pid_nr(dst_tsk);
|
||||
__entry->dst_tgid = task_tgid_nr(dst_tsk);
|
||||
__entry->dst_ngid = task_numa_group_id(dst_tsk);
|
||||
__entry->dst_cpu = dst_cpu;
|
||||
__entry->dst_nid = cpu_to_node(dst_cpu);
|
||||
),
|
||||
|
||||
TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
|
||||
__entry->src_pid, __entry->src_tgid, __entry->src_ngid,
|
||||
__entry->src_cpu, __entry->src_nid,
|
||||
__entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
|
||||
__entry->dst_cpu, __entry->dst_nid)
|
||||
);
|
||||
#endif /* _TRACE_SCHED_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
10
init/main.c
10
init/main.c
@ -355,9 +355,11 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
|
||||
*/
|
||||
static void __init setup_command_line(char *command_line)
|
||||
{
|
||||
saved_command_line = alloc_bootmem(strlen (boot_command_line)+1);
|
||||
initcall_command_line = alloc_bootmem(strlen (boot_command_line)+1);
|
||||
static_command_line = alloc_bootmem(strlen (command_line)+1);
|
||||
saved_command_line =
|
||||
memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
|
||||
initcall_command_line =
|
||||
memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
|
||||
static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0);
|
||||
strcpy (saved_command_line, boot_command_line);
|
||||
strcpy (static_command_line, command_line);
|
||||
}
|
||||
@ -476,7 +478,7 @@ static void __init mm_init(void)
|
||||
mem_init();
|
||||
kmem_cache_init();
|
||||
percpu_init_late();
|
||||
pgtable_cache_init();
|
||||
pgtable_init();
|
||||
vmalloc_init();
|
||||
}
|
||||
|
||||
|
@ -912,12 +912,13 @@ static void evict_chunk(struct audit_chunk *chunk)
|
||||
}
|
||||
|
||||
static int audit_tree_handle_event(struct fsnotify_group *group,
|
||||
struct inode *to_tell,
|
||||
struct fsnotify_mark *inode_mark,
|
||||
struct fsnotify_mark *vfsmonut_mark,
|
||||
struct fsnotify_event *event)
|
||||
struct fsnotify_mark *vfsmount_mark,
|
||||
u32 mask, void *data, int data_type,
|
||||
const unsigned char *file_name)
|
||||
{
|
||||
BUG();
|
||||
return -EOPNOTSUPP;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
|
||||
@ -933,19 +934,8 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify
|
||||
BUG_ON(atomic_read(&entry->refcnt) < 1);
|
||||
}
|
||||
|
||||
static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode,
|
||||
struct fsnotify_mark *inode_mark,
|
||||
struct fsnotify_mark *vfsmount_mark,
|
||||
__u32 mask, void *data, int data_type)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static const struct fsnotify_ops audit_tree_ops = {
|
||||
.handle_event = audit_tree_handle_event,
|
||||
.should_send_event = audit_tree_send_event,
|
||||
.free_group_priv = NULL,
|
||||
.free_event_priv = NULL,
|
||||
.freeing_mark = audit_tree_freeing_mark,
|
||||
};
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user