From 479696840239e0cc43efb3c917bdcad2174d2215 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 15 Oct 2012 21:48:48 -0300 Subject: [PATCH 001/112] i82975x_edac: Fix dimm label initialization The driver has only 4 hardcoded labels, but allows much more memory. Fix it by removing the hardcoded logic, using snprintf() instead. [ 19.833972] general protection fault: 0000 [#1] SMP [ 19.837733] Modules linked in: i82975x_edac(+) edac_core firewire_ohci firewire_core crc_itu_t nouveau mxm_wmi wmi video i2c_algo_bit drm_kms_helper ttm drm i2c_core [ 19.837733] CPU 0 [ 19.837733] Pid: 390, comm: udevd Not tainted 3.6.1-1.fc17.x86_64.debug #1 Dell Inc. Precision WorkStation 390 /0MY510 [ 19.837733] RIP: 0010:[] [] strncpy+0x18/0x30 [ 19.837733] RSP: 0018:ffff880078535b68 EFLAGS: 00010202 [ 19.837733] RAX: ffff880069fa9708 RBX: ffff880078588000 RCX: ffff880069fa9708 [ 19.837733] RDX: 000000000000001f RSI: 5f706f5f63616465 RDI: ffff880069fa9708 [ 19.837733] RBP: ffff880078535b68 R08: ffff880069fa9727 R09: 000000000000fffe [ 19.837733] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000003 [ 19.837733] R13: 0000000000000000 R14: ffff880069fa9290 R15: ffff880079624a80 [ 19.837733] FS: 00007f3de01ee840(0000) GS:ffff88007c400000(0000) knlGS:0000000000000000 [ 19.837733] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 19.837733] CR2: 00007f3de00b9000 CR3: 0000000078dbc000 CR4: 00000000000007f0 [ 19.837733] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 19.837733] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 19.837733] Process udevd (pid: 390, threadinfo ffff880078534000, task ffff880079642450) [ 19.837733] Stack: [ 19.837733] ffff880078535c18 ffffffffa017c6b8 00040000816d627f ffff880079624a88 [ 19.837733] ffffc90004cd6000 ffff880079624520 ffff88007ac21148 0000000000000000 [ 19.837733] 0000000000000000 0004000000000000 feda000078535bc8 ffffffff810d696d [ 19.837733] Call Trace: [ 19.837733] [] i82975x_init_one+0x2e6/0x3e6 [i82975x_edac] ... Fix bug reported at: https://bugzilla.redhat.com/show_bug.cgi?id=848149 And, very likely: https://bbs.archlinux.org/viewtopic.php?id=148033 https://bugzilla.kernel.org/show_bug.cgi?id=47171 Cc: Alan Cox Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i82975x_edac.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c index 069e26c11c4f..a98020409fa9 100644 --- a/drivers/edac/i82975x_edac.c +++ b/drivers/edac/i82975x_edac.c @@ -370,10 +370,6 @@ static enum dev_type i82975x_dram_type(void __iomem *mch_window, int rank) static void i82975x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, void __iomem *mch_window) { - static const char *labels[4] = { - "DIMM A1", "DIMM A2", - "DIMM B1", "DIMM B2" - }; struct csrow_info *csrow; unsigned long last_cumul_size; u8 value; @@ -423,9 +419,10 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci, dimm = mci->csrows[index]->channels[chan]->dimm; dimm->nr_pages = nr_pages / csrow->nr_channels; - strncpy(csrow->channels[chan]->dimm->label, - labels[(index >> 1) + (chan * 2)], - EDAC_MC_LABEL_LEN); + + snprintf(csrow->channels[chan]->dimm->label, EDAC_MC_LABEL_LEN, "DIMM %c%d", + (chan == 0) ? 'A' : 'B', + index); dimm->grain = 1 << 7; /* 128Byte cache-line resolution */ dimm->dtype = i82975x_dram_type(mch_window, index); dimm->mtype = MEM_DDR2; /* I82975x supports only DDR2 */ From 24bef66e74d647aebd34e0bef7693512b7912029 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 24 Oct 2012 10:30:01 -0200 Subject: [PATCH 002/112] edac: Fix the dimm filling for csrows-based layouts The driver is currently filling data in a wrong way, on drivers for csrows-based memory controller, when the first layer is a csrow. This is not easily to notice, as, in general, memories are filed in dual, interleaved, symetric mode, as very few memory controllers support asymetric modes. While digging into a bug for i82795_edac driver, the asymetric mode there is now working, allowing us to fill the machine with 4x1GB ranks at channel 0, and 2x512GB at channel 1: Channel 0 ranks: EDAC DEBUG: i82975x_init_csrows: DIMM A0: from page 0x00000000 to 0x0003ffff (size: 0x00040000 pages) EDAC DEBUG: i82975x_init_csrows: DIMM A1: from page 0x00040000 to 0x0007ffff (size: 0x00040000 pages) EDAC DEBUG: i82975x_init_csrows: DIMM A2: from page 0x00080000 to 0x000bffff (size: 0x00040000 pages) EDAC DEBUG: i82975x_init_csrows: DIMM A3: from page 0x000c0000 to 0x000fffff (size: 0x00040000 pages) Channel 1 ranks: EDAC DEBUG: i82975x_init_csrows: DIMM B0: from page 0x00100000 to 0x0011ffff (size: 0x00020000 pages) EDAC DEBUG: i82975x_init_csrows: DIMM B1: from page 0x00120000 to 0x0013ffff (size: 0x00020000 pages) Instead of properly showing the memories as such, before this patch, it shows the memory layout as: +-----------------------------------+ | mc0 | | csrow0 | csrow1 | csrow2 | ----------+-----------------------------------+ channel1: | 1024 MB | 1024 MB | 512 MB | channel0: | 1024 MB | 1024 MB | 512 MB | ----------+-----------------------------------+ as if both channels were symetric, grouping the DIMMs on a wrong layout. After this patch, the memory is correctly represented. So, for csrows at layers[0], it shows: +-----------------------------------------------+ | mc0 | | csrow0 | csrow1 | csrow2 | csrow3 | ----------+-----------------------------------------------+ channel1: | 512 MB | 512 MB | 0 MB | 0 MB | channel0: | 1024 MB | 1024 MB | 1024 MB | 1024 MB | ----------+-----------------------------------------------+ For csrows at layers[1], it shows: +-----------------------+ | mc0 | | channel0 | channel1 | --------+-----------------------+ csrow3: | 1024 MB | 0 MB | csrow2: | 1024 MB | 0 MB | --------+-----------------------+ csrow1: | 1024 MB | 512 MB | csrow0: | 1024 MB | 512 MB | --------+-----------------------+ So, no matter of what comes first, the information between channel and csrow will be properly represented. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/edac_mc.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index d5dc9da7f99f..81eb9fd3f717 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -416,10 +416,18 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, dimm->cschannel = chn; /* Increment csrow location */ - row++; - if (row == tot_csrows) { - row = 0; + if (layers[0].is_virt_csrow) { chn++; + if (chn == tot_channels) { + chn = 0; + row++; + } + } else { + row++; + if (row == tot_csrows) { + row = 0; + chn++; + } } /* Increment dimm location */ From 7e06b7a3333f5c7a0cec12aff20d39c5c87c0795 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 18 Oct 2012 15:54:45 +0200 Subject: [PATCH 003/112] i7300_edac: Fix error flag testing * Right-shift the values in GET_FBD_FAT_IDX and GET_FBD_NF_IDX, so that the callers get the result they expect. * Fix definition of FERR_FAT_FBD_ERR_MASK. * Call GET_FBD_NF_IDX, not GET_FBD_FAT_IDX, when operating on register FERR_NF_FBD. We were lucky they have the same definition. This fixes kernel bug #44131: https://bugzilla.kernel.org/show_bug.cgi?id=44131 Signed-off-by: Jean Delvare Cc: Mauro Carvalho Chehab Cc: Doug Thompson Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index a09d0667f72a..9d669cd43618 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -197,8 +197,8 @@ static const char *ferr_fat_fbd_name[] = { [0] = "Memory Write error on non-redundant retry or " "FBD configuration Write error on retry", }; -#define GET_FBD_FAT_IDX(fbderr) (fbderr & (3 << 28)) -#define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3)) +#define GET_FBD_FAT_IDX(fbderr) (((fbderr) >> 28) & 3) +#define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 22)) #define FERR_NF_FBD 0xa0 static const char *ferr_nf_fbd_name[] = { @@ -225,7 +225,7 @@ static const char *ferr_nf_fbd_name[] = { [1] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC", [0] = "Uncorrectable Data ECC on Replay", }; -#define GET_FBD_NF_IDX(fbderr) (fbderr & (3 << 28)) +#define GET_FBD_NF_IDX(fbderr) (((fbderr) >> 28) & 3) #define FERR_NF_FBD_ERR_MASK ((1 << 24) | (1 << 23) | (1 << 22) | (1 << 21) |\ (1 << 18) | (1 << 17) | (1 << 16) | (1 << 15) |\ (1 << 14) | (1 << 13) | (1 << 11) | (1 << 10) |\ @@ -464,7 +464,7 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) errnum = find_first_bit(&errors, ARRAY_SIZE(ferr_nf_fbd_name)); specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum); - branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0; + branch = (GET_FBD_NF_IDX(error_reg) == 2) ? 1 : 0; pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, REDMEMA, &syndrome); From 2c5594df344cd1ff0cc9bf007dea3235582b3acf Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 26 Oct 2012 11:40:28 +0200 Subject: [PATCH 004/112] rcu: Fix unrecovered RCU user mode in syscall_trace_leave() On x86-64 syscall exit, 3 non exclusive events may happen looping in the following order: 1) Check if we need resched for user preemption, if so call schedule_user() 2) Check if we have pending signals, if so call do_notify_resume() 3) Check if we do syscall tracing, if so call syscall_trace_leave() However syscall_trace_leave() has been written assuming it directly follows the syscall and forget about the above possible 1st and 2nd steps. Now schedule_user() and do_notify_resume() exit in RCU user mode because they have most chances to resume userspace immediately and this avoids an rcu_user_enter() call in the syscall fast path. So by the time we call syscall_trace_leave(), we may well be in RCU user mode. To fix this up, simply call rcu_user_exit() in the beginning of this function. This fixes some reported RCU uses in extended quiescent state. Reported-by: Dave Jones Reported-by: Sergey Senozhatsky Signed-off-by: Frederic Weisbecker Tested-by: Sergey Senozhatsky Signed-off-by: Paul E. McKenney --- arch/x86/kernel/ptrace.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index b00b33a18390..eff5b8c68652 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1511,6 +1511,13 @@ void syscall_trace_leave(struct pt_regs *regs) { bool step; + /* + * We may come here right after calling schedule_user() + * or do_notify_resume(), in which case we can be in RCU + * user mode. + */ + rcu_user_exit(); + audit_syscall_exit(regs); if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) From 0d855354ea351bec6b222e9fea86a876cfafdcb6 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 26 Oct 2012 18:28:56 +0200 Subject: [PATCH 005/112] perf, powerpc: Fix hw breakpoints returning -ENOSPC I've been trying to get hardware breakpoints with perf to work on POWER7 but I'm getting the following: % perf record -e mem:0x10000000 true Error: sys_perf_event_open() syscall returned with 28 (No space left on device). /bin/dmesg may provide additional information. Fatal: No CONFIG_PERF_EVENTS=y kernel support configured? true: Terminated (FWIW adding -a and it works fine) Debugging it seems that __reserve_bp_slot() is returning ENOSPC because it thinks there are no free breakpoint slots on this CPU. I have a 2 CPUs, so perf userspace is doing two perf_event_open syscalls to add a counter to each CPU [1]. The first syscall succeeds but the second is failing. On this second syscall, fetch_bp_busy_slots() sets slots.pinned to be 1, despite there being no breakpoint on this CPU. This is because the call the task_bp_pinned, checks all CPUs, rather than just the current CPU. POWER7 only has one hardware breakpoint per CPU (ie. HBP_NUM=1), so we return ENOSPC. The following patch fixes this by checking the associated CPU for each breakpoint in task_bp_pinned. I'm not familiar with this code, so it's provided as a reference to the above issue. Signed-off-by: Michael Neuling Acked-by: Peter Zijlstra Cc: Michael Ellerman Cc: Jovi Zhang Cc: K Prasad Link: http://lkml.kernel.org/r/1351268936-2956-1-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/events/hw_breakpoint.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 9a7b487c6fe2..fe8a916507ed 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -111,14 +111,16 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) * Count the number of breakpoints of the same type and same task. * The given event must be not on the list. */ -static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type) +static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type) { struct task_struct *tsk = bp->hw.bp_target; struct perf_event *iter; int count = 0; list_for_each_entry(iter, &bp_task_head, hw.bp_list) { - if (iter->hw.bp_target == tsk && find_slot_idx(iter) == type) + if (iter->hw.bp_target == tsk && + find_slot_idx(iter) == type && + cpu == iter->cpu) count += hw_breakpoint_weight(iter); } @@ -141,7 +143,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, if (!tsk) slots->pinned += max_task_bp_pinned(cpu, type); else - slots->pinned += task_bp_pinned(bp, type); + slots->pinned += task_bp_pinned(cpu, bp, type); slots->flexible = per_cpu(nr_bp_flexible[type], cpu); return; @@ -154,7 +156,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, if (!tsk) nr += max_task_bp_pinned(cpu, type); else - nr += task_bp_pinned(bp, type); + nr += task_bp_pinned(cpu, bp, type); if (nr > slots->pinned) slots->pinned = nr; @@ -188,7 +190,7 @@ static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable, int old_idx = 0; int idx = 0; - old_count = task_bp_pinned(bp, type); + old_count = task_bp_pinned(cpu, bp, type); old_idx = old_count - 1; idx = old_idx + weight; From 1234471e2d11e4ee47f6de452dd8b8aeb09b45de Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 23 Oct 2012 22:44:49 +0900 Subject: [PATCH 006/112] perf header: Fix numa topology printing Andrew reported that the commit 7e94cfcc9d20 ("perf header: Use pre- processed session env when printing") regresses the header output. It was because of a missed string pointer calculation in the loop. Reported-by: Andrew Jones Tested-by: Andrew Jones Signed-off-by: Namhyung Kim Cc: Andrew Jones Link: http://lkml.kernel.org/r/1350999890-6920-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 7daad237dea5..566b84c695c8 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1378,6 +1378,8 @@ static void print_numa_topology(struct perf_header *ph, int fd __maybe_unused, str = tmp + 1; fprintf(fp, "# node%u cpu list : %s\n", c, str); + + str += strlen(str) + 1; } return; error: From f787d9519fb10411f2948f5b9957a1669879ba84 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 23 Oct 2012 22:44:50 +0900 Subject: [PATCH 007/112] perf tools: Fix strbuf_addf() when the buffer needs to grow This was found during chasing down the header output regression. The strbuf_addf() was checking buffer length with a result of vscnprintf() which cannot be greater than that of strbuf_avail(). Since numa topology and pmu mapping info in header were converted to use strbuf, it sometimes caused uninteresting behaviors with the broken strbuf. Fix it by using vsnprintf() which returns desired output string length regardless of the available buffer size and grow the buffer if needed. Reported-by: Andrew Jones Tested-by: Andrew Jones Signed-off-by: Namhyung Kim Cc: Andrew Jones Link: http://lkml.kernel.org/r/1350999890-6920-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/strbuf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 2eeb51baf077..cfa906882e2c 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -90,17 +90,17 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...) if (!strbuf_avail(sb)) strbuf_grow(sb, 64); va_start(ap, fmt); - len = vscnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); + len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); va_end(ap); if (len < 0) - die("your vscnprintf is broken"); + die("your vsnprintf is broken"); if (len > strbuf_avail(sb)) { strbuf_grow(sb, len); va_start(ap, fmt); - len = vscnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); + len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); va_end(ap); if (len > strbuf_avail(sb)) { - die("this should not happen, your snprintf is broken"); + die("this should not happen, your vsnprintf is broken"); } } strbuf_setlen(sb, sb->len + len); From c1c92b6a5b606e39e2181ac8eee2a0ca847542dc Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 30 Oct 2012 15:48:48 -0700 Subject: [PATCH 008/112] openvswitch: Print device when warning about over MTU packets. If an attempt is made to transmit a packet that is over the device's MTU then we log it using the datapath's name. However, it is much more helpful to use the device name instead. Signed-off-by: Jesse Gross --- net/openvswitch/vport-netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 3c1e58ba714b..a9033481fa5e 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -158,7 +158,7 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb) if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", - ovs_dp_name(vport->dp), + netdev_vport->dev->name, packet_length(skb), mtu); goto error; } From d04d382980c86bdee9960c3eb157a73f8ed230cc Mon Sep 17 00:00:00 2001 From: Mehak Mahajan Date: Tue, 30 Oct 2012 15:50:28 -0700 Subject: [PATCH 009/112] openvswitch: Store flow key len if ARP opcode is not request or reply. We currently only extract the ARP payload if the opcode indicates that it is a request or reply. However, we also only set the key length in these situations even though it should still be possible to match on the opcode. There's no real reason to restrict the ARP opcode since all have the same format so this simply removes the check. Signed-off-by: Mehak Mahajan Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 98c70630ad06..733cbf49ed1f 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -702,15 +702,11 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, /* We only match on the lower 8 bits of the opcode. */ if (ntohs(arp->ar_op) <= 0xff) key->ip.proto = ntohs(arp->ar_op); - - if (key->ip.proto == ARPOP_REQUEST - || key->ip.proto == ARPOP_REPLY) { - memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src)); - memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); - memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); - memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); - key_len = SW_FLOW_KEY_OFFSET(ipv4.arp); - } + memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src)); + memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); + memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); + memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); + key_len = SW_FLOW_KEY_OFFSET(ipv4.arp); } } else if (key->eth.type == htons(ETH_P_IPV6)) { int nh_len; /* IPv6 Header + Extensions */ From 6d369a09ccd8104b35646b507112d3ddca4344eb Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Nov 2012 11:26:34 +0000 Subject: [PATCH 010/112] x86: Export asm/{svm.h,vmx.h,perf_regs.h} Export asm/{svm.h,vmx.h,perf_regs.h} so that they can be disintegrated. It looks from previous commits that the first two should have been exported, but the header-y lines weren't added to the Kbuild. I'm guessing that asm/perf_regs.h should be exported too. Signed-off-by: David Howells --- arch/x86/include/asm/Kbuild | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 66e5f0ef0523..79fd8a3418f9 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -12,6 +12,7 @@ header-y += mce.h header-y += msr-index.h header-y += msr.h header-y += mtrr.h +header-y += perf_regs.h header-y += posix_types_32.h header-y += posix_types_64.h header-y += posix_types_x32.h @@ -19,8 +20,10 @@ header-y += prctl.h header-y += processor-flags.h header-y += ptrace-abi.h header-y += sigcontext32.h +header-y += svm.h header-y += ucontext.h header-y += vm86.h +header-y += vmx.h header-y += vsyscall.h genhdr-y += unistd_32.h From 3ea160b3e8f0de8161861995d9901f61192fc0b0 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 16 Nov 2012 08:06:16 -0800 Subject: [PATCH 011/112] target: Fix handling of aborted commands - If we stop processing an already-aborted command in target_execute_cmd(), then we need to complete t_transport_stop_comp to wake up the the TMR handling thread, or else it will end up waiting forever. - If we've a already sent an "aborted" status for a command in transport_check_aborted_status() then we should bail out of transport_send_task_abort() to avoid freeing the command twice. Signed-off-by: Roland Dreier Cc: stable@vger.kernel.org Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 9097155e9ebe..dcecbfb17243 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1819,8 +1819,10 @@ void target_execute_cmd(struct se_cmd *cmd) /* * If the received CDB has aleady been aborted stop processing it here. */ - if (transport_check_aborted_status(cmd, 1)) + if (transport_check_aborted_status(cmd, 1)) { + complete(&cmd->t_transport_stop_comp); return; + } /* * Determine if IOCTL context caller in requesting the stopping of this @@ -3067,7 +3069,7 @@ void transport_send_task_abort(struct se_cmd *cmd) unsigned long flags; spin_lock_irqsave(&cmd->t_state_lock, flags); - if (cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION) { + if (cmd->se_cmd_flags & (SCF_SENT_CHECK_CONDITION | SCF_SENT_DELAYED_TAS)) { spin_unlock_irqrestore(&cmd->t_state_lock, flags); return; } From ca9dfc6cc45a8ae0297188f5fed23af242cc8a8d Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 5 Nov 2012 15:15:24 +0000 Subject: [PATCH 012/112] tools: Define a Makefile function to do subdir processing Define a Makefile function that can be called with $(call ...) to wrap the subdir make invocations in tools/Makefile. This will allow us in the next patch to insert bits in there to honour O= flags when called from the top-level Makefile. Signed-off-by: David Howells Cc: Borislav Petkov Cc: Ingo Molnar Cc: Linus Torvalds Cc: Namhyung Kim Cc: Paul Mackerras Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1378.1352379110@warthog.procyon.org.uk Signed-off-by: Arnaldo Carvalho de Melo --- tools/Makefile | 24 ++++++++++++------------ tools/scripts/Makefile.include | 8 ++++++++ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/tools/Makefile b/tools/Makefile index 3ae43947a171..1f9a529fe544 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -31,44 +31,44 @@ help: @echo ' clean: a summary clean target to clean _all_ folders' cpupower: FORCE - $(QUIET_SUBDIR0)power/$@/ $(QUIET_SUBDIR1) + $(call descend,power/$@) firewire lguest perf usb virtio vm: FORCE - $(QUIET_SUBDIR0)$@/ $(QUIET_SUBDIR1) + $(call descend,$@) selftests: FORCE - $(QUIET_SUBDIR0)testing/$@/ $(QUIET_SUBDIR1) + $(call descend,testing/$@) turbostat x86_energy_perf_policy: FORCE - $(QUIET_SUBDIR0)power/x86/$@/ $(QUIET_SUBDIR1) + $(call descend,power/x86/$@) cpupower_install: - $(QUIET_SUBDIR0)power/$(@:_install=)/ $(QUIET_SUBDIR1) install + $(call descend,power/$(@:_install=),install) firewire_install lguest_install perf_install usb_install virtio_install vm_install: - $(QUIET_SUBDIR0)$(@:_install=)/ $(QUIET_SUBDIR1) install + $(call descend,$(@:_install=),install) selftests_install: - $(QUIET_SUBDIR0)testing/$(@:_clean=)/ $(QUIET_SUBDIR1) install + $(call descend,testing/$(@:_clean=),install) turbostat_install x86_energy_perf_policy_install: - $(QUIET_SUBDIR0)power/x86/$(@:_install=)/ $(QUIET_SUBDIR1) install + $(call descend,power/x86/$(@:_install=),install) install: cpupower_install firewire_install lguest_install perf_install \ selftests_install turbostat_install usb_install virtio_install \ vm_install x86_energy_perf_policy_install cpupower_clean: - $(QUIET_SUBDIR0)power/cpupower/ $(QUIET_SUBDIR1) clean + $(call descend,power/cpupower,clean) firewire_clean lguest_clean perf_clean usb_clean virtio_clean vm_clean: - $(QUIET_SUBDIR0)$(@:_clean=)/ $(QUIET_SUBDIR1) clean + $(call descend,$(@:_clean=),clean) selftests_clean: - $(QUIET_SUBDIR0)testing/$(@:_clean=)/ $(QUIET_SUBDIR1) clean + $(call descend,testing/$(@:_clean=),clean) turbostat_clean x86_energy_perf_policy_clean: - $(QUIET_SUBDIR0)power/x86/$(@:_clean=)/ $(QUIET_SUBDIR1) clean + $(call descend,power/x86/$(@:_clean=),clean) clean: cpupower_clean firewire_clean lguest_clean perf_clean selftests_clean \ turbostat_clean usb_clean virtio_clean vm_clean \ diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 96ce80a3743b..4a9e3176f743 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -41,6 +41,14 @@ else NO_SUBDIR = : endif +# +# Define a callable command for descending to a new directory +# +# Call by doing: $(call descend,directory[,target]) +# +descend = \ + $(QUIET_SUBDIR0)$(1) $(QUIET_SUBDIR1) $(2) + QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir QUIET_SUBDIR1 = From bf35182ffcd00d8b36d56210ffdac110e5624d7d Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 5 Nov 2012 21:02:08 +0000 Subject: [PATCH 013/112] tools: Honour the O= flag when tool build called from a higher Makefile Honour the O= flag that was passed to a higher level Makefile and then passed down as part of a tool build. To make this work, the top-level Makefile passes the original O= flag and subdir=tools to the tools/Makefile, and that in turn passes subdir=$(O)/$(subdir)/foodir when building tool foo in directory $(O)/$(subdir)/foodir (where the intervening slashes aren't added if an element is missing). For example, take perf. This is found in tools/perf/. Assume we're building into directory ~/zebra/, so we pass O=~/zebra to make. Dependening on where we run the build from, we see: make run in dir $(OUTPUT) dir ======================= ================== linux ~/zebra/tools/perf/ linux/tools ~/zebra/perf/ linux/tools/perf ~/zebra/ and if O= is not set, we get: make run in dir $(OUTPUT) dir ======================= ================== linux linux/tools/perf/ linux/tools linux/tools/perf/ linux/tools/perf linux/tools/perf/ The output directories are created by the descend function if they don't already exist. Signed-off-by: David Howells Cc: Borislav Petkov Cc: Ingo Molnar Cc: Linus Torvalds Cc: Namhyung Kim Cc: Paul Mackerras Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1378.1352379110@warthog.procyon.org.uk Signed-off-by: Arnaldo Carvalho de Melo --- Makefile | 6 ++++-- tools/scripts/Makefile.include | 17 +++++++++++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 42d0e56818ea..71dc31e41407 100644 --- a/Makefile +++ b/Makefile @@ -1321,10 +1321,12 @@ kernelversion: # Clear a bunch of variables before executing the submake tools/: FORCE - $(Q)$(MAKE) LDFLAGS= MAKEFLAGS= -C $(src)/tools/ + $(Q)mkdir -p $(objtree)/tools + $(Q)$(MAKE) LDFLAGS= MAKEFLAGS= O=$(objtree) subdir=tools -C $(src)/tools/ tools/%: FORCE - $(Q)$(MAKE) LDFLAGS= MAKEFLAGS= -C $(src)/tools/ $* + $(Q)mkdir -p $(objtree)/tools + $(Q)$(MAKE) LDFLAGS= MAKEFLAGS= O=$(objtree) subdir=tools -C $(src)/tools/ $* # Single targets # --------------------------------------------------------------------------- diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 4a9e3176f743..87467b17e05c 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -1,8 +1,11 @@ -ifeq ("$(origin O)", "command line") +ifeq ($(origin O), command line) dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) ABSOLUTE_O := $(shell cd $(O) ; pwd) - OUTPUT := $(ABSOLUTE_O)/ + OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/) COMMAND_O := O=$(ABSOLUTE_O) +ifeq ($(objtree),) + objtree := $(O) +endif endif ifneq ($(OUTPUT),) @@ -47,9 +50,10 @@ endif # Call by doing: $(call descend,directory[,target]) # descend = \ - $(QUIET_SUBDIR0)$(1) $(QUIET_SUBDIR1) $(2) + +mkdir -p $(OUTPUT)$(1) && \ + $(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) -QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir +QUIET_SUBDIR0 = +$(MAKE) $(COMMAND_O) -C # space to separate -C and subdir QUIET_SUBDIR1 = ifneq ($(findstring $(MAKEFLAGS),s),s) @@ -64,5 +68,10 @@ ifndef V $(MAKE) $(PRINT_DIR) -C $$subdir QUIET_FLEX = @echo ' ' FLEX $@; QUIET_BISON = @echo ' ' BISON $@; + + descend = \ + @echo ' ' DESCEND $(1); \ + mkdir -p $(OUTPUT)$(1) && \ + $(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) endif endif From 2b73f65d114b44b9bc9bd7d229f603e4cd5c1a88 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Nov 2012 14:14:38 -0300 Subject: [PATCH 014/112] tools: Pass the target in descend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixing: [acme@sandy linux]$ cd tools [acme@sandy tools]$ make clean DESCEND power/cpupower CC lib/cpufreq.o CC lib/sysfs.o LD libcpupower.so.0.0.0 CC utils/helpers/amd.o utils/helpers/amd.c:7:21: error: pci/pci.h: No such file or directory In file included from utils/helpers/amd.c:9: ./utils/helpers/helpers.h:137: warning: ‘struct pci_access’ declared inside parameter list ./utils/helpers/helpers.h:137: warning: its scope is only this definition or declaration, which is probably not what you want ./utils/helpers/helpers.h:139: warning: ‘struct pci_access’ declared inside parameter list utils/helpers/amd.c: In function ‘amd_pci_get_num_boost_states’: utils/helpers/amd.c:120: warning: passing argument 1 of ‘pci_slot_func_init’ from incompatible pointer type ./utils/helpers/helpers.h:138: note: expected ‘struct pci_access **’ but argument is of type ‘struct pci_access **’ utils/helpers/amd.c:125: warning: implicit declaration of function ‘pci_read_byte’ utils/helpers/amd.c:132: warning: implicit declaration of function ‘pci_cleanup’ make[1]: *** [utils/helpers/amd.o] Error 1 make: *** [cpupower_clean] Error 2 [acme@sandy tools]$ Reported-by: Arnaldo Carvalho de Melo Signed-off-by: David Howells Cc: Borislav Petkov Cc: Ingo Molnar Cc: Linus Torvalds Cc: Namhyung Kim Cc: Paul Mackerras Cc: Thomas Gleixner Link: http://lkml.kernel.org/n/tip-tviyimq6x6nm77sj5lt4t19f@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/scripts/Makefile.include | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 87467b17e05c..2964b96aa55f 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -51,7 +51,7 @@ endif # descend = \ +mkdir -p $(OUTPUT)$(1) && \ - $(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) + $(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2) QUIET_SUBDIR0 = +$(MAKE) $(COMMAND_O) -C # space to separate -C and subdir QUIET_SUBDIR1 = @@ -72,6 +72,6 @@ ifndef V descend = \ @echo ' ' DESCEND $(1); \ mkdir -p $(OUTPUT)$(1) && \ - $(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) + $(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2) endif endif From f2d9cae9ea9e0228f6eb4d4c5ab4f548d0270d1a Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 31 Oct 2012 11:21:28 -0700 Subject: [PATCH 015/112] perf powerpc: Use uapi/unistd.h to fix build error Use the 'unistd.h' from arch/powerpc/include/uapi to build the perf tool. Signed-off-by: Sukadev Bhattiprolu Acked-by: David Howells Cc: Anton Blanchard Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: linuxppc-dev@ozlabs.org Link: http://lkml.kernel.org/r/20121107191818.GA16211@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/perf.h b/tools/perf/perf.h index c50985eaec41..e2ba8f004d32 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -26,7 +26,7 @@ void get_term_dimensions(struct winsize *ws); #endif #ifdef __powerpc__ -#include "../../arch/powerpc/include/asm/unistd.h" +#include "../../arch/powerpc/include/uapi/asm/unistd.h" #define rmb() asm volatile ("sync" ::: "memory") #define cpu_relax() asm volatile ("" ::: "memory"); #define CPUINFO_PROC "cpu" From d2709c7ce4c513ab7f4ca9a106a930621811f2d3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 Nov 2012 22:21:03 +0000 Subject: [PATCH 016/112] perf: Make perf build for x86 with UAPI disintegration applied Make perf build for x86 once the UAPI disintegration patches for that arch have been applied by adding the appropriate -I flags - in the right order - and then converting some #includes that use ../.. notation to find main kernel headerfiles to use and instead. Note that -Iarch/foo/include/uapi is present _before_ -Iarch/foo/include. This makes sure we get the userspace version of the pt_regs struct. Ideally, we wouldn't have the latter -I flag at all, but unfortunately we want asm/svm.h and asm/vmx.h in builtin-kvm.c and these aren't part of the UAPI - at least not for x86. I wonder if the bits outside of the __KERNEL__ guards *should* be transferred there. I note also that perf seems to do its dependency handling manually by listing all the header files it might want to use in LIB_H in the Makefile. Can this be changed to use -MD? Note that to do make this work, we need to export and UAPI disintegrate linux/hw_breakpoint.h, which I think should've been exported previously so that perf can access the bits. We have to do this in the same patch to maintain bisectability. Signed-off-by: David Howells --- include/linux/hw_breakpoint.h | 31 +------------------------ include/uapi/linux/Kbuild | 1 + include/uapi/linux/hw_breakpoint.h | 30 ++++++++++++++++++++++++ tools/perf/Makefile | 29 ++++++++++++++++++++++- tools/perf/arch/x86/include/perf_regs.h | 2 +- tools/perf/builtin-kvm.c | 6 ++--- tools/perf/builtin-test.c | 2 +- tools/perf/perf.h | 16 +++---------- tools/perf/util/evsel.c | 4 ++-- tools/perf/util/evsel.h | 3 ++- tools/perf/util/header.h | 2 +- tools/perf/util/parse-events-test.c | 2 +- tools/perf/util/parse-events.c | 2 +- tools/perf/util/parse-events.h | 2 +- tools/perf/util/pmu.h | 2 +- tools/perf/util/session.h | 2 +- 16 files changed, 78 insertions(+), 58 deletions(-) create mode 100644 include/uapi/linux/hw_breakpoint.h diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 6ae9c631a1be..0464c85e63fd 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -1,35 +1,8 @@ #ifndef _LINUX_HW_BREAKPOINT_H #define _LINUX_HW_BREAKPOINT_H -enum { - HW_BREAKPOINT_LEN_1 = 1, - HW_BREAKPOINT_LEN_2 = 2, - HW_BREAKPOINT_LEN_4 = 4, - HW_BREAKPOINT_LEN_8 = 8, -}; - -enum { - HW_BREAKPOINT_EMPTY = 0, - HW_BREAKPOINT_R = 1, - HW_BREAKPOINT_W = 2, - HW_BREAKPOINT_RW = HW_BREAKPOINT_R | HW_BREAKPOINT_W, - HW_BREAKPOINT_X = 4, - HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X, -}; - -enum bp_type_idx { - TYPE_INST = 0, -#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS - TYPE_DATA = 0, -#else - TYPE_DATA = 1, -#endif - TYPE_MAX -}; - -#ifdef __KERNEL__ - #include +#include #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -151,6 +124,4 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) } #endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#endif /* __KERNEL__ */ - #endif /* _LINUX_HW_BREAKPOINT_H */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index e194387ef784..19e765fbfef7 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -415,3 +415,4 @@ header-y += wireless.h header-y += x25.h header-y += xattr.h header-y += xfrm.h +header-y += hw_breakpoint.h diff --git a/include/uapi/linux/hw_breakpoint.h b/include/uapi/linux/hw_breakpoint.h new file mode 100644 index 000000000000..b04000a2296a --- /dev/null +++ b/include/uapi/linux/hw_breakpoint.h @@ -0,0 +1,30 @@ +#ifndef _UAPI_LINUX_HW_BREAKPOINT_H +#define _UAPI_LINUX_HW_BREAKPOINT_H + +enum { + HW_BREAKPOINT_LEN_1 = 1, + HW_BREAKPOINT_LEN_2 = 2, + HW_BREAKPOINT_LEN_4 = 4, + HW_BREAKPOINT_LEN_8 = 8, +}; + +enum { + HW_BREAKPOINT_EMPTY = 0, + HW_BREAKPOINT_R = 1, + HW_BREAKPOINT_W = 2, + HW_BREAKPOINT_RW = HW_BREAKPOINT_R | HW_BREAKPOINT_W, + HW_BREAKPOINT_X = 4, + HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X, +}; + +enum bp_type_idx { + TYPE_INST = 0, +#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS + TYPE_DATA = 0, +#else + TYPE_DATA = 1, +#endif + TYPE_MAX +}; + +#endif /* _UAPI_LINUX_HW_BREAKPOINT_H */ diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 00deed4d6159..0a619af5be43 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -169,7 +169,34 @@ endif ### --- END CONFIGURATION SECTION --- -BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +ifneq ($(objtree),) +#$(info Determined 'objtree' to be $(objtree)) +endif + +ifneq ($(OUTPUT),) +#$(info Determined 'OUTPUT' to be $(OUTPUT)) +endif + +BASIC_CFLAGS = \ + -Iutil/include \ + -Iarch/$(ARCH)/include \ + $(if $(objtree),-I$(objtree)/arch/$(ARCH)/include/generated/uapi) \ + -I$(srctree)/arch/$(ARCH)/include/uapi \ + -I$(srctree)/arch/$(ARCH)/include \ + $(if $(objtree),-I$(objtree)/include/generated/uapi) \ + -I$(srctree)/include/uapi \ + -I$(srctree)/include \ + -I$(OUTPUT)util \ + -Iutil \ + -I. \ + -I$(TRACE_EVENT_DIR) \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE BASIC_LDFLAGS = # Guard against environment variables diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h index 46fc9f15c6b3..7fcdcdbee917 100644 --- a/tools/perf/arch/x86/include/perf_regs.h +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -3,7 +3,7 @@ #include #include "../../util/types.h" -#include "../../../../../arch/x86/include/asm/perf_regs.h" +#include #ifndef ARCH_X86_64 #define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 260abc535b5b..e013bdb5e24a 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -22,9 +22,9 @@ #include #include -#include "../../arch/x86/include/asm/svm.h" -#include "../../arch/x86/include/asm/vmx.h" -#include "../../arch/x86/include/asm/kvm.h" +#include +#include +#include struct event_key { #define INVALID_KEY (~0ULL) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 484f26cc0c00..5acd6e8e658b 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -15,7 +15,7 @@ #include "util/thread_map.h" #include "util/pmu.h" #include "event-parse.h" -#include "../../include/linux/hw_breakpoint.h" +#include #include diff --git a/tools/perf/perf.h b/tools/perf/perf.h index e2ba8f004d32..238f923f2218 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -5,8 +5,9 @@ struct winsize; void get_term_dimensions(struct winsize *ws); +#include + #if defined(__i386__) -#include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); #define CPUINFO_PROC "model name" @@ -16,7 +17,6 @@ void get_term_dimensions(struct winsize *ws); #endif #if defined(__x86_64__) -#include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lfence" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); #define CPUINFO_PROC "model name" @@ -26,20 +26,17 @@ void get_term_dimensions(struct winsize *ws); #endif #ifdef __powerpc__ -#include "../../arch/powerpc/include/uapi/asm/unistd.h" #define rmb() asm volatile ("sync" ::: "memory") #define cpu_relax() asm volatile ("" ::: "memory"); #define CPUINFO_PROC "cpu" #endif #ifdef __s390__ -#include "../../arch/s390/include/asm/unistd.h" #define rmb() asm volatile("bcr 15,0" ::: "memory") #define cpu_relax() asm volatile("" ::: "memory"); #endif #ifdef __sh__ -#include "../../arch/sh/include/asm/unistd.h" #if defined(__SH4A__) || defined(__SH5__) # define rmb() asm volatile("synco" ::: "memory") #else @@ -50,35 +47,30 @@ void get_term_dimensions(struct winsize *ws); #endif #ifdef __hppa__ -#include "../../arch/parisc/include/asm/unistd.h" #define rmb() asm volatile("" ::: "memory") #define cpu_relax() asm volatile("" ::: "memory"); #define CPUINFO_PROC "cpu" #endif #ifdef __sparc__ -#include "../../arch/sparc/include/uapi/asm/unistd.h" #define rmb() asm volatile("":::"memory") #define cpu_relax() asm volatile("":::"memory") #define CPUINFO_PROC "cpu" #endif #ifdef __alpha__ -#include "../../arch/alpha/include/asm/unistd.h" #define rmb() asm volatile("mb" ::: "memory") #define cpu_relax() asm volatile("" ::: "memory") #define CPUINFO_PROC "cpu model" #endif #ifdef __ia64__ -#include "../../arch/ia64/include/asm/unistd.h" #define rmb() asm volatile ("mf" ::: "memory") #define cpu_relax() asm volatile ("hint @pause" ::: "memory") #define CPUINFO_PROC "model name" #endif #ifdef __arm__ -#include "../../arch/arm/include/asm/unistd.h" /* * Use the __kuser_memory_barrier helper in the CPU helper page. See * arch/arm/kernel/entry-armv.S in the kernel source for details. @@ -89,13 +81,11 @@ void get_term_dimensions(struct winsize *ws); #endif #ifdef __aarch64__ -#include "../../arch/arm64/include/asm/unistd.h" #define rmb() asm volatile("dmb ld" ::: "memory") #define cpu_relax() asm volatile("yield" ::: "memory") #endif #ifdef __mips__ -#include "../../arch/mips/include/asm/unistd.h" #define rmb() asm volatile( \ ".set mips2\n\t" \ "sync\n\t" \ @@ -112,7 +102,7 @@ void get_term_dimensions(struct winsize *ws); #include #include -#include "../../include/uapi/linux/perf_event.h" +#include #include "util/types.h" #include diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 618d41140abd..d144d464ce39 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -18,8 +18,8 @@ #include "cpumap.h" #include "thread_map.h" #include "target.h" -#include "../../../include/linux/hw_breakpoint.h" -#include "../../../include/uapi/linux/perf_event.h" +#include +#include #include "perf_regs.h" #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6f94d6dea00f..d99b476ef37c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -3,7 +3,8 @@ #include #include -#include "../../../include/uapi/linux/perf_event.h" +#include +#include #include "types.h" #include "xyarray.h" #include "cgroup.h" diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 879d215cdac9..9bc00783f24f 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -1,7 +1,7 @@ #ifndef __PERF_HEADER_H #define __PERF_HEADER_H -#include "../../../include/uapi/linux/perf_event.h" +#include #include #include #include "types.h" diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index 516ecd9ddd6e..6ef213b35ecd 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -3,7 +3,7 @@ #include "evsel.h" #include "evlist.h" #include "sysfs.h" -#include "../../../include/linux/hw_breakpoint.h" +#include #define TEST_ASSERT_VAL(text, cond) \ do { \ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 75c7b0fca6d9..6b6d03e93c3d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1,4 +1,4 @@ -#include "../../../include/linux/hw_breakpoint.h" +#include #include "util.h" #include "../perf.h" #include "evlist.h" diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 839230ceb18b..2820c407adb2 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -7,7 +7,7 @@ #include #include #include "types.h" -#include "../../../include/uapi/linux/perf_event.h" +#include #include "types.h" struct list_head; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 39f3abac7744..fdeb8ac7c5d2 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -2,7 +2,7 @@ #define __PMU_H #include -#include "../../../include/uapi/linux/perf_event.h" +#include enum { PERF_PMU_FORMAT_VALUE_CONFIG, diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index dd6426163ba6..0eae00ad5fe7 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -7,7 +7,7 @@ #include "symbol.h" #include "thread.h" #include -#include "../../../include/uapi/linux/perf_event.h" +#include struct sample_queue; struct ip_callchain; From e506d6fde50e0a737234892eda31708692bdda29 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 13 Nov 2012 17:24:43 +0100 Subject: [PATCH 017/112] drm/i915: disable cloning on sdvo After the recent pile of disable-cloning patches, e.g. commit e3b86d6941c7e5f90be05d986fce1fcb40c68d6b Author: Egbert Eich Date: Sat Oct 13 14:30:15 2012 +0200 DRM/i915: Don't clone SDVO LVDS with analog and a bug report from Chris Wilson indicating that cloning doesn't even work for DVI-SDVO and native VGA, let's just disable cloning on sdvo encoders completely. v2: Update the comment in the code as discussed with Paulo Zanoni. Reviewed-by: Paulo Zanoni Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=29259 Tested-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_sdvo.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index c600fb06e25e..a6ac0b416964 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2201,7 +2201,6 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device) connector->connector_type = DRM_MODE_CONNECTOR_HDMIA; intel_sdvo->is_hdmi = true; } - intel_sdvo->base.cloneable = true; intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo); if (intel_sdvo->is_hdmi) @@ -2232,7 +2231,6 @@ intel_sdvo_tv_init(struct intel_sdvo *intel_sdvo, int type) intel_sdvo->is_tv = true; intel_sdvo->base.needs_tv_clock = true; - intel_sdvo->base.cloneable = false; intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo); @@ -2275,8 +2273,6 @@ intel_sdvo_analog_init(struct intel_sdvo *intel_sdvo, int device) intel_sdvo_connector->output_flag = SDVO_OUTPUT_RGB1; } - intel_sdvo->base.cloneable = true; - intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo); return true; @@ -2307,9 +2303,6 @@ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, int device) intel_sdvo_connector->output_flag = SDVO_OUTPUT_LVDS1; } - /* SDVO LVDS is not cloneable because the input mode gets adjusted by the encoder */ - intel_sdvo->base.cloneable = false; - intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo); if (!intel_sdvo_create_enhance_property(intel_sdvo, intel_sdvo_connector)) goto err; @@ -2721,6 +2714,16 @@ bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob) goto err_output; } + /* + * Cloning SDVO with anything is often impossible, since the SDVO + * encoder can request a special input timing mode. And even if that's + * not the case we have evidence that cloning a plain unscaled mode with + * VGA doesn't really work. Furthermore the cloning flags are way too + * simplistic anyway to express such constraints, so just give up on + * cloning for SDVO encoders. + */ + intel_sdvo->base.cloneable = false; + /* Only enable the hotplug irq if we need it, to work around noisy * hotplug lines. */ From 5edd0b946a0afeb1d0364a3654328b046fb818a2 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 20 Nov 2012 16:31:25 +0200 Subject: [PATCH 018/112] iwlwifi: fix the basic CCK rates calculation Fix a copy paste error in iwl_calc_basic_rates which leads to a wrong calculation of CCK basic rates. Cc: stable@vger.kernel.org Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlwifi/dvm/rxon.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/dvm/rxon.c b/drivers/net/wireless/iwlwifi/dvm/rxon.c index 10896393e5a0..2830ea290502 100644 --- a/drivers/net/wireless/iwlwifi/dvm/rxon.c +++ b/drivers/net/wireless/iwlwifi/dvm/rxon.c @@ -1012,12 +1012,12 @@ static void iwl_calc_basic_rates(struct iwl_priv *priv, * As a consequence, it's not as complicated as it sounds, just add * any lower rates to the ACK rate bitmap. */ - if (IWL_RATE_11M_INDEX < lowest_present_ofdm) - ofdm |= IWL_RATE_11M_MASK >> IWL_FIRST_CCK_RATE; - if (IWL_RATE_5M_INDEX < lowest_present_ofdm) - ofdm |= IWL_RATE_5M_MASK >> IWL_FIRST_CCK_RATE; - if (IWL_RATE_2M_INDEX < lowest_present_ofdm) - ofdm |= IWL_RATE_2M_MASK >> IWL_FIRST_CCK_RATE; + if (IWL_RATE_11M_INDEX < lowest_present_cck) + cck |= IWL_RATE_11M_MASK >> IWL_FIRST_CCK_RATE; + if (IWL_RATE_5M_INDEX < lowest_present_cck) + cck |= IWL_RATE_5M_MASK >> IWL_FIRST_CCK_RATE; + if (IWL_RATE_2M_INDEX < lowest_present_cck) + cck |= IWL_RATE_2M_MASK >> IWL_FIRST_CCK_RATE; /* 1M already there or needed so always add */ cck |= IWL_RATE_1M_MASK >> IWL_FIRST_CCK_RATE; From 70b9b24d4d240ff5f6087bca4013c6969af275ab Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Thu, 22 Nov 2012 00:11:09 +0900 Subject: [PATCH 019/112] ARM: S3C24XX: Fix potential NULL pointer dereference error chan->end is tested for being NULL. However in the event that it is NULL, the subsequent assignment statement would lead to NULL pointer dereference. Thus dereferencing it only when it is not NULL. Signed-off-by: Sachin Kamat Signed-off-by: Kukjin Kim --- arch/arm/plat-s3c24xx/dma.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c index db98e7021f0d..0abd1c469887 100644 --- a/arch/arm/plat-s3c24xx/dma.c +++ b/arch/arm/plat-s3c24xx/dma.c @@ -473,12 +473,13 @@ int s3c2410_dma_enqueue(enum dma_ch channel, void *id, pr_debug("dma%d: %s: buffer %p queued onto non-empty channel\n", chan->number, __func__, buf); - if (chan->end == NULL) + if (chan->end == NULL) { pr_debug("dma%d: %s: %p not empty, and chan->end==NULL?\n", chan->number, __func__, chan); - - chan->end->next = buf; - chan->end = buf; + } else { + chan->end->next = buf; + chan->end = buf; + } } /* if necessary, update the next buffer field */ From 5d3df935426271016b895aecaa247101b4bfa35e Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Sun, 18 Nov 2012 16:29:44 +0000 Subject: [PATCH 020/112] Dove: Attempt to fix PMU/RTC interrupts Fix the acknowledgement of PMU interrupts on Dove: some Dove hardware has not been sensibly designed so that interrupts can be handled in a race free manner. The PMU is one such instance. The pending (aka 'cause') register is a bunch of RW bits, meaning that these bits can be both cleared and set by software (confirmed on the Armada-510 on the cubox.) Hardware sets the appropriate bit when an interrupt is asserted, and software is required to clear the bits which are to be processed. If we write ~(1 << bit), then we end up asserting every other interrupt except the one we're processing. So, we need to do a read-modify-write cycle to clear the asserted bit. However, any interrupts which occur in the middle of this cycle will also be written back as zero, which will also clear the new interrupts. The upshot of this is: there is _no_ way to safely clear down interrupts in this register (and other similarly behaving interrupt pending registers on this device.) The patch below at least stops us creating new interrupts. Signed-off-by: Russell King Cc: stable@vger.kernel.org Signed-off-by: Jason Cooper --- arch/arm/mach-dove/irq.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-dove/irq.c b/arch/arm/mach-dove/irq.c index 087711524e8a..bc4344aa1009 100644 --- a/arch/arm/mach-dove/irq.c +++ b/arch/arm/mach-dove/irq.c @@ -46,8 +46,20 @@ static void pmu_irq_ack(struct irq_data *d) int pin = irq_to_pmu(d->irq); u32 u; + /* + * The PMU mask register is not RW0C: it is RW. This means that + * the bits take whatever value is written to them; if you write + * a '1', you will set the interrupt. + * + * Unfortunately this means there is NO race free way to clear + * these interrupts. + * + * So, let's structure the code so that the window is as small as + * possible. + */ u = ~(1 << (pin & 31)); - writel(u, PMU_INTERRUPT_CAUSE); + u &= readl_relaxed(PMU_INTERRUPT_CAUSE); + writel_relaxed(u, PMU_INTERRUPT_CAUSE); } static struct irq_chip pmu_irq_chip = { From d356cf5a74afa32b40decca3c9dd88bc3cd63eb5 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Sun, 18 Nov 2012 16:39:32 +0000 Subject: [PATCH 021/112] Dove: Fix irq_to_pmu() PMU interrupts start at IRQ_DOVE_PMU_START, not IRQ_DOVE_PMU_START + 1. Fix the condition. (It may have been less likely to occur had the code been written "if (irq >= IRQ_DOVE_PMU_START" which imho is the easier to understand notation, and matches the normal way of thinking about these things.) Signed-off-by: Russell King Cc: stable@vger.kernel.org Signed-off-by: Jason Cooper --- arch/arm/mach-dove/include/mach/pm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-dove/include/mach/pm.h b/arch/arm/mach-dove/include/mach/pm.h index 7bcd0dfce4b1..b47f75038686 100644 --- a/arch/arm/mach-dove/include/mach/pm.h +++ b/arch/arm/mach-dove/include/mach/pm.h @@ -63,7 +63,7 @@ static inline int pmu_to_irq(int pin) static inline int irq_to_pmu(int irq) { - if (IRQ_DOVE_PMU_START < irq && irq < NR_IRQS) + if (IRQ_DOVE_PMU_START <= irq && irq < NR_IRQS) return irq - IRQ_DOVE_PMU_START; return -EINVAL; From 1dc831bf53fddcc6443f74a39e72db5bcea4f15d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 21 Nov 2012 00:19:06 -0700 Subject: [PATCH 022/112] ARM: Kirkwood: Update PCI-E fixup - The code relies on rc_pci_fixup being called, which only happens when CONFIG_PCI_QUIRKS is enabled, so add that to Kconfig. Omitting this causes a booting failure with a non-obvious cause. - Update rc_pci_fixup to set the class properly, copying the more modern style from other places - Correct the rc_pci_fixup comment Signed-off-by: Jason Gunthorpe Cc: stable@vger.kernel.org Signed-off-by: Jason Cooper --- arch/arm/Kconfig | 1 + arch/arm/mach-kirkwood/pcie.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ade7e924bef5..9759fec0b704 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -547,6 +547,7 @@ config ARCH_KIRKWOOD select CPU_FEROCEON select GENERIC_CLOCKEVENTS select PCI + select PCI_QUIRKS select PLAT_ORION_LEGACY help Support for the following Marvell Kirkwood series SoCs: diff --git a/arch/arm/mach-kirkwood/pcie.c b/arch/arm/mach-kirkwood/pcie.c index ec544918b12c..74fc5a074fc4 100644 --- a/arch/arm/mach-kirkwood/pcie.c +++ b/arch/arm/mach-kirkwood/pcie.c @@ -207,14 +207,19 @@ static int __init kirkwood_pcie_setup(int nr, struct pci_sys_data *sys) return 1; } +/* + * The root complex has a hardwired class of PCI_CLASS_MEMORY_OTHER, when it + * is operating as a root complex this needs to be switched to + * PCI_CLASS_BRIDGE_HOST or Linux will errantly try to process the BAR's on + * the device. Decoding setup is handled by the orion code. + */ static void __devinit rc_pci_fixup(struct pci_dev *dev) { - /* - * Prevent enumeration of root complex. - */ if (dev->bus->parent == NULL && dev->devfn == 0) { int i; + dev->class &= 0xff; + dev->class |= PCI_CLASS_BRIDGE_HOST << 8; for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { dev->resource[i].start = 0; dev->resource[i].end = 0; From 3043c5c8bf3511d76bbdcf27f56449549d4bb70c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= Date: Sat, 1 Sep 2012 18:28:14 +0200 Subject: [PATCH 023/112] IXP4xx: Fix Goramo MultiLink platform compilation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Krzysztof Hałasa --- arch/arm/mach-ixp4xx/goramo_mlr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-ixp4xx/goramo_mlr.c b/arch/arm/mach-ixp4xx/goramo_mlr.c index b800a031207c..f417632bc991 100644 --- a/arch/arm/mach-ixp4xx/goramo_mlr.c +++ b/arch/arm/mach-ixp4xx/goramo_mlr.c @@ -15,6 +15,7 @@ #include #include #include +#include #define SLOT_ETHA 0x0B /* IDSEL = AD21 */ #define SLOT_ETHB 0x0C /* IDSEL = AD20 */ From 87ba5c6a313b4da5639a18d7796e51483ededd17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= Date: Sun, 2 Sep 2012 19:23:27 +0200 Subject: [PATCH 024/112] IXP4xx: Fix off-by-one bug in Goramo MultiLink platform. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Krzysztof Hałasa --- arch/arm/mach-ixp4xx/goramo_mlr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-ixp4xx/goramo_mlr.c b/arch/arm/mach-ixp4xx/goramo_mlr.c index f417632bc991..53b8348dfcc2 100644 --- a/arch/arm/mach-ixp4xx/goramo_mlr.c +++ b/arch/arm/mach-ixp4xx/goramo_mlr.c @@ -330,7 +330,7 @@ static struct platform_device device_hss_tab[] = { }; -static struct platform_device *device_tab[6] __initdata = { +static struct platform_device *device_tab[7] __initdata = { &device_flash, /* index 0 */ }; From 553da857b1b917e27817b923ea2c786313620845 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= Date: Tue, 23 Mar 2010 20:22:13 +0100 Subject: [PATCH 025/112] IXP4xx: HW pseudo-random generator is available on IXP45x/46x only. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Krzysztof Hałasa --- drivers/char/hw_random/Kconfig | 6 +++--- drivers/char/hw_random/ixp4xx-rng.c | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index fbd9b2b850ef..c58ea9b80b1a 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -127,12 +127,12 @@ config HW_RANDOM_VIA If unsure, say Y. config HW_RANDOM_IXP4XX - tristate "Intel IXP4xx NPU HW Random Number Generator support" + tristate "Intel IXP4xx NPU HW Pseudo-Random Number Generator support" depends on HW_RANDOM && ARCH_IXP4XX default HW_RANDOM ---help--- - This driver provides kernel-side support for the Random - Number Generator hardware found on the Intel IXP4xx NPU. + This driver provides kernel-side support for the Pseudo-Random + Number Generator hardware found on the Intel IXP45x/46x NPU. To compile this driver as a module, choose M here: the module will be called ixp4xx-rng. diff --git a/drivers/char/hw_random/ixp4xx-rng.c b/drivers/char/hw_random/ixp4xx-rng.c index 263567f5f392..beec1627db3c 100644 --- a/drivers/char/hw_random/ixp4xx-rng.c +++ b/drivers/char/hw_random/ixp4xx-rng.c @@ -45,6 +45,9 @@ static int __init ixp4xx_rng_init(void) void __iomem * rng_base; int err; + if (!cpu_is_ixp46x()) /* includes IXP455 */ + return -ENOSYS; + rng_base = ioremap(0x70002100, 4); if (!rng_base) return -ENOMEM; @@ -68,5 +71,5 @@ module_init(ixp4xx_rng_init); module_exit(ixp4xx_rng_exit); MODULE_AUTHOR("Deepak Saxena "); -MODULE_DESCRIPTION("H/W Random Number Generator (RNG) driver for IXP4xx"); +MODULE_DESCRIPTION("H/W Pseudo-Random Number Generator (RNG) driver for IXP45x/46x"); MODULE_LICENSE("GPL"); From 9665c52b102504399a9f041b85ff9f47c5d56bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= Date: Thu, 25 Mar 2010 23:56:05 +0100 Subject: [PATCH 026/112] IXP4xx: ixp4xx_crypto driver requires Queue Manager and NPE drivers. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Krzysztof Hałasa --- drivers/crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 308c7fb92a60..f6644f59fd9d 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -224,7 +224,7 @@ config CRYPTO_DEV_TALITOS config CRYPTO_DEV_IXP4XX tristate "Driver for IXP4xx crypto hardware acceleration" - depends on ARCH_IXP4XX + depends on ARCH_IXP4XX && IXP4XX_QMGR && IXP4XX_NPE select CRYPTO_DES select CRYPTO_ALGAPI select CRYPTO_AUTHENC From f203bc64e8b7d19ca52df89b142005c5552fef3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= Date: Sun, 17 Apr 2011 21:06:06 +0200 Subject: [PATCH 027/112] IXP4xx: Remove time limit for PCI TRDY to enable use of slow devices. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Krzysztof Hałasa --- arch/arm/mach-ixp4xx/common-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c index 1694f01ce2b6..6d6bde3e15fa 100644 --- a/arch/arm/mach-ixp4xx/common-pci.c +++ b/arch/arm/mach-ixp4xx/common-pci.c @@ -410,6 +410,7 @@ void __init ixp4xx_pci_preinit(void) * Enable the IO window to be way up high, at 0xfffffc00 */ local_write_config(PCI_BASE_ADDRESS_5, 4, 0xfffffc01); + local_write_config(0x40, 4, 0x000080FF); /* No TRDY time limit */ } else { printk("PCI: IXP4xx is target - No bus scan performed\n"); } From 4d18dea51036c673795eb9510fd8d70dcf214414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= Date: Tue, 1 May 2012 20:47:12 +0200 Subject: [PATCH 028/112] WAN: Remove redundant HDLC info printed by IXP4xx HSS driver. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Krzysztof Hałasa --- drivers/net/wan/ixp4xx_hss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c index 3f575afd8cfc..0fe36d770e1f 100644 --- a/drivers/net/wan/ixp4xx_hss.c +++ b/drivers/net/wan/ixp4xx_hss.c @@ -1363,7 +1363,7 @@ static int __devinit hss_init_one(struct platform_device *pdev) platform_set_drvdata(pdev, port); - netdev_info(dev, "HSS-%i\n", port->id); + netdev_info(dev, "initialized\n"); return 0; err_free_netdev: From 9792eb1d7296ad4e19c3219dabf65efd01562a73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= Date: Tue, 28 Dec 2010 13:08:18 +0100 Subject: [PATCH 029/112] IXP4xx crypto: MOD_AES{128,192,256} already include key size. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Krzysztof Hałasa --- drivers/crypto/ixp4xx_crypto.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 8f3f74ce8c7f..21180d6cad6e 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -750,12 +750,12 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt, } if (cipher_cfg & MOD_AES) { switch (key_len) { - case 16: keylen_cfg = MOD_AES128 | KEYLEN_128; break; - case 24: keylen_cfg = MOD_AES192 | KEYLEN_192; break; - case 32: keylen_cfg = MOD_AES256 | KEYLEN_256; break; - default: - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; + case 16: keylen_cfg = MOD_AES128; break; + case 24: keylen_cfg = MOD_AES192; break; + case 32: keylen_cfg = MOD_AES256; break; + default: + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; } cipher_cfg |= keylen_cfg; } else if (cipher_cfg & MOD_3DES) { From 05cd3db0df6f6ac3083a9e9671dd1662053604df Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Wed, 25 Jul 2012 12:33:06 -0600 Subject: [PATCH 030/112] ixp4xx: Declare MODULE_FIRMWARE usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Krzysztof Halasa Cc: Imre Kaloz Cc: Russell King Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Tim Gardner Signed-off-by: Krzysztof Hałasa --- arch/arm/mach-ixp4xx/ixp4xx_npe.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-ixp4xx/ixp4xx_npe.c b/arch/arm/mach-ixp4xx/ixp4xx_npe.c index a17ed79207a4..d4eb09a62863 100644 --- a/arch/arm/mach-ixp4xx/ixp4xx_npe.c +++ b/arch/arm/mach-ixp4xx/ixp4xx_npe.c @@ -116,7 +116,11 @@ /* NPE mailbox_status value for reset */ #define RESET_MBOX_STAT 0x0000F0F0 -const char *npe_names[] = { "NPE-A", "NPE-B", "NPE-C" }; +#define NPE_A_FIRMWARE "NPE-A" +#define NPE_B_FIRMWARE "NPE-B" +#define NPE_C_FIRMWARE "NPE-C" + +const char *npe_names[] = { NPE_A_FIRMWARE, NPE_B_FIRMWARE, NPE_C_FIRMWARE }; #define print_npe(pri, npe, fmt, ...) \ printk(pri "%s: " fmt, npe_name(npe), ## __VA_ARGS__) @@ -724,6 +728,9 @@ module_exit(npe_cleanup_module); MODULE_AUTHOR("Krzysztof Halasa"); MODULE_LICENSE("GPL v2"); +MODULE_FIRMWARE(NPE_A_FIRMWARE); +MODULE_FIRMWARE(NPE_B_FIRMWARE); +MODULE_FIRMWARE(NPE_C_FIRMWARE); EXPORT_SYMBOL(npe_names); EXPORT_SYMBOL(npe_running); From f0cdb153292635203b3a0921c901dacf85d4ef1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= Date: Fri, 26 Mar 2010 16:38:52 +0100 Subject: [PATCH 031/112] IXP4xx: Always ioremap() Queue Manager MMIO region at boot. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It doesn't make much sense to map QMgr dynamically - we almost always need it and the static mapping will be needed for little-endian data-coherent operation (to make QMgr region value-coherent). Signed-off-by: Krzysztof Hałasa --- arch/arm/mach-ixp4xx/common.c | 5 +++++ arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h | 7 ++++--- arch/arm/mach-ixp4xx/include/mach/qmgr.h | 12 ++++++------ arch/arm/mach-ixp4xx/ixp4xx_qmgr.c | 14 ++------------ 4 files changed, 17 insertions(+), 21 deletions(-) diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index fdf91a160884..acc0584377fc 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -67,6 +67,11 @@ static struct map_desc ixp4xx_io_desc[] __initdata = { .pfn = __phys_to_pfn(IXP4XX_PCI_CFG_BASE_PHYS), .length = IXP4XX_PCI_CFG_REGION_SIZE, .type = MT_DEVICE + }, { /* Queue Manager */ + .virtual = (unsigned long)IXP4XX_QMGR_BASE_VIRT, + .pfn = __phys_to_pfn(IXP4XX_QMGR_BASE_PHYS), + .length = IXP4XX_QMGR_REGION_SIZE, + .type = MT_DEVICE }, #ifdef CONFIG_DEBUG_LL { /* Debug UART mapping */ diff --git a/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h b/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h index eb68b61ce975..a24a8fe76604 100644 --- a/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h +++ b/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h @@ -30,19 +30,20 @@ * * 0x50000000 0x10000000 ioremap'd EXP BUS * - * 0x6000000 0x00004000 ioremap'd QMgr + * 0x60000000 0x00004000 0xffbe7000 QMgr * - * 0xC0000000 0x00001000 0xffbff000 PCI CFG + * 0xC8000000 0x00013000 0xffbeb000 On-Chip Peripherals * * 0xC4000000 0x00001000 0xffbfe000 EXP CFG * - * 0xC8000000 0x00013000 0xffbeb000 On-Chip Peripherals + * 0xC0000000 0x00001000 0xffbff000 PCI CFG */ /* * Queue Manager */ #define IXP4XX_QMGR_BASE_PHYS (0x60000000) +#define IXP4XX_QMGR_BASE_VIRT IOMEM(0xFFBE7000) #define IXP4XX_QMGR_REGION_SIZE (0x00004000) /* diff --git a/arch/arm/mach-ixp4xx/include/mach/qmgr.h b/arch/arm/mach-ixp4xx/include/mach/qmgr.h index 9e7cad2d54cb..0a88d3b8e806 100644 --- a/arch/arm/mach-ixp4xx/include/mach/qmgr.h +++ b/arch/arm/mach-ixp4xx/include/mach/qmgr.h @@ -86,7 +86,7 @@ void qmgr_release_queue(unsigned int queue); static inline void qmgr_put_entry(unsigned int queue, u32 val) { - extern struct qmgr_regs __iomem *qmgr_regs; + const struct qmgr_regs __iomem *qmgr_regs = (void __iomem *)IXP4XX_QMGR_BASE_VIRT; #if DEBUG_QMGR BUG_ON(!qmgr_queue_descs[queue]); /* not yet requested */ @@ -99,7 +99,7 @@ static inline void qmgr_put_entry(unsigned int queue, u32 val) static inline u32 qmgr_get_entry(unsigned int queue) { u32 val; - extern struct qmgr_regs __iomem *qmgr_regs; + const struct qmgr_regs __iomem *qmgr_regs = (void __iomem *)IXP4XX_QMGR_BASE_VIRT; val = __raw_readl(&qmgr_regs->acc[queue][0]); #if DEBUG_QMGR BUG_ON(!qmgr_queue_descs[queue]); /* not yet requested */ @@ -112,14 +112,14 @@ static inline u32 qmgr_get_entry(unsigned int queue) static inline int __qmgr_get_stat1(unsigned int queue) { - extern struct qmgr_regs __iomem *qmgr_regs; + const struct qmgr_regs __iomem *qmgr_regs = (void __iomem *)IXP4XX_QMGR_BASE_VIRT; return (__raw_readl(&qmgr_regs->stat1[queue >> 3]) >> ((queue & 7) << 2)) & 0xF; } static inline int __qmgr_get_stat2(unsigned int queue) { - extern struct qmgr_regs __iomem *qmgr_regs; + const struct qmgr_regs __iomem *qmgr_regs = (void __iomem *)IXP4XX_QMGR_BASE_VIRT; BUG_ON(queue >= HALF_QUEUES); return (__raw_readl(&qmgr_regs->stat2[queue >> 4]) >> ((queue & 0xF) << 1)) & 0x3; @@ -145,7 +145,7 @@ static inline int qmgr_stat_empty(unsigned int queue) */ static inline int qmgr_stat_below_low_watermark(unsigned int queue) { - extern struct qmgr_regs __iomem *qmgr_regs; + const struct qmgr_regs __iomem *qmgr_regs = (void __iomem *)IXP4XX_QMGR_BASE_VIRT; if (queue >= HALF_QUEUES) return (__raw_readl(&qmgr_regs->statne_h) >> (queue - HALF_QUEUES)) & 0x01; @@ -172,7 +172,7 @@ static inline int qmgr_stat_above_high_watermark(unsigned int queue) */ static inline int qmgr_stat_full(unsigned int queue) { - extern struct qmgr_regs __iomem *qmgr_regs; + const struct qmgr_regs __iomem *qmgr_regs = (void __iomem *)IXP4XX_QMGR_BASE_VIRT; if (queue >= HALF_QUEUES) return (__raw_readl(&qmgr_regs->statf_h) >> (queue - HALF_QUEUES)) & 0x01; diff --git a/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c b/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c index 852f7c9f87d0..7c0584e0ab87 100644 --- a/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c +++ b/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c @@ -14,7 +14,7 @@ #include #include -struct qmgr_regs __iomem *qmgr_regs; +static const struct qmgr_regs __iomem *qmgr_regs = (void __iomem *)IXP4XX_QMGR_BASE_VIRT; static struct resource *mem_res; static spinlock_t qmgr_lock; static u32 used_sram_bitmap[4]; /* 128 16-dword pages */ @@ -32,7 +32,7 @@ void qmgr_set_irq(unsigned int queue, int src, spin_lock_irqsave(&qmgr_lock, flags); if (queue < HALF_QUEUES) { - u32 __iomem *reg; + const u32 __iomem *reg; int bit; BUG_ON(src > QUEUE_IRQ_SRC_NOT_FULL); reg = &qmgr_regs->irqsrc[queue >> 3]; /* 8 queues per u32 */ @@ -293,12 +293,6 @@ static int qmgr_init(void) if (mem_res == NULL) return -EBUSY; - qmgr_regs = ioremap(IXP4XX_QMGR_BASE_PHYS, IXP4XX_QMGR_REGION_SIZE); - if (qmgr_regs == NULL) { - err = -ENOMEM; - goto error_map; - } - /* reset qmgr registers */ for (i = 0; i < 4; i++) { __raw_writel(0x33333333, &qmgr_regs->stat1[i]); @@ -347,8 +341,6 @@ static int qmgr_init(void) error_irq2: free_irq(IRQ_IXP4XX_QM1, NULL); error_irq: - iounmap(qmgr_regs); -error_map: release_mem_region(IXP4XX_QMGR_BASE_PHYS, IXP4XX_QMGR_REGION_SIZE); return err; } @@ -359,7 +351,6 @@ static void qmgr_remove(void) free_irq(IRQ_IXP4XX_QM2, NULL); synchronize_irq(IRQ_IXP4XX_QM1); synchronize_irq(IRQ_IXP4XX_QM2); - iounmap(qmgr_regs); release_mem_region(IXP4XX_QMGR_BASE_PHYS, IXP4XX_QMGR_REGION_SIZE); } @@ -369,7 +360,6 @@ module_exit(qmgr_remove); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Krzysztof Halasa"); -EXPORT_SYMBOL(qmgr_regs); EXPORT_SYMBOL(qmgr_set_irq); EXPORT_SYMBOL(qmgr_enable_irq); EXPORT_SYMBOL(qmgr_disable_irq); From b7b23db72f9a79e6ed0bcfb75d73b29f8e03fe62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= Date: Sun, 2 Sep 2012 00:50:39 +0200 Subject: [PATCH 032/112] IXP4xx: map CPU config registers within VMALLOC region. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Krzysztof Hałasa --- arch/arm/mach-ixp4xx/common.c | 8 ---- .../mach-ixp4xx/include/mach/debug-macro.S | 4 +- .../mach-ixp4xx/include/mach/ixp4xx-regs.h | 47 ++++++++----------- 3 files changed, 21 insertions(+), 38 deletions(-) diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index acc0584377fc..8c0c0e2d0727 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -73,14 +73,6 @@ static struct map_desc ixp4xx_io_desc[] __initdata = { .length = IXP4XX_QMGR_REGION_SIZE, .type = MT_DEVICE }, -#ifdef CONFIG_DEBUG_LL - { /* Debug UART mapping */ - .virtual = (unsigned long)IXP4XX_DEBUG_UART_BASE_VIRT, - .pfn = __phys_to_pfn(IXP4XX_DEBUG_UART_BASE_PHYS), - .length = IXP4XX_DEBUG_UART_REGION_SIZE, - .type = MT_DEVICE - } -#endif }; void __init ixp4xx_map_io(void) diff --git a/arch/arm/mach-ixp4xx/include/mach/debug-macro.S b/arch/arm/mach-ixp4xx/include/mach/debug-macro.S index 8c9f8d564492..ff686cbc5df4 100644 --- a/arch/arm/mach-ixp4xx/include/mach/debug-macro.S +++ b/arch/arm/mach-ixp4xx/include/mach/debug-macro.S @@ -17,8 +17,8 @@ #else mov \rp, #0 #endif - orr \rv, \rp, #0xff000000 @ virtual - orr \rv, \rv, #0x00b00000 + orr \rv, \rp, #0xfe000000 @ virtual + orr \rv, \rv, #0x00f00000 orr \rp, \rp, #0xc8000000 @ physical .endm diff --git a/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h b/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h index a24a8fe76604..c5bae9c035d5 100644 --- a/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h +++ b/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h @@ -30,52 +30,43 @@ * * 0x50000000 0x10000000 ioremap'd EXP BUS * - * 0x60000000 0x00004000 0xffbe7000 QMgr + * 0xC8000000 0x00013000 0xFEF00000 On-Chip Peripherals * - * 0xC8000000 0x00013000 0xffbeb000 On-Chip Peripherals + * 0xC0000000 0x00001000 0xFEF13000 PCI CFG * - * 0xC4000000 0x00001000 0xffbfe000 EXP CFG + * 0xC4000000 0x00001000 0xFEF14000 EXP CFG * - * 0xC0000000 0x00001000 0xffbff000 PCI CFG + * 0x60000000 0x00004000 0xFEF15000 QMgr */ /* * Queue Manager */ -#define IXP4XX_QMGR_BASE_PHYS (0x60000000) -#define IXP4XX_QMGR_BASE_VIRT IOMEM(0xFFBE7000) -#define IXP4XX_QMGR_REGION_SIZE (0x00004000) +#define IXP4XX_QMGR_BASE_PHYS 0x60000000 +#define IXP4XX_QMGR_BASE_VIRT IOMEM(0xFEF15000) +#define IXP4XX_QMGR_REGION_SIZE 0x00004000 /* - * Expansion BUS Configuration registers + * Peripheral space, including debug UART. Must be section-aligned so that + * it can be used with the low-level debug code. */ -#define IXP4XX_EXP_CFG_BASE_PHYS (0xC4000000) -#define IXP4XX_EXP_CFG_BASE_VIRT IOMEM(0xFFBFE000) -#define IXP4XX_EXP_CFG_REGION_SIZE (0x00001000) +#define IXP4XX_PERIPHERAL_BASE_PHYS 0xC8000000 +#define IXP4XX_PERIPHERAL_BASE_VIRT IOMEM(0xFEF00000) +#define IXP4XX_PERIPHERAL_REGION_SIZE 0x00013000 /* * PCI Config registers */ -#define IXP4XX_PCI_CFG_BASE_PHYS (0xC0000000) -#define IXP4XX_PCI_CFG_BASE_VIRT IOMEM(0xFFBFF000) -#define IXP4XX_PCI_CFG_REGION_SIZE (0x00001000) +#define IXP4XX_PCI_CFG_BASE_PHYS 0xC0000000 +#define IXP4XX_PCI_CFG_BASE_VIRT IOMEM(0xFEF13000) +#define IXP4XX_PCI_CFG_REGION_SIZE 0x00001000 /* - * Peripheral space + * Expansion BUS Configuration registers */ -#define IXP4XX_PERIPHERAL_BASE_PHYS (0xC8000000) -#define IXP4XX_PERIPHERAL_BASE_VIRT IOMEM(0xFFBEB000) -#define IXP4XX_PERIPHERAL_REGION_SIZE (0x00013000) - -/* - * Debug UART - * - * This is basically a remap of UART1 into a region that is section - * aligned so that it * can be used with the low-level debug code. - */ -#define IXP4XX_DEBUG_UART_BASE_PHYS (0xC8000000) -#define IXP4XX_DEBUG_UART_BASE_VIRT IOMEM(0xffb00000) -#define IXP4XX_DEBUG_UART_REGION_SIZE (0x00001000) +#define IXP4XX_EXP_CFG_BASE_PHYS 0xC4000000 +#define IXP4XX_EXP_CFG_BASE_VIRT 0xFEF14000 +#define IXP4XX_EXP_CFG_REGION_SIZE 0x00001000 #define IXP4XX_EXP_CS0_OFFSET 0x00 #define IXP4XX_EXP_CS1_OFFSET 0x04 From 0d2c9f0517e915ce03a04e91d3207827e0d274a8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 28 Sep 2012 23:36:10 +0200 Subject: [PATCH 033/112] IXP4xx: use __iomem for MMIO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ixp4xx queue manager uses "const struct qmgr_regs __iomem *" as the type for a pointer that is passed to __raw_writel, which is not allowed because of the const-ness. Dropping the 'const' keyword fixes the problem. While we're here, let's also drop the useless type cast. Without this patch, building ixp4xx_defconfig results in: In file included from arch/arm/mach-ixp4xx/ixp4xx_qmgr.c:15:0: arch/arm/mach-ixp4xx/include/mach/qmgr.h: In function 'qmgr_put_entry': arch/arm/mach-ixp4xx/include/mach/qmgr.h:96:2: warning: passing argument 2 of '__raw_writel' discards 'const' qualifier from pointer target type [enabled by default] arch/arm/include/asm/io.h:88:91: note: expected 'volatile void *' but argument is of type 'const u32 *' In file included from drivers/net/ethernet/xscale/ixp4xx_eth.c:41:0: arch/arm/mach-ixp4xx/include/mach/qmgr.h: In function 'qmgr_put_entry': arch/arm/mach-ixp4xx/include/mach/qmgr.h:96:2: warning: passing argument 2 of '__raw_writel' discards 'const' qualifier from pointer target type [enabled by default] arch/arm/include/asm/io.h:88:91: note: expected 'volatile void *' but argument is of type 'const u32 *' arch/arm/mach-ixp4xx/ixp4xx_qmgr.c: In function 'qmgr_set_irq': arch/arm/mach-ixp4xx/ixp4xx_qmgr.c:41:9: warning: passing argument 2 of '__raw_writel' discards 'const' qualifier from pointer target type [enabled by default] arch/arm/include/asm/io.h:88:91: note: expected 'volatile void *' but argument is of type 'const u32 *' Signed-off-by: Arnd Bergmann Signed-off-by: Krzysztof Hałasa --- arch/arm/mach-ixp4xx/include/mach/qmgr.h | 12 ++++++------ arch/arm/mach-ixp4xx/ixp4xx_qmgr.c | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/mach-ixp4xx/include/mach/qmgr.h b/arch/arm/mach-ixp4xx/include/mach/qmgr.h index 0a88d3b8e806..4de8da536dbb 100644 --- a/arch/arm/mach-ixp4xx/include/mach/qmgr.h +++ b/arch/arm/mach-ixp4xx/include/mach/qmgr.h @@ -86,7 +86,7 @@ void qmgr_release_queue(unsigned int queue); static inline void qmgr_put_entry(unsigned int queue, u32 val) { - const struct qmgr_regs __iomem *qmgr_regs = (void __iomem *)IXP4XX_QMGR_BASE_VIRT; + struct qmgr_regs __iomem *qmgr_regs = IXP4XX_QMGR_BASE_VIRT; #if DEBUG_QMGR BUG_ON(!qmgr_queue_descs[queue]); /* not yet requested */ @@ -99,7 +99,7 @@ static inline void qmgr_put_entry(unsigned int queue, u32 val) static inline u32 qmgr_get_entry(unsigned int queue) { u32 val; - const struct qmgr_regs __iomem *qmgr_regs = (void __iomem *)IXP4XX_QMGR_BASE_VIRT; + const struct qmgr_regs __iomem *qmgr_regs = IXP4XX_QMGR_BASE_VIRT; val = __raw_readl(&qmgr_regs->acc[queue][0]); #if DEBUG_QMGR BUG_ON(!qmgr_queue_descs[queue]); /* not yet requested */ @@ -112,14 +112,14 @@ static inline u32 qmgr_get_entry(unsigned int queue) static inline int __qmgr_get_stat1(unsigned int queue) { - const struct qmgr_regs __iomem *qmgr_regs = (void __iomem *)IXP4XX_QMGR_BASE_VIRT; + const struct qmgr_regs __iomem *qmgr_regs = IXP4XX_QMGR_BASE_VIRT; return (__raw_readl(&qmgr_regs->stat1[queue >> 3]) >> ((queue & 7) << 2)) & 0xF; } static inline int __qmgr_get_stat2(unsigned int queue) { - const struct qmgr_regs __iomem *qmgr_regs = (void __iomem *)IXP4XX_QMGR_BASE_VIRT; + const struct qmgr_regs __iomem *qmgr_regs = IXP4XX_QMGR_BASE_VIRT; BUG_ON(queue >= HALF_QUEUES); return (__raw_readl(&qmgr_regs->stat2[queue >> 4]) >> ((queue & 0xF) << 1)) & 0x3; @@ -145,7 +145,7 @@ static inline int qmgr_stat_empty(unsigned int queue) */ static inline int qmgr_stat_below_low_watermark(unsigned int queue) { - const struct qmgr_regs __iomem *qmgr_regs = (void __iomem *)IXP4XX_QMGR_BASE_VIRT; + const struct qmgr_regs __iomem *qmgr_regs = IXP4XX_QMGR_BASE_VIRT; if (queue >= HALF_QUEUES) return (__raw_readl(&qmgr_regs->statne_h) >> (queue - HALF_QUEUES)) & 0x01; @@ -172,7 +172,7 @@ static inline int qmgr_stat_above_high_watermark(unsigned int queue) */ static inline int qmgr_stat_full(unsigned int queue) { - const struct qmgr_regs __iomem *qmgr_regs = (void __iomem *)IXP4XX_QMGR_BASE_VIRT; + const struct qmgr_regs __iomem *qmgr_regs = IXP4XX_QMGR_BASE_VIRT; if (queue >= HALF_QUEUES) return (__raw_readl(&qmgr_regs->statf_h) >> (queue - HALF_QUEUES)) & 0x01; diff --git a/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c b/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c index 7c0584e0ab87..9d1b6b7c394c 100644 --- a/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c +++ b/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c @@ -14,7 +14,7 @@ #include #include -static const struct qmgr_regs __iomem *qmgr_regs = (void __iomem *)IXP4XX_QMGR_BASE_VIRT; +static struct qmgr_regs __iomem *qmgr_regs = IXP4XX_QMGR_BASE_VIRT; static struct resource *mem_res; static spinlock_t qmgr_lock; static u32 used_sram_bitmap[4]; /* 128 16-dword pages */ @@ -32,7 +32,7 @@ void qmgr_set_irq(unsigned int queue, int src, spin_lock_irqsave(&qmgr_lock, flags); if (queue < HALF_QUEUES) { - const u32 __iomem *reg; + u32 __iomem *reg; int bit; BUG_ON(src > QUEUE_IRQ_SRC_NOT_FULL); reg = &qmgr_regs->irqsrc[queue >> 3]; /* 8 queues per u32 */ From 129495dee5e1b76e8c8bf342c0f7ad18544d79a9 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Fri, 9 Nov 2012 16:41:29 +0900 Subject: [PATCH 034/112] drm/exynos: fix linux framebuffer address setting. With iommu, buffer->dma_addr has device addres so this patch fixes for physical address to be set to fix.smem_start always. Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park --- drivers/gpu/drm/exynos/exynos_drm_fbdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c index 67eb6ba56edf..e7466c4414cb 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c @@ -87,7 +87,8 @@ static int exynos_drm_fbdev_update(struct drm_fb_helper *helper, dev->mode_config.fb_base = (resource_size_t)buffer->dma_addr; fbi->screen_base = buffer->kvaddr + offset; - fbi->fix.smem_start = (unsigned long)(buffer->dma_addr + offset); + fbi->fix.smem_start = (unsigned long)(page_to_phys(buffer->pages[0]) + + offset); fbi->screen_size = size; fbi->fix.smem_len = size; From a39b49812fe0c3f62fd3f7f6ec8c10d8f7035992 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Wed, 14 Nov 2012 17:28:36 +0900 Subject: [PATCH 035/112] drm/exynos: remove unnecessary code. plane->fb will be set to new fb after update_plane callback is called by drm_mode_set_plane() Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park --- drivers/gpu/drm/exynos/exynos_drm_plane.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c index 60b877a388c2..862ca1eb2102 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_plane.c +++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c @@ -204,7 +204,6 @@ exynos_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, return ret; plane->crtc = crtc; - plane->fb = crtc->fb; exynos_plane_commit(plane); exynos_plane_dpms(plane, DRM_MODE_DPMS_ON); From ffe9955a741a2742cc517245b2219b61cc6dde28 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Wed, 14 Nov 2012 20:41:35 +0900 Subject: [PATCH 036/112] drm/exynos: fix overlay updating issue Chagelog v2: Move encoder's dpms updating into exynos_drm_encoder_commit function because when crtc's dpms is updated, encoder's dpms is updated also. This would induce the issue that encoder isn't disabled after crtc is disabled. Changelog v1: This patch fixes a issue that overlay data aren't applied to real hardware when dpms off goes to on after setcrtc was requested like below, dpms off -> setcrtc -> dpms off -> dpms on For this, it makes encoder's dpms to be updated when setcrtc is requested. Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park --- drivers/gpu/drm/exynos/exynos_drm_encoder.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/exynos/exynos_drm_encoder.c b/drivers/gpu/drm/exynos/exynos_drm_encoder.c index 241ad1eeec64..607231a71b2d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_encoder.c +++ b/drivers/gpu/drm/exynos/exynos_drm_encoder.c @@ -226,6 +226,12 @@ static void exynos_drm_encoder_commit(struct drm_encoder *encoder) * already updated or not by exynos_drm_encoder_dpms function. */ exynos_encoder->updated = true; + + /* + * In case of setcrtc, there is no way to update encoder's dpms + * so update it here. + */ + exynos_encoder->dpms = DRM_MODE_DPMS_ON; } static void exynos_drm_encoder_disable(struct drm_encoder *encoder) From 77b1c0362ffdb11128590a7248f3f8c434ab42f2 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Mon, 19 Nov 2012 15:22:54 +0530 Subject: [PATCH 037/112] drm/exynos: Make exynos4/5_fimd_driver_data static Fixes the following sparse warnings: drivers/gpu/drm/exynos/exynos_drm_fimd.c:65:25: warning: symbol 'exynos4_fimd_driver_data' was not declared. Should it be static? drivers/gpu/drm/exynos/exynos_drm_fimd.c:69:25: warning: symbol 'exynos5_fimd_driver_data' was not declared. Should it be static? Signed-off-by: Sachin Kamat Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park --- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 130a2b510d4a..e08478f19f1a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -61,11 +61,11 @@ struct fimd_driver_data { unsigned int timing_base; }; -struct fimd_driver_data exynos4_fimd_driver_data = { +static struct fimd_driver_data exynos4_fimd_driver_data = { .timing_base = 0x0, }; -struct fimd_driver_data exynos5_fimd_driver_data = { +static struct fimd_driver_data exynos5_fimd_driver_data = { .timing_base = 0x20000, }; From c69d5276704e1a9c771de57c0a2e7b69ea3ea578 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Mon, 19 Nov 2012 14:50:30 +0530 Subject: [PATCH 038/112] drm/exynos: Fix potential NULL pointer dereference in exynos_drm_encoder.c Check overlay_ops is not NULL as checked in the previous 'if' condition. Fixes the following smatch error: drivers/gpu/drm/exynos/exynos_drm_encoder.c:509 exynos_drm_encoder_plane_disable() error: we previously assumed 'overlay_ops' could be null (see line 499) Signed-off-by: Sachin Kamat Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park --- drivers/gpu/drm/exynos/exynos_drm_encoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_encoder.c b/drivers/gpu/drm/exynos/exynos_drm_encoder.c index 607231a71b2d..f2df06c603f7 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_encoder.c +++ b/drivers/gpu/drm/exynos/exynos_drm_encoder.c @@ -513,6 +513,6 @@ void exynos_drm_encoder_plane_disable(struct drm_encoder *encoder, void *data) * because the setting for disabling the overlay will be updated * at vsync. */ - if (overlay_ops->wait_for_vblank) + if (overlay_ops && overlay_ops->wait_for_vblank) overlay_ops->wait_for_vblank(manager->dev); } From 9a30a61f3516871c5c638fd7c025fbaa11ddf7fe Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 12 Nov 2012 14:33:45 +0200 Subject: [PATCH 039/112] drm/i915: do not default to 18 bpp for eDP if missing from VBT commit 500a8cc466a24e2fbc4c86ef9c6467ae2ffdeb0c Author: Zhenyu Wang Date: Wed Jan 13 11:19:52 2010 +0800 drm/i915: parse eDP panel color depth from VBT block originally introduced parsing bpp for eDP from VBT, with a default of 18 bpp if the eDP BIOS data block is not present. Turns out that default seems to break the Macbook Pro with retina display, as noted in commit 4344b813f105a19f793f1fd93ad775b784648b95 Author: Daniel Vetter Date: Fri Aug 10 11:10:20 2012 +0200 drm/i915: ignore eDP bpc settings from vbt Since we can't ignore bpc settings from VBT completely after all, get rid of the default. Do not clamp eDP to 18 bpp by default if the eDP BDB is missing from VBT. Signed-off-by: Jani Nikula Tested-by: Henrik Rydberg [danvet: paste in the updated commit message from irc.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_bios.c | 11 ++--------- drivers/gpu/drm/i915/intel_display.c | 2 +- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 0ed6baff4b0c..56846ed5ee55 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -499,12 +499,8 @@ parse_edp(struct drm_i915_private *dev_priv, struct bdb_header *bdb) edp = find_section(bdb, BDB_EDP); if (!edp) { - if (SUPPORTS_EDP(dev_priv->dev) && dev_priv->edp.support) { - DRM_DEBUG_KMS("No eDP BDB found but eDP panel " - "supported, assume %dbpp panel color " - "depth.\n", - dev_priv->edp.bpp); - } + if (SUPPORTS_EDP(dev_priv->dev) && dev_priv->edp.support) + DRM_DEBUG_KMS("No eDP BDB found but eDP panel supported.\n"); return; } @@ -657,9 +653,6 @@ init_vbt_defaults(struct drm_i915_private *dev_priv) dev_priv->lvds_use_ssc = 1; dev_priv->lvds_ssc_freq = intel_bios_ssc_frequency(dev, 1); DRM_DEBUG_KMS("Set default to SSC at %dMHz\n", dev_priv->lvds_ssc_freq); - - /* eDP data */ - dev_priv->edp.bpp = 18; } static int __init intel_no_opregion_vbt_callback(const struct dmi_system_id *id) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4154bcd7a070..b426d44a2b05 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3845,7 +3845,7 @@ static bool intel_choose_pipe_bpp_dither(struct drm_crtc *crtc, /* Use VBT settings if we have an eDP panel */ unsigned int edp_bpc = dev_priv->edp.bpp / 3; - if (edp_bpc < display_bpc) { + if (edp_bpc && edp_bpc < display_bpc) { DRM_DEBUG_KMS("clamping display bpc (was %d) to eDP (%d)\n", display_bpc, edp_bpc); display_bpc = edp_bpc; } From 3786063a3c0ba26a2400a04476c0c0ccfd3c6beb Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 15 Nov 2012 14:17:01 +0800 Subject: [PATCH 040/112] perf kvm: Rename perf_kvm to perf_kvm_stat Then let it only be used in 'perf kvm stat'. Preparatory patch to stop trying to build parts of this tool that for now are only supported on x86. Signed-off-by: Xiao Guangrong Cc: Borislav Petkov Cc: David Ahern Cc: David Howells Cc: Dong Hao Cc: Ingo Molnar Cc: Josh Boyer Cc: Linus Torvalds Cc: Namhyung Kim Cc: Paul Mackerras Cc: Runzhen Wang Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: x86@kernel.org Link: http://lkml.kernel.org/r/50A488DD.6090106@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 111 +++++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 51 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index e013bdb5e24a..9fa45fa13bd6 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -58,7 +58,7 @@ struct kvm_event_key { }; -struct perf_kvm; +struct perf_kvm_stat; struct kvm_events_ops { bool (*is_begin_event)(struct perf_evsel *evsel, @@ -66,7 +66,7 @@ struct kvm_events_ops { struct event_key *key); bool (*is_end_event)(struct perf_evsel *evsel, struct perf_sample *sample, struct event_key *key); - void (*decode_key)(struct perf_kvm *kvm, struct event_key *key, + void (*decode_key)(struct perf_kvm_stat *kvm, struct event_key *key, char decode[20]); const char *name; }; @@ -79,7 +79,7 @@ struct exit_reasons_table { #define EVENTS_BITS 12 #define EVENTS_CACHE_SIZE (1UL << EVENTS_BITS) -struct perf_kvm { +struct perf_kvm_stat { struct perf_tool tool; struct perf_session *session; @@ -146,7 +146,7 @@ static struct exit_reasons_table svm_exit_reasons[] = { SVM_EXIT_REASONS }; -static const char *get_exit_reason(struct perf_kvm *kvm, u64 exit_code) +static const char *get_exit_reason(struct perf_kvm_stat *kvm, u64 exit_code) { int i = kvm->exit_reasons_size; struct exit_reasons_table *tbl = kvm->exit_reasons; @@ -162,7 +162,7 @@ static const char *get_exit_reason(struct perf_kvm *kvm, u64 exit_code) return "UNKNOWN"; } -static void exit_event_decode_key(struct perf_kvm *kvm, +static void exit_event_decode_key(struct perf_kvm_stat *kvm, struct event_key *key, char decode[20]) { @@ -228,7 +228,7 @@ static bool mmio_event_end(struct perf_evsel *evsel, struct perf_sample *sample, return false; } -static void mmio_event_decode_key(struct perf_kvm *kvm __maybe_unused, +static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, struct event_key *key, char decode[20]) { @@ -271,7 +271,7 @@ static bool ioport_event_end(struct perf_evsel *evsel, return kvm_entry_event(evsel); } -static void ioport_event_decode_key(struct perf_kvm *kvm __maybe_unused, +static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, struct event_key *key, char decode[20]) { @@ -286,7 +286,7 @@ static struct kvm_events_ops ioport_events = { .name = "IO Port Access" }; -static bool register_kvm_events_ops(struct perf_kvm *kvm) +static bool register_kvm_events_ops(struct perf_kvm_stat *kvm) { bool ret = true; @@ -311,7 +311,7 @@ struct vcpu_event_record { }; -static void init_kvm_event_record(struct perf_kvm *kvm) +static void init_kvm_event_record(struct perf_kvm_stat *kvm) { int i; @@ -360,7 +360,7 @@ static struct kvm_event *kvm_alloc_init_event(struct event_key *key) return event; } -static struct kvm_event *find_create_kvm_event(struct perf_kvm *kvm, +static struct kvm_event *find_create_kvm_event(struct perf_kvm_stat *kvm, struct event_key *key) { struct kvm_event *event; @@ -381,7 +381,7 @@ static struct kvm_event *find_create_kvm_event(struct perf_kvm *kvm, return event; } -static bool handle_begin_event(struct perf_kvm *kvm, +static bool handle_begin_event(struct perf_kvm_stat *kvm, struct vcpu_event_record *vcpu_record, struct event_key *key, u64 timestamp) { @@ -425,7 +425,7 @@ static bool update_kvm_event(struct kvm_event *event, int vcpu_id, return true; } -static bool handle_end_event(struct perf_kvm *kvm, +static bool handle_end_event(struct perf_kvm_stat *kvm, struct vcpu_event_record *vcpu_record, struct event_key *key, u64 timestamp) @@ -486,7 +486,7 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread, return thread->priv; } -static bool handle_kvm_event(struct perf_kvm *kvm, +static bool handle_kvm_event(struct perf_kvm_stat *kvm, struct thread *thread, struct perf_evsel *evsel, struct perf_sample *sample) @@ -541,7 +541,7 @@ static struct kvm_event_key keys[] = { { NULL, NULL } }; -static bool select_key(struct perf_kvm *kvm) +static bool select_key(struct perf_kvm_stat *kvm) { int i; @@ -577,7 +577,8 @@ static void insert_to_result(struct rb_root *result, struct kvm_event *event, rb_insert_color(&event->rb, result); } -static void update_total_count(struct perf_kvm *kvm, struct kvm_event *event) +static void +update_total_count(struct perf_kvm_stat *kvm, struct kvm_event *event) { int vcpu = kvm->trace_vcpu; @@ -590,7 +591,7 @@ static bool event_is_valid(struct kvm_event *event, int vcpu) return !!get_event_count(event, vcpu); } -static void sort_result(struct perf_kvm *kvm) +static void sort_result(struct perf_kvm_stat *kvm) { unsigned int i; int vcpu = kvm->trace_vcpu; @@ -627,7 +628,7 @@ static void print_vcpu_info(int vcpu) pr_info("VCPU %d:\n\n", vcpu); } -static void print_result(struct perf_kvm *kvm) +static void print_result(struct perf_kvm_stat *kvm) { char decode[20]; struct kvm_event *event; @@ -670,7 +671,8 @@ static int process_sample_event(struct perf_tool *tool, struct machine *machine) { struct thread *thread = machine__findnew_thread(machine, sample->tid); - struct perf_kvm *kvm = container_of(tool, struct perf_kvm, tool); + struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat, + tool); if (thread == NULL) { pr_debug("problem processing %d event, skipping it.\n", @@ -701,7 +703,7 @@ static int get_cpu_isa(struct perf_session *session) return isa; } -static int read_events(struct perf_kvm *kvm) +static int read_events(struct perf_kvm_stat *kvm) { int ret; @@ -750,7 +752,7 @@ static bool verify_vcpu(int vcpu) return true; } -static int kvm_events_report_vcpu(struct perf_kvm *kvm) +static int kvm_events_report_vcpu(struct perf_kvm_stat *kvm) { int ret = -EINVAL; int vcpu = kvm->trace_vcpu; @@ -798,7 +800,8 @@ static const char * const record_args[] = { _p; \ }) -static int kvm_events_record(struct perf_kvm *kvm, int argc, const char **argv) +static int +kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv) { unsigned int rec_argc, i, j; const char **rec_argv; @@ -821,7 +824,8 @@ static int kvm_events_record(struct perf_kvm *kvm, int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } -static int kvm_events_report(struct perf_kvm *kvm, int argc, const char **argv) +static int +kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv) { const struct option kvm_events_report_options[] = { OPT_STRING(0, "event", &kvm->report_event, "report event", @@ -864,24 +868,36 @@ static void print_kvm_stat_usage(void) printf("\nOtherwise, it is the alias of 'perf stat':\n"); } -static int kvm_cmd_stat(struct perf_kvm *kvm, int argc, const char **argv) +static int kvm_cmd_stat(const char *file_name, int argc, const char **argv) { + struct perf_kvm_stat kvm = { + .file_name = file_name, + + .trace_vcpu = -1, + .report_event = "vmexit", + .sort_key = "sample", + + .exit_reasons = svm_exit_reasons, + .exit_reasons_size = ARRAY_SIZE(svm_exit_reasons), + .exit_reasons_isa = "SVM", + }; + if (argc == 1) { print_kvm_stat_usage(); goto perf_stat; } if (!strncmp(argv[1], "rec", 3)) - return kvm_events_record(kvm, argc - 1, argv + 1); + return kvm_events_record(&kvm, argc - 1, argv + 1); if (!strncmp(argv[1], "rep", 3)) - return kvm_events_report(kvm, argc - 1 , argv + 1); + return kvm_events_report(&kvm, argc - 1 , argv + 1); perf_stat: return cmd_stat(argc, argv, NULL); } -static int __cmd_record(struct perf_kvm *kvm, int argc, const char **argv) +static int __cmd_record(const char *file_name, int argc, const char **argv) { int rec_argc, i = 0, j; const char **rec_argv; @@ -890,7 +906,7 @@ static int __cmd_record(struct perf_kvm *kvm, int argc, const char **argv) rec_argv = calloc(rec_argc + 1, sizeof(char *)); rec_argv[i++] = strdup("record"); rec_argv[i++] = strdup("-o"); - rec_argv[i++] = strdup(kvm->file_name); + rec_argv[i++] = strdup(file_name); for (j = 1; j < argc; j++, i++) rec_argv[i] = argv[j]; @@ -899,7 +915,7 @@ static int __cmd_record(struct perf_kvm *kvm, int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } -static int __cmd_report(struct perf_kvm *kvm, int argc, const char **argv) +static int __cmd_report(const char *file_name, int argc, const char **argv) { int rec_argc, i = 0, j; const char **rec_argv; @@ -908,7 +924,7 @@ static int __cmd_report(struct perf_kvm *kvm, int argc, const char **argv) rec_argv = calloc(rec_argc + 1, sizeof(char *)); rec_argv[i++] = strdup("report"); rec_argv[i++] = strdup("-i"); - rec_argv[i++] = strdup(kvm->file_name); + rec_argv[i++] = strdup(file_name); for (j = 1; j < argc; j++, i++) rec_argv[i] = argv[j]; @@ -917,7 +933,8 @@ static int __cmd_report(struct perf_kvm *kvm, int argc, const char **argv) return cmd_report(i, rec_argv, NULL); } -static int __cmd_buildid_list(struct perf_kvm *kvm, int argc, const char **argv) +static int +__cmd_buildid_list(const char *file_name, int argc, const char **argv) { int rec_argc, i = 0, j; const char **rec_argv; @@ -926,7 +943,7 @@ static int __cmd_buildid_list(struct perf_kvm *kvm, int argc, const char **argv) rec_argv = calloc(rec_argc + 1, sizeof(char *)); rec_argv[i++] = strdup("buildid-list"); rec_argv[i++] = strdup("-i"); - rec_argv[i++] = strdup(kvm->file_name); + rec_argv[i++] = strdup(file_name); for (j = 1; j < argc; j++, i++) rec_argv[i] = argv[j]; @@ -937,20 +954,12 @@ static int __cmd_buildid_list(struct perf_kvm *kvm, int argc, const char **argv) int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused) { - struct perf_kvm kvm = { - .trace_vcpu = -1, - .report_event = "vmexit", - .sort_key = "sample", - - .exit_reasons = svm_exit_reasons, - .exit_reasons_size = ARRAY_SIZE(svm_exit_reasons), - .exit_reasons_isa = "SVM", - }; + const char *file_name; const struct option kvm_options[] = { - OPT_STRING('i', "input", &kvm.file_name, "file", + OPT_STRING('i', "input", &file_name, "file", "Input file name"), - OPT_STRING('o', "output", &kvm.file_name, "file", + OPT_STRING('o', "output", &file_name, "file", "Output file name"), OPT_BOOLEAN(0, "guest", &perf_guest, "Collect guest os data"), @@ -985,32 +994,32 @@ int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused) if (!perf_host) perf_guest = 1; - if (!kvm.file_name) { + if (!file_name) { if (perf_host && !perf_guest) - kvm.file_name = strdup("perf.data.host"); + file_name = strdup("perf.data.host"); else if (!perf_host && perf_guest) - kvm.file_name = strdup("perf.data.guest"); + file_name = strdup("perf.data.guest"); else - kvm.file_name = strdup("perf.data.kvm"); + file_name = strdup("perf.data.kvm"); - if (!kvm.file_name) { + if (!file_name) { pr_err("Failed to allocate memory for filename\n"); return -ENOMEM; } } if (!strncmp(argv[0], "rec", 3)) - return __cmd_record(&kvm, argc, argv); + return __cmd_record(file_name, argc, argv); else if (!strncmp(argv[0], "rep", 3)) - return __cmd_report(&kvm, argc, argv); + return __cmd_report(file_name, argc, argv); else if (!strncmp(argv[0], "diff", 4)) return cmd_diff(argc, argv, NULL); else if (!strncmp(argv[0], "top", 3)) return cmd_top(argc, argv, NULL); else if (!strncmp(argv[0], "buildid-list", 12)) - return __cmd_buildid_list(&kvm, argc, argv); + return __cmd_buildid_list(file_name, argc, argv); else if (!strncmp(argv[0], "stat", 4)) - return kvm_cmd_stat(&kvm, argc, argv); + return kvm_cmd_stat(file_name, argc, argv); else usage_with_options(kvm_usage, kvm_options); From 7321090f6751c9987c26a8c81c63680d16a614d7 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 19 Nov 2012 16:19:21 +0800 Subject: [PATCH 041/112] perf kvm: Fix building perf kvm on non x86 arches Now, 'perf kvm stat' is only supported on x86, let its code depend on (__x86_64__ || __i386__) to fix building it on other architectures. Reviewed-by: David Howells Signed-off-by: Xiao Guangrong Cc: Borislav Petkov Cc: David Ahern Cc: David Howells Cc: Dong Hao Cc: Ingo Molnar Cc: Josh Boyer Cc: Linus Torvalds Cc: Namhyung Kim Cc: Paul Mackerras Cc: Runzhen Wang Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: x86@kernel.org Link: http://lkml.kernel.org/r/50A9EB89.70901@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 9fa45fa13bd6..283b4397e397 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -22,6 +22,7 @@ #include #include +#if defined(__i386__) || defined(__x86_64__) #include #include #include @@ -896,6 +897,7 @@ static int kvm_cmd_stat(const char *file_name, int argc, const char **argv) perf_stat: return cmd_stat(argc, argv, NULL); } +#endif static int __cmd_record(const char *file_name, int argc, const char **argv) { @@ -1018,8 +1020,10 @@ int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused) return cmd_top(argc, argv, NULL); else if (!strncmp(argv[0], "buildid-list", 12)) return __cmd_buildid_list(file_name, argc, argv); +#if defined(__i386__) || defined(__x86_64__) else if (!strncmp(argv[0], "stat", 4)) return kvm_cmd_stat(file_name, argc, argv); +#endif else usage_with_options(kvm_usage, kvm_options); From 4a6dd664eba59488c9e56b51a594396d7706eb08 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 22 Nov 2012 01:32:45 +0000 Subject: [PATCH 042/112] netfilter: ipset: fix netiface set name overflow attribute is copied to IFNAMSIZ-size stack variable, but IFNAMSIZ is smaller than IPSET_MAXNAMELEN. Fortunately nfnetlink needs CAP_NET_ADMIN. Signed-off-by: Florian Westphal Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_netiface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index b9a63381e349..45a101439bc5 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -793,7 +793,7 @@ static struct ip_set_type hash_netiface_type __read_mostly = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_IFACE] = { .type = NLA_NUL_STRING, - .len = IPSET_MAXNAMELEN - 1 }, + .len = IFNAMSIZ - 1 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, From 6567d748c4e94e3481e523803ec07ebd825c80d6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 10 Nov 2012 10:00:06 +0000 Subject: [PATCH 043/112] Revert "drm/i915: enable rc6 on ilk again" Even with the cumulative set of ilk w/a, rc6 is demonstrably still failing and causing GPU hangs as found by Peter Wu. So we need to disable it again until it is stable. This reverts commit 456470eb583f063ee84c6818251e638598be0fb8 Author: Daniel Vetter Date: Wed Aug 8 23:35:40 2012 +0200 drm/i915: enable rc6 on ilk again and the follow-on commit cd7988eea561a70a4f98e431c1395f913672d626 Author: Daniel Vetter Date: Sun Aug 26 20:33:18 2012 +0200 drm/i915: disable rc6 on ilk when vt-d is enabled Note: The situation around the gen4/5 gpu hangs that cropped up in 3.7 is rather strange. Most useful bisects have lead to commit 6c085a728cf000ac1865d66f8c9b52935558b328 Author: Chris Wilson Date: Mon Aug 20 11:40:46 2012 +0200 drm/i915: Track unbound pages or even later commits that affect the gem bo recycling, which all is way past the point where we re-enabled rc6. But somehow reverting/disabling those commits doesn't help, but disabling rc6 at least helps for many hangs on ilk. Obviously it doesn't change anything at all on gen4, and there are still strange issues left on gen5 (which we unfortunately can't readily reproduce). Also, the error_state signature of the hangs which can be fixed with this patch look remarkably different to those which seem to be unaffected by the rc6 settings: The rc6 hangs are in the ring, somewhere in the MI_FLUSH/PIPE_CONTROL sequence to make ilk coherent, wheras all the other hangs tend to be at a random point in the middle of the user batch. So it could also be that we have different issues. Until we grow more clue, this at least helps some users. Reported-by: Peter Wu References: https://bugs.freedesktop.org/show_bug.cgi?id=55984 Signed-off-by: Chris Wilson [danvet: Added note with some more details about the gen4/5 3.7 gpu hang regression.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 72f41aaa71ff..442968f8b201 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2373,15 +2373,9 @@ int intel_enable_rc6(const struct drm_device *dev) if (i915_enable_rc6 >= 0) return i915_enable_rc6; - if (INTEL_INFO(dev)->gen == 5) { -#ifdef CONFIG_INTEL_IOMMU - /* Disable rc6 on ilk if VT-d is on. */ - if (intel_iommu_gfx_mapped) - return false; -#endif - DRM_DEBUG_DRIVER("Ironlake: only RC6 available\n"); - return INTEL_RC6_ENABLE; - } + /* Disable RC6 on Ironlake */ + if (INTEL_INFO(dev)->gen == 5) + return 0; if (IS_HASWELL(dev)) { DRM_DEBUG_DRIVER("Haswell: only RC6 available\n"); From 6bdd253f635f7b2ef027d116933a6c9ec148b87f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 24 Nov 2012 00:32:19 +0100 Subject: [PATCH 044/112] mac80211: fix remain-on-channel (non-)cancelling Felix Liao reported that when an interface is set DOWN while another interface is executing a ROC, the warning in ieee80211_start_next_roc() (about the first item on the list having started already) triggers. This is because ieee80211_roc_purge() calls it even if it never actually changed the list of ROC items. To fix this, simply remove the function call. If it is needed then it will be done by the ieee80211_sw_roc_work() function when the ROC item that is being removed while active is cleaned up. Cc: stable@vger.kernel.org Reported-by: Felix Liao Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/offchannel.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 83608ac16780..2c84185dfdb0 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -458,8 +458,6 @@ void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata) list_move_tail(&roc->list, &tmp_list); roc->abort = true; } - - ieee80211_start_next_roc(local); mutex_unlock(&local->mtx); list_for_each_entry_safe(roc, tmp, &tmp_list, list) { From 97d66c47277fc801bc1ee93559a848057181ea55 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 22 Nov 2012 06:17:19 -0300 Subject: [PATCH 045/112] [media] s5p-fimc: Prevent race conditions during subdevs registration Make sure when fimc and fimc-lite capture video node is registered it has valid pipeline_ops assigned to it. Otherwise when a video node is opened right after is was registered there, might be an attempt to use ops that are just being assigned, after function v4l2_device_register_subdev() returns. Signed-off-by: Sylwester Nawrocki Signed-off-by: Kyungmin Park Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/s5p-fimc/fimc-capture.c | 7 ++++++- drivers/media/platform/s5p-fimc/fimc-lite.c | 3 +++ drivers/media/platform/s5p-fimc/fimc-mdevice.c | 4 ++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/s5p-fimc/fimc-capture.c b/drivers/media/platform/s5p-fimc/fimc-capture.c index 3d39d97abaa5..3fc896b8eeae 100644 --- a/drivers/media/platform/s5p-fimc/fimc-capture.c +++ b/drivers/media/platform/s5p-fimc/fimc-capture.c @@ -1774,9 +1774,13 @@ static int fimc_capture_subdev_registered(struct v4l2_subdev *sd) if (ret) return ret; + fimc->pipeline_ops = v4l2_get_subdev_hostdata(sd); + ret = fimc_register_capture_device(fimc, sd->v4l2_dev); - if (ret) + if (ret) { fimc_unregister_m2m_device(fimc); + fimc->pipeline_ops = NULL; + } return ret; } @@ -1793,6 +1797,7 @@ static void fimc_capture_subdev_unregistered(struct v4l2_subdev *sd) if (video_is_registered(&fimc->vid_cap.vfd)) { video_unregister_device(&fimc->vid_cap.vfd); media_entity_cleanup(&fimc->vid_cap.vfd.entity); + fimc->pipeline_ops = NULL; } kfree(fimc->vid_cap.ctx); fimc->vid_cap.ctx = NULL; diff --git a/drivers/media/platform/s5p-fimc/fimc-lite.c b/drivers/media/platform/s5p-fimc/fimc-lite.c index 9db246bed841..23f203e8a7d3 100644 --- a/drivers/media/platform/s5p-fimc/fimc-lite.c +++ b/drivers/media/platform/s5p-fimc/fimc-lite.c @@ -1263,10 +1263,12 @@ static int fimc_lite_subdev_registered(struct v4l2_subdev *sd) return ret; video_set_drvdata(vfd, fimc); + fimc->pipeline_ops = v4l2_get_subdev_hostdata(sd); ret = video_register_device(vfd, VFL_TYPE_GRABBER, -1); if (ret < 0) { media_entity_cleanup(&vfd->entity); + fimc->pipeline_ops = NULL; return ret; } @@ -1285,6 +1287,7 @@ static void fimc_lite_subdev_unregistered(struct v4l2_subdev *sd) if (video_is_registered(&fimc->vfd)) { video_unregister_device(&fimc->vfd); media_entity_cleanup(&fimc->vfd.entity); + fimc->pipeline_ops = NULL; } } diff --git a/drivers/media/platform/s5p-fimc/fimc-mdevice.c b/drivers/media/platform/s5p-fimc/fimc-mdevice.c index 38ea4d143a49..0531ab70a94c 100644 --- a/drivers/media/platform/s5p-fimc/fimc-mdevice.c +++ b/drivers/media/platform/s5p-fimc/fimc-mdevice.c @@ -352,6 +352,7 @@ static int fimc_register_callback(struct device *dev, void *p) sd = &fimc->vid_cap.subdev; sd->grp_id = FIMC_GROUP_ID; + v4l2_set_subdev_hostdata(sd, (void *)&fimc_pipeline_ops); ret = v4l2_device_register_subdev(&fmd->v4l2_dev, sd); if (ret) { @@ -360,7 +361,6 @@ static int fimc_register_callback(struct device *dev, void *p) return ret; } - fimc->pipeline_ops = &fimc_pipeline_ops; fmd->fimc[fimc->id] = fimc; return 0; } @@ -375,6 +375,7 @@ static int fimc_lite_register_callback(struct device *dev, void *p) return 0; fimc->subdev.grp_id = FLITE_GROUP_ID; + v4l2_set_subdev_hostdata(&fimc->subdev, (void *)&fimc_pipeline_ops); ret = v4l2_device_register_subdev(&fmd->v4l2_dev, &fimc->subdev); if (ret) { @@ -384,7 +385,6 @@ static int fimc_lite_register_callback(struct device *dev, void *p) return ret; } - fimc->pipeline_ops = &fimc_pipeline_ops; fmd->fimc_lite[fimc->index] = fimc; return 0; } From ba6b372cf0af564bdc6c14bf68e078ae34bef222 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 22 Nov 2012 11:12:16 -0300 Subject: [PATCH 046/112] [media] s5p-fimc: Don't use mutex_lock_interruptible() in device release() Use uninterruptible mutex_lock in the release() file op to make sure all resources are properly freed when a process is being terminated. Returning -ERESTARTSYS has no effect for a terminating process and this caused driver resources not to be released. Not releasing the buffer queue also prevented other drivers to free memory, e.g. in MMAP -> USERPTR scenario. This patch is required for stable kernels v3.6+. Reported-by: Kamil Debski Reported-by: Marek Szyprowski Signed-off-by: Sylwester Nawrocki Signed-off-by: Kyungmin Park Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/s5p-fimc/fimc-capture.c | 3 +-- drivers/media/platform/s5p-fimc/fimc-m2m.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/media/platform/s5p-fimc/fimc-capture.c b/drivers/media/platform/s5p-fimc/fimc-capture.c index 3fc896b8eeae..891ee873c62b 100644 --- a/drivers/media/platform/s5p-fimc/fimc-capture.c +++ b/drivers/media/platform/s5p-fimc/fimc-capture.c @@ -556,8 +556,7 @@ static int fimc_capture_close(struct file *file) dbg("pid: %d, state: 0x%lx", task_pid_nr(current), fimc->state); - if (mutex_lock_interruptible(&fimc->lock)) - return -ERESTARTSYS; + mutex_lock(&fimc->lock); if (--fimc->vid_cap.refcnt == 0) { clear_bit(ST_CAPT_BUSY, &fimc->state); diff --git a/drivers/media/platform/s5p-fimc/fimc-m2m.c b/drivers/media/platform/s5p-fimc/fimc-m2m.c index 4500e44f6857..62afed3162ea 100644 --- a/drivers/media/platform/s5p-fimc/fimc-m2m.c +++ b/drivers/media/platform/s5p-fimc/fimc-m2m.c @@ -718,8 +718,7 @@ static int fimc_m2m_release(struct file *file) dbg("pid: %d, state: 0x%lx, refcnt= %d", task_pid_nr(current), fimc->state, fimc->m2m.refcnt); - if (mutex_lock_interruptible(&fimc->lock)) - return -ERESTARTSYS; + mutex_lock(&fimc->lock); v4l2_m2m_ctx_release(ctx->m2m_ctx); fimc_ctrls_delete(ctx); From ddc43d6dc7df0849fe41b91460fa76145cf87b67 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 22 Nov 2012 11:13:04 -0300 Subject: [PATCH 047/112] [media] fimc-lite: Don't use mutex_lock_interruptible() in device release() Use uninterruptible mutex_lock in the release() file op to make sure all resources are properly freed when a process is being terminated. Returning -ERESTARTSYS has no effect for a terminating process and this may cause driver resources not to be released. This patch is required for stable kernels v3.5+. Reported-by: Marek Szyprowski Signed-off-by: Sylwester Nawrocki Signed-off-by: Kyungmin Park Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/s5p-fimc/fimc-lite.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/platform/s5p-fimc/fimc-lite.c b/drivers/media/platform/s5p-fimc/fimc-lite.c index 23f203e8a7d3..1b309a72f09f 100644 --- a/drivers/media/platform/s5p-fimc/fimc-lite.c +++ b/drivers/media/platform/s5p-fimc/fimc-lite.c @@ -491,8 +491,7 @@ static int fimc_lite_close(struct file *file) struct fimc_lite *fimc = video_drvdata(file); int ret; - if (mutex_lock_interruptible(&fimc->lock)) - return -ERESTARTSYS; + mutex_lock(&fimc->lock); if (--fimc->ref_count == 0 && fimc->out_path == FIMC_IO_DMA) { clear_bit(ST_FLITE_IN_USE, &fimc->state); From 9868018045c555c7a724c0cb7eeb0caeba9d43ff Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Wed, 21 Nov 2012 11:46:43 -0300 Subject: [PATCH 048/112] [media] exynos-gsc: Don't use mutex_lock_interruptible() in device release() Use uninterruptible mutex_lock in the release() file op to make sure all resources are properly freed when a process is being terminated. Returning -ERESTARTSYS has no effect for a terminating process and this may cause driver resources not to be released. Reported-by: Marek Szyprowski Signed-off-by: Sylwester Nawrocki Signed-off-by: Kyungmin Park Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/exynos-gsc/gsc-m2m.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/platform/exynos-gsc/gsc-m2m.c b/drivers/media/platform/exynos-gsc/gsc-m2m.c index 3c7f00577bd9..2650be3ccd5a 100644 --- a/drivers/media/platform/exynos-gsc/gsc-m2m.c +++ b/drivers/media/platform/exynos-gsc/gsc-m2m.c @@ -657,8 +657,7 @@ static int gsc_m2m_release(struct file *file) pr_debug("pid: %d, state: 0x%lx, refcnt= %d", task_pid_nr(current), gsc->state, gsc->m2m.refcnt); - if (mutex_lock_interruptible(&gsc->lock)) - return -ERESTARTSYS; + mutex_lock(&gsc->lock); v4l2_m2m_ctx_release(ctx->m2m_ctx); gsc_ctrls_delete(ctx); From bca36481e6fdc8e64f5837bc1d7f9752f3437c63 Mon Sep 17 00:00:00 2001 From: Shaik Ameer Basha Date: Wed, 7 Nov 2012 03:38:31 -0300 Subject: [PATCH 049/112] [media] exynos-gsc: Fix settings for input and output image RGB type Macros used to set input and output RGB type aren't correct. Updating the macros as per register manual. Signed-off-by: Shaik Ameer Basha Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/exynos-gsc/gsc-regs.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/media/platform/exynos-gsc/gsc-regs.h b/drivers/media/platform/exynos-gsc/gsc-regs.h index 533e9947a925..4678f9a6a4fd 100644 --- a/drivers/media/platform/exynos-gsc/gsc-regs.h +++ b/drivers/media/platform/exynos-gsc/gsc-regs.h @@ -40,10 +40,10 @@ #define GSC_IN_ROT_YFLIP (2 << 16) #define GSC_IN_ROT_XFLIP (1 << 16) #define GSC_IN_RGB_TYPE_MASK (3 << 14) -#define GSC_IN_RGB_HD_WIDE (3 << 14) -#define GSC_IN_RGB_HD_NARROW (2 << 14) -#define GSC_IN_RGB_SD_WIDE (1 << 14) -#define GSC_IN_RGB_SD_NARROW (0 << 14) +#define GSC_IN_RGB_HD_NARROW (3 << 14) +#define GSC_IN_RGB_HD_WIDE (2 << 14) +#define GSC_IN_RGB_SD_NARROW (1 << 14) +#define GSC_IN_RGB_SD_WIDE (0 << 14) #define GSC_IN_YUV422_1P_ORDER_MASK (1 << 13) #define GSC_IN_YUV422_1P_ORDER_LSB_Y (0 << 13) #define GSC_IN_YUV422_1P_OEDER_LSB_C (1 << 13) @@ -85,10 +85,10 @@ #define GSC_OUT_GLOBAL_ALPHA_MASK (0xff << 24) #define GSC_OUT_GLOBAL_ALPHA(x) ((x) << 24) #define GSC_OUT_RGB_TYPE_MASK (3 << 10) -#define GSC_OUT_RGB_HD_NARROW (3 << 10) -#define GSC_OUT_RGB_HD_WIDE (2 << 10) -#define GSC_OUT_RGB_SD_NARROW (1 << 10) -#define GSC_OUT_RGB_SD_WIDE (0 << 10) +#define GSC_OUT_RGB_HD_WIDE (3 << 10) +#define GSC_OUT_RGB_HD_NARROW (2 << 10) +#define GSC_OUT_RGB_SD_WIDE (1 << 10) +#define GSC_OUT_RGB_SD_NARROW (0 << 10) #define GSC_OUT_YUV422_1P_ORDER_MASK (1 << 9) #define GSC_OUT_YUV422_1P_ORDER_LSB_Y (0 << 9) #define GSC_OUT_YUV422_1P_OEDER_LSB_C (1 << 9) From 24fc681a8ea6efa6bd7d6f366824a31addad2203 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Sat, 10 Nov 2012 19:57:56 -0300 Subject: [PATCH 050/112] [media] exynos-gsc: Add missing video device vfl_dir flag initialization vfl_dir should be set to VFL_DIR_M2M so valid ioctls for this mem-to-mem device can be properly determined in the v4l2 core. Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/exynos-gsc/gsc-m2m.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/exynos-gsc/gsc-m2m.c b/drivers/media/platform/exynos-gsc/gsc-m2m.c index 2650be3ccd5a..c065d040ed94 100644 --- a/drivers/media/platform/exynos-gsc/gsc-m2m.c +++ b/drivers/media/platform/exynos-gsc/gsc-m2m.c @@ -731,6 +731,7 @@ int gsc_register_m2m_device(struct gsc_dev *gsc) gsc->vdev.ioctl_ops = &gsc_m2m_ioctl_ops; gsc->vdev.release = video_device_release_empty; gsc->vdev.lock = &gsc->lock; + gsc->vdev.vfl_dir = VFL_DIR_M2M; snprintf(gsc->vdev.name, sizeof(gsc->vdev.name), "%s.%d:m2m", GSC_MODULE_NAME, gsc->id); From d9acbe1ace5cacc5e104443d94c7402c7fb9dcf9 Mon Sep 17 00:00:00 2001 From: Arun Kumar K Date: Mon, 5 Nov 2012 05:14:03 -0300 Subject: [PATCH 051/112] [media] s5p-mfc: Bug fix of timestamp/timecode copy mechanism Modified the function s5p_mfc_get_dec_y_adr_v6 to access the decode Y address register instead of display Y address. Signed-off-by: Sunil Mazhavanchery Signed-off-by: Arun Kumar K Acked-by: Kamil Debski Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c b/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c index 50b5bee3c44e..3a8cfd9fc1bd 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c @@ -1762,7 +1762,7 @@ int s5p_mfc_get_dspl_y_adr_v6(struct s5p_mfc_dev *dev) int s5p_mfc_get_dec_y_adr_v6(struct s5p_mfc_dev *dev) { - return mfc_read(dev, S5P_FIMV_D_DISPLAY_LUMA_ADDR_V6); + return mfc_read(dev, S5P_FIMV_D_DECODED_LUMA_ADDR_V6); } int s5p_mfc_get_dspl_status_v6(struct s5p_mfc_dev *dev) From d2a0db1ee01aea154ccc460e45a16857e32c4427 Mon Sep 17 00:00:00 2001 From: Arun Kumar K Date: Wed, 14 Nov 2012 09:26:45 -0300 Subject: [PATCH 052/112] [media] s5p-mfc: Handle multi-frame input buffer When one input buffer has multiple frames, it should be fed again to the hardware with the remaining bytes. Removed the check for P frame in this scenario as this condition can come with all frame types. Signed-off-by: Arun Kumar K Signed-off-by: ARUN MANKUZHI Acked-by: Kamil Debski Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/s5p-mfc/s5p_mfc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c index 130f4ac8649e..3afe879d54d7 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c @@ -381,11 +381,8 @@ static void s5p_mfc_handle_frame(struct s5p_mfc_ctx *ctx, ctx->consumed_stream += s5p_mfc_hw_call(dev->mfc_ops, get_consumed_stream, dev); if (ctx->codec_mode != S5P_MFC_CODEC_H264_DEC && - s5p_mfc_hw_call(dev->mfc_ops, - get_dec_frame_type, dev) == - S5P_FIMV_DECODE_FRAME_P_FRAME - && ctx->consumed_stream + STUFF_BYTE < - src_buf->b->v4l2_planes[0].bytesused) { + ctx->consumed_stream + STUFF_BYTE < + src_buf->b->v4l2_planes[0].bytesused) { /* Run MFC again on the same buffer */ mfc_debug(2, "Running again the same buffer\n"); ctx->after_packed_pb = 1; From c9faaa09e2a1335678f09c70a0d0eda095564bab Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 21 Nov 2012 22:43:59 +0100 Subject: [PATCH 053/112] can: peak_usb: fix hwtstamp assignment The skb->tstamp is set to the hardware timestamp when available in the USB urb message. This leads to user visible timestamps which contain the 'uptime' of the USB adapter - and not the usual system generated timestamp. Fix this wrong assignment by applying the available hardware timestamp to the skb_shared_hwtstamps data structure - which is intended for this purpose. Cc: linux-stable Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb.c | 8 ++++++-- drivers/net/can/usb/peak_usb/pcan_usb_pro.c | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 86f26a1ede4c..25723d8ee201 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -519,8 +519,10 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, mc->pdev->dev.can.state = new_state; if (status_len & PCAN_USB_STATUSLEN_TIMESTAMP) { + struct skb_shared_hwtstamps *hwts = skb_hwtstamps(skb); + peak_usb_get_ts_tv(&mc->pdev->time_ref, mc->ts16, &tv); - skb->tstamp = timeval_to_ktime(tv); + hwts->hwtstamp = timeval_to_ktime(tv); } netif_rx(skb); @@ -605,6 +607,7 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len) struct sk_buff *skb; struct can_frame *cf; struct timeval tv; + struct skb_shared_hwtstamps *hwts; skb = alloc_can_skb(mc->netdev, &cf); if (!skb) @@ -652,7 +655,8 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len) /* convert timestamp into kernel time */ peak_usb_get_ts_tv(&mc->pdev->time_ref, mc->ts16, &tv); - skb->tstamp = timeval_to_ktime(tv); + hwts = skb_hwtstamps(skb); + hwts->hwtstamp = timeval_to_ktime(tv); /* push the skb */ netif_rx(skb); diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c index e1626d92511a..30d79bfa5b10 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c @@ -532,6 +532,7 @@ static int pcan_usb_pro_handle_canmsg(struct pcan_usb_pro_interface *usb_if, struct can_frame *can_frame; struct sk_buff *skb; struct timeval tv; + struct skb_shared_hwtstamps *hwts; skb = alloc_can_skb(netdev, &can_frame); if (!skb) @@ -549,7 +550,8 @@ static int pcan_usb_pro_handle_canmsg(struct pcan_usb_pro_interface *usb_if, memcpy(can_frame->data, rx->data, can_frame->can_dlc); peak_usb_get_ts_tv(&usb_if->time_ref, le32_to_cpu(rx->ts32), &tv); - skb->tstamp = timeval_to_ktime(tv); + hwts = skb_hwtstamps(skb); + hwts->hwtstamp = timeval_to_ktime(tv); netif_rx(skb); netdev->stats.rx_packets++; @@ -570,6 +572,7 @@ static int pcan_usb_pro_handle_error(struct pcan_usb_pro_interface *usb_if, u8 err_mask = 0; struct sk_buff *skb; struct timeval tv; + struct skb_shared_hwtstamps *hwts; /* nothing should be sent while in BUS_OFF state */ if (dev->can.state == CAN_STATE_BUS_OFF) @@ -664,7 +667,8 @@ static int pcan_usb_pro_handle_error(struct pcan_usb_pro_interface *usb_if, dev->can.state = new_state; peak_usb_get_ts_tv(&usb_if->time_ref, le32_to_cpu(er->ts32), &tv); - skb->tstamp = timeval_to_ktime(tv); + hwts = skb_hwtstamps(skb); + hwts->hwtstamp = timeval_to_ktime(tv); netif_rx(skb); netdev->stats.rx_packets++; netdev->stats.rx_bytes += can_frame->can_dlc; From 81b401100c01d2357031e874689f89bd788d13cd Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 26 Nov 2012 22:24:23 +0100 Subject: [PATCH 054/112] can: bcm: initialize ifindex for timeouts without previous frame reception Set in the rx_ifindex to pass the correct interface index in the case of a message timeout detection. Usually the rx_ifindex value is set at receive time. But when no CAN frame has been received the RX_TIMEOUT notification did not contain a valid value. Cc: linux-stable Reported-by: Andre Naujoks Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/can/bcm.c b/net/can/bcm.c index 6f747582718e..969b7cdff59d 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1084,6 +1084,9 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, op->sk = sk; op->ifindex = ifindex; + /* ifindex for timeout events w/o previous frame reception */ + op->rx_ifindex = ifindex; + /* initialize uninitialized (kzalloc) structure */ hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); op->timer.function = bcm_rx_timeout_handler; From e1a676424c290b1c8d757e3860170ac7ecd89af4 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 24 Nov 2012 18:54:37 +0000 Subject: [PATCH 055/112] ipv4: avoid passing NULL to inet_putpeer() in icmpv4_xrlim_allow() inet_getpeer_v4() can return NULL under OOM conditions, and while inet_peer_xrlim_allow() is OK with a NULL peer, inet_putpeer() will crash. This code path now uses the same idiom as the others from: 1d861aa4b3fb08822055345f480850205ffe6170 ("inet: Minimize use of cached route inetpeer."). Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index f2eccd531746..17ff9fd7cdda 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -257,7 +257,8 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1); rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); - inet_putpeer(peer); + if (peer) + inet_putpeer(peer); } out: return rc; From b49d3c1e1c948d76d64790abe9acffa9fa747d19 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 25 Nov 2012 09:44:29 +0000 Subject: [PATCH 056/112] net: ipmr: limit MRT_TABLE identifiers Name of pimreg devices are built from following format : char name[IFNAMSIZ]; // IFNAMSIZ == 16 sprintf(name, "pimreg%u", mrt->id); We must therefore limit mrt->id to 9 decimal digits or risk a buffer overflow and a crash. Restrict table identifiers in [0 ... 999999999] interval. Reported-by: Chen Gang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 6168c4dc58b1..3eab2b2ffd34 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1318,6 +1318,10 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi if (get_user(v, (u32 __user *)optval)) return -EFAULT; + /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ + if (v != RT_TABLE_DEFAULT && v >= 1000000000) + return -EINVAL; + rtnl_lock(); ret = 0; if (sk == rtnl_dereference(mrt->mroute_sk)) { From 9d34340ebd00eaadbd414e02acc881c5fae903d3 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Mon, 26 Nov 2012 18:06:13 -0500 Subject: [PATCH 057/112] c6x: run do_notify_resume with interrupts enabled C6x was mistakenly running do_notify_resume with interrupts disabled. This would triggerlead to a warning in local_bh_enable() because interrupts were disabled: ------------[ cut here ]------------ WARNING: at /es/linux/linux-next/kernel/softirq.c:160 local_bh_enable+0x5c/0x10c() Modules linked in: e02f384d e002cda8 e02f3469 e02f384d 000000a0 e00363fc e01cce58 e5005c00 e0327986 00000000 e63c0aec 00000164 e00363fc 00000000 fffffffe e5005c00 e61fde00 e0268184 00000134 e01c91dc 00000001 fffffffe 00000000 10000100 e01c80e4 e5005c00 00000000 00000000 00000000 e63c0aec e526ce00 10000100 e628f920 e63c0a88 e6010410 e6449750 e5005c20 00000000 00000000 e63c0a80 e5005c20 e01c8590 e63c0a80 e5005c20 e63c0aec e00a0554 e009c758 e639e860 irq_spurious_proc_fops+0x6ad/0x3438 warn_slowpath_common+0x8c/0xb8 irq_spurious_proc_fops+0x2c9/0x3438 irq_spurious_proc_fops+0x6ad/0x3438 local_bh_enable+0x5c/0x10c sk_alloc+0x34/0xa4 local_bh_enable+0x5c/0x10c unix_release_sock+0x5c/0x2a0 sys_connect+0x94/0xd4 sock_release+0x38/0x104 sock_close+0x3c/0x54 __fput+0x154/0x2ec filp_close+0xc0/0xe4 task_work_run+0xdc/0x12c sys_close+0x2c/0x74 resume_userspace+0x0/0x30 ---[ end trace a70cbd610ae1f6b4 ]--- This patch enables interrupts before calling do_notify_resume(). Signed-off-by: Mark Salter --- arch/c6x/kernel/entry.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/c6x/kernel/entry.S b/arch/c6x/kernel/entry.S index 5449c36018fe..0c164c2c22be 100644 --- a/arch/c6x/kernel/entry.S +++ b/arch/c6x/kernel/entry.S @@ -277,6 +277,8 @@ work_rescheduled: [A1] BNOP .S1 work_resched,5 work_notifysig: + ;; enable interrupts for do_notify_resume() + UNMASK_INT B2 B .S2 do_notify_resume LDW .D2T1 *+SP(REGS__END+8),A6 ; syscall flag ADDKPC .S2 resume_userspace,B3,1 From 9c0603f487b6b0cfa0b2bbcab7f06333e78db652 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Mon, 26 Nov 2012 18:14:00 -0500 Subject: [PATCH 058/112] c6x: fix misleading comment A comment in entry.S incorrectly stated that interrupt vectors called __do_IRQ() and that int6 vector was used for syscalls. Both statements are incorrect for the current kernel, so this patch cleans up the wording to reflect current reality. Signed-off-by: Mark Salter --- arch/c6x/kernel/entry.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/c6x/kernel/entry.S b/arch/c6x/kernel/entry.S index 0c164c2c22be..0ed6157dd256 100644 --- a/arch/c6x/kernel/entry.S +++ b/arch/c6x/kernel/entry.S @@ -429,8 +429,7 @@ ENTRY(ret_from_kernel_execve) ENDPROC(ret_from_kernel_execve) ;; - ;; These are the interrupt handlers, responsible for calling __do_IRQ() - ;; int6 is used for syscalls (see _system_call entry) + ;; These are the interrupt handlers, responsible for calling c6x_do_IRQ() ;; .macro SAVE_ALL_INT SAVE_ALL IRP,ITSR From cb7cb2864e758a1b040040bc55e404c677c911cb Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 21 Nov 2012 14:41:21 -0800 Subject: [PATCH 059/112] x86, kvm: Remove incorrect redundant assembly constraint In __emulate_1op_rax_rdx, we use "+a" and "+d" which are input/output constraints, and *then* use "a" and "d" as input constraints. This is incorrect, but happens to work on some versions of gcc. However, it breaks gcc with -O0 and icc, and may break on future versions of gcc. Reported-and-tested-by: Melanie Blower Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/B3584E72CFEBED439A3ECA9BCE67A4EF1B17AF90@FMSMSX107.amr.corp.intel.com Reviewed-by: Paolo Bonzini Acked-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 39171cb307ea..bba39bfa1c4b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -426,8 +426,7 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) _ASM_EXTABLE(1b, 3b) \ : "=m" ((ctxt)->eflags), "=&r" (_tmp), \ "+a" (*rax), "+d" (*rdx), "+qm"(_ex) \ - : "i" (EFLAGS_MASK), "m" ((ctxt)->src.val), \ - "a" (*rax), "d" (*rdx)); \ + : "i" (EFLAGS_MASK), "m" ((ctxt)->src.val)); \ } while (0) /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ From 874807a83139abc094f939e93623c5623573d543 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 27 Nov 2012 12:14:40 +1100 Subject: [PATCH 060/112] md/raid1{,0}: fix deadlock in bitmap_unplug. If the raid1 or raid10 unplug function gets called from a make_request function (which is very possible) when there are bios on the current->bio_list list, then it will not be able to successfully call bitmap_unplug() and it could need to submit more bios and wait for them to complete. But they won't complete while current->bio_list is non-empty. So detect that case and handle the unplugging off to another thread just like we already do when called from within the scheduler. RAID1 version of bug was introduced in 3.6, so that part of fix is suitable for 3.6.y. RAID10 part won't apply. Cc: stable@vger.kernel.org Reported-by: Torsten Kaiser Reported-by: Peter Maloney Signed-off-by: NeilBrown --- drivers/md/raid1.c | 2 +- drivers/md/raid10.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 636bae0405e8..a0f73092176e 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -963,7 +963,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) struct r1conf *conf = mddev->private; struct bio *bio; - if (from_schedule) { + if (from_schedule || current->bio_list) { spin_lock_irq(&conf->device_lock); bio_list_merge(&conf->pending_bio_list, &plug->pending); conf->pending_count += plug->pending_cnt; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0d5d0ff2c0f7..c9acbd717131 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1069,7 +1069,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) struct r10conf *conf = mddev->private; struct bio *bio; - if (from_schedule) { + if (from_schedule || current->bio_list) { spin_lock_irq(&conf->device_lock); bio_list_merge(&conf->pending_bio_list, &plug->pending); conf->pending_count += plug->pending_cnt; From 6662c34fa9c60a48aaa5879cb229cd9a84de9c22 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 27 Nov 2012 08:54:36 -0800 Subject: [PATCH 061/112] x86-32: Unbreak booting on some 486 clones There appear to have been some 486 clones, including the "enhanced" version of Am486, which have CPUID but not CR4. These 486 clones had only the FPU flag, if any, unlike the Intel 486s with CPUID, which also had VME and therefore needed CR4. Therefore, look at the basic CPUID flags and require at least one bit other than bit 0 before we modify CR4. Thanks to Christian Ludloff of sandpile.org for confirming this as a problem. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_32.S | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 957a47aec64e..4dac2f68ed4a 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -292,8 +292,8 @@ default_entry: * be using the global pages. * * NOTE! If we are on a 486 we may have no cr4 at all! - * Specifically, cr4 exists if and only if CPUID exists, - * which in turn exists if and only if EFLAGS.ID exists. + * Specifically, cr4 exists if and only if CPUID exists + * and has flags other than the FPU flag set. */ movl $X86_EFLAGS_ID,%ecx pushl %ecx @@ -308,6 +308,11 @@ default_entry: testl %ecx,%eax jz 6f # No ID flag = no CPUID = no CR4 + movl $1,%eax + cpuid + andl $~1,%edx # Ignore CPUID.FPU + jz 6f # No flags or only CPUID.FPU = no CR4 + movl pa(mmu_cr4_features),%eax movl %eax,%cr4 From 3a98b8614312026d489e56c1d0e294a68e2aad77 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 26 Nov 2012 09:48:41 -0500 Subject: [PATCH 062/112] cifs: fix writeback race with file that is growing Commit eddb079deb4 created a regression in the writepages codepath. Previously, whenever it needed to check the size of the file, it did so by consulting the inode->i_size field directly. With that patch, the i_size was fetched once on entry into the writepages code and that value was used henceforth. If the file is changing size though (for instance, if someone is writing to it or has truncated it), then that value is likely to be wrong. This can lead to data corruption. Pages past the EOF at the time that the writepages call was issued may be silently dropped and ignored because cifs_writepages wrongly assumes that the file must have been truncated in the interim. Fix cifs_writepages to properly fetch the size from the inode->i_size field instead to properly account for this possibility. Original bug report is here: https://bugzilla.kernel.org/show_bug.cgi?id=50991 Reported-and-Tested-by: Maxim Britov Reviewed-by: Suresh Jayaraman Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index edb25b4bbb95..70b6f4c3a0c1 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1794,7 +1794,6 @@ static int cifs_writepages(struct address_space *mapping, struct TCP_Server_Info *server; struct page *page; int rc = 0; - loff_t isize = i_size_read(mapping->host); /* * If wsize is smaller than the page cache size, default to writing @@ -1899,7 +1898,7 @@ retry: */ set_page_writeback(page); - if (page_offset(page) >= isize) { + if (page_offset(page) >= i_size_read(mapping->host)) { done = true; unlock_page(page); end_page_writeback(page); @@ -1932,7 +1931,8 @@ retry: wdata->offset = page_offset(wdata->pages[0]); wdata->pagesz = PAGE_CACHE_SIZE; wdata->tailsz = - min(isize - page_offset(wdata->pages[nr_pages - 1]), + min(i_size_read(mapping->host) - + page_offset(wdata->pages[nr_pages - 1]), (loff_t)PAGE_CACHE_SIZE); wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz; From fc58acdbf153f12783b80cb19c04cc9de121b518 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 27 Nov 2012 16:12:29 -0500 Subject: [PATCH 063/112] radeon: fix pll/ctrc mapping on dce2 and dce3 hardware This fix black screen on resume issue that some people are experiencing. There is a bug in the atombios code regarding pll/crtc mapping. The atombios code reverse the logic for the pll and crtc mapping. agd5f: drop unnecessary crtc id check, cc stable in case we miss 3.7. This fixes the root cause that was worked around by commits: drm/radeon: allocate PPLLs from low to high drm/radeon/dce3: switch back to old pll allocation order for discrete Signed-off-by: Jerome Glisse Reviewed-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/atombios_crtc.c | 48 ++++++++------------------ 1 file changed, 14 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 3bce0299f64a..24d932f53203 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1696,42 +1696,22 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) return ATOM_PPLL2; DRM_ERROR("unable to allocate a PPLL\n"); return ATOM_PPLL_INVALID; - } else if (ASIC_IS_AVIVO(rdev)) { - /* in DP mode, the DP ref clock can come from either PPLL - * depending on the asic: - * DCE3: PPLL1 or PPLL2 - */ - if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(radeon_crtc->encoder))) { - /* use the same PPLL for all DP monitors */ - pll = radeon_get_shared_dp_ppll(crtc); - if (pll != ATOM_PPLL_INVALID) - return pll; - } else { - /* use the same PPLL for all monitors with the same clock */ - pll = radeon_get_shared_nondp_ppll(crtc); - if (pll != ATOM_PPLL_INVALID) - return pll; - } - /* all other cases */ - pll_in_use = radeon_get_pll_use_mask(crtc); - /* the order shouldn't matter here, but we probably - * need this until we have atomic modeset - */ - if (rdev->flags & RADEON_IS_IGP) { - if (!(pll_in_use & (1 << ATOM_PPLL1))) - return ATOM_PPLL1; - if (!(pll_in_use & (1 << ATOM_PPLL2))) - return ATOM_PPLL2; - } else { - if (!(pll_in_use & (1 << ATOM_PPLL2))) - return ATOM_PPLL2; - if (!(pll_in_use & (1 << ATOM_PPLL1))) - return ATOM_PPLL1; - } - DRM_ERROR("unable to allocate a PPLL\n"); - return ATOM_PPLL_INVALID; } else { /* on pre-R5xx asics, the crtc to pll mapping is hardcoded */ + /* some atombios (observed in some DCE2/DCE3) code have a bug, + * the matching btw pll and crtc is done through + * PCLK_CRTC[1|2]_CNTL (0x480/0x484) but atombios code use the + * pll (1 or 2) to select which register to write. ie if using + * pll1 it will use PCLK_CRTC1_CNTL (0x480) and if using pll2 + * it will use PCLK_CRTC2_CNTL (0x484), it then use crtc id to + * choose which value to write. Which is reverse order from + * register logic. So only case that works is when pllid is + * same as crtcid or when both pll and crtc are enabled and + * both use same clock. + * + * So just return crtc id as if crtc and pll were hard linked + * together even if they aren't + */ return radeon_crtc->crtc_id; } } From 42709efb3a47524c6252e1bdc85e205f7bc356fb Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 16 Oct 2012 09:02:27 -0400 Subject: [PATCH 064/112] i7core_edac: fix panic when accessing sysfs files The i7core_edac addrmatch_dev and chancounts_dev have sysfs files associated with them. The sysfs files, however, are coded so that the parent device is is the mci device. This is incorrect and the mci struct should be obtained through the addrmatch_dev and chancounts_dev device's private data field which is populated in i7core_create_sysfs_devices(). Signed-off-by: Prarit Bhargava Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7core_edac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 3672101023bd..10c8c00d6469 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -816,7 +816,7 @@ static ssize_t i7core_inject_store_##param( \ struct device_attribute *mattr, \ const char *data, size_t count) \ { \ - struct mem_ctl_info *mci = to_mci(dev); \ + struct mem_ctl_info *mci = dev_get_drvdata(dev); \ struct i7core_pvt *pvt; \ long value; \ int rc; \ @@ -845,7 +845,7 @@ static ssize_t i7core_inject_show_##param( \ struct device_attribute *mattr, \ char *data) \ { \ - struct mem_ctl_info *mci = to_mci(dev); \ + struct mem_ctl_info *mci = dev_get_drvdata(dev); \ struct i7core_pvt *pvt; \ \ pvt = mci->pvt_info; \ @@ -1052,7 +1052,7 @@ static ssize_t i7core_show_counter_##param( \ struct device_attribute *mattr, \ char *data) \ { \ - struct mem_ctl_info *mci = to_mci(dev); \ + struct mem_ctl_info *mci = dev_get_drvdata(dev); \ struct i7core_pvt *pvt = mci->pvt_info; \ \ edac_dbg(1, "\n"); \ From ea3aacf5f22c1002cac97985354658072232e280 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 23 Nov 2012 18:55:30 +0100 Subject: [PATCH 065/112] ARM: 7586/1: sp804: set cpumask to cpu_possible_mask for clock event device The SP804 driver statically initialises the cpumask of the clock event device to be cpu_all_mask, which is derived from the compile-time constant NR_CPUS. This breaks SMP_ON_UP systems where the interrupt controller handling the sp804 doesn't have the irq_set_affinity callback on the irq_chip, because the common timer code fails to identify the device as cpu-local and ends up treating it as a broadcast device instead. This patch fixes the problem by using cpu_possible_mask at runtime, which will correctly represent the possible CPUs when SMP_ON_UP is being used. Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/common/timer-sp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c index df13a3ffff35..9d2d3ba339ff 100644 --- a/arch/arm/common/timer-sp.c +++ b/arch/arm/common/timer-sp.c @@ -162,7 +162,6 @@ static struct clock_event_device sp804_clockevent = { .set_mode = sp804_set_mode, .set_next_event = sp804_set_next_event, .rating = 300, - .cpumask = cpu_all_mask, }; static struct irqaction sp804_timer_irq = { @@ -185,6 +184,7 @@ void __init sp804_clockevents_init(void __iomem *base, unsigned int irq, clkevt_reload = DIV_ROUND_CLOSEST(rate, HZ); evt->name = name; evt->irq = irq; + evt->cpumask = cpu_possible_mask; setup_irq(irq, &sp804_timer_irq); clockevents_config_and_register(evt, rate, 0xf, 0xffffffff); From 4b05a1c74d1cfae37cf6ff293ee928350f031418 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 27 Nov 2012 22:59:52 -0500 Subject: [PATCH 066/112] percpu-rwsem: use synchronize_sched_expedited Use synchronize_sched_expedited() instead of synchronize_sched() to improve mount speed. This patch improves mount time from 0.500s to 0.013s for Jeff's test-case. Signed-off-by: Mikulas Patocka Reported-and-tested-by: Jeff Chua Signed-off-by: Linus Torvalds --- include/linux/percpu-rwsem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 250a4acddb2b..bd1e86071e57 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -13,7 +13,7 @@ struct percpu_rw_semaphore { }; #define light_mb() barrier() -#define heavy_mb() synchronize_sched() +#define heavy_mb() synchronize_sched_expedited() static inline void percpu_down_read(struct percpu_rw_semaphore *p) { @@ -51,7 +51,7 @@ static inline void percpu_down_write(struct percpu_rw_semaphore *p) { mutex_lock(&p->mtx); p->locked = true; - synchronize_sched(); /* make sure that all readers exit the rcu_read_lock_sched region */ + synchronize_sched_expedited(); /* make sure that all readers exit the rcu_read_lock_sched region */ while (__percpu_count(p->counters)) msleep(1); heavy_mb(); /* C, between read of p->counter and write to data, paired with B */ From c772aa92b6deb2857d4b39a5cc3bd3679cc5f4a6 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 28 Nov 2012 15:27:54 +0400 Subject: [PATCH 067/112] CIFS: Fix wrong buffer pointer usage in smb_set_file_info Commit 6bdf6dbd662176c0da5c3ac8ed10ac94e7776c85 caused a regression in setattr codepath that leads to files with wrong attributes. Signed-off-by: Pavel Shilovsky Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/smb1ops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 56cc4be87807..34cea2798333 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -766,7 +766,6 @@ smb_set_file_info(struct inode *inode, const char *full_path, struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = NULL; struct cifs_tcon *tcon; - FILE_BASIC_INFO info_buf; /* if the file is already open for write, just use that fileid */ open_file = find_writable_file(cinode, true); @@ -817,7 +816,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, netpid = current->tgid; set_via_filehandle: - rc = CIFSSMBSetFileInfo(xid, tcon, &info_buf, netfid, netpid); + rc = CIFSSMBSetFileInfo(xid, tcon, buf, netfid, netpid); if (!rc) cinode->cifsAttrs = le32_to_cpu(buf->Attributes); From be364c8c0f17a3dd42707b5a090b318028538eb9 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Tue, 27 Nov 2012 04:01:46 +0000 Subject: [PATCH 068/112] sctp: fix memory leak in sctp_datamsg_from_user() when copy from user space fails Trinity (the syscall fuzzer) discovered a memory leak in SCTP, reproducible e.g. with the sendto() syscall by passing invalid user space pointer in the second argument: #include #include #include int main(void) { int fd; struct sockaddr_in sa; fd = socket(AF_INET, SOCK_STREAM, 132 /*IPPROTO_SCTP*/); if (fd < 0) return 1; memset(&sa, 0, sizeof(sa)); sa.sin_family = AF_INET; sa.sin_addr.s_addr = inet_addr("127.0.0.1"); sa.sin_port = htons(11111); sendto(fd, NULL, 1, 0, (struct sockaddr *)&sa, sizeof(sa)); return 0; } As far as I can tell, the leak has been around since ~2003. Signed-off-by: Tommi Rantala Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/chunk.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 7c2df9c33df3..f2aebdbb1eda 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -284,7 +284,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, goto errout; err = sctp_user_addto_chunk(chunk, offset, len, msgh->msg_iov); if (err < 0) - goto errout; + goto errout_chunk_free; offset += len; @@ -324,7 +324,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, __skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr - (__u8 *)chunk->skb->data); if (err < 0) - goto errout; + goto errout_chunk_free; sctp_datamsg_assign(msg, chunk); list_add_tail(&chunk->frag_list, &msg->chunks); @@ -332,6 +332,9 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, return msg; +errout_chunk_free: + sctp_chunk_free(chunk); + errout: list_for_each_safe(pos, temp, &msg->chunks) { list_del_init(pos); From 6e51fe7572590d8d86e93b547fab6693d305fd0d Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 22 Nov 2012 03:23:16 +0000 Subject: [PATCH 069/112] sctp: fix -ENOMEM result with invalid user space pointer in sendto() syscall Consider the following program, that sets the second argument to the sendto() syscall incorrectly: #include #include #include int main(void) { int fd; struct sockaddr_in sa; fd = socket(AF_INET, SOCK_STREAM, 132 /*IPPROTO_SCTP*/); if (fd < 0) return 1; memset(&sa, 0, sizeof(sa)); sa.sin_family = AF_INET; sa.sin_addr.s_addr = inet_addr("127.0.0.1"); sa.sin_port = htons(11111); sendto(fd, NULL, 1, 0, (struct sockaddr *)&sa, sizeof(sa)); return 0; } We get -ENOMEM: $ strace -e sendto ./demo sendto(3, NULL, 1, 0, {sa_family=AF_INET, sin_port=htons(11111), sin_addr=inet_addr("127.0.0.1")}, 16) = -1 ENOMEM (Cannot allocate memory) Propagate the error code from sctp_user_addto_chunk(), so that we will tell user space what actually went wrong: $ strace -e sendto ./demo sendto(3, NULL, 1, 0, {sa_family=AF_INET, sin_port=htons(11111), sin_addr=inet_addr("127.0.0.1")}, 16) = -1 EFAULT (Bad address) Noticed while running Trinity (the syscall fuzzer). Signed-off-by: Tommi Rantala Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/chunk.c | 13 +++++++++---- net/sctp/socket.c | 4 ++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index f2aebdbb1eda..69ce21e3716f 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -183,7 +183,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, msg = sctp_datamsg_new(GFP_KERNEL); if (!msg) - return NULL; + return ERR_PTR(-ENOMEM); /* Note: Calculate this outside of the loop, so that all fragments * have the same expiration. @@ -280,8 +280,11 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag, 0); - if (!chunk) + if (!chunk) { + err = -ENOMEM; goto errout; + } + err = sctp_user_addto_chunk(chunk, offset, len, msgh->msg_iov); if (err < 0) goto errout_chunk_free; @@ -315,8 +318,10 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, chunk = sctp_make_datafrag_empty(asoc, sinfo, over, frag, 0); - if (!chunk) + if (!chunk) { + err = -ENOMEM; goto errout; + } err = sctp_user_addto_chunk(chunk, offset, over,msgh->msg_iov); @@ -342,7 +347,7 @@ errout: sctp_chunk_free(chunk); } sctp_datamsg_put(msg); - return NULL; + return ERR_PTR(err); } /* Check whether this message has expired. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a60d1f8b41c5..406d957d08fb 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1915,8 +1915,8 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, /* Break the message into multiple chunks of maximum size. */ datamsg = sctp_datamsg_from_user(asoc, sinfo, msg, msg_len); - if (!datamsg) { - err = -ENOMEM; + if (IS_ERR(datamsg)) { + err = PTR_ERR(datamsg); goto out_free; } From 92d64c261e77cb2a6117887617e2a629fea6b67c Mon Sep 17 00:00:00 2001 From: Schoch Christian Date: Wed, 28 Nov 2012 05:18:29 +0000 Subject: [PATCH 070/112] sctp: Error in calculation of RTTvar The calculation of RTTVAR involves the subtraction of two unsigned numbers which may causes rollover and results in very high values of RTTVAR when RTT > SRTT. With this patch it is possible to set RTOmin = 1 to get the minimum of RTO at 4 times the clock granularity. Change Notes: v2) *Replaced abs() by abs64() and long by __s64, changed patch description. Signed-off-by: Christian Schoch CC: Vlad Yasevich CC: Sridhar Samudrala CC: Neil Horman CC: linux-sctp@vger.kernel.org Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 953c21e4af97..206cf5238fd3 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -331,7 +331,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) * 1/8, rto_alpha would be expressed as 3. */ tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta) - + ((abs(tp->srtt - rtt)) >> net->sctp.rto_beta); + + (((__u32)abs64((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta); tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha) + (rtt >> net->sctp.rto_alpha); } else { From 29bb8f4a8d94d017700d09ee643483b0708cbd7b Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Wed, 28 Nov 2012 13:43:15 +0200 Subject: [PATCH 071/112] net/mlx4_en: Can set maxrate only for TC0 Had a typo in memcpy. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index 5d36795877cb..b799ab12a291 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -237,7 +237,7 @@ static int mlx4_en_dcbnl_ieee_setmaxrate(struct net_device *dev, if (err) return err; - memcpy(priv->maxrate, tmp, sizeof(*priv->maxrate)); + memcpy(priv->maxrate, tmp, sizeof(priv->maxrate)); return 0; } From ba695af067f9cadfec84457ac06b44e3fa849b15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sun, 25 Nov 2012 06:03:59 +0000 Subject: [PATCH 072/112] net: qmi_wwan: add Huawei E173 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Huawei E173 is a QMI/wwan device which normally appear as 12d1:1436 in Linux. The descriptors displayed in that mode will be picked up by cdc_ether. But the modem has another mode with a different device ID and a slightly different set of descriptors. This is the mode used by Windows like this: 3Modem: USB\VID_12D1&PID_140C&MI_00\6&3A1D2012&0&0000 Networkcard: USB\VID_12D1&PID_140C&MI_01\6&3A1D2012&0&0001 Appli.Inter: USB\VID_12D1&PID_140C&MI_02\6&3A1D2012&0&0002 PC UI Inter: USB\VID_12D1&PID_140C&MI_03\6&3A1D2012&0&0003 Reported-by: Thomas Schäfer Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 3b566fa0f8e6..1ea91f4237f0 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -385,6 +385,7 @@ static const struct usb_device_id products[] = { }, /* 3. Combined interface devices matching on interface number */ + {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ {QMI_FIXED_INTF(0x19d2, 0x0002, 1)}, {QMI_FIXED_INTF(0x19d2, 0x0012, 1)}, {QMI_FIXED_INTF(0x19d2, 0x0017, 3)}, From c3b2c2581988d304127c2aa218430b5fca88ce56 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Mon, 26 Nov 2012 04:16:44 +0000 Subject: [PATCH 073/112] irda: irttp: fix memory leak in irttp_open_tsap() error path Cleanup the memory we allocated earlier in irttp_open_tsap() when we hit this error path. The leak goes back to at least 1da177e4 ("Linux-2.6.12-rc2"). Discovered with Trinity (the syscall fuzzer). Signed-off-by: Tommi Rantala Signed-off-by: David S. Miller --- net/irda/irttp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 1002e3396f72..ae43c62f9045 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -441,6 +441,7 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify) lsap = irlmp_open_lsap(stsap_sel, &ttp_notify, 0); if (lsap == NULL) { IRDA_DEBUG(0, "%s: unable to allocate LSAP!!\n", __func__); + __irttp_close_tsap(self); return NULL; } From bd97120fc3d1a11f3124c7c9ba1d91f51829eb85 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 26 Nov 2012 05:57:27 +0000 Subject: [PATCH 074/112] vhost: fix length for cross region descriptor If a single descriptor crosses a region, the second chunk length should be decremented by size translated so far, instead it includes the full descriptor length. Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 99ac2cb08b43..dedaf81d8f36 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1076,7 +1076,7 @@ static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, } _iov = iov + ret; size = reg->memory_size - addr + reg->guest_phys_addr; - _iov->iov_len = min((u64)len, size); + _iov->iov_len = min((u64)len - s, size); _iov->iov_base = (void __user *)(unsigned long) (reg->userspace_addr + addr - reg->guest_phys_addr); s += size; From fcdc90b025e69a38f9ec2742df099c7ddaa331fe Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 27 Nov 2012 07:29:35 +0000 Subject: [PATCH 075/112] atm: forever loop loading ambassador firmware There was a forever loop introduced here when we converted this to request_firmware() back in 2008. Signed-off-by: Dan Carpenter Acked-by: Chas Williams Signed-off-by: David S. Miller --- drivers/atm/ambassador.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index 89b30f32ba68..ff7bb8a42ed6 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -1961,6 +1961,7 @@ static int __devinit ucode_init (loader_block * lb, amb_dev * dev) { res = loader_verify(lb, dev, rec); if (res) break; + rec = ihex_next_binrec(rec); } release_firmware(fw); if (!res) From 3ed7147189d2fbe8ac6da95db2fd9d6d52f53ce9 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 28 Nov 2012 06:13:10 +0000 Subject: [PATCH 076/112] team: fix hw_features setup Do this in the same way bonding does. This fixed setup resolves performance issues when using some cards with certain offloading. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index d44cca327588..ad86660fb8f9 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1794,10 +1794,12 @@ static void team_setup(struct net_device *dev) dev->features |= NETIF_F_LLTX; dev->features |= NETIF_F_GRO; - dev->hw_features = NETIF_F_HW_VLAN_TX | + dev->hw_features = TEAM_VLAN_FEATURES | + NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER; + dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); dev->features |= dev->hw_features; } From de0eed2813ce42b8e0320a86cbe373266c904044 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Wed, 28 Nov 2012 14:16:05 -0500 Subject: [PATCH 077/112] c6x: remove internal kernel symbols from exported setup.h Some internal kernel symbols were referenced in the exported setup.h. This splits out the internal bits from the exported uapi bits. Signed-off-by: Mark Salter --- arch/c6x/include/asm/setup.h | 33 +++++++++++++++++++++++++++++++ arch/c6x/include/uapi/asm/setup.h | 33 +++---------------------------- 2 files changed, 36 insertions(+), 30 deletions(-) create mode 100644 arch/c6x/include/asm/setup.h diff --git a/arch/c6x/include/asm/setup.h b/arch/c6x/include/asm/setup.h new file mode 100644 index 000000000000..ecead15872a6 --- /dev/null +++ b/arch/c6x/include/asm/setup.h @@ -0,0 +1,33 @@ +/* + * Port on Texas Instruments TMS320C6x architecture + * + * Copyright (C) 2004, 2009, 2010 2011 Texas Instruments Incorporated + * Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _ASM_C6X_SETUP_H +#define _ASM_C6X_SETUP_H + +#include + +#ifndef __ASSEMBLY__ +extern char c6x_command_line[COMMAND_LINE_SIZE]; + +extern int c6x_add_memory(phys_addr_t start, unsigned long size); + +extern unsigned long ram_start; +extern unsigned long ram_end; + +extern int c6x_num_cores; +extern unsigned int c6x_silicon_rev; +extern unsigned int c6x_devstat; +extern unsigned char c6x_fuse_mac[6]; + +extern void machine_init(unsigned long dt_ptr); +extern void time_init(void); + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_C6X_SETUP_H */ diff --git a/arch/c6x/include/uapi/asm/setup.h b/arch/c6x/include/uapi/asm/setup.h index a01e31896fa9..ad9ac97a8dad 100644 --- a/arch/c6x/include/uapi/asm/setup.h +++ b/arch/c6x/include/uapi/asm/setup.h @@ -1,33 +1,6 @@ -/* - * Port on Texas Instruments TMS320C6x architecture - * - * Copyright (C) 2004, 2009, 2010 2011 Texas Instruments Incorporated - * Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef _ASM_C6X_SETUP_H -#define _ASM_C6X_SETUP_H +#ifndef _UAPI_ASM_C6X_SETUP_H +#define _UAPI_ASM_C6X_SETUP_H #define COMMAND_LINE_SIZE 1024 -#ifndef __ASSEMBLY__ -extern char c6x_command_line[COMMAND_LINE_SIZE]; - -extern int c6x_add_memory(phys_addr_t start, unsigned long size); - -extern unsigned long ram_start; -extern unsigned long ram_end; - -extern int c6x_num_cores; -extern unsigned int c6x_silicon_rev; -extern unsigned int c6x_devstat; -extern unsigned char c6x_fuse_mac[6]; - -extern void machine_init(unsigned long dt_ptr); -extern void time_init(void); - -#endif /* !__ASSEMBLY__ */ -#endif /* _ASM_C6X_SETUP_H */ +#endif /* _UAPI_ASM_C6X_SETUP_H */ From 93bbd0c087eb299e0fe11c59d340932180c082b5 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Wed, 28 Nov 2012 14:22:41 -0500 Subject: [PATCH 078/112] c6x: use generic kvm_para.h Signed-off-by: Mark Salter --- arch/c6x/include/uapi/asm/Kbuild | 2 ++ arch/c6x/include/uapi/asm/kvm_para.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) delete mode 100644 arch/c6x/include/uapi/asm/kvm_para.h diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild index c312b424c433..e9bc2b2b8147 100644 --- a/arch/c6x/include/uapi/asm/Kbuild +++ b/arch/c6x/include/uapi/asm/Kbuild @@ -1,6 +1,8 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += kvm_para.h + header-y += byteorder.h header-y += kvm_para.h header-y += ptrace.h diff --git a/arch/c6x/include/uapi/asm/kvm_para.h b/arch/c6x/include/uapi/asm/kvm_para.h deleted file mode 100644 index 14fab8f0b957..000000000000 --- a/arch/c6x/include/uapi/asm/kvm_para.h +++ /dev/null @@ -1 +0,0 @@ -#include From 9436d5c32b1ac8d4efdbc933611b5e699908854e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 Nov 2012 01:15:42 -0500 Subject: [PATCH 079/112] um: get_safe_registers() should be done in flush_thread(), not start_thread() ... or we'll end up buggering the results of ELF_PLAT_INIT() Signed-off-by: Al Viro --- arch/um/kernel/exec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 3a8ece7d09ca..0d7103c9eff3 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -32,13 +32,14 @@ void flush_thread(void) "err = %d\n", ret); force_sig(SIGKILL, current); } + get_safe_registers(current_pt_regs()->regs.gp, + current_pt_regs()->regs.fp); __switch_mm(¤t->mm->context.id); } void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) { - get_safe_registers(regs->regs.gp, regs->regs.fp); PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; current->ptrace &= ~PT_DTRACE; From d26654e5f9cd27815ab5bc8148c826f3b960396c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 20 Sep 2012 15:49:09 -0400 Subject: [PATCH 080/112] openrisk: fix altstack switching on sigreturn do_sigaltstack() expects _userland_ address, TYVM... Signed-off-by: Al Viro --- arch/openrisc/kernel/signal.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index 30110297f4f9..ddedc8a77861 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -84,7 +84,6 @@ asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs) { struct rt_sigframe *frame = (struct rt_sigframe __user *)regs->sp; sigset_t set; - stack_t st; /* * Since we stacked the signal on a dword boundary, @@ -104,11 +103,10 @@ asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs) if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; - if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st))) - goto badframe; /* It is more difficult to avoid calling this function than to call it and ignore errors. */ - do_sigaltstack(&st, NULL, regs->sp); + if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) + goto badframe; return regs->gpr[11]; From 9dc87c7b5b0a2db407e086f45cf08ca78208cc16 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 20 Sep 2012 16:38:34 -0400 Subject: [PATCH 081/112] sh64: fix altstack switching on sigreturn incidentally, declaring a local variable as __user (!) to make sparse STFU is really sick. Especially since sparse had been 100% right - it *is* a bug. Signed-off-by: Al Viro --- arch/sh/kernel/signal_64.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c index 23853814bd17..d867cd95a622 100644 --- a/arch/sh/kernel/signal_64.c +++ b/arch/sh/kernel/signal_64.c @@ -347,7 +347,6 @@ asmlinkage int sys_rt_sigreturn(unsigned long r2, unsigned long r3, { struct rt_sigframe __user *frame = (struct rt_sigframe __user *) (long) REF_REG_SP; sigset_t set; - stack_t __user st; long long ret; /* Always make any pending restarted system calls return -EINTR */ @@ -365,11 +364,10 @@ asmlinkage int sys_rt_sigreturn(unsigned long r2, unsigned long r3, goto badframe; regs->pc -= 4; - if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st))) - goto badframe; /* It is more difficult to avoid calling this function than to call it and ignore errors. */ - do_sigaltstack(&st, NULL, REF_REG_SP); + if (do_sigaltstack(&frame->uc.uc_stack, NULL, REF_REG_SP) == -EFAULT) + goto badframe; return (int) ret; From afd3f96dc34357584763c2dc453192c78a9dbce0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 20 Nov 2012 10:36:17 -0500 Subject: [PATCH 082/112] score: do_sigaltstack() expects a userland pointer... Signed-off-by: Al Viro --- arch/score/kernel/signal.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/score/kernel/signal.c b/arch/score/kernel/signal.c index c268bbf8b410..02353bde92d8 100644 --- a/arch/score/kernel/signal.c +++ b/arch/score/kernel/signal.c @@ -148,7 +148,6 @@ score_rt_sigreturn(struct pt_regs *regs) { struct rt_sigframe __user *frame; sigset_t set; - stack_t st; int sig; /* Always make any pending restarted system calls return -EINTR */ @@ -168,12 +167,10 @@ score_rt_sigreturn(struct pt_regs *regs) else if (sig) force_sig(sig, current); - if (__copy_from_user(&st, &frame->rs_uc.uc_stack, sizeof(st))) - goto badframe; - /* It is more difficult to avoid calling this function than to call it and ignore errors. */ - do_sigaltstack((stack_t __user *)&st, NULL, regs->regs[0]); + if (do_sigaltstack(&frame->rs_uc.uc_stack, NULL, regs->regs[0]) == -EFAULT) + goto badframe; regs->is_syscall = 0; __asm__ __volatile__( From 02232f8d2b22708f0651dc515544f4a7ef1e0224 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 20 Nov 2012 10:41:11 -0500 Subject: [PATCH 083/112] microblaze: rt_sigreturn is too trigger-happy about sigaltstack errors Signed-off-by: Al Viro --- arch/microblaze/kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index 3847e5b9c601..3903e3d11f5a 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -111,7 +111,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) /* It is more difficult to avoid calling this function than to call it and ignore errors. */ - if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->r1)) + if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->r1) == -EFAULT) goto badframe; return rval; From dab55bbafdb491ce2c3f2d7136e54101e948b911 Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Mon, 12 Nov 2012 11:13:51 +0200 Subject: [PATCH 084/112] remoteproc: fix error path of ->find_vqs Eliminate an erroneous invocation of rproc_shutdown inside the error path of rproc_virtio_find_vqs. Reported-by: Ido Yariv Signed-off-by: Ohad Ben-Cohen --- drivers/remoteproc/remoteproc_virtio.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index e7a4780e93db..9e198e590675 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -120,15 +120,11 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev, return vq; } -static void rproc_virtio_del_vqs(struct virtio_device *vdev) +static void __rproc_virtio_del_vqs(struct virtio_device *vdev) { struct virtqueue *vq, *n; - struct rproc *rproc = vdev_to_rproc(vdev); struct rproc_vring *rvring; - /* power down the remote processor before deleting vqs */ - rproc_shutdown(rproc); - list_for_each_entry_safe(vq, n, &vdev->vqs, list) { rvring = vq->priv; rvring->vq = NULL; @@ -137,6 +133,16 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev) } } +static void rproc_virtio_del_vqs(struct virtio_device *vdev) +{ + struct rproc *rproc = vdev_to_rproc(vdev); + + /* power down the remote processor before deleting vqs */ + rproc_shutdown(rproc); + + __rproc_virtio_del_vqs(vdev); +} + static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], @@ -163,7 +169,7 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, return 0; error: - rproc_virtio_del_vqs(vdev); + __rproc_virtio_del_vqs(vdev); return ret; } From fbb0c41b814d497c656fc7be9e35456f139cb2fb Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 29 Nov 2012 01:31:31 +0000 Subject: [PATCH 085/112] bonding: fix miimon and arp_interval delayed work race conditions First I would give three observations which will be used later. Observation 1: if (delayed_work_pending(wq)) cancel_delayed_work(wq) This usage is wrong because the pending bit is cleared just before the work's fn is executed and if the function re-arms itself we might end up with the work still running. It's safe to call cancel_delayed_work_sync() even if the work is not queued at all. Observation 2: Use of INIT_DELAYED_WORK() Work needs to be initialized only once prior to (de/en)queueing. Observation 3: IFF_UP is set only after ndo_open is called Related race conditions: 1. Race between bonding_store_miimon() and bonding_store_arp_interval() Because of Obs.1 we can end up having both works enqueued. 2. Multiple races with INIT_DELAYED_WORK() Since the works are not protected by anything between INIT_DELAYED_WORK() and calls to (en/de)queue it is possible for races between the following functions: (races are also possible between the calls to INIT_DELAYED_WORK() and workqueue code) bonding_store_miimon() - bonding_store_arp_interval(), bond_close(), bond_open(), enqueued functions bonding_store_arp_interval() - bonding_store_miimon(), bond_close(), bond_open(), enqueued functions 3. By Obs.1 we need to change bond_cancel_all() Bugs 1 and 2 are fixed by moving all work initializations in bond_open which by Obs. 2 and Obs. 3 and the fact that we make sure that all works are cancelled in bond_close(), is guaranteed not to have any work enqueued. Also RTNL lock is now acquired in bonding_store_miimon/arp_interval so they can't race with bond_close and bond_open. The opposing work is cancelled only if the IFF_UP flag is set and it is cancelled unconditionally. The opposing work is already cancelled if the interface is down so no need to cancel it again. This way we don't need new synchronizations for the bonding workqueue. These bugs (and fixes) are tied together and belong in the same patch. Note: I have left 1 line intentionally over 80 characters (84) because I didn't like how it looks broken down. If you'd prefer it otherwise, then simply break it. v2: Make description text < 75 columns Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 88 ++++++++++---------------------- drivers/net/bonding/bond_sysfs.c | 34 ++++-------- 2 files changed, 36 insertions(+), 86 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5f5b69f37d2e..1445c7ddb26c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3459,6 +3459,28 @@ static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) /*-------------------------- Device entry points ----------------------------*/ +static void bond_work_init_all(struct bonding *bond) +{ + INIT_DELAYED_WORK(&bond->mcast_work, + bond_resend_igmp_join_requests_delayed); + INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor); + INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor); + if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) + INIT_DELAYED_WORK(&bond->arp_work, bond_activebackup_arp_mon); + else + INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon); + INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); +} + +static void bond_work_cancel_all(struct bonding *bond) +{ + cancel_delayed_work_sync(&bond->mii_work); + cancel_delayed_work_sync(&bond->arp_work); + cancel_delayed_work_sync(&bond->alb_work); + cancel_delayed_work_sync(&bond->ad_work); + cancel_delayed_work_sync(&bond->mcast_work); +} + static int bond_open(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); @@ -3481,41 +3503,27 @@ static int bond_open(struct net_device *bond_dev) } read_unlock(&bond->lock); - INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed); + bond_work_init_all(bond); if (bond_is_lb(bond)) { /* bond_alb_initialize must be called before the timer * is started. */ - if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) { - /* something went wrong - fail the open operation */ + if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) return -ENOMEM; - } - - INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor); queue_delayed_work(bond->wq, &bond->alb_work, 0); } - if (bond->params.miimon) { /* link check interval, in milliseconds. */ - INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor); + if (bond->params.miimon) /* link check interval, in milliseconds. */ queue_delayed_work(bond->wq, &bond->mii_work, 0); - } if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ - if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) - INIT_DELAYED_WORK(&bond->arp_work, - bond_activebackup_arp_mon); - else - INIT_DELAYED_WORK(&bond->arp_work, - bond_loadbalance_arp_mon); - queue_delayed_work(bond->wq, &bond->arp_work, 0); if (bond->params.arp_validate) bond->recv_probe = bond_arp_rcv; } if (bond->params.mode == BOND_MODE_8023AD) { - INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); queue_delayed_work(bond->wq, &bond->ad_work, 0); /* register to receive LACPDUs */ bond->recv_probe = bond_3ad_lacpdu_recv; @@ -3530,34 +3538,10 @@ static int bond_close(struct net_device *bond_dev) struct bonding *bond = netdev_priv(bond_dev); write_lock_bh(&bond->lock); - bond->send_peer_notif = 0; - write_unlock_bh(&bond->lock); - if (bond->params.miimon) { /* link check interval, in milliseconds. */ - cancel_delayed_work_sync(&bond->mii_work); - } - - if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ - cancel_delayed_work_sync(&bond->arp_work); - } - - switch (bond->params.mode) { - case BOND_MODE_8023AD: - cancel_delayed_work_sync(&bond->ad_work); - break; - case BOND_MODE_TLB: - case BOND_MODE_ALB: - cancel_delayed_work_sync(&bond->alb_work); - break; - default: - break; - } - - if (delayed_work_pending(&bond->mcast_work)) - cancel_delayed_work_sync(&bond->mcast_work); - + bond_work_cancel_all(bond); if (bond_is_lb(bond)) { /* Must be called only after all * slaves have been released @@ -4436,26 +4420,6 @@ static void bond_setup(struct net_device *bond_dev) bond_dev->features |= bond_dev->hw_features; } -static void bond_work_cancel_all(struct bonding *bond) -{ - if (bond->params.miimon && delayed_work_pending(&bond->mii_work)) - cancel_delayed_work_sync(&bond->mii_work); - - if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work)) - cancel_delayed_work_sync(&bond->arp_work); - - if (bond->params.mode == BOND_MODE_ALB && - delayed_work_pending(&bond->alb_work)) - cancel_delayed_work_sync(&bond->alb_work); - - if (bond->params.mode == BOND_MODE_8023AD && - delayed_work_pending(&bond->ad_work)) - cancel_delayed_work_sync(&bond->ad_work); - - if (delayed_work_pending(&bond->mcast_work)) - cancel_delayed_work_sync(&bond->mcast_work); -} - /* * Destroy a bonding device. * Must be under rtnl_lock when this function is called. diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index ef8d2a080d17..3327a072e224 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -513,6 +513,8 @@ static ssize_t bonding_store_arp_interval(struct device *d, int new_value, ret = count; struct bonding *bond = to_bond(d); + if (!rtnl_trylock()) + return restart_syscall(); if (sscanf(buf, "%d", &new_value) != 1) { pr_err("%s: no arp_interval value specified.\n", bond->dev->name); @@ -539,10 +541,6 @@ static ssize_t bonding_store_arp_interval(struct device *d, pr_info("%s: ARP monitoring cannot be used with MII monitoring. %s Disabling MII monitoring.\n", bond->dev->name, bond->dev->name); bond->params.miimon = 0; - if (delayed_work_pending(&bond->mii_work)) { - cancel_delayed_work(&bond->mii_work); - flush_workqueue(bond->wq); - } } if (!bond->params.arp_targets[0]) { pr_info("%s: ARP monitoring has been set up, but no ARP targets have been specified.\n", @@ -554,19 +552,12 @@ static ssize_t bonding_store_arp_interval(struct device *d, * timer will get fired off when the open function * is called. */ - if (!delayed_work_pending(&bond->arp_work)) { - if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) - INIT_DELAYED_WORK(&bond->arp_work, - bond_activebackup_arp_mon); - else - INIT_DELAYED_WORK(&bond->arp_work, - bond_loadbalance_arp_mon); - - queue_delayed_work(bond->wq, &bond->arp_work, 0); - } + cancel_delayed_work_sync(&bond->mii_work); + queue_delayed_work(bond->wq, &bond->arp_work, 0); } out: + rtnl_unlock(); return ret; } static DEVICE_ATTR(arp_interval, S_IRUGO | S_IWUSR, @@ -962,6 +953,8 @@ static ssize_t bonding_store_miimon(struct device *d, int new_value, ret = count; struct bonding *bond = to_bond(d); + if (!rtnl_trylock()) + return restart_syscall(); if (sscanf(buf, "%d", &new_value) != 1) { pr_err("%s: no miimon value specified.\n", bond->dev->name); @@ -993,10 +986,6 @@ static ssize_t bonding_store_miimon(struct device *d, bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; } - if (delayed_work_pending(&bond->arp_work)) { - cancel_delayed_work(&bond->arp_work); - flush_workqueue(bond->wq); - } } if (bond->dev->flags & IFF_UP) { @@ -1005,15 +994,12 @@ static ssize_t bonding_store_miimon(struct device *d, * timer will get fired off when the open function * is called. */ - if (!delayed_work_pending(&bond->mii_work)) { - INIT_DELAYED_WORK(&bond->mii_work, - bond_mii_monitor); - queue_delayed_work(bond->wq, - &bond->mii_work, 0); - } + cancel_delayed_work_sync(&bond->arp_work); + queue_delayed_work(bond->wq, &bond->mii_work, 0); } } out: + rtnl_unlock(); return ret; } static DEVICE_ATTR(miimon, S_IRUGO | S_IWUSR, From 90fb6250c509cabd425b7ae4524053dba2e27e2c Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 29 Nov 2012 01:34:06 +0000 Subject: [PATCH 086/112] bonding: make arp_ip_target parameter checks consistent with sysfs The module can be loaded with arp_ip_target="255.255.255.255" which makes it impossible to remove as the function in sysfs checks for that value, so we make the parameter checks consistent with sysfs. v2: Fix formatting v3: Make description text < 75 columns Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1445c7ddb26c..a7d47350ea4b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4670,12 +4670,13 @@ static int bond_check_params(struct bond_params *params) arp_ip_count++) { /* not complete check, but should be good enough to catch mistakes */ - if (!isdigit(arp_ip_target[arp_ip_count][0])) { + __be32 ip = in_aton(arp_ip_target[arp_ip_count]); + if (!isdigit(arp_ip_target[arp_ip_count][0]) || + ip == 0 || ip == htonl(INADDR_BROADCAST)) { pr_warning("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n", arp_ip_target[arp_ip_count]); arp_interval = 0; } else { - __be32 ip = in_aton(arp_ip_target[arp_ip_count]); arp_target[arp_ip_count] = ip; } } From e196c0e579902f42cf72414461fb034e5a1ffbf7 Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 29 Nov 2012 01:37:59 +0000 Subject: [PATCH 087/112] bonding: fix race condition in bonding_store_slaves_active Race between bonding_store_slaves_active() and slave manipulation functions. The bond_for_each_slave use in bonding_store_slaves_active() is not protected by any synchronization mechanism. NULL pointer dereference is easy to reach. Fixed by acquiring the bond->lock for the slave walk. v2: Make description text < 75 columns Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 3327a072e224..1877ed7ca086 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -1568,6 +1568,7 @@ static ssize_t bonding_store_slaves_active(struct device *d, goto out; } + read_lock(&bond->lock); bond_for_each_slave(bond, slave, i) { if (!bond_is_active_slave(slave)) { if (new_value) @@ -1576,6 +1577,7 @@ static ssize_t bonding_store_slaves_active(struct device *d, slave->inactive = 1; } } + read_unlock(&bond->lock); out: return ret; } From 45bce8f3e3436bbe2e03dd2b076abdce79ffabb7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 29 Nov 2012 10:21:43 -0800 Subject: [PATCH 088/112] fs/buffer.c: make block-size be per-page and protected by the page lock This makes the buffer size handling be a per-page thing, which allows us to not have to worry about locking too much when changing the buffer size. If a page doesn't have buffers, we still need to read the block size from the inode, but we can do that with ACCESS_ONCE(), so that even if the size is changing, we get a consistent value. This doesn't convert all functions - many of the buffer functions are used purely by filesystems, which in turn results in the buffer size being fixed at mount-time. So they don't have the same consistency issues that the raw device access can have. Signed-off-by: Linus Torvalds --- fs/buffer.c | 79 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 31 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index b5f044283edb..28a74ff5324b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1552,6 +1552,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block) } EXPORT_SYMBOL(unmap_underlying_metadata); +/* + * Size is a power-of-two in the range 512..PAGE_SIZE, + * and the case we care about most is PAGE_SIZE. + * + * So this *could* possibly be written with those + * constraints in mind (relevant mostly if some + * architecture has a slow bit-scan instruction) + */ +static inline int block_size_bits(unsigned int blocksize) +{ + return ilog2(blocksize); +} + +static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state) +{ + BUG_ON(!PageLocked(page)); + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state); + return page_buffers(page); +} + /* * NOTE! All mapped/uptodate combinations are valid: * @@ -1589,19 +1611,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page, sector_t block; sector_t last_block; struct buffer_head *bh, *head; - const unsigned blocksize = 1 << inode->i_blkbits; + unsigned int blocksize, bbits; int nr_underway = 0; int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); - BUG_ON(!PageLocked(page)); - - last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; - - if (!page_has_buffers(page)) { - create_empty_buffers(page, blocksize, + head = create_page_buffers(page, inode, (1 << BH_Dirty)|(1 << BH_Uptodate)); - } /* * Be very careful. We have no exclusion from __set_page_dirty_buffers @@ -1613,9 +1629,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page, * handle that here by just cleaning them. */ - block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - head = page_buffers(page); bh = head; + blocksize = bh->b_size; + bbits = block_size_bits(blocksize); + + block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); + last_block = (i_size_read(inode) - 1) >> bbits; /* * Get all the dirty buffers mapped to disk addresses and @@ -1806,12 +1825,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len, BUG_ON(to > PAGE_CACHE_SIZE); BUG_ON(from > to); - blocksize = 1 << inode->i_blkbits; - if (!page_has_buffers(page)) - create_empty_buffers(page, blocksize, 0); - head = page_buffers(page); + head = create_page_buffers(page, inode, 0); + blocksize = head->b_size; + bbits = block_size_bits(blocksize); - bbits = inode->i_blkbits; block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); for(bh = head, block_start = 0; bh != head || !block_start; @@ -1881,11 +1898,11 @@ static int __block_commit_write(struct inode *inode, struct page *page, unsigned blocksize; struct buffer_head *bh, *head; - blocksize = 1 << inode->i_blkbits; + bh = head = page_buffers(page); + blocksize = bh->b_size; - for(bh = head = page_buffers(page), block_start = 0; - bh != head || !block_start; - block_start=block_end, bh = bh->b_this_page) { + block_start = 0; + do { block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { if (!buffer_uptodate(bh)) @@ -1895,7 +1912,10 @@ static int __block_commit_write(struct inode *inode, struct page *page, mark_buffer_dirty(bh); } clear_buffer_new(bh); - } + + block_start = block_end; + bh = bh->b_this_page; + } while (bh != head); /* * If this is a partial write which happened to make all buffers @@ -2020,7 +2040,6 @@ EXPORT_SYMBOL(generic_write_end); int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, unsigned long from) { - struct inode *inode = page->mapping->host; unsigned block_start, block_end, blocksize; unsigned to; struct buffer_head *bh, *head; @@ -2029,13 +2048,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, if (!page_has_buffers(page)) return 0; - blocksize = 1 << inode->i_blkbits; + head = page_buffers(page); + blocksize = head->b_size; to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); to = from + to; if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) return 0; - head = page_buffers(page); bh = head; block_start = 0; do { @@ -2068,18 +2087,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block) struct inode *inode = page->mapping->host; sector_t iblock, lblock; struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; - unsigned int blocksize; + unsigned int blocksize, bbits; int nr, i; int fully_mapped = 1; - BUG_ON(!PageLocked(page)); - blocksize = 1 << inode->i_blkbits; - if (!page_has_buffers(page)) - create_empty_buffers(page, blocksize, 0); - head = page_buffers(page); + head = create_page_buffers(page, inode, 0); + blocksize = head->b_size; + bbits = block_size_bits(blocksize); - iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; + iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); + lblock = (i_size_read(inode)+blocksize-1) >> bbits; bh = head; nr = 0; i = 0; From 1e8b33328a5407b447ff80953655a47014a6dcb9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 29 Nov 2012 10:49:50 -0800 Subject: [PATCH 089/112] blockdev: remove bd_block_size_semaphore again This reverts the block-device direct access code to the previous unlocked code, now that fs/buffer.c no longer needs external locking. With this, fs/block_dev.c is back to the original version, apart from a whitespace cleanup that I didn't want to revert. Signed-off-by: Linus Torvalds --- drivers/char/raw.c | 2 +- fs/block_dev.c | 105 ++------------------------------------------- include/linux/fs.h | 4 -- 3 files changed, 5 insertions(+), 106 deletions(-) diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 0bb207eaef2f..54a3a6d09819 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -285,7 +285,7 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd, static const struct file_operations raw_fops = { .read = do_sync_read, - .aio_read = blkdev_aio_read, + .aio_read = generic_file_aio_read, .write = do_sync_write, .aio_write = blkdev_aio_write, .fsync = blkdev_fsync, diff --git a/fs/block_dev.c b/fs/block_dev.c index 1a1e5e3b1eaf..47a949d8a07e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -116,8 +116,6 @@ EXPORT_SYMBOL(invalidate_bdev); int set_blocksize(struct block_device *bdev, int size) { - struct address_space *mapping; - /* Size must be a power of two, and between 512 and PAGE_SIZE */ if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) return -EINVAL; @@ -126,19 +124,6 @@ int set_blocksize(struct block_device *bdev, int size) if (size < bdev_logical_block_size(bdev)) return -EINVAL; - /* Prevent starting I/O or mapping the device */ - percpu_down_write(&bdev->bd_block_size_semaphore); - - /* Check that the block device is not memory mapped */ - mapping = bdev->bd_inode->i_mapping; - mutex_lock(&mapping->i_mmap_mutex); - if (mapping_mapped(mapping)) { - mutex_unlock(&mapping->i_mmap_mutex); - percpu_up_write(&bdev->bd_block_size_semaphore); - return -EBUSY; - } - mutex_unlock(&mapping->i_mmap_mutex); - /* Don't change the size if it is same as current */ if (bdev->bd_block_size != size) { sync_blockdev(bdev); @@ -146,9 +131,6 @@ int set_blocksize(struct block_device *bdev, int size) bdev->bd_inode->i_blkbits = blksize_bits(size); kill_bdev(bdev); } - - percpu_up_write(&bdev->bd_block_size_semaphore); - return 0; } @@ -459,12 +441,6 @@ static struct inode *bdev_alloc_inode(struct super_block *sb) struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); if (!ei) return NULL; - - if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) { - kmem_cache_free(bdev_cachep, ei); - return NULL; - } - return &ei->vfs_inode; } @@ -473,8 +449,6 @@ static void bdev_i_callback(struct rcu_head *head) struct inode *inode = container_of(head, struct inode, i_rcu); struct bdev_inode *bdi = BDEV_I(inode); - percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore); - kmem_cache_free(bdev_cachep, bdi); } @@ -1593,22 +1567,6 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) return blkdev_ioctl(bdev, mode, cmd, arg); } -ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - ssize_t ret; - struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); - - percpu_down_read(&bdev->bd_block_size_semaphore); - - ret = generic_file_aio_read(iocb, iov, nr_segs, pos); - - percpu_up_read(&bdev->bd_block_size_semaphore); - - return ret; -} -EXPORT_SYMBOL_GPL(blkdev_aio_read); - /* * Write data to the block device. Only intended for the block device itself * and the raw driver which basically is a fake block device. @@ -1620,16 +1578,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; - struct block_device *bdev = I_BDEV(file->f_mapping->host); struct blk_plug plug; ssize_t ret; BUG_ON(iocb->ki_pos != pos); blk_start_plug(&plug); - - percpu_down_read(&bdev->bd_block_size_semaphore); - ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); if (ret > 0 || ret == -EIOCBQUEUED) { ssize_t err; @@ -1638,62 +1592,11 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err < 0 && ret > 0) ret = err; } - - percpu_up_read(&bdev->bd_block_size_semaphore); - blk_finish_plug(&plug); - return ret; } EXPORT_SYMBOL_GPL(blkdev_aio_write); -static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) -{ - int ret; - struct block_device *bdev = I_BDEV(file->f_mapping->host); - - percpu_down_read(&bdev->bd_block_size_semaphore); - - ret = generic_file_mmap(file, vma); - - percpu_up_read(&bdev->bd_block_size_semaphore); - - return ret; -} - -static ssize_t blkdev_splice_read(struct file *file, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) -{ - ssize_t ret; - struct block_device *bdev = I_BDEV(file->f_mapping->host); - - percpu_down_read(&bdev->bd_block_size_semaphore); - - ret = generic_file_splice_read(file, ppos, pipe, len, flags); - - percpu_up_read(&bdev->bd_block_size_semaphore); - - return ret; -} - -static ssize_t blkdev_splice_write(struct pipe_inode_info *pipe, - struct file *file, loff_t *ppos, size_t len, - unsigned int flags) -{ - ssize_t ret; - struct block_device *bdev = I_BDEV(file->f_mapping->host); - - percpu_down_read(&bdev->bd_block_size_semaphore); - - ret = generic_file_splice_write(pipe, file, ppos, len, flags); - - percpu_up_read(&bdev->bd_block_size_semaphore); - - return ret; -} - - /* * Try to release a page associated with block device when the system * is under memory pressure. @@ -1724,16 +1627,16 @@ const struct file_operations def_blk_fops = { .llseek = block_llseek, .read = do_sync_read, .write = do_sync_write, - .aio_read = blkdev_aio_read, + .aio_read = generic_file_aio_read, .aio_write = blkdev_aio_write, - .mmap = blkdev_mmap, + .mmap = generic_file_mmap, .fsync = blkdev_fsync, .unlocked_ioctl = block_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_blkdev_ioctl, #endif - .splice_read = blkdev_splice_read, - .splice_write = blkdev_splice_write, + .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, }; int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) diff --git a/include/linux/fs.h b/include/linux/fs.h index b33cfc97b9ca..44f288e9726d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -462,8 +462,6 @@ struct block_device { int bd_fsfreeze_count; /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; - /* A semaphore that prevents I/O while block size is being changed */ - struct percpu_rw_semaphore bd_block_size_semaphore; }; /* @@ -2379,8 +2377,6 @@ extern int generic_segment_checks(const struct iovec *iov, unsigned long *nr_segs, size_t *count, int access_flags); /* fs/block_dev.c */ -extern ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos); extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, From ab73857e354ab9e317613cba7db714e2c12c6547 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 29 Nov 2012 12:27:00 -0800 Subject: [PATCH 090/112] direct-io: don't read inode->i_blkbits multiple times Since directio can work on a raw block device, and the block size of the device can change under it, we need to do the same thing that fs/buffer.c now does: read the block size a single time, using ACCESS_ONCE(). Reading it multiple times can get different results, which will then confuse the code because it actually encodes the i_blksize in relationship to the underlying logical blocksize. Signed-off-by: Linus Torvalds --- fs/direct-io.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index f86c720dba0e..cf5b44b10c67 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -540,6 +540,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, sector_t fs_endblk; /* Into file, in filesystem-sized blocks */ unsigned long fs_count; /* Number of filesystem-sized blocks */ int create; + unsigned int i_blkbits = sdio->blkbits + sdio->blkfactor; /* * If there was a memory error and we've overwritten all the @@ -554,7 +555,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, fs_count = fs_endblk - fs_startblk + 1; map_bh->b_state = 0; - map_bh->b_size = fs_count << dio->inode->i_blkbits; + map_bh->b_size = fs_count << i_blkbits; /* * For writes inside i_size on a DIO_SKIP_HOLES filesystem we @@ -1053,7 +1054,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, int seg; size_t size; unsigned long addr; - unsigned blkbits = inode->i_blkbits; + unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); + unsigned blkbits = i_blkbits; unsigned blocksize_mask = (1 << blkbits) - 1; ssize_t retval = -EINVAL; loff_t end = offset; @@ -1149,7 +1151,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, dio->inode = inode; dio->rw = rw; sdio.blkbits = blkbits; - sdio.blkfactor = inode->i_blkbits - blkbits; + sdio.blkfactor = i_blkbits - blkbits; sdio.block_in_file = offset >> blkbits; sdio.get_block = get_block; From bbec0270bdd887f96377065ee38b8848b5afa395 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 29 Nov 2012 12:31:52 -0800 Subject: [PATCH 091/112] blkdev_max_block: make private to fs/buffer.c We really don't want to look at the block size for the raw block device accesses in fs/block-dev.c, because it may be changing from under us. So get rid of the max_block logic entirely, since the caller should already have done it anyway. That leaves the only user of this function in fs/buffer.c, so move the whole function there and make it static. Signed-off-by: Linus Torvalds --- fs/block_dev.c | 55 +--------------------------------------------- fs/buffer.c | 14 +++++++++++- include/linux/fs.h | 1 - 3 files changed, 14 insertions(+), 56 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 47a949d8a07e..a1e09b4fe1ba 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -70,19 +70,6 @@ static void bdev_inode_switch_bdi(struct inode *inode, spin_unlock(&dst->wb.list_lock); } -sector_t blkdev_max_block(struct block_device *bdev) -{ - sector_t retval = ~((sector_t)0); - loff_t sz = i_size_read(bdev->bd_inode); - - if (sz) { - unsigned int size = block_size(bdev); - unsigned int sizebits = blksize_bits(size); - retval = (sz >> sizebits); - } - return retval; -} - /* Kill _all_ buffers and pagecache , dirty or not.. */ void kill_bdev(struct block_device *bdev) { @@ -163,52 +150,12 @@ static int blkdev_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create) { - if (iblock >= blkdev_max_block(I_BDEV(inode))) { - if (create) - return -EIO; - - /* - * for reads, we're just trying to fill a partial page. - * return a hole, they will have to call get_block again - * before they can fill it, and they will get -EIO at that - * time - */ - return 0; - } bh->b_bdev = I_BDEV(inode); bh->b_blocknr = iblock; set_buffer_mapped(bh); return 0; } -static int -blkdev_get_blocks(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int create) -{ - sector_t end_block = blkdev_max_block(I_BDEV(inode)); - unsigned long max_blocks = bh->b_size >> inode->i_blkbits; - - if ((iblock + max_blocks) > end_block) { - max_blocks = end_block - iblock; - if ((long)max_blocks <= 0) { - if (create) - return -EIO; /* write fully beyond EOF */ - /* - * It is a read which is fully beyond EOF. We return - * a !buffer_mapped buffer - */ - max_blocks = 0; - } - } - - bh->b_bdev = I_BDEV(inode); - bh->b_blocknr = iblock; - bh->b_size = max_blocks << inode->i_blkbits; - if (max_blocks) - set_buffer_mapped(bh); - return 0; -} - static ssize_t blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) @@ -217,7 +164,7 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, struct inode *inode = file->f_mapping->host; return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset, - nr_segs, blkdev_get_blocks, NULL, NULL, 0); + nr_segs, blkdev_get_block, NULL, NULL, 0); } int __sync_blockdev(struct block_device *bdev, int wait) diff --git a/fs/buffer.c b/fs/buffer.c index 28a74ff5324b..3586fb05c8ce 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -911,6 +911,18 @@ link_dev_buffers(struct page *page, struct buffer_head *head) attach_page_buffers(page, head); } +static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size) +{ + sector_t retval = ~((sector_t)0); + loff_t sz = i_size_read(bdev->bd_inode); + + if (sz) { + unsigned int sizebits = blksize_bits(size); + retval = (sz >> sizebits); + } + return retval; +} + /* * Initialise the state of a blockdev page's buffers. */ @@ -921,7 +933,7 @@ init_page_buffers(struct page *page, struct block_device *bdev, struct buffer_head *head = page_buffers(page); struct buffer_head *bh = head; int uptodate = PageUptodate(page); - sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode)); + sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size); do { if (!buffer_mapped(bh)) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 44f288e9726d..75fe9a134803 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2047,7 +2047,6 @@ extern void unregister_blkdev(unsigned int, const char *); extern struct block_device *bdget(dev_t); extern struct block_device *bdgrab(struct block_device *bdev); extern void bd_set_size(struct block_device *, loff_t size); -extern sector_t blkdev_max_block(struct block_device *bdev); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); extern void invalidate_bdev(struct block_device *); From 696199f8ccf7fc6d17ef89c296ad3b6c78c52d9c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Nov 2012 22:00:51 -0500 Subject: [PATCH 092/112] don't do blind d_drop() in nfs_prime_dcache() Signed-off-by: Al Viro --- fs/nfs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ce8cb926526b..99489cfca24d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -450,7 +450,8 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) nfs_refresh_inode(dentry->d_inode, entry->fattr); goto out; } else { - d_drop(dentry); + if (d_invalidate(dentry) != 0) + goto out; dput(dentry); } } From c44600c9d1de64314c2bd58103f15acb53e10073 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Nov 2012 22:04:36 -0500 Subject: [PATCH 093/112] nfs_lookup_revalidate(): fix a leak We are leaking fattr and fhandle if we decide that dentry is not to be invalidated, after all (e.g. happens to be a mountpoint). Just free both before that... Signed-off-by: Al Viro --- fs/nfs/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 99489cfca24d..b9e66b7e0c14 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1101,6 +1101,8 @@ out_set_verifier: out_zap_parent: nfs_zap_caches(dir); out_bad: + nfs_free_fattr(fattr); + nfs_free_fhandle(fhandle); nfs_mark_for_revalidate(dir); if (inode && S_ISDIR(inode->i_mode)) { /* Purge readdir caches. */ @@ -1113,8 +1115,6 @@ out_zap_parent: shrink_dcache_parent(dentry); } d_drop(dentry); - nfs_free_fattr(fattr); - nfs_free_fhandle(fhandle); dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", __func__, dentry->d_parent->d_name.name, From 0903a0c8491c1e987dfc6eb294199a36760398bc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Nov 2012 22:11:06 -0500 Subject: [PATCH 094/112] cifs: get rid of blind d_drop() in readdir Signed-off-by: Al Viro --- fs/cifs/readdir.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index f9b5d3d6cf33..1c576e871366 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -86,14 +86,17 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, dentry = d_lookup(parent, name); if (dentry) { + int err; inode = dentry->d_inode; /* update inode in place if i_ino didn't change */ if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { cifs_fattr_to_inode(inode, fattr); return dentry; } - d_drop(dentry); + err = d_invalidate(dentry); dput(dentry); + if (err) + return NULL; } dentry = d_alloc(parent, name); From 21d8a15ac333b05f1fecdf9fdc30996be2e11d60 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Nov 2012 22:17:21 -0500 Subject: [PATCH 095/112] lookup_one_len: don't accept . and .. Signed-off-by: Al Viro --- fs/namei.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/namei.c b/fs/namei.c index 937f9d50c84b..5f4cdf3ad913 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2131,6 +2131,11 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) if (!len) return ERR_PTR(-EACCES); + if (unlikely(name[0] == '.')) { + if (len < 2 || (len == 2 && name[1] == '.')) + return ERR_PTR(-EACCES); + } + while (len--) { c = *(const unsigned char *)name++; if (c == '/' || c == '\0') From a77cfcb429ed98845a4e4df72473b8f37acd890b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Nov 2012 22:57:33 -0500 Subject: [PATCH 096/112] fix off-by-one in argument passed by iterate_fd() to callbacks Noticed by Pavel Roskin; the thing in his patch I disagree with was compensating for that shite in callbacks instead of fixing it once in the iterator itself. Signed-off-by: Al Viro --- fs/file.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/file.c b/fs/file.c index 7cb71b992603..eff23162485f 100644 --- a/fs/file.c +++ b/fs/file.c @@ -994,16 +994,18 @@ int iterate_fd(struct files_struct *files, unsigned n, const void *p) { struct fdtable *fdt; - struct file *file; int res = 0; if (!files) return 0; spin_lock(&files->file_lock); - fdt = files_fdtable(files); - while (!res && n < fdt->max_fds) { - file = rcu_dereference_check_fdtable(files, fdt->fd[n++]); - if (file) - res = f(p, file, n); + for (fdt = files_fdtable(files); n < fdt->max_fds; n++) { + struct file *file; + file = rcu_dereference_check_fdtable(files, fdt->fd[n]); + if (!file) + continue; + res = f(p, file, n); + if (res) + break; } spin_unlock(&files->file_lock); return res; From 58d002097b98664e2a39cc708f30d11549d870b2 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 29 Nov 2012 13:54:20 -0800 Subject: [PATCH 097/112] mm: compaction: fix return value of capture_free_page() Commit ef6c5be658f6 ("fix incorrect NR_FREE_PAGES accounting (appears like memory leak)") fixes a NR_FREE_PAGE accounting leak but missed the return value which was also missed by this reviewer until today. That return value is used by compaction when adding pages to a list of isolated free pages and without this follow-up fix, there is a risk of free list corruption. Signed-off-by: Mel Gorman Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 92871579cbee..7e208f0ad68c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1422,7 +1422,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) } } - return 1UL << order; + return 1UL << alloc_order; } /* From ae64ffcac35de0db628ba9631edf8ff34c5cd7ac Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Thu, 29 Nov 2012 13:54:21 -0800 Subject: [PATCH 098/112] mm/vmemmap: fix wrong use of virt_to_page I enable CONFIG_DEBUG_VIRTUAL and CONFIG_SPARSEMEM_VMEMMAP, when doing memory hotremove, there is a kernel BUG at arch/x86/mm/physaddr.c:20. It is caused by free_section_usemap()->virt_to_page(), virt_to_page() is only used for kernel direct mapping address, but sparse-vmemmap uses vmemmap address, so it is going wrong here. ------------[ cut here ]------------ kernel BUG at arch/x86/mm/physaddr.c:20! invalid opcode: 0000 [#1] SMP Modules linked in: acpihp_drv acpihp_slot edd cpufreq_conservative cpufreq_userspace cpufreq_powersave acpi_cpufreq mperf fuse vfat fat loop dm_mod coretemp kvm crc32c_intel ipv6 ixgbe igb iTCO_wdt i7core_edac edac_core pcspkr iTCO_vendor_support ioatdma microcode joydev sr_mod i2c_i801 dca lpc_ich mfd_core mdio tpm_tis i2c_core hid_generic tpm cdrom sg tpm_bios rtc_cmos button ext3 jbd mbcache usbhid hid uhci_hcd ehci_hcd usbcore usb_common sd_mod crc_t10dif processor thermal_sys hwmon scsi_dh_alua scsi_dh_hp_sw scsi_dh_rdac scsi_dh_emc scsi_dh ata_generic ata_piix libata megaraid_sas scsi_mod CPU 39 Pid: 6454, comm: sh Not tainted 3.7.0-rc1-acpihp-final+ #45 QCI QSSC-S4R/QSSC-S4R RIP: 0010:[] [] __phys_addr+0x88/0x90 RSP: 0018:ffff8804440d7c08 EFLAGS: 00010006 RAX: 0000000000000006 RBX: ffffea0012000000 RCX: 000000000000002c ... Signed-off-by: Jianguo Wu Signed-off-by: Jiang Liu Reviewd-by: Wen Congyang Acked-by: Johannes Weiner Reviewed-by: Yasuaki Ishimatsu Reviewed-by: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/mm/sparse.c b/mm/sparse.c index fac95f2888f2..a83de2f72b30 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -617,7 +617,7 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) { return; /* XXX: Not implemented yet */ } -static void free_map_bootmem(struct page *page, unsigned long nr_pages) +static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) { } #else @@ -658,10 +658,11 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) get_order(sizeof(struct page) * nr_pages)); } -static void free_map_bootmem(struct page *page, unsigned long nr_pages) +static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) { unsigned long maps_section_nr, removing_section_nr, i; unsigned long magic; + struct page *page = virt_to_page(memmap); for (i = 0; i < nr_pages; i++, page++) { magic = (unsigned long) page->lru.next; @@ -710,13 +711,10 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap) */ if (memmap) { - struct page *memmap_page; - memmap_page = virt_to_page(memmap); - nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) >> PAGE_SHIFT; - free_map_bootmem(memmap_page, nr_pages); + free_map_bootmem(memmap, nr_pages); } } From 60cefed485a02bd99b6299dad70666fe49245da7 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 29 Nov 2012 13:54:23 -0800 Subject: [PATCH 099/112] mm: vmscan: fix endless loop in kswapd balancing Kswapd does not in all places have the same criteria for a balanced zone. Zones are only being reclaimed when their high watermark is breached, but compaction checks loop over the zonelist again when the zone does not meet the low watermark plus two times the size of the allocation. This gets kswapd stuck in an endless loop over a small zone, like the DMA zone, where the high watermark is smaller than the compaction requirement. Add a function, zone_balanced(), that checks the watermark, and, for higher order allocations, if compaction has enough free memory. Then use it uniformly to check for balanced zones. This makes sure that when the compaction watermark is not met, at least reclaim happens and progress is made - or the zone is declared unreclaimable at some point and skipped entirely. Signed-off-by: Johannes Weiner Reported-by: George Spelvin Reported-by: Johannes Hirte Reported-by: Tomas Racek Tested-by: Johannes Hirte Reviewed-by: Rik van Riel Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index cbf84e152f04..83f4d0e85601 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2414,6 +2414,19 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc) } while (memcg); } +static bool zone_balanced(struct zone *zone, int order, + unsigned long balance_gap, int classzone_idx) +{ + if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) + + balance_gap, classzone_idx, 0)) + return false; + + if (COMPACTION_BUILD && order && !compaction_suitable(zone, order)) + return false; + + return true; +} + /* * pgdat_balanced is used when checking if a node is balanced for high-order * allocations. Only zones that meet watermarks and are in a zone allowed @@ -2492,8 +2505,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, continue; } - if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), - i, 0)) + if (!zone_balanced(zone, order, 0, i)) all_zones_ok = false; else balanced += zone->present_pages; @@ -2602,8 +2614,7 @@ loop_again: break; } - if (!zone_watermark_ok_safe(zone, order, - high_wmark_pages(zone), 0, 0)) { + if (!zone_balanced(zone, order, 0, 0)) { end_zone = i; break; } else { @@ -2679,9 +2690,8 @@ loop_again: testorder = 0; if ((buffer_heads_over_limit && is_highmem_idx(i)) || - !zone_watermark_ok_safe(zone, testorder, - high_wmark_pages(zone) + balance_gap, - end_zone, 0)) { + !zone_balanced(zone, testorder, + balance_gap, end_zone)) { shrink_zone(zone, &sc); reclaim_state->reclaimed_slab = 0; @@ -2708,8 +2718,7 @@ loop_again: continue; } - if (!zone_watermark_ok_safe(zone, testorder, - high_wmark_pages(zone), end_zone, 0)) { + if (!zone_balanced(zone, testorder, 0, end_zone)) { all_zones_ok = 0; /* * We are still under min water mark. This From a50915394f1fc02c2861d3b7ce7014788aa5066e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 29 Nov 2012 13:54:27 -0800 Subject: [PATCH 100/112] revert "Revert "mm: remove __GFP_NO_KSWAPD"" It apepars that this patch was innocent, and we hope that "mm: avoid waking kswapd for THP allocations when compaction is deferred or contended" will fix the final kswapd-spinning cause. Cc: Zdenek Kabelac Cc: Seth Jennings Cc: Valdis Kletnieks Cc: Jiri Slaby Cc: Rik van Riel Cc: Robert Jennings Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mtd/mtdcore.c | 6 ++---- include/linux/gfp.h | 13 +++++-------- include/trace/events/gfpflags.h | 1 - mm/page_alloc.c | 7 +++---- 4 files changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index ec794a72975d..374c46dff7dd 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -1077,8 +1077,7 @@ EXPORT_SYMBOL_GPL(mtd_writev); * until the request succeeds or until the allocation size falls below * the system page size. This attempts to make sure it does not adversely * impact system performance, so when allocating more than one page, we - * ask the memory allocator to avoid re-trying, swapping, writing back - * or performing I/O. + * ask the memory allocator to avoid re-trying. * * Note, this function also makes sure that the allocated buffer is aligned to * the MTD device's min. I/O unit, i.e. the "mtd->writesize" value. @@ -1092,8 +1091,7 @@ EXPORT_SYMBOL_GPL(mtd_writev); */ void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size) { - gfp_t flags = __GFP_NOWARN | __GFP_WAIT | - __GFP_NORETRY | __GFP_NO_KSWAPD; + gfp_t flags = __GFP_NOWARN | __GFP_WAIT | __GFP_NORETRY; size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE); void *kbuf; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index d0a79678f169..76e1aa206f57 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -30,10 +30,9 @@ struct vm_area_struct; #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u #define ___GFP_RECLAIMABLE 0x80000u -#define ___GFP_NOTRACK 0x200000u -#define ___GFP_NO_KSWAPD 0x400000u -#define ___GFP_OTHER_NODE 0x800000u -#define ___GFP_WRITE 0x1000000u +#define ___GFP_NOTRACK 0x100000u +#define ___GFP_OTHER_NODE 0x200000u +#define ___GFP_WRITE 0x400000u /* * GFP bitmasks.. @@ -86,7 +85,6 @@ struct vm_area_struct; #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */ #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ -#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ @@ -96,7 +94,7 @@ struct vm_area_struct; */ #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) -#define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */ +#define __GFP_BITS_SHIFT 23 /* Room for N __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* This equals 0, but use constants in case they ever change */ @@ -116,8 +114,7 @@ struct vm_area_struct; __GFP_MOVABLE) #define GFP_IOFS (__GFP_IO | __GFP_FS) #define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ - __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ - __GFP_NO_KSWAPD) + __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) #ifdef CONFIG_NUMA #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h index d6fd8e5b14b7..9391706e9254 100644 --- a/include/trace/events/gfpflags.h +++ b/include/trace/events/gfpflags.h @@ -36,7 +36,6 @@ {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ - {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \ {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \ ) : "GFP_NOWAIT" diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7e208f0ad68c..8193809f3de0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2416,9 +2416,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, goto nopage; restart: - if (!(gfp_mask & __GFP_NO_KSWAPD)) - wake_all_kswapd(order, zonelist, high_zoneidx, - zone_idx(preferred_zone)); + wake_all_kswapd(order, zonelist, high_zoneidx, + zone_idx(preferred_zone)); /* * OK, we're below the kswapd watermark and have kicked background @@ -2495,7 +2494,7 @@ rebalance: * system then fail the allocation instead of entering direct reclaim. */ if ((deferred_compaction || contended_compaction) && - (gfp_mask & __GFP_NO_KSWAPD)) + (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) goto nopage; /* Try direct reclaim and then allocating */ From 782fd30406ecb9d9b082816abe0c6008fc72a7b0 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 29 Nov 2012 13:54:30 -0800 Subject: [PATCH 101/112] mm: avoid waking kswapd for THP allocations when compaction is deferred or contended With "mm: vmscan: scale number of pages reclaimed by reclaim/compaction based on failures" reverted, Zdenek Kabelac reported the following Hmm, so it's just took longer to hit the problem and observe kswapd0 spinning on my CPU again - it's not as endless like before - but still it easily eats minutes - it helps to turn off Firefox or TB (memory hungry apps) so kswapd0 stops soon - and restart those apps again. (And I still have like >1GB of cached memory) kswapd0 R running task 0 30 2 0x00000000 Call Trace: preempt_schedule+0x42/0x60 _raw_spin_unlock+0x55/0x60 put_super+0x31/0x40 drop_super+0x22/0x30 prune_super+0x149/0x1b0 shrink_slab+0xba/0x510 The sysrq+m indicates the system has no swap so it'll never reclaim anonymous pages as part of reclaim/compaction. That is one part of the problem but not the root cause as file-backed pages could also be reclaimed. The likely underlying problem is that kswapd is woken up or kept awake for each THP allocation request in the page allocator slow path. If compaction fails for the requesting process then compaction will be deferred for a time and direct reclaim is avoided. However, if there are a storm of THP requests that are simply rejected, it will still be the the case that kswapd is awake for a prolonged period of time as pgdat->kswapd_max_order is updated each time. This is noticed by the main kswapd() loop and it will not call kswapd_try_to_sleep(). Instead it will loopp, shrinking a small number of pages and calling shrink_slab() on each iteration. This patch defers when kswapd gets woken up for THP allocations. For !THP allocations, kswapd is always woken up. For THP allocations, kswapd is woken up iff the process is willing to enter into direct reclaim/compaction. [akpm@linux-foundation.org: fix typo in comment] Signed-off-by: Mel Gorman Cc: Zdenek Kabelac Cc: Seth Jennings Cc: Jiri Slaby Cc: Rik van Riel Cc: Robert Jennings Cc: Valdis Kletnieks Cc: Glauber Costa Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8193809f3de0..a8f2c87792c3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2378,6 +2378,15 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS); } +/* Returns true if the allocation is likely for THP */ +static bool is_thp_alloc(gfp_t gfp_mask, unsigned int order) +{ + if (order == pageblock_order && + (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) + return true; + return false; +} + static inline struct page * __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, @@ -2416,7 +2425,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, goto nopage; restart: - wake_all_kswapd(order, zonelist, high_zoneidx, + /* The decision whether to wake kswapd for THP is made later */ + if (!is_thp_alloc(gfp_mask, order)) + wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(preferred_zone)); /* @@ -2487,15 +2498,21 @@ rebalance: goto got_pg; sync_migration = true; - /* - * If compaction is deferred for high-order allocations, it is because - * sync compaction recently failed. In this is the case and the caller - * requested a movable allocation that does not heavily disrupt the - * system then fail the allocation instead of entering direct reclaim. - */ - if ((deferred_compaction || contended_compaction) && - (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) - goto nopage; + if (is_thp_alloc(gfp_mask, order)) { + /* + * If compaction is deferred for high-order allocations, it is + * because sync compaction recently failed. If this is the case + * and the caller requested a movable allocation that does not + * heavily disrupt the system then fail the allocation instead + * of entering direct reclaim. + */ + if (deferred_compaction || contended_compaction) + goto nopage; + + /* If process is willing to reclaim/compact then wake kswapd */ + wake_all_kswapd(order, zonelist, high_zoneidx, + zone_idx(preferred_zone)); + } /* Try direct reclaim and then allocating */ page = __alloc_pages_direct_reclaim(gfp_mask, order, From 783657a7dc20e5c0efbc9a09a9dd38e238a723da Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 29 Nov 2012 13:54:34 -0800 Subject: [PATCH 102/112] mm: soft offline: split thp at the beginning of soft_offline_page() When we try to soft-offline a thp tail page, put_page() is called on the tail page unthinkingly and VM_BUG_ON is triggered in put_compound_page(). This patch splits thp before going into the main body of soft-offlining. Signed-off-by: Naoya Horiguchi Cc: Andi Kleen Cc: Tony Luck Cc: Andi Kleen Cc: Wu Fengguang Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 6c5899b9034a..8b20278be6a6 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1476,9 +1476,17 @@ int soft_offline_page(struct page *page, int flags) { int ret; unsigned long pfn = page_to_pfn(page); + struct page *hpage = compound_trans_head(page); if (PageHuge(page)) return soft_offline_huge_page(page, flags); + if (PageTransHuge(hpage)) { + if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) { + pr_info("soft offline: %#lx: failed to split THP\n", + pfn); + return -EBUSY; + } + } ret = get_any_page(page, pfn, flags); if (ret < 0) From 1430e17844e2cd15bc15f3c21b5349e762f6ba41 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Thu, 29 Nov 2012 13:54:36 -0800 Subject: [PATCH 103/112] drivers/rtc/rtc-tps65910.c: fix invalid pointer access on _remove() The tps65910_rtc data is registered as the platform driver data in _probe(= ). Therefore the tps65910_rtc should be used on unregistering the rtc device. And device pointer should be retrieved from the platform_device structure. This patch fixes the below oops: Unable to handle kernel NULL pointer dereference at virtual address 00000008 Modules linked in: rtc_tps65910(-) CPU: 0 Not tainted (3.7.0-rc7-next-20121128-g6b1f974-dirty #7) PC is at tps65910_rtc_alarm_irq_enable+0x20/0x2c [rtc_tps65910] (tps65910_rtc_alarm_irq_enable+0x20/0x2c [rtc_tps65910]) (tps65910_rtc_remove+0x18/0x28 [rtc_tps65910]) (platform_drv_remove+0x18/0x1c) (__device_release_driver+0x70/0xcc) (driver_detach+0xb4/0xb8) (bus_remove_driver+0x7c/0xc0) (sys_delete_module+0x148/0x21c) Signed-off-by: Milo(Woogyom) Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-tps65910.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-tps65910.c b/drivers/rtc/rtc-tps65910.c index 7a82337e4dee..073108dcf9e7 100644 --- a/drivers/rtc/rtc-tps65910.c +++ b/drivers/rtc/rtc-tps65910.c @@ -288,11 +288,11 @@ static int __devinit tps65910_rtc_probe(struct platform_device *pdev) static int __devexit tps65910_rtc_remove(struct platform_device *pdev) { /* leave rtc running, but disable irqs */ - struct rtc_device *rtc = platform_get_drvdata(pdev); + struct tps65910_rtc *tps_rtc = platform_get_drvdata(pdev); - tps65910_rtc_alarm_irq_enable(&rtc->dev, 0); + tps65910_rtc_alarm_irq_enable(&pdev->dev, 0); - rtc_device_unregister(rtc); + rtc_device_unregister(tps_rtc->rtc); return 0; } From 644c154186386bb1fa6446bc5e037b9ed098db46 Mon Sep 17 00:00:00 2001 From: Vincent Palatin Date: Fri, 30 Nov 2012 12:15:32 -0800 Subject: [PATCH 104/112] x86, fpu: Avoid FPU lazy restore after suspend When a cpu enters S3 state, the FPU state is lost. After resuming for S3, if we try to lazy restore the FPU for a process running on the same CPU, this will result in a corrupted FPU context. Ensure that "fpu_owner_task" is properly invalided when (re-)initializing a CPU, so nobody will try to lazy restore a state which doesn't exist in the hardware. Tested with a 64-bit kernel on a 4-core Ivybridge CPU with eagerfpu=off, by doing thousands of suspend/resume cycles with 4 processes doing FPU operations running. Without the patch, a process is killed after a few hundreds cycles by a SIGFPE. Cc: Duncan Laurie Cc: Olof Johansson Cc: v3.4+ # for 3.4 need to replace this_cpu_write by percpu_write Signed-off-by: Vincent Palatin Link: http://lkml.kernel.org/r/1354306532-1014-1-git-send-email-vpalatin@chromium.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fpu-internal.h | 15 +++++++++------ arch/x86/kernel/smpboot.c | 5 +++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 831dbb9c6c02..41ab26ea6564 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -399,14 +399,17 @@ static inline void drop_init_fpu(struct task_struct *tsk) typedef struct { int preload; } fpu_switch_t; /* - * FIXME! We could do a totally lazy restore, but we need to - * add a per-cpu "this was the task that last touched the FPU - * on this CPU" variable, and the task needs to have a "I last - * touched the FPU on this CPU" and check them. + * Must be run with preemption disabled: this clears the fpu_owner_task, + * on this CPU. * - * We don't do that yet, so "fpu_lazy_restore()" always returns - * false, but some day.. + * This will disable any lazy FPU state restore of the current FPU state, + * but if the current thread owns the FPU, it will still be saved by. */ +static inline void __cpu_disable_lazy_restore(unsigned int cpu) +{ + per_cpu(fpu_owner_task, cpu) = NULL; +} + static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) { return new == this_cpu_read_stable(fpu_owner_task) && diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c80a33bc528b..f3e2ec878b8c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -68,6 +68,8 @@ #include #include #include +#include +#include #include #include #include @@ -818,6 +820,9 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle) per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + /* the FPU context is blank, nobody can own it */ + __cpu_disable_lazy_restore(cpu); + err = do_boot_cpu(apicid, cpu, tidle); if (err) { pr_debug("do_boot_cpu failed %d\n", err); From 412d32e6c98527078779e5b515823b2810e40324 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 28 Nov 2012 07:17:18 +0100 Subject: [PATCH 105/112] workqueue: exit rescuer_thread() as TASK_RUNNING A rescue thread exiting TASK_INTERRUPTIBLE can lead to a task scheduling off, never to be seen again. In the case where this occurred, an exiting thread hit reiserfs homebrew conditional resched while holding a mutex, bringing the box to its knees. PID: 18105 TASK: ffff8807fd412180 CPU: 5 COMMAND: "kdmflush" #0 [ffff8808157e7670] schedule at ffffffff8143f489 #1 [ffff8808157e77b8] reiserfs_get_block at ffffffffa038ab2d [reiserfs] #2 [ffff8808157e79a8] __block_write_begin at ffffffff8117fb14 #3 [ffff8808157e7a98] reiserfs_write_begin at ffffffffa0388695 [reiserfs] #4 [ffff8808157e7ad8] generic_perform_write at ffffffff810ee9e2 #5 [ffff8808157e7b58] generic_file_buffered_write at ffffffff810eeb41 #6 [ffff8808157e7ba8] __generic_file_aio_write at ffffffff810f1a3a #7 [ffff8808157e7c58] generic_file_aio_write at ffffffff810f1c88 #8 [ffff8808157e7cc8] do_sync_write at ffffffff8114f850 #9 [ffff8808157e7dd8] do_acct_process at ffffffff810a268f [exception RIP: kernel_thread_helper] RIP: ffffffff8144a5c0 RSP: ffff8808157e7f58 RFLAGS: 00000202 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff8107af60 RDI: ffff8803ee491d18 RBP: 0000000000000000 R8: 0000000000000000 R9: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 Signed-off-by: Mike Galbraith Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- kernel/workqueue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 042d221d33cc..ac25db111f48 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2407,8 +2407,10 @@ static int rescuer_thread(void *__wq) repeat: set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) + if (kthread_should_stop()) { + __set_current_state(TASK_RUNNING); return 0; + } /* * See whether any cpu is asking for help. Unbounded From 8852aac25e79e38cc6529f20298eed154f60b574 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 1 Dec 2012 16:23:42 -0800 Subject: [PATCH 106/112] workqueue: mod_delayed_work_on() shouldn't queue timer on 0 delay 8376fe22c7 ("workqueue: implement mod_delayed_work[_on]()") implemented mod_delayed_work[_on]() using the improved try_to_grab_pending(). The function is later used, among others, to replace [__]candel_delayed_work() + queue_delayed_work() combinations. Unfortunately, a delayed_work item w/ zero @delay is handled slightly differently by mod_delayed_work_on() compared to queue_delayed_work_on(). The latter skips timer altogether and directly queues it using queue_work_on() while the former schedules timer which will expire on the closest tick. This means, when @delay is zero, that [__]cancel_delayed_work() + queue_delayed_work_on() makes the target item immediately executable while mod_delayed_work_on() may induce delay of upto a full tick. This somewhat subtle difference breaks some of the converted users. e.g. block queue plugging uses delayed_work for deferred processing and uses mod_delayed_work_on() when the queue needs to be immediately unplugged. The above problem manifested as noticeably higher number of context switches under certain circumstances. The difference in behavior was caused by missing special case handling for 0 delay in mod_delayed_work_on() compared to queue_delayed_work_on(). Joonsoo Kim posted a patch to add it - ("workqueue: optimize mod_delayed_work_on() when @delay == 0")[1]. The patch was queued for 3.8 but it was described as optimization and I missed that it was a correctness issue. As both queue_delayed_work_on() and mod_delayed_work_on() use __queue_delayed_work() for queueing, it seems that the better approach is to move the 0 delay special handling to the function instead of duplicating it in mod_delayed_work_on(). Fix the problem by moving 0 delay special case handling from queue_delayed_work_on() to __queue_delayed_work(). This replaces Joonsoo's patch. [1] http://thread.gmane.org/gmane.linux.kernel/1379011/focus=1379012 Signed-off-by: Tejun Heo Reported-and-tested-by: Anders Kaseorg Reported-and-tested-by: Zlatko Calusic LKML-Reference: LKML-Reference: <50A78AA9.5040904@iskon.hr> Cc: Joonsoo Kim --- kernel/workqueue.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ac25db111f48..084aa47bac82 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1364,6 +1364,17 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, BUG_ON(timer_pending(timer)); BUG_ON(!list_empty(&work->entry)); + /* + * If @delay is 0, queue @dwork->work immediately. This is for + * both optimization and correctness. The earliest @timer can + * expire is on the closest next tick and delayed_work users depend + * on that there's no such delay when @delay is 0. + */ + if (!delay) { + __queue_work(cpu, wq, &dwork->work); + return; + } + timer_stats_timer_set_start_info(&dwork->timer); /* @@ -1417,9 +1428,6 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, bool ret = false; unsigned long flags; - if (!delay) - return queue_work_on(cpu, wq, &dwork->work); - /* read the comment in __queue_work() */ local_irq_save(flags); From 64022d0b4e93ea432e95db55a72b8a1c5775f3c0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 1 Dec 2012 13:07:02 +0000 Subject: [PATCH 107/112] tcp: fix crashes in do_tcp_sendpages() Recent network changes allowed high order pages being used for skb fragments. This uncovered a bug in do_tcp_sendpages() which was assuming its caller provided an array of order-0 page pointers. We only have to deal with a single page in this function, and its order is irrelevant. Reported-by: Willy Tarreau Tested-by: Willy Tarreau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 083092e3aed6..e457c7ab2e28 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -830,8 +830,8 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) return mss_now; } -static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, - size_t psize, int flags) +static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, + size_t size, int flags) { struct tcp_sock *tp = tcp_sk(sk); int mss_now, size_goal; @@ -858,12 +858,9 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto out_err; - while (psize > 0) { + while (size > 0) { struct sk_buff *skb = tcp_write_queue_tail(sk); - struct page *page = pages[poffset / PAGE_SIZE]; int copy, i; - int offset = poffset % PAGE_SIZE; - int size = min_t(size_t, psize, PAGE_SIZE - offset); bool can_coalesce; if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { @@ -912,8 +909,8 @@ new_segment: TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; copied += copy; - poffset += copy; - if (!(psize -= copy)) + offset += copy; + if (!(size -= copy)) goto out; if (skb->len < size_goal || (flags & MSG_OOB)) @@ -960,7 +957,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, flags); lock_sock(sk); - res = do_tcp_sendpages(sk, &page, offset, size, flags); + res = do_tcp_sendpages(sk, page, offset, size, flags); release_sock(sk); return res; } From 892a925e42adb8192a3c832ad29cbc780fc466f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?fran=C3=A7ois=20romieu?= Date: Sat, 1 Dec 2012 13:08:50 +0000 Subject: [PATCH 108/112] 8139cp: fix coherent mapping leak in error path. cp_open [...] rc = cp_alloc_rings(cp); if (rc) return rc; cp_alloc_rings [...] mem = dma_alloc_coherent(&cp->pdev->dev, CP_RING_BYTES, &cp->ring_dma, GFP_KERNEL); - cp_alloc_rings never frees the coherent mapping it allocates - neither do cp_open when cp_alloc_rings fails Signed-off-by: Francois Romieu Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/8139cp.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index b01f83a044c4..609125a249d9 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -1060,17 +1060,22 @@ static int cp_init_rings (struct cp_private *cp) static int cp_alloc_rings (struct cp_private *cp) { + struct device *d = &cp->pdev->dev; void *mem; + int rc; - mem = dma_alloc_coherent(&cp->pdev->dev, CP_RING_BYTES, - &cp->ring_dma, GFP_KERNEL); + mem = dma_alloc_coherent(d, CP_RING_BYTES, &cp->ring_dma, GFP_KERNEL); if (!mem) return -ENOMEM; cp->rx_ring = mem; cp->tx_ring = &cp->rx_ring[CP_RX_RING_SIZE]; - return cp_init_rings(cp); + rc = cp_init_rings(cp); + if (rc < 0) + dma_free_coherent(d, CP_RING_BYTES, cp->rx_ring, cp->ring_dma); + + return rc; } static void cp_clean_rings (struct cp_private *cp) From 9d73fc2d641f8831c9dc803177fe47c02120cc36 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Dec 2012 17:55:07 +0000 Subject: [PATCH 109/112] open*(2) compat fixes (s390, arm64) The usual rules for open()/openat()/open_by_handle_at() are 1) native 32bit - don't force O_LARGEFILE in flags 2) native 64bit - force O_LARGEFILE in flags 3) compat on 64bit host - as for native 32bit 4) native 32bit ABI for 64bit system (mips/n32, x86/x32) - as for native 64bit There are only two exceptions - s390 compat has open() forcing O_LARGEFILE and arm64 compat has open_by_handle_at() doing the same thing. The same binaries on native host (s390/31 and arm resp.) will *not* force O_LARGEFILE, so IMO both are emulation bugs. Objections? The fix is obvious... Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/unistd32.h | 2 +- arch/s390/kernel/compat_wrapper.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 6d909faebf28..656a6f291a35 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -392,7 +392,7 @@ __SYSCALL(367, sys_fanotify_init) __SYSCALL(368, compat_sys_fanotify_mark_wrapper) __SYSCALL(369, sys_prlimit64) __SYSCALL(370, sys_name_to_handle_at) -__SYSCALL(371, sys_open_by_handle_at) +__SYSCALL(371, compat_sys_open_by_handle_at) __SYSCALL(372, sys_clock_adjtime) __SYSCALL(373, sys_syncfs) diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index ad79b846535c..827e094a2f49 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -28,7 +28,7 @@ ENTRY(sys32_open_wrapper) llgtr %r2,%r2 # const char * lgfr %r3,%r3 # int lgfr %r4,%r4 # int - jg sys_open # branch to system call + jg compat_sys_open # branch to system call ENTRY(sys32_close_wrapper) llgfr %r2,%r2 # unsigned int From fd8ef11730f1d03d5d6555aa53126e9e34f52f12 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 3 Dec 2012 06:25:25 +0100 Subject: [PATCH 110/112] Revert "sched, autogroup: Stop going ahead if autogroup is disabled" This reverts commit 800d4d30c8f20bd728e5741a3b77c4859a613f7c. Between commits 8323f26ce342 ("sched: Fix race in task_group()") and 800d4d30c8f2 ("sched, autogroup: Stop going ahead if autogroup is disabled"), autogroup is a wreck. With both applied, all you have to do to crash a box is disable autogroup during boot up, then reboot.. boom, NULL pointer dereference due to commit 800d4d30c8f2 not allowing autogroup to move things, and commit 8323f26ce342 making that the only way to switch runqueues: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] effective_load.isra.43+0x50/0x90 Pid: 7047, comm: systemd-user-se Not tainted 3.6.8-smp #7 MEDIONPC MS-7502/MS-7502 RIP: effective_load.isra.43+0x50/0x90 Process systemd-user-se (pid: 7047, threadinfo ffff880221dde000, task ffff88022618b3a0) Call Trace: select_task_rq_fair+0x255/0x780 try_to_wake_up+0x156/0x2c0 wake_up_state+0xb/0x10 signal_wake_up+0x28/0x40 complete_signal+0x1d6/0x250 __send_signal+0x170/0x310 send_signal+0x40/0x80 do_send_sig_info+0x47/0x90 group_send_sig_info+0x4a/0x70 kill_pid_info+0x3a/0x60 sys_kill+0x97/0x1a0 ? vfs_read+0x120/0x160 ? sys_read+0x45/0x90 system_call_fastpath+0x16/0x1b Code: 49 0f af 41 50 31 d2 49 f7 f0 48 83 f8 01 48 0f 46 c6 48 2b 07 48 8b bf 40 01 00 00 48 85 ff 74 3a 45 31 c0 48 8b 8f 50 01 00 00 <48> 8b 11 4c 8b 89 80 00 00 00 49 89 d2 48 01 d0 45 8b 59 58 4c RIP [] effective_load.isra.43+0x50/0x90 RSP CR2: 0000000000000000 Signed-off-by: Mike Galbraith Acked-by: Ingo Molnar Cc: Yong Zhang Cc: Peter Zijlstra Cc: stable@vger.kernel.org # 2.6.39+ Signed-off-by: Linus Torvalds --- kernel/sched/auto_group.c | 4 ---- kernel/sched/auto_group.h | 5 ----- 2 files changed, 9 deletions(-) diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index 0984a21076a3..15f60d01198b 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c @@ -143,15 +143,11 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag) p->signal->autogroup = autogroup_kref_get(ag); - if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled)) - goto out; - t = p; do { sched_move_task(t); } while_each_thread(p, t); -out: unlock_task_sighand(p, &flags); autogroup_kref_put(prev); } diff --git a/kernel/sched/auto_group.h b/kernel/sched/auto_group.h index 8bd047142816..443232ebbb53 100644 --- a/kernel/sched/auto_group.h +++ b/kernel/sched/auto_group.h @@ -4,11 +4,6 @@ #include struct autogroup { - /* - * reference doesn't mean how many thread attach to this - * autogroup now. It just stands for the number of task - * could use this autogroup. - */ struct kref kref; struct task_group *tg; struct rw_semaphore lock; From 25a3bc6bd1ca03ab504b8c55c98f8d0135644d53 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 3 Dec 2012 18:15:30 +0000 Subject: [PATCH 111/112] [parisc] open(2) compat bug In commit 9d73fc2d641f ("open*(2) compat fixes (s390, arm64)") I said: > > The usual rules for open()/openat()/open_by_handle_at() are > 1) native 32bit - don't force O_LARGEFILE in flags > 2) native 64bit - force O_LARGEFILE in flags > 3) compat on 64bit host - as for native 32bit > 4) native 32bit ABI for 64bit system (mips/n32, x86/x32) - as for native 64bit > > There are only two exceptions - s390 compat has open() forcing O_LARGEFILE and > arm64 compat has open_by_handle_at() doing the same thing. The same binaries > on native host (s390/31 and arm resp.) will *not* force O_LARGEFILE, so IMO > both are emulation bugs. Three exceptions, actually - parisc open() is another case like that. Native 32bit won't force O_LARGEFILE, the same binary on parisc64 will. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/parisc/kernel/syscall_table.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 3735abd7f8f6..cbf5d59d5d6a 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -60,7 +60,7 @@ ENTRY_SAME(fork_wrapper) ENTRY_SAME(read) ENTRY_SAME(write) - ENTRY_SAME(open) /* 5 */ + ENTRY_COMP(open) /* 5 */ ENTRY_SAME(close) ENTRY_SAME(waitpid) ENTRY_SAME(creat) From b69f0859dc8e633c5d8c06845811588fe17e68b3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 3 Dec 2012 11:22:37 -0800 Subject: [PATCH 112/112] Linux 3.7-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6cab75b74365..346e28f5de52 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Terrified Chipmunk # *DOCUMENTATION*