]> git.openfabrics.org - ~shefty/rdma-dev.git/commitdiff
Merge branches 'core-urgent-for-linus' and 'irq-urgent-for-linus' of git://git.kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 6 Dec 2011 00:51:21 +0000 (16:51 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 6 Dec 2011 00:51:21 +0000 (16:51 -0800)
* 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  slab, lockdep: Fix silly bug

* 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  genirq: Fix race condition when stopping the irq thread

20 files changed:
arch/s390/include/asm/pgtable.h
arch/s390/kernel/ptrace.c
arch/s390/kernel/setup.c
arch/s390/kernel/signal.c
arch/x86/include/asm/timer.h
drivers/gpio/Makefile
drivers/s390/cio/chsc.c
drivers/s390/cio/cio.h
drivers/s390/cio/css.c
drivers/s390/cio/device.c
drivers/s390/cio/device_fsm.c
drivers/s390/cio/device_ops.c
drivers/s390/cio/io_sch.h
drivers/s390/crypto/ap_bus.c
include/linux/init_task.h
kernel/irq/manage.c
kernel/sched.c
kernel/sched_fair.c
kernel/sched_features.h
kernel/sched_rt.c

index 524d23b8610ceb65c42661a97e79226125dbadf3..4f289ff0b7fe27b7de54d8886b6aee0b8890066f 100644 (file)
@@ -599,10 +599,10 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
        skey = page_get_storage_key(address);
        bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
        /* Clear page changed & referenced bit in the storage key */
-       if (bits) {
-               skey ^= bits;
-               page_set_storage_key(address, skey, 1);
-       }
+       if (bits & _PAGE_CHANGED)
+               page_set_storage_key(address, skey ^ bits, 1);
+       else if (bits)
+               page_reset_referenced(address);
        /* Transfer page changed & referenced bit to guest bits in pgste */
        pgste_val(pgste) |= bits << 48;         /* RCP_GR_BIT & RCP_GC_BIT */
        /* Get host changed & referenced bits from pgste */
index 450931a45b684b2044106921a66a4702e1eb93f1..573bc29551ef471fee58b89d02df0ea0ff956503 100644 (file)
@@ -296,13 +296,6 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
                     ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))))
                        /* Invalid psw mask. */
                        return -EINVAL;
-               if (addr == (addr_t) &dummy->regs.psw.addr)
-                       /*
-                        * The debugger changed the instruction address,
-                        * reset system call restart, see signal.c:do_signal
-                        */
-                       task_thread_info(child)->system_call = 0;
-
                *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data;
 
        } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) {
@@ -614,11 +607,6 @@ static int __poke_user_compat(struct task_struct *child,
                        /* Transfer 31 bit amode bit to psw mask. */
                        regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) |
                                (__u64)(tmp & PSW32_ADDR_AMODE);
-                       /*
-                        * The debugger changed the instruction address,
-                        * reset system call restart, see signal.c:do_signal
-                        */
-                       task_thread_info(child)->system_call = 0;
                } else {
                        /* gpr 0-15 */
                        *(__u32*)((addr_t) &regs->psw + addr*2 + 4) = tmp;
@@ -905,6 +893,14 @@ static int s390_last_break_get(struct task_struct *target,
        return 0;
 }
 
+static int s390_last_break_set(struct task_struct *target,
+                              const struct user_regset *regset,
+                              unsigned int pos, unsigned int count,
+                              const void *kbuf, const void __user *ubuf)
+{
+       return 0;
+}
+
 #endif
 
 static int s390_system_call_get(struct task_struct *target,
@@ -951,6 +947,7 @@ static const struct user_regset s390_regsets[] = {
                .size = sizeof(long),
                .align = sizeof(long),
                .get = s390_last_break_get,
+               .set = s390_last_break_set,
        },
 #endif
        [REGSET_SYSTEM_CALL] = {
@@ -1116,6 +1113,14 @@ static int s390_compat_last_break_get(struct task_struct *target,
        return 0;
 }
 
+static int s390_compat_last_break_set(struct task_struct *target,
+                                     const struct user_regset *regset,
+                                     unsigned int pos, unsigned int count,
+                                     const void *kbuf, const void __user *ubuf)
+{
+       return 0;
+}
+
 static const struct user_regset s390_compat_regsets[] = {
        [REGSET_GENERAL] = {
                .core_note_type = NT_PRSTATUS,
@@ -1139,6 +1144,7 @@ static const struct user_regset s390_compat_regsets[] = {
                .size = sizeof(long),
                .align = sizeof(long),
                .get = s390_compat_last_break_get,
+               .set = s390_compat_last_break_set,
        },
        [REGSET_SYSTEM_CALL] = {
                .core_note_type = NT_S390_SYSTEM_CALL,
index e58a462949b164ea90c5697696b9ccec47bb7ff4..e54c4ff8abaaa3d1a34efd34decd0d12c713e4ef 100644 (file)
@@ -579,7 +579,7 @@ static unsigned long __init find_crash_base(unsigned long crash_size,
                *msg = "first memory chunk must be at least crashkernel size";
                return 0;
        }
-       if (is_kdump_kernel() && (crash_size == OLDMEM_SIZE))
+       if (OLDMEM_BASE && crash_size == OLDMEM_SIZE)
                return OLDMEM_BASE;
 
        for (i = MEMORY_CHUNKS - 1; i >= 0; i--) {
index 05a85bc14c98a2556e86bf40cec7e76de2dc2423..7f6f9f35454518f091e4fb86e3d39e46c8aaf391 100644 (file)
@@ -460,9 +460,9 @@ void do_signal(struct pt_regs *regs)
                                                     regs->svc_code >> 16);
                                break;
                        }
-                       /* No longer in a system call */
-                       clear_thread_flag(TIF_SYSCALL);
                }
+               /* No longer in a system call */
+               clear_thread_flag(TIF_SYSCALL);
 
                if ((is_compat_task() ?
                     handle_signal32(signr, &ka, &info, oldset, regs) :
@@ -486,6 +486,7 @@ void do_signal(struct pt_regs *regs)
        }
 
        /* No handlers present - check for system call restart */
+       clear_thread_flag(TIF_SYSCALL);
        if (current_thread_info()->system_call) {
                regs->svc_code = current_thread_info()->system_call;
                switch (regs->gprs[2]) {
@@ -500,9 +501,6 @@ void do_signal(struct pt_regs *regs)
                        regs->gprs[2] = regs->orig_gpr2;
                        set_thread_flag(TIF_SYSCALL);
                        break;
-               default:
-                       clear_thread_flag(TIF_SYSCALL);
-                       break;
                }
        }
 
index fa7b9176b76cb33820034403fd8f4a50dc49709c..431793e5d4846f23bf5947f933fdaf9f5ae1987a 100644 (file)
@@ -32,6 +32,22 @@ extern int no_timer_check;
  *  (mathieu.desnoyers@polymtl.ca)
  *
  *                     -johnstul@us.ibm.com "math is hard, lets go shopping!"
+ *
+ * In:
+ *
+ * ns = cycles * cyc2ns_scale / SC
+ *
+ * Although we may still have enough bits to store the value of ns,
+ * in some cases, we may not have enough bits to store cycles * cyc2ns_scale,
+ * leading to an incorrect result.
+ *
+ * To avoid this, we can decompose 'cycles' into quotient and remainder
+ * of division by SC.  Then,
+ *
+ * ns = (quot * SC + rem) * cyc2ns_scale / SC
+ *    = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC
+ *
+ *                     - sqazi@google.com
  */
 
 DECLARE_PER_CPU(unsigned long, cyc2ns);
@@ -41,9 +57,14 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
 
 static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
 {
+       unsigned long long quot;
+       unsigned long long rem;
        int cpu = smp_processor_id();
        unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
-       ns += cyc * per_cpu(cyc2ns, cpu) >> CYC2NS_SCALE_FACTOR;
+       quot = (cyc >> CYC2NS_SCALE_FACTOR);
+       rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1);
+       ns += quot * per_cpu(cyc2ns, cpu) +
+               ((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR);
        return ns;
 }
 
index dbcb0bcfd8dadf49ed156311594e704f81a9b9df..4e018d6a763996127cd370a6a3908273024f37a8 100644 (file)
@@ -18,7 +18,7 @@ obj-$(CONFIG_ARCH_DAVINCI)    += gpio-davinci.o
 obj-$(CONFIG_GPIO_EP93XX)      += gpio-ep93xx.o
 obj-$(CONFIG_GPIO_IT8761E)     += gpio-it8761e.o
 obj-$(CONFIG_GPIO_JANZ_TTL)    += gpio-janz-ttl.o
-obj-$(CONFIG_MACH_KS8695)      += gpio-ks8695.o
+obj-$(CONFIG_ARCH_KS8695)      += gpio-ks8695.o
 obj-$(CONFIG_GPIO_LANGWELL)    += gpio-langwell.o
 obj-$(CONFIG_ARCH_LPC32XX)     += gpio-lpc32xx.o
 obj-$(CONFIG_GPIO_MAX730X)     += gpio-max730x.o
index 75c3f1f8fd434301c3ba4a07a632e0ffefa6aac3..a84631a7391d3ed50c680b939129d159fc25daa3 100644 (file)
@@ -529,10 +529,7 @@ __s390_vary_chpid_on(struct subchannel_id schid, void *data)
 int chsc_chp_vary(struct chp_id chpid, int on)
 {
        struct channel_path *chp = chpid_to_chp(chpid);
-       struct chp_link link;
 
-       memset(&link, 0, sizeof(struct chp_link));
-       link.chpid = chpid;
        /* Wait until previous actions have settled. */
        css_wait_for_slow_path();
        /*
@@ -542,10 +539,10 @@ int chsc_chp_vary(struct chp_id chpid, int on)
                /* Try to update the channel path descritor. */
                chsc_determine_base_channel_path_desc(chpid, &chp->desc);
                for_each_subchannel_staged(s390_subchannel_vary_chpid_on,
-                                          __s390_vary_chpid_on, &link);
+                                          __s390_vary_chpid_on, &chpid);
        } else
                for_each_subchannel_staged(s390_subchannel_vary_chpid_off,
-                                          NULL, &link);
+                                          NULL, &chpid);
 
        return 0;
 }
index 155a82bcb9e545e2888430337d0b1c2b50239acd..4a1ff5c2eb881355204ffe8e047cfbe3bb8e3706 100644 (file)
@@ -68,8 +68,13 @@ struct schib {
        __u8 mda[4];             /* model dependent area */
 } __attribute__ ((packed,aligned(4)));
 
+/*
+ * When rescheduled, todo's with higher values will overwrite those
+ * with lower values.
+ */
 enum sch_todo {
        SCH_TODO_NOTHING,
+       SCH_TODO_EVAL,
        SCH_TODO_UNREG,
 };
 
index 92d7324acb1c78fbab348a2ea190c8901351df9d..21908e67bf6745d8dc91f791347a9d9cee538aa1 100644 (file)
@@ -195,51 +195,6 @@ void css_sch_device_unregister(struct subchannel *sch)
 }
 EXPORT_SYMBOL_GPL(css_sch_device_unregister);
 
-static void css_sch_todo(struct work_struct *work)
-{
-       struct subchannel *sch;
-       enum sch_todo todo;
-
-       sch = container_of(work, struct subchannel, todo_work);
-       /* Find out todo. */
-       spin_lock_irq(sch->lock);
-       todo = sch->todo;
-       CIO_MSG_EVENT(4, "sch_todo: sch=0.%x.%04x, todo=%d\n", sch->schid.ssid,
-                     sch->schid.sch_no, todo);
-       sch->todo = SCH_TODO_NOTHING;
-       spin_unlock_irq(sch->lock);
-       /* Perform todo. */
-       if (todo == SCH_TODO_UNREG)
-               css_sch_device_unregister(sch);
-       /* Release workqueue ref. */
-       put_device(&sch->dev);
-}
-
-/**
- * css_sched_sch_todo - schedule a subchannel operation
- * @sch: subchannel
- * @todo: todo
- *
- * Schedule the operation identified by @todo to be performed on the slow path
- * workqueue. Do nothing if another operation with higher priority is already
- * scheduled. Needs to be called with subchannel lock held.
- */
-void css_sched_sch_todo(struct subchannel *sch, enum sch_todo todo)
-{
-       CIO_MSG_EVENT(4, "sch_todo: sched sch=0.%x.%04x todo=%d\n",
-                     sch->schid.ssid, sch->schid.sch_no, todo);
-       if (sch->todo >= todo)
-               return;
-       /* Get workqueue ref. */
-       if (!get_device(&sch->dev))
-               return;
-       sch->todo = todo;
-       if (!queue_work(cio_work_q, &sch->todo_work)) {
-               /* Already queued, release workqueue ref. */
-               put_device(&sch->dev);
-       }
-}
-
 static void ssd_from_pmcw(struct chsc_ssd_info *ssd, struct pmcw *pmcw)
 {
        int i;
@@ -466,6 +421,65 @@ static void css_evaluate_subchannel(struct subchannel_id schid, int slow)
                css_schedule_eval(schid);
 }
 
+/**
+ * css_sched_sch_todo - schedule a subchannel operation
+ * @sch: subchannel
+ * @todo: todo
+ *
+ * Schedule the operation identified by @todo to be performed on the slow path
+ * workqueue. Do nothing if another operation with higher priority is already
+ * scheduled. Needs to be called with subchannel lock held.
+ */
+void css_sched_sch_todo(struct subchannel *sch, enum sch_todo todo)
+{
+       CIO_MSG_EVENT(4, "sch_todo: sched sch=0.%x.%04x todo=%d\n",
+                     sch->schid.ssid, sch->schid.sch_no, todo);
+       if (sch->todo >= todo)
+               return;
+       /* Get workqueue ref. */
+       if (!get_device(&sch->dev))
+               return;
+       sch->todo = todo;
+       if (!queue_work(cio_work_q, &sch->todo_work)) {
+               /* Already queued, release workqueue ref. */
+               put_device(&sch->dev);
+       }
+}
+
+static void css_sch_todo(struct work_struct *work)
+{
+       struct subchannel *sch;
+       enum sch_todo todo;
+       int ret;
+
+       sch = container_of(work, struct subchannel, todo_work);
+       /* Find out todo. */
+       spin_lock_irq(sch->lock);
+       todo = sch->todo;
+       CIO_MSG_EVENT(4, "sch_todo: sch=0.%x.%04x, todo=%d\n", sch->schid.ssid,
+                     sch->schid.sch_no, todo);
+       sch->todo = SCH_TODO_NOTHING;
+       spin_unlock_irq(sch->lock);
+       /* Perform todo. */
+       switch (todo) {
+       case SCH_TODO_NOTHING:
+               break;
+       case SCH_TODO_EVAL:
+               ret = css_evaluate_known_subchannel(sch, 1);
+               if (ret == -EAGAIN) {
+                       spin_lock_irq(sch->lock);
+                       css_sched_sch_todo(sch, todo);
+                       spin_unlock_irq(sch->lock);
+               }
+               break;
+       case SCH_TODO_UNREG:
+               css_sch_device_unregister(sch);
+               break;
+       }
+       /* Release workqueue ref. */
+       put_device(&sch->dev);
+}
+
 static struct idset *slow_subchannel_set;
 static spinlock_t slow_subchannel_lock;
 static wait_queue_head_t css_eval_wq;
index d734f4a0ecac23cea1d821b316a563b71087ccc6..47269858ecb662af862c38a5e96fc4f9aacfe2ca 100644 (file)
@@ -1868,9 +1868,9 @@ static void __ccw_device_pm_restore(struct ccw_device *cdev)
         */
        cdev->private->flags.resuming = 1;
        cdev->private->path_new_mask = LPM_ANYPATH;
-       css_schedule_eval(sch->schid);
+       css_sched_sch_todo(sch, SCH_TODO_EVAL);
        spin_unlock_irq(sch->lock);
-       css_complete_work();
+       css_wait_for_slow_path();
 
        /* cdev may have been moved to a different subchannel. */
        sch = to_subchannel(cdev->dev.parent);
index 52c233fa2b1281d14a2881618465606447bef365..1b853513c891ca2f010f8703b110f49c6f65afb3 100644 (file)
@@ -496,8 +496,26 @@ static void ccw_device_reset_path_events(struct ccw_device *cdev)
        cdev->private->pgid_reset_mask = 0;
 }
 
-void
-ccw_device_verify_done(struct ccw_device *cdev, int err)
+static void create_fake_irb(struct irb *irb, int type)
+{
+       memset(irb, 0, sizeof(*irb));
+       if (type == FAKE_CMD_IRB) {
+               struct cmd_scsw *scsw = &irb->scsw.cmd;
+               scsw->cc = 1;
+               scsw->fctl = SCSW_FCTL_START_FUNC;
+               scsw->actl = SCSW_ACTL_START_PEND;
+               scsw->stctl = SCSW_STCTL_STATUS_PEND;
+       } else if (type == FAKE_TM_IRB) {
+               struct tm_scsw *scsw = &irb->scsw.tm;
+               scsw->x = 1;
+               scsw->cc = 1;
+               scsw->fctl = SCSW_FCTL_START_FUNC;
+               scsw->actl = SCSW_ACTL_START_PEND;
+               scsw->stctl = SCSW_STCTL_STATUS_PEND;
+       }
+}
+
+void ccw_device_verify_done(struct ccw_device *cdev, int err)
 {
        struct subchannel *sch;
 
@@ -520,12 +538,8 @@ callback:
                ccw_device_done(cdev, DEV_STATE_ONLINE);
                /* Deliver fake irb to device driver, if needed. */
                if (cdev->private->flags.fake_irb) {
-                       memset(&cdev->private->irb, 0, sizeof(struct irb));
-                       cdev->private->irb.scsw.cmd.cc = 1;
-                       cdev->private->irb.scsw.cmd.fctl = SCSW_FCTL_START_FUNC;
-                       cdev->private->irb.scsw.cmd.actl = SCSW_ACTL_START_PEND;
-                       cdev->private->irb.scsw.cmd.stctl =
-                               SCSW_STCTL_STATUS_PEND;
+                       create_fake_irb(&cdev->private->irb,
+                                       cdev->private->flags.fake_irb);
                        cdev->private->flags.fake_irb = 0;
                        if (cdev->handler)
                                cdev->handler(cdev, cdev->private->intparm,
index f98698d5735e887e0fb6cc46f00a63012ecdccb5..ec7fb6d3b479a25a32bfad67ecc36a3539782b39 100644 (file)
@@ -198,7 +198,7 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
        if (cdev->private->state == DEV_STATE_VERIFY) {
                /* Remember to fake irb when finished. */
                if (!cdev->private->flags.fake_irb) {
-                       cdev->private->flags.fake_irb = 1;
+                       cdev->private->flags.fake_irb = FAKE_CMD_IRB;
                        cdev->private->intparm = intparm;
                        return 0;
                } else
@@ -213,9 +213,9 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
        ret = cio_set_options (sch, flags);
        if (ret)
                return ret;
-       /* Adjust requested path mask to excluded varied off paths. */
+       /* Adjust requested path mask to exclude unusable paths. */
        if (lpm) {
-               lpm &= sch->opm;
+               lpm &= sch->lpm;
                if (lpm == 0)
                        return -EACCES;
        }
@@ -605,11 +605,21 @@ int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
        sch = to_subchannel(cdev->dev.parent);
        if (!sch->schib.pmcw.ena)
                return -EINVAL;
+       if (cdev->private->state == DEV_STATE_VERIFY) {
+               /* Remember to fake irb when finished. */
+               if (!cdev->private->flags.fake_irb) {
+                       cdev->private->flags.fake_irb = FAKE_TM_IRB;
+                       cdev->private->intparm = intparm;
+                       return 0;
+               } else
+                       /* There's already a fake I/O around. */
+                       return -EBUSY;
+       }
        if (cdev->private->state != DEV_STATE_ONLINE)
                return -EIO;
-       /* Adjust requested path mask to excluded varied off paths. */
+       /* Adjust requested path mask to exclude unusable paths. */
        if (lpm) {
-               lpm &= sch->opm;
+               lpm &= sch->lpm;
                if (lpm == 0)
                        return -EACCES;
        }
index 2ebb492a5c17dcb8a9e05fb23209162edb43d849..76253dfcc1be86a18eba7ac7ea4d6f53c134bc7e 100644 (file)
@@ -111,6 +111,9 @@ enum cdev_todo {
        CDEV_TODO_UNREG_EVAL,
 };
 
+#define FAKE_CMD_IRB   1
+#define FAKE_TM_IRB    2
+
 struct ccw_device_private {
        struct ccw_device *cdev;
        struct subchannel *sch;
@@ -138,7 +141,7 @@ struct ccw_device_private {
                unsigned int doverify:1;    /* delayed path verification */
                unsigned int donotify:1;    /* call notify function */
                unsigned int recog_done:1;  /* dev. recog. complete */
-               unsigned int fake_irb:1;    /* deliver faked irb */
+               unsigned int fake_irb:2;    /* deliver faked irb */
                unsigned int resuming:1;    /* recognition while resume */
                unsigned int pgroup:1;      /* pathgroup is set up */
                unsigned int mpath:1;       /* multipathing is set up */
index ec94f049e99543849ed56c90c665102c40c5b87e..96bbe9d12a79fbef17fa9b4df331adf5a24a96c3 100644 (file)
@@ -1552,6 +1552,8 @@ static void ap_reset(struct ap_device *ap_dev)
        rc = ap_init_queue(ap_dev->qid);
        if (rc == -ENODEV)
                ap_dev->unregistered = 1;
+       else
+               __ap_schedule_poll_timer();
 }
 
 static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags)
index 94b1e356c02ab4fa808b5dd43d1ed9f57afbe50f..32574eef93941bab73a9b43138cd8a67511101ec 100644 (file)
@@ -126,6 +126,8 @@ extern struct cred init_cred;
 # define INIT_PERF_EVENTS(tsk)
 #endif
 
+#define INIT_TASK_COMM "swapper"
+
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -162,7 +164,7 @@ extern struct cred init_cred;
        .group_leader   = &tsk,                                         \
        RCU_INIT_POINTER(.real_cred, &init_cred),                       \
        RCU_INIT_POINTER(.cred, &init_cred),                            \
-       .comm           = "swapper",                                    \
+       .comm           = INIT_TASK_COMM,                               \
        .thread         = INIT_THREAD,                                  \
        .fs             = &init_fs,                                     \
        .files          = &init_files,                                  \
index 0e2b179bc7b371ea8aa60f52899ae60ff1cb5ec2..1da999f5e746caedacb8b40d0015e7c3f273ad04 100644 (file)
@@ -623,8 +623,9 @@ static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id)
 
 static int irq_wait_for_interrupt(struct irqaction *action)
 {
+       set_current_state(TASK_INTERRUPTIBLE);
+
        while (!kthread_should_stop()) {
-               set_current_state(TASK_INTERRUPTIBLE);
 
                if (test_and_clear_bit(IRQTF_RUNTHREAD,
                                       &action->thread_flags)) {
@@ -632,7 +633,9 @@ static int irq_wait_for_interrupt(struct irqaction *action)
                        return 0;
                }
                schedule();
+               set_current_state(TASK_INTERRUPTIBLE);
        }
+       __set_current_state(TASK_RUNNING);
        return -1;
 }
 
index 0e9344a71be33f6335bf55fd3a7bfe0f52418162..d6b149ccf925c320841e8a42f31fd23b6ee64dc6 100644 (file)
@@ -71,6 +71,7 @@
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
 #include <linux/slab.h>
+#include <linux/init_task.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
@@ -4810,6 +4811,9 @@ EXPORT_SYMBOL(wait_for_completion);
  * This waits for either a completion of a specific task to be signaled or for a
  * specified timeout to expire. The timeout is in jiffies. It is not
  * interruptible.
+ *
+ * The return value is 0 if timed out, and positive (at least 1, or number of
+ * jiffies left till timeout) if completed.
  */
 unsigned long __sched
 wait_for_completion_timeout(struct completion *x, unsigned long timeout)
@@ -4824,6 +4828,8 @@ EXPORT_SYMBOL(wait_for_completion_timeout);
  *
  * This waits for completion of a specific task to be signaled. It is
  * interruptible.
+ *
+ * The return value is -ERESTARTSYS if interrupted, 0 if completed.
  */
 int __sched wait_for_completion_interruptible(struct completion *x)
 {
@@ -4841,6 +4847,9 @@ EXPORT_SYMBOL(wait_for_completion_interruptible);
  *
  * This waits for either a completion of a specific task to be signaled or for a
  * specified timeout to expire. It is interruptible. The timeout is in jiffies.
+ *
+ * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
+ * positive (at least 1, or number of jiffies left till timeout) if completed.
  */
 long __sched
 wait_for_completion_interruptible_timeout(struct completion *x,
@@ -4856,6 +4865,8 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
  *
  * This waits to be signaled for completion of a specific task. It can be
  * interrupted by a kill signal.
+ *
+ * The return value is -ERESTARTSYS if interrupted, 0 if completed.
  */
 int __sched wait_for_completion_killable(struct completion *x)
 {
@@ -4874,6 +4885,9 @@ EXPORT_SYMBOL(wait_for_completion_killable);
  * This waits for either a completion of a specific task to be
  * signaled or for a specified timeout to expire. It can be
  * interrupted by a kill signal. The timeout is in jiffies.
+ *
+ * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
+ * positive (at least 1, or number of jiffies left till timeout) if completed.
  */
 long __sched
 wait_for_completion_killable_timeout(struct completion *x,
@@ -6099,6 +6113,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
         */
        idle->sched_class = &idle_sched_class;
        ftrace_graph_init_idle_task(idle, cpu);
+#if defined(CONFIG_SMP)
+       sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
+#endif
 }
 
 /*
index 5c9e67923b7cfd7826903c17322c3f0c55de5d74..a78ed2736ba79f02a201d8256bd9e0a56d57981e 100644 (file)
@@ -772,19 +772,32 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
                list_del_leaf_cfs_rq(cfs_rq);
 }
 
+static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq)
+{
+       long tg_weight;
+
+       /*
+        * Use this CPU's actual weight instead of the last load_contribution
+        * to gain a more accurate current total weight. See
+        * update_cfs_rq_load_contribution().
+        */
+       tg_weight = atomic_read(&tg->load_weight);
+       tg_weight -= cfs_rq->load_contribution;
+       tg_weight += cfs_rq->load.weight;
+
+       return tg_weight;
+}
+
 static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
 {
-       long load_weight, load, shares;
+       long tg_weight, load, shares;
 
+       tg_weight = calc_tg_weight(tg, cfs_rq);
        load = cfs_rq->load.weight;
 
-       load_weight = atomic_read(&tg->load_weight);
-       load_weight += load;
-       load_weight -= cfs_rq->load_contribution;
-
        shares = (tg->shares * load);
-       if (load_weight)
-               shares /= load_weight;
+       if (tg_weight)
+               shares /= tg_weight;
 
        if (shares < MIN_SHARES)
                shares = MIN_SHARES;
@@ -1743,7 +1756,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 
 static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 {
-       if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running)
+       if (!cfs_rq->runtime_enabled || cfs_rq->nr_running)
                return;
 
        __return_cfs_rq_runtime(cfs_rq);
@@ -2036,36 +2049,100 @@ static void task_waking_fair(struct task_struct *p)
  * Adding load to a group doesn't make a group heavier, but can cause movement
  * of group shares between cpus. Assuming the shares were perfectly aligned one
  * can calculate the shift in shares.
+ *
+ * Calculate the effective load difference if @wl is added (subtracted) to @tg
+ * on this @cpu and results in a total addition (subtraction) of @wg to the
+ * total group weight.
+ *
+ * Given a runqueue weight distribution (rw_i) we can compute a shares
+ * distribution (s_i) using:
+ *
+ *   s_i = rw_i / \Sum rw_j                                            (1)
+ *
+ * Suppose we have 4 CPUs and our @tg is a direct child of the root group and
+ * has 7 equal weight tasks, distributed as below (rw_i), with the resulting
+ * shares distribution (s_i):
+ *
+ *   rw_i = {   2,   4,   1,   0 }
+ *   s_i  = { 2/7, 4/7, 1/7,   0 }
+ *
+ * As per wake_affine() we're interested in the load of two CPUs (the CPU the
+ * task used to run on and the CPU the waker is running on), we need to
+ * compute the effect of waking a task on either CPU and, in case of a sync
+ * wakeup, compute the effect of the current task going to sleep.
+ *
+ * So for a change of @wl to the local @cpu with an overall group weight change
+ * of @wl we can compute the new shares distribution (s'_i) using:
+ *
+ *   s'_i = (rw_i + @wl) / (@wg + \Sum rw_j)                           (2)
+ *
+ * Suppose we're interested in CPUs 0 and 1, and want to compute the load
+ * differences in waking a task to CPU 0. The additional task changes the
+ * weight and shares distributions like:
+ *
+ *   rw'_i = {   3,   4,   1,   0 }
+ *   s'_i  = { 3/8, 4/8, 1/8,   0 }
+ *
+ * We can then compute the difference in effective weight by using:
+ *
+ *   dw_i = S * (s'_i - s_i)                                           (3)
+ *
+ * Where 'S' is the group weight as seen by its parent.
+ *
+ * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7)
+ * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 -
+ * 4/7) times the weight of the group.
  */
 static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
 {
        struct sched_entity *se = tg->se[cpu];
 
-       if (!tg->parent)
+       if (!tg->parent)        /* the trivial, non-cgroup case */
                return wl;
 
        for_each_sched_entity(se) {
-               long lw, w;
+               long w, W;
 
                tg = se->my_q->tg;
-               w = se->my_q->load.weight;
 
-               /* use this cpu's instantaneous contribution */
-               lw = atomic_read(&tg->load_weight);
-               lw -= se->my_q->load_contribution;
-               lw += w + wg;
+               /*
+                * W = @wg + \Sum rw_j
+                */
+               W = wg + calc_tg_weight(tg, se->my_q);
 
-               wl += w;
+               /*
+                * w = rw_i + @wl
+                */
+               w = se->my_q->load.weight + wl;
 
-               if (lw > 0 && wl < lw)
-                       wl = (wl * tg->shares) / lw;
+               /*
+                * wl = S * s'_i; see (2)
+                */
+               if (W > 0 && w < W)
+                       wl = (w * tg->shares) / W;
                else
                        wl = tg->shares;
 
-               /* zero point is MIN_SHARES */
+               /*
+                * Per the above, wl is the new se->load.weight value; since
+                * those are clipped to [MIN_SHARES, ...) do so now. See
+                * calc_cfs_shares().
+                */
                if (wl < MIN_SHARES)
                        wl = MIN_SHARES;
+
+               /*
+                * wl = dw_i = S * (s'_i - s_i); see (3)
+                */
                wl -= se->load.weight;
+
+               /*
+                * Recursively apply this logic to all parent groups to compute
+                * the final effective load change on the root group. Since
+                * only the @tg group gets extra weight, all parent groups can
+                * only redistribute existing shares. @wl is the shift in shares
+                * resulting from this level per the above.
+                */
                wg = 0;
        }
 
@@ -2249,7 +2326,8 @@ static int select_idle_sibling(struct task_struct *p, int target)
        int cpu = smp_processor_id();
        int prev_cpu = task_cpu(p);
        struct sched_domain *sd;
-       int i;
+       struct sched_group *sg;
+       int i, smt = 0;
 
        /*
         * If the task is going to be woken-up on this cpu and if it is
@@ -2269,25 +2347,38 @@ static int select_idle_sibling(struct task_struct *p, int target)
         * Otherwise, iterate the domains and find an elegible idle cpu.
         */
        rcu_read_lock();
+again:
        for_each_domain(target, sd) {
-               if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
-                       break;
+               if (!smt && (sd->flags & SD_SHARE_CPUPOWER))
+                       continue;
 
-               for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) {
-                       if (idle_cpu(i)) {
-                               target = i;
-                               break;
+               if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) {
+                       if (!smt) {
+                               smt = 1;
+                               goto again;
                        }
+                       break;
                }
 
-               /*
-                * Lets stop looking for an idle sibling when we reached
-                * the domain that spans the current cpu and prev_cpu.
-                */
-               if (cpumask_test_cpu(cpu, sched_domain_span(sd)) &&
-                   cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
-                       break;
+               sg = sd->groups;
+               do {
+                       if (!cpumask_intersects(sched_group_cpus(sg),
+                                               tsk_cpus_allowed(p)))
+                               goto next;
+
+                       for_each_cpu(i, sched_group_cpus(sg)) {
+                               if (!idle_cpu(i))
+                                       goto next;
+                       }
+
+                       target = cpumask_first_and(sched_group_cpus(sg),
+                                       tsk_cpus_allowed(p));
+                       goto done;
+next:
+                       sg = sg->next;
+               } while (sg != sd->groups);
        }
+done:
        rcu_read_unlock();
 
        return target;
@@ -3511,7 +3602,7 @@ static bool update_sd_pick_busiest(struct sched_domain *sd,
 }
 
 /**
- * update_sd_lb_stats - Update sched_group's statistics for load balancing.
+ * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
  * @sd: sched_domain whose statistics are to be updated.
  * @this_cpu: Cpu for which load balance is currently performed.
  * @idle: Idle status of this_cpu
index efa0a7b75dde7408e89bd07e5b1a490c4f68ea95..84802245abd2562acad3c0eb734fe48cc5213b8e 100644 (file)
@@ -67,3 +67,4 @@ SCHED_FEAT(NONTASK_POWER, 1)
 SCHED_FEAT(TTWU_QUEUE, 1)
 
 SCHED_FEAT(FORCE_SD_OVERLAP, 0)
+SCHED_FEAT(RT_RUNTIME_SHARE, 1)
index 056cbd2e2a27fea8cb15e76bfc711fe32de03303..583a1368afe6ed7d96879d762f73553b2068e27c 100644 (file)
@@ -560,6 +560,9 @@ static int balance_runtime(struct rt_rq *rt_rq)
 {
        int more = 0;
 
+       if (!sched_feat(RT_RUNTIME_SHARE))
+               return more;
+
        if (rt_rq->rt_time > rt_rq->rt_runtime) {
                raw_spin_unlock(&rt_rq->rt_runtime_lock);
                more = do_balance_runtime(rt_rq);