sched/tsc: replace my fix with two mainline commits

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
diff --git a/patches/sched-clock-Initialize-all-per-CPU-state-before-swit.patch b/patches/sched-clock-Initialize-all-per-CPU-state-before-swit.patch
new file mode 100644
index 0000000..95d17ed
--- /dev/null
+++ b/patches/sched-clock-Initialize-all-per-CPU-state-before-swit.patch
@@ -0,0 +1,122 @@
+From cf15ca8deda86b27b66e27848b4b0fe58098fc0b Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 21 Apr 2017 12:11:53 +0200
+Subject: [PATCH] sched/clock: Initialize all per-CPU state before switching
+ (back) to unstable
+
+commit cf15ca8deda86b27b66e27848b4b0fe58098fc0b upstream.
+
+In preparation for not keeping the sched_clock_tick() active for
+stable TSC, we need to explicitly initialize all per-CPU state
+before switching back to unstable.
+
+Note: this patch looses the __gtod_offset calculation; it will be
+restored in the next one.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
+index 00a45c45beca..dc650851935f 100644
+--- a/kernel/sched/clock.c
++++ b/kernel/sched/clock.c
+@@ -124,6 +124,12 @@ int sched_clock_stable(void)
+ 	return static_branch_likely(&__sched_clock_stable);
+ }
+ 
++static void __scd_stamp(struct sched_clock_data *scd)
++{
++	scd->tick_gtod = ktime_get_ns();
++	scd->tick_raw = sched_clock();
++}
++
+ static void __set_sched_clock_stable(void)
+ {
+ 	struct sched_clock_data *scd = this_scd();
+@@ -141,8 +147,37 @@ static void __set_sched_clock_stable(void)
+ 	tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
+ }
+ 
++/*
++ * If we ever get here, we're screwed, because we found out -- typically after
++ * the fact -- that TSC wasn't good. This means all our clocksources (including
++ * ktime) could have reported wrong values.
++ *
++ * What we do here is an attempt to fix up and continue sort of where we left
++ * off in a coherent manner.
++ *
++ * The only way to fully avoid random clock jumps is to boot with:
++ * "tsc=unstable".
++ */
+ static void __sched_clock_work(struct work_struct *work)
+ {
++	struct sched_clock_data *scd;
++	int cpu;
++
++	/* take a current timestamp and set 'now' */
++	preempt_disable();
++	scd = this_scd();
++	__scd_stamp(scd);
++	scd->clock = scd->tick_gtod + __gtod_offset;
++	preempt_enable();
++
++	/* clone to all CPUs */
++	for_each_possible_cpu(cpu)
++		per_cpu(sched_clock_data, cpu) = *scd;
++
++	printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
++			scd->tick_gtod, __gtod_offset,
++			scd->tick_raw,  __sched_clock_offset);
++
+ 	static_branch_disable(&__sched_clock_stable);
+ }
+ 
+@@ -150,27 +185,11 @@ static DECLARE_WORK(sched_clock_work, __sched_clock_work);
+ 
+ static void __clear_sched_clock_stable(void)
+ {
+-	struct sched_clock_data *scd = this_scd();
+-
+-	/*
+-	 * Attempt to make the stable->unstable transition continuous.
+-	 *
+-	 * Trouble is, this is typically called from the TSC watchdog
+-	 * timer, which is late per definition. This means the tick
+-	 * values can already be screwy.
+-	 *
+-	 * Still do what we can.
+-	 */
+-	__gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod);
+-
+-	printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
+-			scd->tick_gtod, __gtod_offset,
+-			scd->tick_raw,  __sched_clock_offset);
++	if (!sched_clock_stable())
++		return;
+ 
+ 	tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
+-
+-	if (sched_clock_stable())
+-		schedule_work(&sched_clock_work);
++	schedule_work(&sched_clock_work);
+ }
+ 
+ void clear_sched_clock_stable(void)
+@@ -357,8 +376,7 @@ void sched_clock_tick(void)
+ 	 * XXX arguably we can skip this if we expose tsc_clocksource_reliable
+ 	 */
+ 	scd = this_scd();
+-	scd->tick_raw  = sched_clock();
+-	scd->tick_gtod = ktime_get_ns();
++	__scd_stamp(scd);
+ 
+ 	if (!sched_clock_stable() && likely(sched_clock_running))
+ 		sched_clock_local(scd);
+-- 
+2.1.4
+
diff --git a/patches/sched-clock-fix-early-boot-splat-on-clock-transition.patch b/patches/sched-clock-fix-early-boot-splat-on-clock-transition.patch
deleted file mode 100644
index 5d27cb1..0000000
--- a/patches/sched-clock-fix-early-boot-splat-on-clock-transition.patch
+++ /dev/null
@@ -1,85 +0,0 @@
-From 9383285e87322b696eb48309a5ef29d421b84bad Mon Sep 17 00:00:00 2001
-From: Paul Gortmaker <paul.gortmaker@windriver.com>
-Date: Mon, 2 Oct 2017 21:59:48 -0400
-Subject: [PATCH rt-v4.11] sched/clock: fix early boot splat on clock transition to
- unstable
-
-On an older machine with a Pentium(R) Dual-Core E5300 I see the
-the following early (see time stamps) boot splat on clock transition
-due to TSC unstable (indicated in the last line):
-
-  [  2.487904] BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1
-  [  2.487909] caller is debug_smp_processor_id+0x17/0x20
-  [  2.487911] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.11.12-rt14-00451
-  [  2.487911] Hardware name: Dell Inc. OptiPlex 760                 /0M858N, BIOS A16 08/06/2013
-  [  2.487912] Call Trace:
-  [  2.487918]  dump_stack+0x4f/0x6a
-  [  2.487919]  check_preemption_disabled+0xda/0xe0
-  [  2.487921]  debug_smp_processor_id+0x17/0x20
-  [  2.487924]  clear_sched_clock_stable+0x28/0x80
-  [  2.487927]  mark_tsc_unstable+0x22/0x70
-  [  2.487930]  acpi_processor_get_power_info+0x3e3/0x6a0
-  [  2.487932]  acpi_processor_power_init+0x3a/0x1d0
-  [  2.487933]  __acpi_processor_start+0x162/0x1b0
-               ....
-  [  2.487950]  acpi_processor_driver_init+0x20/0x96
-  [  2.487951]  do_one_initcall+0x3f/0x170
-  [  2.487954]  kernel_init_freeable+0x18e/0x216
-  [  2.487955]  ? rest_init+0xd0/0xd0
-  [  2.487956]  kernel_init+0x9/0x100
-  [  2.487958]  ret_from_fork+0x22/0x30
-  [  2.487960] sched_clock: Marking unstable (2488005383, -223143)<-(2590866395, -103084155)
-  [  2.488004] tsc: Marking TSC unstable due to TSC halts in idle
-
-(gdb) list *clear_sched_clock_stable+0x28
-0xffffffff8108bbb8 is in clear_sched_clock_stable (kernel/sched/clock.c:114).
-
-[...]
-
-112     static inline struct sched_clock_data *this_scd(void)
-113     {
-114             return this_cpu_ptr(&sched_clock_data);
-115     }
-
-We now get this_scd with preemption disabled.  I also decided to pass
-in the scd to __clear_sched_clock_stable in the hope it made it more
-clear that the caller (currently only one) needs to get this_scd with
-preemption disabled, even though that wasn't strictly required.
-
-Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
-
-diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
-index 11ad4bd995e2..32dcda23c616 100644
---- a/kernel/sched/clock.c
-+++ b/kernel/sched/clock.c
-@@ -155,9 +155,8 @@ static void __sched_clock_work(struct work_struct *work)
- 
- static DECLARE_WORK(sched_clock_work, __sched_clock_work);
- 
--static void __clear_sched_clock_stable(void)
-+static void __clear_sched_clock_stable(struct sched_clock_data *scd)
- {
--	struct sched_clock_data *scd = this_scd();
- 
- 	/*
- 	 * Attempt to make the stable->unstable transition continuous.
-@@ -186,8 +185,14 @@ void clear_sched_clock_stable(void)
- 
- 	smp_mb(); /* matches sched_clock_init_late() */
- 
--	if (sched_clock_running == 2)
--		__clear_sched_clock_stable();
-+	if (sched_clock_running == 2) {
-+		struct sched_clock_data *scd;
-+
-+		preempt_disable();
-+		scd = this_scd();
-+		preempt_enable();
-+		__clear_sched_clock_stable(scd);
-+	}
- }
- 
- void sched_clock_init_late(void)
--- 
-2.1.4
-
diff --git a/patches/series b/patches/series
index 01d15e9..3d67fcd 100644
--- a/patches/series
+++ b/patches/series
@@ -19,6 +19,8 @@
 smp-hotplug-Move-unparking-of-percpu-threads-to-the-.patch
 
 # a few patches from tip's sched/core
+sched-clock-Initialize-all-per-CPU-state-before-swit.patch
+x86-tsc-sched-clock-clocksource-Use-clocksource-watc.patch
 0001-sched-clock-Fix-early-boot-preempt-assumption-in-__s.patch
 0001-init-Pin-init-task-to-the-boot-CPU-initially.patch
 0002-arm-Adjust-system_state-check.patch
@@ -229,7 +231,6 @@
 signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch
 
 # SCHED
-sched-clock-fix-early-boot-splat-on-clock-transition.patch
 
 # GENERIC CMPXCHG
 
diff --git a/patches/x86-tsc-sched-clock-clocksource-Use-clocksource-watc.patch b/patches/x86-tsc-sched-clock-clocksource-Use-clocksource-watc.patch
new file mode 100644
index 0000000..6db62ad
--- /dev/null
+++ b/patches/x86-tsc-sched-clock-clocksource-Use-clocksource-watc.patch
@@ -0,0 +1,155 @@
+From b421b22b00b0011f6a2ce3561176c4e79e640c49 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 21 Apr 2017 12:14:13 +0200
+Subject: [PATCH] x86/tsc, sched/clock, clocksource: Use clocksource watchdog
+ to provide stable sync points
+
+commit b421b22b00b0011f6a2ce3561176c4e79e640c49 upstream.
+
+Currently we keep sched_clock_tick() active for stable TSC in order to
+keep the per-CPU state semi up-to-date. The (obvious) problem is that
+by the time we detect TSC is borked, our per-CPU state is also borked.
+
+So hook into the clocksource watchdog and call a method after we've
+found it to still be stable.
+
+There's the obvious race where the TSC goes wonky between finding it
+stable and us running the callback, but closing that is too much work
+and not really worth it, since we're already detecting TSC wobbles
+after the fact, so we cannot, per definition, fully avoid funny clock
+values.
+
+And since the watchdog runs less often than the tick, this is also an
+optimization.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
+index 66015195bd18..c1b16b328abe 100644
+--- a/arch/x86/kernel/tsc.c
++++ b/arch/x86/kernel/tsc.c
+@@ -1033,6 +1033,15 @@ static void tsc_cs_mark_unstable(struct clocksource *cs)
+ 	pr_info("Marking TSC unstable due to clocksource watchdog\n");
+ }
+ 
++static void tsc_cs_tick_stable(struct clocksource *cs)
++{
++	if (tsc_unstable)
++		return;
++
++	if (using_native_sched_clock())
++		sched_clock_tick_stable();
++}
++
+ /*
+  * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
+  */
+@@ -1046,6 +1055,7 @@ static struct clocksource clocksource_tsc = {
+ 	.archdata               = { .vclock_mode = VCLOCK_TSC },
+ 	.resume			= tsc_resume,
+ 	.mark_unstable		= tsc_cs_mark_unstable,
++	.tick_stable		= tsc_cs_tick_stable,
+ };
+ 
+ void mark_tsc_unstable(char *reason)
+diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
+index f2b10d9ebd04..81490456c242 100644
+--- a/include/linux/clocksource.h
++++ b/include/linux/clocksource.h
+@@ -96,6 +96,7 @@ struct clocksource {
+ 	void (*suspend)(struct clocksource *cs);
+ 	void (*resume)(struct clocksource *cs);
+ 	void (*mark_unstable)(struct clocksource *cs);
++	void (*tick_stable)(struct clocksource *cs);
+ 
+ 	/* private: */
+ #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
+diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
+index 34fe92ce1ebd..978cbb0af5f3 100644
+--- a/include/linux/sched/clock.h
++++ b/include/linux/sched/clock.h
+@@ -63,8 +63,8 @@ extern void clear_sched_clock_stable(void);
+  */
+ extern u64 __sched_clock_offset;
+ 
+-
+ extern void sched_clock_tick(void);
++extern void sched_clock_tick_stable(void);
+ extern void sched_clock_idle_sleep_event(void);
+ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+ 
+diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
+index dc650851935f..f861637f7fdc 100644
+--- a/kernel/sched/clock.c
++++ b/kernel/sched/clock.c
+@@ -366,20 +366,38 @@ void sched_clock_tick(void)
+ {
+ 	struct sched_clock_data *scd;
+ 
++	if (sched_clock_stable())
++		return;
++
++	if (unlikely(!sched_clock_running))
++		return;
++
+ 	WARN_ON_ONCE(!irqs_disabled());
+ 
+-	/*
+-	 * Update these values even if sched_clock_stable(), because it can
+-	 * become unstable at any point in time at which point we need some
+-	 * values to fall back on.
+-	 *
+-	 * XXX arguably we can skip this if we expose tsc_clocksource_reliable
+-	 */
+ 	scd = this_scd();
+ 	__scd_stamp(scd);
++	sched_clock_local(scd);
++}
++
++void sched_clock_tick_stable(void)
++{
++	u64 gtod, clock;
+ 
+-	if (!sched_clock_stable() && likely(sched_clock_running))
+-		sched_clock_local(scd);
++	if (!sched_clock_stable())
++		return;
++
++	/*
++	 * Called under watchdog_lock.
++	 *
++	 * The watchdog just found this TSC to (still) be stable, so now is a
++	 * good moment to update our __gtod_offset. Because once we find the
++	 * TSC to be unstable, any computation will be computing crap.
++	 */
++	local_irq_disable();
++	gtod = ktime_get_ns();
++	clock = sched_clock();
++	__gtod_offset = (clock + __sched_clock_offset) - gtod;
++	local_irq_enable();
+ }
+ 
+ /*
+diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
+index 93621ae718d3..03918a19cf2d 100644
+--- a/kernel/time/clocksource.c
++++ b/kernel/time/clocksource.c
+@@ -233,6 +233,9 @@ static void clocksource_watchdog(unsigned long data)
+ 			continue;
+ 		}
+ 
++		if (cs == curr_clocksource && cs->tick_stable)
++			cs->tick_stable(cs);
++
+ 		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
+ 		    (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
+ 		    (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
+-- 
+2.1.4
+