
From: Zwane Mwaikambo <zwane@linuxpower.ca>

Patch adds support for notification of overheating conditions on intel
x86_64 processors.  Tested on EM64T, test booted on AMD64.

Hardware courtesy of Intel Corporation

Signed-off-by: Zwane Mwaikambo <zwane@linuxpower.ca>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/arch/x86_64/Kconfig            |    7 ++
 25-akpm/arch/x86_64/kernel/Makefile    |    1 
 25-akpm/arch/x86_64/kernel/entry.S     |    3 +
 25-akpm/arch/x86_64/kernel/i8259.c     |    2 
 25-akpm/arch/x86_64/kernel/mce.c       |   14 ++++
 25-akpm/arch/x86_64/kernel/mce_intel.c |   99 +++++++++++++++++++++++++++++++++
 25-akpm/arch/x86_64/kernel/traps.c     |    4 +
 25-akpm/include/asm-x86_64/mce.h       |   13 ++++
 8 files changed, 142 insertions(+), 1 deletion(-)

diff -puN arch/x86_64/Kconfig~intel-thermal-monitor-for-x86_64 arch/x86_64/Kconfig
--- 25/arch/x86_64/Kconfig~intel-thermal-monitor-for-x86_64	Fri Nov 19 15:17:32 2004
+++ 25-akpm/arch/x86_64/Kconfig	Fri Nov 19 15:17:32 2004
@@ -341,6 +341,13 @@ config X86_MCE
 	   machine check error logs. See
 	   ftp://ftp.x86-64.org/pub/linux/tools/mcelog
 
+config X86_MCE_INTEL
+	bool "Intel MCE features"
+	depends on X86_MCE && X86_LOCAL_APIC
+	default y
+	help
+	   Additional support for intel specific MCE features such as
+	   the thermal monitor.
 endmenu
 
 #
diff -puN arch/x86_64/kernel/entry.S~intel-thermal-monitor-for-x86_64 arch/x86_64/kernel/entry.S
--- 25/arch/x86_64/kernel/entry.S~intel-thermal-monitor-for-x86_64	Fri Nov 19 15:17:32 2004
+++ 25-akpm/arch/x86_64/kernel/entry.S	Fri Nov 19 15:17:32 2004
@@ -538,6 +538,9 @@ retint_kernel:	
 	CFI_ENDPROC
 	.endm
 
+ENTRY(thermal_interrupt)
+	apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
+
 #ifdef CONFIG_SMP	
 ENTRY(reschedule_interrupt)
 	apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
diff -puN arch/x86_64/kernel/i8259.c~intel-thermal-monitor-for-x86_64 arch/x86_64/kernel/i8259.c
--- 25/arch/x86_64/kernel/i8259.c~intel-thermal-monitor-for-x86_64	Fri Nov 19 15:17:32 2004
+++ 25-akpm/arch/x86_64/kernel/i8259.c	Fri Nov 19 15:17:32 2004
@@ -476,6 +476,7 @@ void error_interrupt(void);
 void reschedule_interrupt(void);
 void call_function_interrupt(void);
 void invalidate_interrupt(void);
+void thermal_interrupt(void);
 
 static void setup_timer(void)
 {
@@ -550,6 +551,7 @@ void __init init_IRQ(void)
 	/* IPI for generic function call */
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 #endif	
+	set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	/* self generated IPI for local APIC timer */
diff -puN arch/x86_64/kernel/Makefile~intel-thermal-monitor-for-x86_64 arch/x86_64/kernel/Makefile
--- 25/arch/x86_64/kernel/Makefile~intel-thermal-monitor-for-x86_64	Fri Nov 19 15:17:32 2004
+++ 25-akpm/arch/x86_64/kernel/Makefile	Fri Nov 19 15:17:32 2004
@@ -10,6 +10,7 @@ obj-y	:= process.o semaphore.o signal.o 
 		setup64.o bootflag.o e820.o reboot.o warmreboot.o quirks.o
 
 obj-$(CONFIG_X86_MCE)         += mce.o
+obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel.o
 obj-$(CONFIG_MTRR)		+= ../../i386/kernel/cpu/mtrr/
 obj-$(CONFIG_ACPI_BOOT)		+= acpi/
 obj-$(CONFIG_X86_MSR)		+= msr.o
diff -puN arch/x86_64/kernel/mce.c~intel-thermal-monitor-for-x86_64 arch/x86_64/kernel/mce.c
--- 25/arch/x86_64/kernel/mce.c~intel-thermal-monitor-for-x86_64	Fri Nov 19 15:17:32 2004
+++ 25-akpm/arch/x86_64/kernel/mce.c	Fri Nov 19 15:17:32 2004
@@ -43,7 +43,7 @@ struct mce_log mcelog = { 
 	MCE_LOG_LEN,
 }; 
 
-static void mce_log(struct mce *mce)
+void mce_log(struct mce *mce)
 {
 	unsigned next, entry;
 	mce->finished = 0;
@@ -305,6 +305,17 @@ static void __init mce_cpu_quirks(struct
 	}
 }			
 
+static void __init mce_cpu_features(struct cpuinfo_x86 *c)
+{
+	switch (c->x86_vendor) {
+	case X86_VENDOR_INTEL:
+		mce_intel_feature_init(c);
+		break;
+	default:
+		break;
+	}
+}
+
 /* 
  * Called for each booted CPU to set up machine checks.
  * Must be called with preempt off. 
@@ -321,6 +332,7 @@ void __init mcheck_init(struct cpuinfo_x
 		return;
 
 	mce_init(NULL);
+	mce_cpu_features(c);
 }
 
 /*
diff -puN /dev/null arch/x86_64/kernel/mce_intel.c
--- /dev/null	Thu Apr 11 07:25:15 2002
+++ 25-akpm/arch/x86_64/kernel/mce_intel.c	Fri Nov 19 15:17:32 2004
@@ -0,0 +1,99 @@
+/*
+ * Intel specific MCE features.
+ * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/mce.h>
+#include <asm/hw_irq.h>
+
+static DEFINE_PER_CPU(unsigned long, next_check);
+
+asmlinkage void smp_thermal_interrupt(void)
+{
+	struct mce m;
+
+	ack_APIC_irq();
+
+	irq_enter();
+	if (time_before(jiffies, __get_cpu_var(next_check)))
+		goto done;
+
+	__get_cpu_var(next_check) = jiffies + HZ*300;
+	memset(&m, 0, sizeof(m));
+	m.cpu = smp_processor_id();
+	m.bank = MCE_THERMAL_BANK;
+	rdtscll(m.tsc);
+	rdmsrl(MSR_IA32_THERM_STATUS, m.status);
+	if (m.status & 0x1) {
+		printk(KERN_EMERG
+			"CPU%d: Temperature above threshold, cpu clock throttled\n", m.cpu);
+		add_taint(TAINT_MACHINE_CHECK);
+	} else {
+		printk(KERN_EMERG "CPU%d: Temperature/speed normal\n", m.cpu);
+	}
+
+	mce_log(&m);
+done:
+	irq_exit();
+}
+
+static void __init intel_init_thermal(struct cpuinfo_x86 *c)
+{
+	u32 l, h;
+	int tm2 = 0;
+	unsigned int cpu = smp_processor_id();
+
+	if (!cpu_has(c, X86_FEATURE_ACPI))
+		return;
+
+	if (!cpu_has(c, X86_FEATURE_ACC))
+		return;
+
+	/* first check if TM1 is already enabled by the BIOS, in which
+	 * case there might be some SMM goo which handles it, so we can't even
+	 * put a handler since it might be delivered via SMI already.
+	 */
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+	h = apic_read(APIC_LVTTHMR);
+	if ((l & (1 << 3)) && (h & APIC_DM_SMI)) {
+		printk(KERN_DEBUG
+		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+		return;
+	}
+
+	if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
+		tm2 = 1;
+
+	if (h & APIC_VECTOR_MASK) {
+		printk(KERN_DEBUG
+		       "CPU%d: Thermal LVT vector (%#x) already "
+		       "installed\n", cpu, (h & APIC_VECTOR_MASK));
+		return;
+	}
+
+	h = THERMAL_APIC_VECTOR;
+	h |= (APIC_DM_FIXED | APIC_LVT_MASKED);
+	apic_write_around(APIC_LVTTHMR, h);
+
+	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
+	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
+
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+	wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h);
+
+	l = apic_read(APIC_LVTTHMR);
+	apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+	printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
+		cpu, tm2 ? "TM2" : "TM1");
+	return;
+}
+
+void __init mce_intel_feature_init(struct cpuinfo_x86 *c)
+{
+	intel_init_thermal(c);
+}
diff -puN arch/x86_64/kernel/traps.c~intel-thermal-monitor-for-x86_64 arch/x86_64/kernel/traps.c
--- 25/arch/x86_64/kernel/traps.c~intel-thermal-monitor-for-x86_64	Fri Nov 19 15:17:32 2004
+++ 25-akpm/arch/x86_64/kernel/traps.c	Fri Nov 19 15:17:32 2004
@@ -882,6 +882,10 @@ asmlinkage void do_spurious_interrupt_bu
 {
 }
 
+asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
+{
+}
+
 /*
  *  'math_state_restore()' saves the current math information in the
  * old math state array, and gets the new ones from the current task
diff -puN include/asm-x86_64/mce.h~intel-thermal-monitor-for-x86_64 include/asm-x86_64/mce.h
--- 25/include/asm-x86_64/mce.h~intel-thermal-monitor-for-x86_64	Fri Nov 19 15:17:32 2004
+++ 25-akpm/include/asm-x86_64/mce.h	Fri Nov 19 15:17:32 2004
@@ -64,4 +64,17 @@ struct mce_log { 
 #define MCE_GET_LOG_LEN      _IOR('M', 2, int)
 #define MCE_GETCLEAR_FLAGS   _IOR('M', 3, int)
 
+/* Software defined banks */
+#define MCE_EXTENDED_BANK	128
+#define MCE_THERMAL_BANK	MCE_EXTENDED_BANK + 0
+
+void mce_log(struct mce *m);
+#ifdef CONFIG_X86_MCE_INTEL
+void mce_intel_feature_init(struct cpuinfo_x86 *c);
+#else
+static inline void mce_intel_feature_init(struct cpuinfo_x86 *c)
+{
+}
+#endif
+
 #endif
_
