
From: Geoff Gustafson <geoff@linux.jf.intel.com>,
      "Chen, Kenneth W" <kenneth.w.chen@intel.com>,
      Ingo Molnar <mingo@elte.hu>,
      me.

The big-SMP guys are seeing high CPU load due to del_timer_sync()'s
inefficiencies.  The callers are fs/aio.c and schedule_timeout().

We note that neither of these callers' timer handlers actually re-add the
timer - they are single-shot.

So we don't need all that complexity in del_timer_sync() - we can just run
del_timer() and if that worked we know the timer is dead.

Add del_single_shot_timer(), export it to modules and use it in AIO and
schedule_timeout().


(these numbers are for an earlier patch, but they'll be close)

Before:             32p     4p
     Warm cache   29,000    505
     Cold cache   37,800   1220

After:              32p     4p
     Warm cache       95     88
     Cold cache    1,800    140

[Measurements are CPU cycles spent in a call to del_timer_sync, the average
of 1000 calls. 32p is 16-node NUMA, 4p is SMP.]



---

 25-akpm/fs/aio.c              |    2 +-
 25-akpm/include/linux/timer.h |    2 ++
 25-akpm/kernel/timer.c        |   31 ++++++++++++++++++++++++++++++-
 3 files changed, 33 insertions(+), 2 deletions(-)

diff -puN fs/aio.c~del_singleshot_timer_sync fs/aio.c
--- 25/fs/aio.c~del_singleshot_timer_sync	Tue May 11 16:38:56 2004
+++ 25-akpm/fs/aio.c	Tue May 11 16:38:56 2004
@@ -788,7 +788,7 @@ static inline void set_timeout(long star
 
 static inline void clear_timeout(struct timeout *to)
 {
-	del_timer_sync(&to->timer);
+	del_singleshot_timer_sync(&to->timer);
 }
 
 static int read_events(struct kioctx *ctx,
diff -puN include/linux/timer.h~del_singleshot_timer_sync include/linux/timer.h
--- 25/include/linux/timer.h~del_singleshot_timer_sync	Tue May 11 16:38:56 2004
+++ 25-akpm/include/linux/timer.h	Tue May 11 16:52:55 2004
@@ -88,8 +88,10 @@ static inline void add_timer(struct time
 
 #ifdef CONFIG_SMP
   extern int del_timer_sync(struct timer_list * timer);
+  extern int del_singleshot_timer_sync(struct timer_list * timer);
 #else
 # define del_timer_sync(t) del_timer(t)
+# define del_singleshot_timer_sync(t) del_timer(t)
 #endif
 
 extern void init_timers(void);
diff -puN kernel/timer.c~del_singleshot_timer_sync kernel/timer.c
--- 25/kernel/timer.c~del_singleshot_timer_sync	Tue May 11 16:38:56 2004
+++ 25-akpm/kernel/timer.c	Tue May 11 16:52:55 2004
@@ -350,6 +350,35 @@ del_again:
 }
 
 EXPORT_SYMBOL(del_timer_sync);
+
+/***
+ * del_singleshot_timer_sync - deactivate a non-recursive timer
+ * @timer: the timer to be deactivated
+ *
+ * This function is an optimization of del_timer_sync for the case where the
+ * caller can guarantee the timer does not reschedule itself in its timer
+ * function.
+ *
+ * Synchronization rules: callers must prevent restarting of the timer,
+ * otherwise this function is meaningless. It must not be called from
+ * interrupt contexts. Upon exit the timer is not queued and the handler
+ * is not running on any CPU.
+ *
+ * The function returns whether it has deactivated a pending timer or not.
+ */
+int del_singleshot_timer_sync(struct timer_list *timer)
+{
+	int ret = del_timer(timer);
+
+	if (!ret) {
+		ret = del_timer_sync(timer);
+		BUG_ON(ret);
+	}
+
+	return ret;
+}
+
+EXPORT_SYMBOL(del_singleshot_timer_sync);
 #endif
 
 static int cascade(tvec_base_t *base, tvec_t *tv, int index)
@@ -1109,7 +1138,7 @@ fastcall signed long __sched schedule_ti
 
 	add_timer(&timer);
 	schedule();
-	del_timer_sync(&timer);
+	del_singleshot_timer_sync(&timer);
 
 	timeout = expire - jiffies;
 

_
