perf tools: Add front cache for dso data access

There's a high contention in dso_cache__find() due to the dso->lock
when dwarf unwinding is done with libunwind.  Add last accessed
pointers of dso_cache to lockless lookup.  It'll fallback to normal
tree search when it misses the last cache.

The size 16 is arbitrary and works best for my setting.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 6c1f561..d8ee1fd 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -466,12 +466,31 @@
 	pthread_mutex_unlock(&dso->lock);
 }
 
+static void update_last_cache(struct dso *dso, struct dso_cache *cache)
+{
+	int i;
+
+	for (i = DSO_LAST_CACHE_NR - 1; i > 0; i--)
+		dso->data.last[i] = dso->data.last[i-1];
+
+	dso->data.last[0] = cache;
+}
+
 static struct dso_cache *dso_cache__find(struct dso *dso, u64 offset)
 {
 	const struct rb_root *root = &dso->data.cache;
 	struct rb_node * const *p = &root->rb_node;
 	const struct rb_node *parent = NULL;
 	struct dso_cache *cache;
+	int i;
+
+	for (i = 0; i < DSO_LAST_CACHE_NR; i++) {
+		cache = dso->data.last[i];
+
+		if (cache && cache->offset <= offset &&
+		    offset < cache->offset + DSO__DATA_CACHE_SIZE)
+			return cache;
+	}
 
 	pthread_mutex_lock(&dso->lock);
 	while (*p != NULL) {
@@ -485,8 +504,10 @@
 			p = &(*p)->rb_left;
 		else if (offset >= end)
 			p = &(*p)->rb_right;
-		else
+		else {
+			update_last_cache(dso, cache);
 			goto out;
+		}
 	}
 	cache = NULL;
 out:
@@ -515,13 +536,16 @@
 			p = &(*p)->rb_left;
 		else if (offset >= end)
 			p = &(*p)->rb_right;
-		else
+		else {
+			update_last_cache(dso, cache);
 			goto out;
+		}
 	}
 
 	rb_link_node(&new->rb_node, parent, p);
 	rb_insert_color(&new->rb_node, root);
 
+	update_last_cache(dso, new);
 	cache = NULL;
 out:
 	pthread_mutex_unlock(&dso->lock);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index c18fcc0..28e8bb3 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -136,6 +136,8 @@
 	/* dso data file */
 	struct {
 		struct rb_root	 cache;
+#define DSO_LAST_CACHE_NR  16
+		struct dso_cache *last[DSO_LAST_CACHE_NR];
 		int		 fd;
 		int		 status;
 		u32		 status_seen;