diff --git a/include/rocksdb/perf_context.h b/include/rocksdb/perf_context.h
index a1d803c2c..9aa8a49ed 100644
--- a/include/rocksdb/perf_context.h
+++ b/include/rocksdb/perf_context.h
@@ -223,6 +223,11 @@ struct PerfContext {
 
   std::map<uint32_t, PerfContextByLevel>* level_to_perf_context = nullptr;
   bool per_level_perf_context_enabled = false;
+
+  uint64_t rangelock_remove;
+  uint64_t rangelock_remove_rcu;
+  uint64_t rangelock_acquire;
+  uint64_t rangelock_acquire_rcu;
 };
 
 // Get Thread-local PerfContext object pointer
diff --git a/utilities/transactions/range_locking/locktree/concurrent_tree.cc b/utilities/transactions/range_locking/locktree/concurrent_tree.cc
index 74d65f710..52e9a66f3 100644
--- a/utilities/transactions/range_locking/locktree/concurrent_tree.cc
+++ b/utilities/transactions/range_locking/locktree/concurrent_tree.cc
@@ -70,13 +70,24 @@ uint64_t concurrent_tree::get_insertion_memory_overhead(void) {
     return sizeof(treenode);
 }
 
-void concurrent_tree::locked_keyrange::prepare(concurrent_tree *tree) {
+void concurrent_tree::locked_keyrange::prepare_(concurrent_tree *tree) {
     // the first step in acquiring a locked keyrange is locking the root
     treenode *const root = &tree->m_root;
     m_tree = tree;
     m_subtree = root;
     m_range = keyrange::get_infinite_range();
     root->mutex_lock();
+
+    tree->rcu_cache_usable= false;
+    synchronize_rcu();
+}
+
+void concurrent_tree::locked_keyrange::prepare_no_lock(concurrent_tree *tree) {
+    // the first step in acquiring a locked keyrange is locking the root
+    treenode *const root = &tree->m_root;
+    m_tree = tree;
+    m_subtree = root;
+    m_range = keyrange::get_infinite_range();
 }
 
 void concurrent_tree::locked_keyrange::acquire(const keyrange &range) {
@@ -97,6 +108,29 @@ void concurrent_tree::locked_keyrange::acquire(const keyrange &range) {
     m_subtree = subtree;
 }
 
+
+bool concurrent_tree::locked_keyrange::acquire_under_rcu(const keyrange &range) {
+    treenode *const root = &m_tree->m_root;
+
+    if (root->is_empty() || root->range_overlaps(range)) {
+        return false;
+    }
+    //treenode *subtree;
+    treenode *child = root->find_child_under_rcu(range);
+    if (!child)
+        return false;
+
+    if (child->range_overlaps(range)) {
+        child->mutex_unlock();
+        return false;
+    }
+    
+    m_subtree = child->find_node_with_overlapping_child(range, NULL);
+    m_range = range;
+    return true;
+}
+
+
 void concurrent_tree::locked_keyrange::add_shared_owner(const keyrange &range,
                                                         TXNID new_owner)
 {
@@ -104,6 +138,8 @@ void concurrent_tree::locked_keyrange::add_shared_owner(const keyrange &range,
 }
 
 void concurrent_tree::locked_keyrange::release(void) {
+    if (&m_tree->m_root == m_subtree && !m_tree->is_empty())
+      m_tree->rcu_cache_usable = true; 
     m_subtree->mutex_unlock();
 }
 
diff --git a/utilities/transactions/range_locking/locktree/concurrent_tree.h b/utilities/transactions/range_locking/locktree/concurrent_tree.h
index fabda7294..d678afdb8 100644
--- a/utilities/transactions/range_locking/locktree/concurrent_tree.h
+++ b/utilities/transactions/range_locking/locktree/concurrent_tree.h
@@ -89,13 +89,15 @@ public:
         // There are two valid sequences of calls:
         //  - prepare, acquire, [operations], release
         //  - prepare, [operations],release
-        void prepare(concurrent_tree *tree);
+        void prepare_(concurrent_tree *tree);
+        void prepare_no_lock(concurrent_tree *tree);
 
         // requires: the locked keyrange was prepare()'d
         // effect: acquire a locked keyrange over the given concurrent_tree.
         //         the locked keyrange represents the range of keys overlapped
         //         by the given range
         void acquire(const keyrange &range);
+        bool acquire_under_rcu(const keyrange &range);
 
         // effect: releases a locked keyrange and the mutex it holds
         void release(void);
@@ -156,6 +158,12 @@ public:
     // returns: the memory overhead of a single insertion into the tree
     static uint64_t get_insertion_memory_overhead(void);
 
+    /*
+    //psergey-rcu:
+      Here, we will have an RCU-based cache.
+    */
+    bool rcu_cache_usable;
+
 private:
     // the root needs to always exist so there's a lock to grab
     // even if the tree is empty. that's why we store a treenode
diff --git a/utilities/transactions/range_locking/locktree/locktree.cc b/utilities/transactions/range_locking/locktree/locktree.cc
index 3c32e1b77..bf62f28f6 100644
--- a/utilities/transactions/range_locking/locktree/locktree.cc
+++ b/utilities/transactions/range_locking/locktree/locktree.cc
@@ -64,6 +64,13 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 // the locktree source file instead of the header.
 #include "concurrent_tree.h"
 
+#ifndef ROCKSDB_SUPPORT_THREAD_LOCAL
+#define ROCKSDB_SUPPORT_THREAD_LOCAL
+#endif
+#include "rocksdb/perf_context.h"
+#include "monitoring/perf_level_imp.h"
+#include "monitoring/perf_context_imp.h"
+
 namespace toku {
 // A locktree represents the set of row locks owned by all transactions
 // over an open dictionary. Read and write ranges are represented as
@@ -86,6 +93,7 @@ void locktree::create(locktree_manager *mgr, DICTIONARY_ID dict_id,
 
     XCALLOC(m_rangetree);
     m_rangetree->create(&m_cmp);
+    m_rangetree->rcu_cache_usable = false;
 
     m_sto_txnid = TXNID_NONE;
     m_sto_buffer.create();
@@ -93,6 +101,7 @@ void locktree::create(locktree_manager *mgr, DICTIONARY_ID dict_id,
     m_sto_end_early_count = 0;
     m_sto_end_early_time = 0;
 
+
     m_lock_request_info.init(mutex_factory);
 }
 
@@ -282,7 +291,7 @@ void locktree::sto_migrate_buffer_ranges_to_tree(void *prepared_lkr) {
     range_buffer::iterator iter(&m_sto_buffer);
     range_buffer::iterator::record rec;
     while (iter.current(&rec)) {
-        sto_lkr.prepare(&sto_rangetree);
+        sto_lkr.prepare_(&sto_rangetree);
         int r = acquire_lock_consolidated(&sto_lkr, m_sto_txnid,
                                           rec.get_left_key(),
                                           rec.get_right_key(),
@@ -304,7 +313,7 @@ void locktree::sto_migrate_buffer_ranges_to_tree(void *prepared_lkr) {
         }
     } migrate_fn;
     migrate_fn.dst_lkr = static_cast<concurrent_tree::locked_keyrange *>(prepared_lkr);
-    sto_lkr.prepare(&sto_rangetree);
+    sto_lkr.prepare_(&sto_rangetree);
     sto_lkr.iterate(&migrate_fn);
     sto_lkr.remove_all();
     sto_lkr.release();
@@ -402,13 +411,31 @@ int locktree::acquire_lock_consolidated(void *prepared_lkr,
                                         const DBT *left_key, const DBT *right_key,
                                         bool is_write_request,
                                         txnid_set *conflicts) {
-    int r = 0;
     concurrent_tree::locked_keyrange *lkr;
 
     keyrange requested_range;
     requested_range.create(left_key, right_key);
     lkr = static_cast<concurrent_tree::locked_keyrange *>(prepared_lkr); 
     lkr->acquire(requested_range);
+    return acquire_lock_consolidated_part2(lkr, txnid,
+                                           left_key, right_key,
+                                           requested_range,
+                                           is_write_request,
+                                           conflicts);
+}
+
+int locktree::acquire_lock_consolidated_part2(
+      void *lkr_as_void,
+      TXNID txnid,
+      const DBT *left_key, const DBT *right_key,
+      keyrange& requested_range,
+      bool is_write_request,
+      txnid_set *conflicts) {
+
+    concurrent_tree::locked_keyrange *lkr;
+    lkr = static_cast<concurrent_tree::locked_keyrange *>(lkr_as_void); 
+
+    int r = 0;
 
     // copy out the set of overlapping row locks.
     GrowableArray<row_lock> overlapping_row_locks;
@@ -477,6 +504,11 @@ int locktree::acquire_lock_consolidated(void *prepared_lkr,
     return r;
 }
 
+
+using rocksdb::PerfLevel;
+using rocksdb::perf_level;
+using rocksdb::perf_context;
+
 // acquire a lock in the given key range, inclusive. if successful,
 // return 0. otherwise, populate the conflicts txnid_set with the set of
 // transactions that conflict with this request.
@@ -493,16 +525,52 @@ int locktree::acquire_lock(bool is_write_request,
     // prepare is a serialzation point, so we take the opportunity to
     // try the single txnid optimization first.
     concurrent_tree::locked_keyrange lkr;
-    lkr.prepare(m_rangetree);
 
-    bool acquired = sto_try_acquire(&lkr, txnid, left_key, right_key, 
-                                    is_write_request);
-    if (!acquired) {
-        r = acquire_lock_consolidated(&lkr, txnid, left_key, right_key,
-                                      is_write_request, conflicts);
+    //psergey-todo: this call will lock the root mutex:
+    // (SO, RCU should be used somewhere around here...)
+    bool used_rcu = false;
+    PERF_COUNTER_ADD(rangelock_acquire, 1);
+
+    if (m_rangetree->rcu_cache_usable)
+    {
+        lkr.prepare_no_lock(m_rangetree);
+        keyrange requested_range;
+        requested_range.create(left_key, right_key);
+        
+        bool acquired;
+        rcu_read_lock();
+
+        if (!m_rangetree->rcu_cache_usable)
+            acquired= false;
+        else
+            acquired = lkr.acquire_under_rcu(requested_range);
+
+        rcu_read_unlock();
+        if (acquired) {
+            PERF_COUNTER_ADD(rangelock_acquire_rcu, 1);
+            used_rcu = true;
+            r = acquire_lock_consolidated_part2(&lkr, txnid,
+                                                left_key, right_key,
+                                                requested_range,
+                                                is_write_request,
+                                                conflicts);
+            lkr.release();
+        }
+    }
+    
+    if (!used_rcu) {
+        //m_rangetree->rcu_cache_usable= false;
+        //synchronize_rcu();
+        lkr.prepare_(m_rangetree);
+        bool acquired = sto_try_acquire(&lkr, txnid, left_key, right_key, 
+                                        is_write_request);
+        if (!acquired) {
+            r = acquire_lock_consolidated(&lkr, txnid, left_key, right_key,
+                                          is_write_request, conflicts);
+        }
+        lkr.release();
     }
 
-    lkr.release();
     return r;
 }
 
@@ -540,7 +608,7 @@ void locktree::dump_locks(void *cdata, dump_callback cb)
     range.create(toku_dbt_negative_infinity(),
                  toku_dbt_positive_infinity());
 
-    lkr.prepare(m_rangetree);
+    lkr.prepare_(m_rangetree);
     lkr.acquire(range);
 
     TXNID sto_txn;
@@ -588,7 +656,7 @@ void locktree::get_conflicts(bool is_write_request,
     keyrange range;
     range.create(left_key, right_key);
     concurrent_tree::locked_keyrange lkr;
-    lkr.prepare(m_rangetree);
+    lkr.prepare_(m_rangetree);
     lkr.acquire(range);
 
     // copy out the set of overlapping row locks and determine the conflicts
@@ -636,8 +704,30 @@ void locktree::remove_overlapping_locks_for_txnid(TXNID txnid,
 
     // acquire and prepare a locked keyrange over the release range
     concurrent_tree::locked_keyrange lkr;
-    lkr.prepare(m_rangetree);
-    lkr.acquire(release_range);
+    bool used_rcu = false;
+
+////
+    PERF_COUNTER_ADD(rangelock_remove, 1);
+    rcu_read_lock(); 
+    if (m_rangetree->rcu_cache_usable)
+    {
+        lkr.prepare_no_lock(m_rangetree);
+        used_rcu = lkr.acquire_under_rcu(release_range);
+        rcu_read_unlock();
+    }
+    else    
+    {
+        rcu_read_unlock();
+    }
+    
+    if (!used_rcu)
+    {
+        // Non-RCU path:
+        lkr.prepare_(m_rangetree);
+        lkr.acquire(release_range);
+    } else {
+      PERF_COUNTER_ADD(rangelock_remove_rcu, 1);
+    }
 
     // copy out the set of overlapping row locks.
     GrowableArray<row_lock> overlapping_row_locks;
@@ -676,7 +766,7 @@ bool locktree::sto_try_release(TXNID txnid) {
         // check the bit again with a prepared locked keyrange,
         // which protects the optimization bits and rangetree data
         concurrent_tree::locked_keyrange lkr;
-        lkr.prepare(m_rangetree);
+        lkr.prepare_(m_rangetree);
         if (m_sto_txnid != TXNID_NONE) {
             // this txnid better be the single txnid on this locktree,
             // or else we are in big trouble (meaning the logic is broken)
@@ -716,7 +806,7 @@ void locktree::release_locks(TXNID txnid, const range_buffer *ranges,
             // check the bit again with a prepared locked keyrange,
             // which protects the optimization bits and rangetree data
             concurrent_tree::locked_keyrange lkr;
-            lkr.prepare(m_rangetree);
+            lkr.prepare_(m_rangetree);
             if (m_sto_txnid != TXNID_NONE) {
                 sto_end_early(&lkr);
             }
@@ -827,7 +917,7 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
     // prepare and acquire a locked keyrange on the entire locktree
     concurrent_tree::locked_keyrange lkr;
     keyrange infinite_range = keyrange::get_infinite_range();
-    lkr.prepare(m_rangetree);
+    lkr.prepare_(m_rangetree);
     lkr.acquire(infinite_range);
 
     // if we're in the single txnid optimization, simply call it off.
diff --git a/utilities/transactions/range_locking/locktree/locktree.h b/utilities/transactions/range_locking/locktree/locktree.h
index e7c909be0..c969c4e25 100644
--- a/utilities/transactions/range_locking/locktree/locktree.h
+++ b/utilities/transactions/range_locking/locktree/locktree.h
@@ -63,6 +63,9 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 // PORT: ft-status for LTM_STATUS:
 #include <ft/ft-status.h>
 
+// psergey2:
+#include <urcu.h>
+
 struct DICTIONARY_ID {
     uint64_t dictid;
 };
@@ -73,6 +76,7 @@ struct DICTIONARY_ID {
 #include "wfg.h"
 #include "range_buffer.h"
 
+#include "keyrange.h"
 
 namespace toku {
 
@@ -344,7 +348,9 @@ namespace toku {
                                       TXNID txnid, bool is_shared,
                                       TxnidVector *owners);
         void dump_locks(void *cdata, dump_callback cb);
+
     private:
+
         locktree_manager *m_mgr;
         DICTIONARY_ID m_dict_id;
         uint32_t m_reference_count;
@@ -525,6 +531,13 @@ namespace toku {
         int acquire_lock_consolidated(void *prepared_lkr, TXNID txnid,
                                       const DBT *left_key, const DBT *right_key,
                                       bool is_write_request, txnid_set *conflicts);
+        // for RCU:
+        int acquire_lock_consolidated_part2(void *lkr_as_void,
+                                            TXNID txnid,
+                                            const DBT *left_key, const DBT *right_key,
+                                            keyrange& requested_range,
+                                            bool is_write_request,
+                                            txnid_set *conflicts);
 
         int acquire_lock(bool is_write_request, TXNID txnid,
                          const DBT *left_key, const DBT *right_key,
diff --git a/utilities/transactions/range_locking/locktree/treenode.cc b/utilities/transactions/range_locking/locktree/treenode.cc
index 725991f7d..b9f8365de 100644
--- a/utilities/transactions/range_locking/locktree/treenode.cc
+++ b/utilities/transactions/range_locking/locktree/treenode.cc
@@ -215,6 +215,21 @@ treenode *treenode::find_node_with_overlapping_child(const keyrange &range,
     }
 }
 
+treenode *treenode::find_child_under_rcu(const keyrange &range) {
+    keyrange::comparison c = range.compare(*m_cmp, m_range);
+    treenode *child;
+
+    if (c == keyrange::comparison::LESS_THAN) {
+      child = m_left_child.get_locked();
+    }
+    else if (c == keyrange::comparison::GREATER_THAN) {
+      child = m_right_child.get_locked();
+    }
+    else
+       child= nullptr;
+    return child;
+}
+
 template <class F>
 void treenode::traverse_overlaps(const keyrange &range, F *function) {
     keyrange::comparison c = range.compare(*m_cmp, m_range);
diff --git a/utilities/transactions/range_locking/locktree/treenode.h b/utilities/transactions/range_locking/locktree/treenode.h
index f23324f03..3cca0c381 100644
--- a/utilities/transactions/range_locking/locktree/treenode.h
+++ b/utilities/transactions/range_locking/locktree/treenode.h
@@ -116,6 +116,8 @@ public:
     treenode *find_node_with_overlapping_child(const keyrange &range,
             const keyrange::comparison *cmp_hint);
 
+    treenode *find_child_under_rcu(const keyrange &range);
+
     // effect: performs an in-order traversal of the ranges that overlap the
     //         given range, calling function->fn() on each node that does
     // requires: function signature is: bool fn(const keyrange &range, TXNID txnid)
