1414#include <linux/export.h>
1515#include <linux/mm.h>
1616#include <linux/err.h>
17+ #include <linux/srcu.h>
1718#include <linux/rcupdate.h>
1819#include <linux/sched.h>
1920#include <linux/slab.h>
2021
22+ /* global SRCU for all MMs */
23+ struct srcu_struct srcu ;
24+
2125/*
2226 * This function can't run concurrently against mmu_notifier_register
2327 * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
2428 * runs with mm_users == 0. Other tasks may still invoke mmu notifiers
2529 * in parallel despite there being no task using this mm any more,
2630 * through the vmas outside of the exit_mmap context, such as with
2731 * vmtruncate. This serializes against mmu_notifier_unregister with
28- * the mmu_notifier_mm->lock in addition to RCU and it serializes
29- * against the other mmu notifiers with RCU . struct mmu_notifier_mm
32+ * the mmu_notifier_mm->lock in addition to SRCU and it serializes
33+ * against the other mmu notifiers with SRCU . struct mmu_notifier_mm
3034 * can't go away from under us as exit_mmap holds an mm_count pin
3135 * itself.
3236 */
3337void __mmu_notifier_release (struct mm_struct * mm )
3438{
3539 struct mmu_notifier * mn ;
3640 struct hlist_node * n ;
41+ int id ;
3742
3843 /*
3944 * RCU here will block mmu_notifier_unregister until
4045 * ->release returns.
4146 */
42- rcu_read_lock ( );
47+ id = srcu_read_lock ( & srcu );
4348 hlist_for_each_entry_rcu (mn , n , & mm -> mmu_notifier_mm -> list , hlist )
4449 /*
4550 * if ->release runs before mmu_notifier_unregister it
@@ -50,7 +55,7 @@ void __mmu_notifier_release(struct mm_struct *mm)
5055 */
5156 if (mn -> ops -> release )
5257 mn -> ops -> release (mn , mm );
53- rcu_read_unlock ( );
58+ srcu_read_unlock ( & srcu , id );
5459
5560 spin_lock (& mm -> mmu_notifier_mm -> lock );
5661 while (unlikely (!hlist_empty (& mm -> mmu_notifier_mm -> list ))) {
@@ -68,15 +73,15 @@ void __mmu_notifier_release(struct mm_struct *mm)
6873 spin_unlock (& mm -> mmu_notifier_mm -> lock );
6974
7075 /*
71- * synchronize_rcu here prevents mmu_notifier_release to
76+ * synchronize_srcu here prevents mmu_notifier_release to
7277 * return to exit_mmap (which would proceed freeing all pages
7378 * in the mm) until the ->release method returns, if it was
7479 * invoked by mmu_notifier_unregister.
7580 *
7681 * The mmu_notifier_mm can't go away from under us because one
7782 * mm_count is hold by exit_mmap.
7883 */
79- synchronize_rcu ( );
84+ synchronize_srcu ( & srcu );
8085}
8186
8287/*
@@ -89,14 +94,14 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
8994{
9095 struct mmu_notifier * mn ;
9196 struct hlist_node * n ;
92- int young = 0 ;
97+ int young = 0 , id ;
9398
94- rcu_read_lock ( );
99+ id = srcu_read_lock ( & srcu );
95100 hlist_for_each_entry_rcu (mn , n , & mm -> mmu_notifier_mm -> list , hlist ) {
96101 if (mn -> ops -> clear_flush_young )
97102 young |= mn -> ops -> clear_flush_young (mn , mm , address );
98103 }
99- rcu_read_unlock ( );
104+ srcu_read_unlock ( & srcu , id );
100105
101106 return young ;
102107}
@@ -106,17 +111,17 @@ int __mmu_notifier_test_young(struct mm_struct *mm,
106111{
107112 struct mmu_notifier * mn ;
108113 struct hlist_node * n ;
109- int young = 0 ;
114+ int young = 0 , id ;
110115
111- rcu_read_lock ( );
116+ id = srcu_read_lock ( & srcu );
112117 hlist_for_each_entry_rcu (mn , n , & mm -> mmu_notifier_mm -> list , hlist ) {
113118 if (mn -> ops -> test_young ) {
114119 young = mn -> ops -> test_young (mn , mm , address );
115120 if (young )
116121 break ;
117122 }
118123 }
119- rcu_read_unlock ( );
124+ srcu_read_unlock ( & srcu , id );
120125
121126 return young ;
122127}
@@ -126,8 +131,9 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
126131{
127132 struct mmu_notifier * mn ;
128133 struct hlist_node * n ;
134+ int id ;
129135
130- rcu_read_lock ( );
136+ id = srcu_read_lock ( & srcu );
131137 hlist_for_each_entry_rcu (mn , n , & mm -> mmu_notifier_mm -> list , hlist ) {
132138 if (mn -> ops -> change_pte )
133139 mn -> ops -> change_pte (mn , mm , address , pte );
@@ -138,49 +144,52 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
138144 else if (mn -> ops -> invalidate_page )
139145 mn -> ops -> invalidate_page (mn , mm , address );
140146 }
141- rcu_read_unlock ( );
147+ srcu_read_unlock ( & srcu , id );
142148}
143149
144150void __mmu_notifier_invalidate_page (struct mm_struct * mm ,
145151 unsigned long address )
146152{
147153 struct mmu_notifier * mn ;
148154 struct hlist_node * n ;
155+ int id ;
149156
150- rcu_read_lock ( );
157+ id = srcu_read_lock ( & srcu );
151158 hlist_for_each_entry_rcu (mn , n , & mm -> mmu_notifier_mm -> list , hlist ) {
152159 if (mn -> ops -> invalidate_page )
153160 mn -> ops -> invalidate_page (mn , mm , address );
154161 }
155- rcu_read_unlock ( );
162+ srcu_read_unlock ( & srcu , id );
156163}
157164
158165void __mmu_notifier_invalidate_range_start (struct mm_struct * mm ,
159166 unsigned long start , unsigned long end )
160167{
161168 struct mmu_notifier * mn ;
162169 struct hlist_node * n ;
170+ int id ;
163171
164- rcu_read_lock ( );
172+ id = srcu_read_lock ( & srcu );
165173 hlist_for_each_entry_rcu (mn , n , & mm -> mmu_notifier_mm -> list , hlist ) {
166174 if (mn -> ops -> invalidate_range_start )
167175 mn -> ops -> invalidate_range_start (mn , mm , start , end );
168176 }
169- rcu_read_unlock ( );
177+ srcu_read_unlock ( & srcu , id );
170178}
171179
172180void __mmu_notifier_invalidate_range_end (struct mm_struct * mm ,
173181 unsigned long start , unsigned long end )
174182{
175183 struct mmu_notifier * mn ;
176184 struct hlist_node * n ;
185+ int id ;
177186
178- rcu_read_lock ( );
187+ id = srcu_read_lock ( & srcu );
179188 hlist_for_each_entry_rcu (mn , n , & mm -> mmu_notifier_mm -> list , hlist ) {
180189 if (mn -> ops -> invalidate_range_end )
181190 mn -> ops -> invalidate_range_end (mn , mm , start , end );
182191 }
183- rcu_read_unlock ( );
192+ srcu_read_unlock ( & srcu , id );
184193}
185194
186195static int do_mmu_notifier_register (struct mmu_notifier * mn ,
@@ -192,6 +201,12 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,
192201
193202 BUG_ON (atomic_read (& mm -> mm_users ) <= 0 );
194203
204+ /*
205+ * Verify that mmu_notifier_init() already run and the global srcu is
206+ * initialized.
207+ */
208+ BUG_ON (!srcu .per_cpu_ref );
209+
195210 ret = - ENOMEM ;
196211 mmu_notifier_mm = kmalloc (sizeof (struct mmu_notifier_mm ), GFP_KERNEL );
197212 if (unlikely (!mmu_notifier_mm ))
@@ -274,8 +289,8 @@ void __mmu_notifier_mm_destroy(struct mm_struct *mm)
274289/*
275290 * This releases the mm_count pin automatically and frees the mm
276291 * structure if it was the last user of it. It serializes against
277- * running mmu notifiers with RCU and against mmu_notifier_unregister
278- * with the unregister lock + RCU . All sptes must be dropped before
292+ * running mmu notifiers with SRCU and against mmu_notifier_unregister
293+ * with the unregister lock + SRCU . All sptes must be dropped before
279294 * calling mmu_notifier_unregister. ->release or any other notifier
280295 * method may be invoked concurrently with mmu_notifier_unregister,
281296 * and only after mmu_notifier_unregister returned we're guaranteed
@@ -290,16 +305,17 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
290305 * RCU here will force exit_mmap to wait ->release to finish
291306 * before freeing the pages.
292307 */
293- rcu_read_lock () ;
308+ int id ;
294309
310+ id = srcu_read_lock (& srcu );
295311 /*
296312 * exit_mmap will block in mmu_notifier_release to
297313 * guarantee ->release is called before freeing the
298314 * pages.
299315 */
300316 if (mn -> ops -> release )
301317 mn -> ops -> release (mn , mm );
302- rcu_read_unlock ( );
318+ srcu_read_unlock ( & srcu , id );
303319
304320 spin_lock (& mm -> mmu_notifier_mm -> lock );
305321 hlist_del_rcu (& mn -> hlist );
@@ -310,10 +326,17 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
310326 * Wait any running method to finish, of course including
311327 * ->release if it was run by mmu_notifier_relase instead of us.
312328 */
313- synchronize_rcu ( );
329+ synchronize_srcu ( & srcu );
314330
315331 BUG_ON (atomic_read (& mm -> mm_count ) <= 0 );
316332
317333 mmdrop (mm );
318334}
319335EXPORT_SYMBOL_GPL (mmu_notifier_unregister );
336+
337+ static int __init mmu_notifier_init (void )
338+ {
339+ return init_srcu_struct (& srcu );
340+ }
341+
342+ module_init (mmu_notifier_init );
0 commit comments