Skip to content

Commit 3f59876

Browse files
committed
Merge patch series "fs: add i_state helpers"
Christian Brauner <[email protected]> says: I've recently looked for some free space in struct inode again because of some exec kerfuffle we had and while my idea didn't turn into anything I noticed that we often waste bytes when using wait bit operations. So I set out to switch that to another mechanism that would allow us to free up bytes. So this is an attempt to turn i_state from an unsigned long into an u32 using the individual bytes of i_state as addresses for the wait var event mechanism (Thanks to Linus for that idea.). This survives LTP, xfstests on various filesystems, and will-it-scale. * patches from https://lore.kernel.org/r/[email protected]: inode: make i_state a u32 inode: port __I_LRU_ISOLATING to var event inode: port __I_NEW to var event inode: port __I_SYNC to var event fs: reorder i_state bits fs: add i_state helpers Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Christian Brauner <[email protected]>
2 parents d126e37 + dbd5479 commit 3f59876

File tree

6 files changed

+135
-56
lines changed

6 files changed

+135
-56
lines changed

fs/bcachefs/fs.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,14 +1644,16 @@ void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
16441644
break;
16451645
}
16461646
} else if (clean_pass && this_pass_clean) {
1647-
wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
1648-
DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
1647+
struct wait_bit_queue_entry wqe;
1648+
struct wait_queue_head *wq_head;
16491649

1650-
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
1650+
wq_head = inode_bit_waitqueue(&wqe, &inode->v, __I_NEW);
1651+
prepare_to_wait_event(wq_head, &wqe.wq_entry,
1652+
TASK_UNINTERRUPTIBLE);
16511653
mutex_unlock(&c->vfs_inodes_lock);
16521654

16531655
schedule();
1654-
finish_wait(wq, &wait.wq_entry);
1656+
finish_wait(wq_head, &wqe.wq_entry);
16551657
goto again;
16561658
}
16571659
}

fs/dcache.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1908,8 +1908,13 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
19081908
__d_instantiate(entry, inode);
19091909
WARN_ON(!(inode->i_state & I_NEW));
19101910
inode->i_state &= ~I_NEW & ~I_CREATING;
1911+
/*
1912+
* Pairs with the barrier in prepare_to_wait_event() to make sure
1913+
* ___wait_var_event() either sees the bit cleared or
1914+
* waitqueue_active() check in wake_up_var() sees the waiter.
1915+
*/
19111916
smp_mb();
1912-
wake_up_bit(&inode->i_state, __I_NEW);
1917+
inode_wake_up_bit(inode, __I_NEW);
19131918
spin_unlock(&inode->i_lock);
19141919
}
19151920
EXPORT_SYMBOL(d_instantiate_new);

fs/fs-writeback.c

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1386,12 +1386,13 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
13861386

13871387
static void inode_sync_complete(struct inode *inode)
13881388
{
1389+
assert_spin_locked(&inode->i_lock);
1390+
13891391
inode->i_state &= ~I_SYNC;
13901392
/* If inode is clean an unused, put it into LRU now... */
13911393
inode_add_lru(inode);
1392-
/* Waiters must see I_SYNC cleared before being woken up */
1393-
smp_mb();
1394-
wake_up_bit(&inode->i_state, __I_SYNC);
1394+
/* Called with inode->i_lock which ensures memory ordering. */
1395+
inode_wake_up_bit(inode, __I_SYNC);
13951396
}
13961397

13971398
static bool inode_dirtied_after(struct inode *inode, unsigned long t)
@@ -1512,17 +1513,25 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc)
15121513
*/
15131514
void inode_wait_for_writeback(struct inode *inode)
15141515
{
1515-
DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
1516-
wait_queue_head_t *wqh;
1516+
struct wait_bit_queue_entry wqe;
1517+
struct wait_queue_head *wq_head;
1518+
1519+
assert_spin_locked(&inode->i_lock);
1520+
1521+
if (!(inode->i_state & I_SYNC))
1522+
return;
15171523

1518-
lockdep_assert_held(&inode->i_lock);
1519-
wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
1520-
while (inode->i_state & I_SYNC) {
1524+
wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
1525+
for (;;) {
1526+
prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
1527+
/* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
1528+
if (!(inode->i_state & I_SYNC))
1529+
break;
15211530
spin_unlock(&inode->i_lock);
1522-
__wait_on_bit(wqh, &wq, bit_wait,
1523-
TASK_UNINTERRUPTIBLE);
1531+
schedule();
15241532
spin_lock(&inode->i_lock);
15251533
}
1534+
finish_wait(wq_head, &wqe.wq_entry);
15261535
}
15271536

15281537
/*
@@ -1533,16 +1542,20 @@ void inode_wait_for_writeback(struct inode *inode)
15331542
static void inode_sleep_on_writeback(struct inode *inode)
15341543
__releases(inode->i_lock)
15351544
{
1536-
DEFINE_WAIT(wait);
1537-
wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
1538-
int sleep;
1545+
struct wait_bit_queue_entry wqe;
1546+
struct wait_queue_head *wq_head;
1547+
bool sleep;
1548+
1549+
assert_spin_locked(&inode->i_lock);
15391550

1540-
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
1541-
sleep = inode->i_state & I_SYNC;
1551+
wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
1552+
prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
1553+
/* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
1554+
sleep = !!(inode->i_state & I_SYNC);
15421555
spin_unlock(&inode->i_lock);
15431556
if (sleep)
15441557
schedule();
1545-
finish_wait(wqh, &wait);
1558+
finish_wait(wq_head, &wqe.wq_entry);
15461559
}
15471560

15481561
/*

fs/inode.c

Lines changed: 54 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,17 @@ static void __inode_add_lru(struct inode *inode, bool rotate)
472472
inode->i_state |= I_REFERENCED;
473473
}
474474

475+
struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
476+
struct inode *inode, u32 bit)
477+
{
478+
void *bit_address;
479+
480+
bit_address = inode_state_wait_address(inode, bit);
481+
init_wait_var_entry(wqe, bit_address, 0);
482+
return __var_waitqueue(bit_address);
483+
}
484+
EXPORT_SYMBOL(inode_bit_waitqueue);
485+
475486
/*
476487
* Add inode to LRU if needed (inode is unused and clean).
477488
*
@@ -500,24 +511,35 @@ static void inode_unpin_lru_isolating(struct inode *inode)
500511
spin_lock(&inode->i_lock);
501512
WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
502513
inode->i_state &= ~I_LRU_ISOLATING;
503-
smp_mb();
504-
wake_up_bit(&inode->i_state, __I_LRU_ISOLATING);
514+
/* Called with inode->i_lock which ensures memory ordering. */
515+
inode_wake_up_bit(inode, __I_LRU_ISOLATING);
505516
spin_unlock(&inode->i_lock);
506517
}
507518

508519
static void inode_wait_for_lru_isolating(struct inode *inode)
509520
{
521+
struct wait_bit_queue_entry wqe;
522+
struct wait_queue_head *wq_head;
523+
510524
lockdep_assert_held(&inode->i_lock);
511-
if (inode->i_state & I_LRU_ISOLATING) {
512-
DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING);
513-
wait_queue_head_t *wqh;
525+
if (!(inode->i_state & I_LRU_ISOLATING))
526+
return;
514527

515-
wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING);
528+
wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING);
529+
for (;;) {
530+
prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
531+
/*
532+
* Checking I_LRU_ISOLATING with inode->i_lock guarantees
533+
* memory ordering.
534+
*/
535+
if (!(inode->i_state & I_LRU_ISOLATING))
536+
break;
516537
spin_unlock(&inode->i_lock);
517-
__wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
538+
schedule();
518539
spin_lock(&inode->i_lock);
519-
WARN_ON(inode->i_state & I_LRU_ISOLATING);
520540
}
541+
finish_wait(wq_head, &wqe.wq_entry);
542+
WARN_ON(inode->i_state & I_LRU_ISOLATING);
521543
}
522544

523545
/**
@@ -723,7 +745,13 @@ static void evict(struct inode *inode)
723745
* used as an indicator whether blocking on it is safe.
724746
*/
725747
spin_lock(&inode->i_lock);
726-
wake_up_bit(&inode->i_state, __I_NEW);
748+
/*
749+
* Pairs with the barrier in prepare_to_wait_event() to make sure
750+
* ___wait_var_event() either sees the bit cleared or
751+
* waitqueue_active() check in wake_up_var() sees the waiter.
752+
*/
753+
smp_mb();
754+
inode_wake_up_bit(inode, __I_NEW);
727755
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
728756
spin_unlock(&inode->i_lock);
729757

@@ -1135,8 +1163,13 @@ void unlock_new_inode(struct inode *inode)
11351163
spin_lock(&inode->i_lock);
11361164
WARN_ON(!(inode->i_state & I_NEW));
11371165
inode->i_state &= ~I_NEW & ~I_CREATING;
1166+
/*
1167+
* Pairs with the barrier in prepare_to_wait_event() to make sure
1168+
* ___wait_var_event() either sees the bit cleared or
1169+
* waitqueue_active() check in wake_up_var() sees the waiter.
1170+
*/
11381171
smp_mb();
1139-
wake_up_bit(&inode->i_state, __I_NEW);
1172+
inode_wake_up_bit(inode, __I_NEW);
11401173
spin_unlock(&inode->i_lock);
11411174
}
11421175
EXPORT_SYMBOL(unlock_new_inode);
@@ -1147,8 +1180,13 @@ void discard_new_inode(struct inode *inode)
11471180
spin_lock(&inode->i_lock);
11481181
WARN_ON(!(inode->i_state & I_NEW));
11491182
inode->i_state &= ~I_NEW;
1183+
/*
1184+
* Pairs with the barrier in prepare_to_wait_event() to make sure
1185+
* ___wait_var_event() either sees the bit cleared or
1186+
* waitqueue_active() check in wake_up_var() sees the waiter.
1187+
*/
11501188
smp_mb();
1151-
wake_up_bit(&inode->i_state, __I_NEW);
1189+
inode_wake_up_bit(inode, __I_NEW);
11521190
spin_unlock(&inode->i_lock);
11531191
iput(inode);
11541192
}
@@ -2337,8 +2375,8 @@ EXPORT_SYMBOL(inode_needs_sync);
23372375
*/
23382376
static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
23392377
{
2340-
wait_queue_head_t *wq;
2341-
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
2378+
struct wait_bit_queue_entry wqe;
2379+
struct wait_queue_head *wq_head;
23422380

23432381
/*
23442382
* Handle racing against evict(), see that routine for more details.
@@ -2349,14 +2387,14 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
23492387
return;
23502388
}
23512389

2352-
wq = bit_waitqueue(&inode->i_state, __I_NEW);
2353-
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
2390+
wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
2391+
prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
23542392
spin_unlock(&inode->i_lock);
23552393
rcu_read_unlock();
23562394
if (is_inode_hash_locked)
23572395
spin_unlock(&inode_hash_lock);
23582396
schedule();
2359-
finish_wait(wq, &wait.wq_entry);
2397+
finish_wait(wq_head, &wqe.wq_entry);
23602398
if (is_inode_hash_locked)
23612399
spin_lock(&inode_hash_lock);
23622400
rcu_read_lock();

include/linux/fs.h

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -681,7 +681,8 @@ struct inode {
681681
#endif
682682

683683
/* Misc */
684-
unsigned long i_state;
684+
u32 i_state;
685+
/* 32-bit hole */
685686
struct rw_semaphore i_rwsem;
686687

687688
unsigned long dirtied_when; /* jiffies of first dirtying */
@@ -744,6 +745,21 @@ struct inode {
744745
void *i_private; /* fs or device private pointer */
745746
} __randomize_layout;
746747

748+
/*
749+
* Get bit address from inode->i_state to use with wait_var_event()
750+
* infrastructre.
751+
*/
752+
#define inode_state_wait_address(inode, bit) ((char *)&(inode)->i_state + (bit))
753+
754+
struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
755+
struct inode *inode, u32 bit);
756+
757+
static inline void inode_wake_up_bit(struct inode *inode, u32 bit)
758+
{
759+
/* Caller is responsible for correct memory barriers. */
760+
wake_up_var(inode_state_wait_address(inode, bit));
761+
}
762+
747763
struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode);
748764

749765
static inline unsigned int i_blocksize(const struct inode *node)
@@ -2395,28 +2411,32 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
23952411
* i_count.
23962412
*
23972413
* Q: What is the difference between I_WILL_FREE and I_FREEING?
2414+
*
2415+
* __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait
2416+
* upon. There's one free address left.
23982417
*/
2399-
#define I_DIRTY_SYNC (1 << 0)
2400-
#define I_DIRTY_DATASYNC (1 << 1)
2401-
#define I_DIRTY_PAGES (1 << 2)
2402-
#define __I_NEW 3
2418+
#define __I_NEW 0
24032419
#define I_NEW (1 << __I_NEW)
2404-
#define I_WILL_FREE (1 << 4)
2405-
#define I_FREEING (1 << 5)
2406-
#define I_CLEAR (1 << 6)
2407-
#define __I_SYNC 7
2420+
#define __I_SYNC 1
24082421
#define I_SYNC (1 << __I_SYNC)
2409-
#define I_REFERENCED (1 << 8)
2422+
#define __I_LRU_ISOLATING 2
2423+
#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING)
2424+
2425+
#define I_DIRTY_SYNC (1 << 3)
2426+
#define I_DIRTY_DATASYNC (1 << 4)
2427+
#define I_DIRTY_PAGES (1 << 5)
2428+
#define I_WILL_FREE (1 << 6)
2429+
#define I_FREEING (1 << 7)
2430+
#define I_CLEAR (1 << 8)
2431+
#define I_REFERENCED (1 << 9)
24102432
#define I_LINKABLE (1 << 10)
24112433
#define I_DIRTY_TIME (1 << 11)
2412-
#define I_WB_SWITCH (1 << 13)
2413-
#define I_OVL_INUSE (1 << 14)
2414-
#define I_CREATING (1 << 15)
2415-
#define I_DONTCACHE (1 << 16)
2416-
#define I_SYNC_QUEUED (1 << 17)
2417-
#define I_PINNING_NETFS_WB (1 << 18)
2418-
#define __I_LRU_ISOLATING 19
2419-
#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING)
2434+
#define I_WB_SWITCH (1 << 12)
2435+
#define I_OVL_INUSE (1 << 13)
2436+
#define I_CREATING (1 << 14)
2437+
#define I_DONTCACHE (1 << 15)
2438+
#define I_SYNC_QUEUED (1 << 16)
2439+
#define I_PINNING_NETFS_WB (1 << 17)
24202440

24212441
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
24222442
#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)

include/linux/writeback.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,8 @@ void inode_io_list_del(struct inode *inode);
200200
/* writeback.h requires fs.h; it, too, is not included from here. */
201201
static inline void wait_on_inode(struct inode *inode)
202202
{
203-
wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE);
203+
wait_var_event(inode_state_wait_address(inode, __I_NEW),
204+
!(READ_ONCE(inode->i_state) & I_NEW));
204205
}
205206

206207
#ifdef CONFIG_CGROUP_WRITEBACK

0 commit comments

Comments
 (0)