Skip to content

Commit 2864d28

Browse files
shligitsfrothwell
authored andcommitted
swap: add block io poll in swapin path
For fast flash disk, async IO could introduce overhead because of context switch. block-mq now supports IO poll, which improves performance and latency a lot. swapin is a good place to use this technique, because the task is waitting for the swapin page to continue execution. In my virtual machine, directly read 4k data from a NVMe with iopoll is about 60% better than that without poll. With iopoll support in swapin patch, my microbenchmark (a task does random memory write) is about 10% ~ 25% faster. CPU utilization increases a lot though, 2x and even 3x CPU utilization. This will depend on disk speed though. While iopoll in swapin isn't intended for all usage cases, it's a win for latency sensistive workloads with high speed swap disk. block layer has knob to control poll in runtime. If poll isn't enabled in block layer, there should be no noticeable change in swapin. I got a chance to run the same test in a NVMe with DRAM as the media. In simple fio IO test, blkpoll boosts 50% performance in single thread test and ~20% in 8 threads test. So this is the base line. In above swap test, blkpoll boosts ~27% performance in single thread test. blkpoll uses 2x CPU time though. If we enable hybid polling, the performance gain has very slight drop but CPU time is only 50% worse than that without blkpoll. Also we can adjust parameter of hybid poll, with it, the CPU time penality is reduced further. In 8 threads test, blkpoll doesn't help though. The performance is similar to that without blkpoll, but cpu utilization is similar too. There is lock contention in swap path. The cpu time spending on blkpoll isn't high. So overall, blkpoll swapin isn't worse than that without it. The swapin readahead might read several pages in in the same time and form a big IO request. Since the IO will take longer time, it doesn't make sense to do poll, so the patch only does iopoll for single page swapin. Link: http://lkml.kernel.org/r/070c3c3e40b711e7b1390002c991e86a-b5408f0@7511894063d3764ff01ea8111f5a004d7dd700ed078797c204a24e620ddb965c Signed-off-by: Shaohua Li <[email protected]> Cc: Tim Chen <[email protected]> Cc: Huang Ying <[email protected]> Cc: Jens Axboe <[email protected]> Cc: Hugh Dickins <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 65e596c commit 2864d28

File tree

5 files changed

+33
-11
lines changed

5 files changed

+33
-11
lines changed

include/linux/swap.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ extern void kswapd_stop(int nid);
331331
#include <linux/blk_types.h> /* for bio_end_io_t */
332332

333333
/* linux/mm/page_io.c */
334-
extern int swap_readpage(struct page *);
334+
extern int swap_readpage(struct page *, bool do_poll);
335335
extern int swap_writepage(struct page *page, struct writeback_control *wbc);
336336
extern void end_swap_bio_write(struct bio *bio);
337337
extern int __swap_writepage(struct page *page, struct writeback_control *wbc,
@@ -362,7 +362,8 @@ extern void free_page_and_swap_cache(struct page *);
362362
extern void free_pages_and_swap_cache(struct page **, int);
363363
extern struct page *lookup_swap_cache(swp_entry_t);
364364
extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
365-
struct vm_area_struct *vma, unsigned long addr);
365+
struct vm_area_struct *vma, unsigned long addr,
366+
bool do_poll);
366367
extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
367368
struct vm_area_struct *vma, unsigned long addr,
368369
bool *new_page_allocated);

mm/madvise.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
205205
continue;
206206

207207
page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE,
208-
vma, index);
208+
vma, index, false);
209209
if (page)
210210
put_page(page);
211211
}
@@ -246,7 +246,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
246246
}
247247
swap = radix_to_swp_entry(page);
248248
page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
249-
NULL, 0);
249+
NULL, 0, false);
250250
if (page)
251251
put_page(page);
252252
}

mm/page_io.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ static void swap_slot_free_notify(struct page *page)
117117
static void end_swap_bio_read(struct bio *bio)
118118
{
119119
struct page *page = bio->bi_io_vec[0].bv_page;
120+
struct task_struct *waiter = bio->bi_private;
120121

121122
if (bio->bi_error) {
122123
SetPageError(page);
@@ -132,7 +133,9 @@ static void end_swap_bio_read(struct bio *bio)
132133
swap_slot_free_notify(page);
133134
out:
134135
unlock_page(page);
136+
WRITE_ONCE(bio->bi_private, NULL);
135137
bio_put(bio);
138+
wake_up_process(waiter);
136139
}
137140

138141
int generic_swapfile_activate(struct swap_info_struct *sis,
@@ -329,11 +332,13 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
329332
return ret;
330333
}
331334

332-
int swap_readpage(struct page *page)
335+
int swap_readpage(struct page *page, bool do_poll)
333336
{
334337
struct bio *bio;
335338
int ret = 0;
336339
struct swap_info_struct *sis = page_swap_info(page);
340+
blk_qc_t qc;
341+
struct block_device *bdev;
337342

338343
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
339344
VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -372,9 +377,23 @@ int swap_readpage(struct page *page)
372377
ret = -ENOMEM;
373378
goto out;
374379
}
380+
bdev = bio->bi_bdev;
381+
bio->bi_private = current;
375382
bio_set_op_attrs(bio, REQ_OP_READ, 0);
376383
count_vm_event(PSWPIN);
377-
submit_bio(bio);
384+
bio_get(bio);
385+
qc = submit_bio(bio);
386+
while (do_poll) {
387+
set_current_state(TASK_UNINTERRUPTIBLE);
388+
if (!READ_ONCE(bio->bi_private))
389+
break;
390+
391+
if (!blk_mq_poll(bdev_get_queue(bdev), qc))
392+
break;
393+
}
394+
__set_current_state(TASK_RUNNING);
395+
bio_put(bio);
396+
378397
out:
379398
return ret;
380399
}

mm/swap_state.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -412,14 +412,14 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
412412
* the swap entry is no longer in use.
413413
*/
414414
struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
415-
struct vm_area_struct *vma, unsigned long addr)
415+
struct vm_area_struct *vma, unsigned long addr, bool do_poll)
416416
{
417417
bool page_was_allocated;
418418
struct page *retpage = __read_swap_cache_async(entry, gfp_mask,
419419
vma, addr, &page_was_allocated);
420420

421421
if (page_was_allocated)
422-
swap_readpage(retpage);
422+
swap_readpage(retpage, do_poll);
423423

424424
return retpage;
425425
}
@@ -496,11 +496,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
496496
unsigned long start_offset, end_offset;
497497
unsigned long mask;
498498
struct blk_plug plug;
499+
bool do_poll = true;
499500

500501
mask = swapin_nr_pages(offset) - 1;
501502
if (!mask)
502503
goto skip;
503504

505+
do_poll = false;
504506
/* Read a page_cluster sized and aligned cluster around offset. */
505507
start_offset = offset & ~mask;
506508
end_offset = offset | mask;
@@ -511,7 +513,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
511513
for (offset = start_offset; offset <= end_offset ; offset++) {
512514
/* Ok, do the async read-ahead now */
513515
page = read_swap_cache_async(swp_entry(swp_type(entry), offset),
514-
gfp_mask, vma, addr);
516+
gfp_mask, vma, addr, false);
515517
if (!page)
516518
continue;
517519
if (offset != entry_offset && likely(!PageTransCompound(page)))
@@ -522,7 +524,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
522524

523525
lru_add_drain(); /* Push any new pages onto the LRU now */
524526
skip:
525-
return read_swap_cache_async(entry, gfp_mask, vma, addr);
527+
return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
526528
}
527529

528530
int init_swap_address_space(unsigned int type, unsigned long nr_pages)

mm/swapfile.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1852,7 +1852,7 @@ int try_to_unuse(unsigned int type, bool frontswap,
18521852
swap_map = &si->swap_map[i];
18531853
entry = swp_entry(type, i);
18541854
page = read_swap_cache_async(entry,
1855-
GFP_HIGHUSER_MOVABLE, NULL, 0);
1855+
GFP_HIGHUSER_MOVABLE, NULL, 0, false);
18561856
if (!page) {
18571857
/*
18581858
* Either swap_duplicate() failed because entry

0 commit comments

Comments
 (0)