From 72ed8fbd1ec0edf9fc09ea8eda6e6077f9e2fc4e Mon Sep 17 00:00:00 2001 From: Lakshmi Narasimhan Sundararajan Date: Fri, 2 Jul 2021 12:23:11 +0530 Subject: [PATCH 01/13] implement bg processing at driver Signed-off-by: Lakshmi Narasimhan Sundararajan --- dev.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- fuse_i.h | 14 ++++ pxd.c | 19 +----- 3 files changed, 215 insertions(+), 19 deletions(-) diff --git a/dev.c b/dev.c index bf23d9a5..b1164ec0 100755 --- a/dev.c +++ b/dev.c @@ -27,6 +27,10 @@ #include #include "pxd_compat.h" #include "pxd_core.h" +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +#include +#endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0) #define PAGE_CACHE_GET(page) get_page(page) @@ -254,7 +258,13 @@ void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) static bool request_pending(struct fuse_conn *fc) { struct fuse_queue_cb *cb = &fc->queue->requests_cb; - return cb->r.read != cb->r.write; + return cb->r.read != smp_load_acquire(&cb->r.write); +} + +static bool user_request_pending(struct fuse_conn *fc) +{ + struct fuse_queue_cb *cb = &fc->queue->user_requests_cb; + return cb->r.read != smp_load_acquire(&cb->r.write); } /* Wait until a request is available on the pending list */ @@ -1077,6 +1087,9 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) if (!fc) return POLLERR; + if (request_pending(fc)) + return (mask | POLLIN | POLLRDNORM); + poll_wait(file, &fc->waitq, wait); if (request_pending(fc)) @@ -1105,6 +1118,15 @@ void fuse_end_queued_requests(struct fuse_conn *fc) static void fuse_conn_free_allocs(struct fuse_conn *fc) { + int i; + + fc->user_mm = NULL; + for (i=0; iio_worker_thread[i]) { + kthread_stop(fc->io_worker_thread[i]); + fc->io_worker_thread[i] = NULL; + } + } if (fc->per_cpu_ids) free_percpu(fc->per_cpu_ids); if (fc->free_ids) @@ -1124,6 +1146,7 @@ void fuse_queue_init_cb(struct fuse_queue_cb *cb) cb->r.write = 0; cb->r.read = 0; + atomic_set(&cb->r.need_wake_up, 0); } static void fuse_conn_queues_init(struct fuse_conn_queues *queue) @@ -1135,6 +1158,167 @@ static void fuse_conn_queues_init(struct fuse_conn_queues *queue) memset(queue->user_requests, 0, sizeof(queue->user_requests)); } +void fuse_run_user_queue(struct fuse_conn *fc, bool mm_fault) +{ + struct fuse_queue_cb *cb = &fc->queue->user_requests_cb; + struct fuse_user_request *req; + uint32_t read, write; + uint32_t span, i; +#define NREQINLINE (16u) + struct fuse_user_request ureq[NREQINLINE]; + + spin_lock(&fc->io_lock); + write = smp_load_acquire(&cb->r.write); + read = cb->r.read; + span = min((write - read), NREQINLINE); + for (i=0; iqueue->user_requests[ + read & (FUSE_REQUEST_QUEUE_SIZE - 1)]; + memcpy(&ureq[i], req, sizeof(*req)); + } + + if (span != 0) smp_store_release(&cb->r.read, read); + + // give up lock + spin_unlock(&fc->io_lock); + + if (user_request_pending(fc)) wake_up(&fc->io_wait); + + for (i=0; i= KERNEL_VERSION(4, 11, 0) +mm_segment_t fuse_setup_user_access(struct mm_struct *mm, bool *mm_fault) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,10,0) + mm_segment_t old_fs = force_uaccess_begin(); +#else + mm_segment_t old_fs = get_fs(); +#endif + + *mm_fault = false; + if (mm) { + if (!mmget_not_zero(mm)) { + *mm_fault = true; + return old_fs; + } +#if LINUX_VERSION_CODE < KERNEL_VERSION(5,8,0) + use_mm(mm); + set_fs(USER_DS); +#else + kthread_use_mm(mm); +#endif + } + + return old_fs; +} + +void fuse_remove_user_access(struct mm_struct *mm, mm_segment_t old_fs) +{ + if (mm) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(5,8,0) + unuse_mm(mm); + set_fs(old_fs); +#else + kthread_unuse_mm(mm); +#endif + mmput(mm); + } +} +#else +mm_segment_t fuse_setup_user_access(struct fuse_conn *fc, bool *mm_fault) { *mm_fault = false; return get_fs();} +void fuse_remove_user_access(struct mm_struct *mm, mm_segment_t old_fs) {} +#endif + +static int fuse_process_user_queue(void *c) +{ + struct fuse_conn *fc = (struct fuse_conn*) c; + struct fuse_queue_cb *cb = &fc->queue->user_requests_cb; + bool mm_fault; + struct mm_struct *cur_mm = NULL; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,10,0) + mm_segment_t old_fs = force_uaccess_begin(); +#else + mm_segment_t old_fs = get_fs(); +#endif + unsigned long spin_wait = jiffies; + + while (!kthread_should_stop()) { + if (signal_pending(current)) + flush_signals(current); + + do { + if (user_request_pending(fc)) break; + cpu_relax(); + } while (jiffies < spin_wait); + + // prepare to sleep + if (!user_request_pending(fc) && cur_mm != NULL) { + fuse_remove_user_access(cur_mm, old_fs); + cur_mm = NULL; + } + + atomic_inc(&cb->r.need_wake_up); + wait_event_interruptible(fc->io_wait, + (user_request_pending(fc) || kthread_should_stop() || + kthread_should_park())); + atomic_dec(&cb->r.need_wake_up); + + if (kthread_should_stop()) { + break; + } + + if (kthread_should_park()) { + fuse_remove_user_access(cur_mm, old_fs); + cur_mm = NULL; + kthread_parkme(); + continue; + } + + mm_fault = false; + if (!cur_mm) { + fuse_setup_user_access(fc->user_mm, &mm_fault); + cur_mm = fc->user_mm; + } + fuse_run_user_queue(fc, mm_fault); + spin_wait = jiffies + msecs_to_jiffies(3); + } + + if (cur_mm != NULL) { + fuse_remove_user_access(cur_mm, old_fs); + } + return 0; +} + +void fuse_pause_user_queue(struct fuse_conn *fc) +{ + struct fuse_queue_cb *cb = &fc->queue->user_requests_cb; + int i; + + pr_info("parking worker threads"); + for (i=0; iio_worker_thread[i]); + + if (fc->user_mm) mmdrop(fc->user_mm); + BUG_ON(atomic_read(&cb->r.need_wake_up) == 0); +} + +void fuse_restart_user_queue(struct fuse_conn *fc) +{ + int i; + + mmgrab(current->mm); + fc->user_mm = current->mm; + pr_info("unparking worker threads"); + for (i=0; iio_worker_thread[i]); +} + int fuse_conn_init(struct fuse_conn *fc) { int i, rc; @@ -1147,6 +1331,16 @@ int fuse_conn_init(struct fuse_conn *fc) fc->request_map = kmalloc(FUSE_MAX_REQUEST_IDS * sizeof(struct fuse_req*), GFP_KERNEL); + init_waitqueue_head(&fc->io_wait); + spin_lock_init(&fc->io_lock); + for (i=0; iio_worker_thread[i] = kthread_create(fuse_process_user_queue, fc, "userq-worker-%d", i); + if (IS_ERR(fc->io_worker_thread[i])) { + rc = (int) PTR_ERR(fc->io_worker_thread[i]); + goto err_out; + } + } + rc = -ENOMEM; if (!fc->request_map) { printk(KERN_ERR "failed to allocate request map"); @@ -1184,6 +1378,9 @@ int fuse_conn_init(struct fuse_conn *fc) } fuse_conn_queues_init(fc->queue); + fc->user_mm = NULL; + for (i=0; iio_worker_thread[i]); return 0; err_out: @@ -1370,6 +1567,8 @@ int fuse_restart_requests(struct fuse_conn *fc) vfree(resend_reqs); + fuse_restart_user_queue(fc); + return 0; } diff --git a/fuse_i.h b/fuse_i.h index afe69b3f..b378ff8a 100755 --- a/fuse_i.h +++ b/fuse_i.h @@ -266,6 +266,13 @@ struct fuse_conn { /** Called on final put */ void (*release)(struct fuse_conn *); + + /** user request processing */ + wait_queue_head_t io_wait; +#define NWORKERS (8) + struct task_struct* io_worker_thread[NWORKERS]; + struct mm_struct *user_mm; + spinlock_t io_lock; }; /** Device operations */ @@ -305,6 +312,13 @@ void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req); /* Abort all requests */ void fuse_abort_conn(struct fuse_conn *fc); +/** + * start processing pending IOs from userspace. + */ +void fuse_run_user_queue(struct fuse_conn *fc, bool mm_fault); +void fuse_restart_user_queue(struct fuse_conn *fc); +void fuse_pause_user_queue(struct fuse_conn *fc); + /** * Initialize fuse_conn */ diff --git a/pxd.c b/pxd.c index 9526173d..9843e198 100644 --- a/pxd.c +++ b/pxd.c @@ -230,25 +230,8 @@ static long pxd_ioctl_run_user_queue(struct file *file) { struct pxd_context *ctx = container_of(file->f_op, struct pxd_context, fops); struct fuse_conn *fc = &ctx->fc; - struct fuse_queue_cb *cb = &fc->queue->user_requests_cb; - - struct fuse_user_request *req; - - uint32_t read = cb->r.read; - uint32_t write = smp_load_acquire(&cb->r.write); - - while (read != write) { - for (; read != write; ++read) { - req = &fc->queue->user_requests[ - read & (FUSE_REQUEST_QUEUE_SIZE - 1)]; - fuse_process_user_request(fc, req); - } - - smp_store_release(&cb->r.read, read); - - write = smp_load_acquire(&cb->r.write); - } + fuse_run_user_queue(fc, false); return 0; } From ccd951a9a2ed91c0e53f45e4c4f2bb7e53aa875f Mon Sep 17 00:00:00 2001 From: Lakshmi Narasimhan Sundararajan Date: Fri, 2 Jul 2021 14:51:57 +0530 Subject: [PATCH 02/13] optimize Signed-off-by: Lakshmi Narasimhan Sundararajan --- dev.c | 52 ++++++++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/dev.c b/dev.c index b1164ec0..0919354e 100755 --- a/dev.c +++ b/dev.c @@ -1251,25 +1251,27 @@ static int fuse_process_user_queue(void *c) unsigned long spin_wait = jiffies; while (!kthread_should_stop()) { - if (signal_pending(current)) - flush_signals(current); - do { if (user_request_pending(fc)) break; cpu_relax(); } while (jiffies < spin_wait); // prepare to sleep - if (!user_request_pending(fc) && cur_mm != NULL) { - fuse_remove_user_access(cur_mm, old_fs); - cur_mm = NULL; - } + if (!user_request_pending(fc)) { + if (cur_mm != NULL) { + fuse_remove_user_access(cur_mm, old_fs); + cur_mm = NULL; + } - atomic_inc(&cb->r.need_wake_up); - wait_event_interruptible(fc->io_wait, + atomic_inc(&cb->r.need_wake_up); + if (signal_pending(current)) + flush_signals(current); + + wait_event_interruptible(fc->io_wait, (user_request_pending(fc) || kthread_should_stop() || kthread_should_park())); - atomic_dec(&cb->r.need_wake_up); + atomic_dec(&cb->r.need_wake_up); + } if (kthread_should_stop()) { break; @@ -1304,9 +1306,12 @@ void fuse_pause_user_queue(struct fuse_conn *fc) pr_info("parking worker threads"); for (i=0; iio_worker_thread[i]); + BUG_ON(atomic_read(&cb->r.need_wake_up) != 0); - if (fc->user_mm) mmdrop(fc->user_mm); - BUG_ON(atomic_read(&cb->r.need_wake_up) == 0); + if (fc->user_mm) { + mmdrop(fc->user_mm); + fc->user_mm = NULL; + } } void fuse_restart_user_queue(struct fuse_conn *fc) @@ -1331,16 +1336,6 @@ int fuse_conn_init(struct fuse_conn *fc) fc->request_map = kmalloc(FUSE_MAX_REQUEST_IDS * sizeof(struct fuse_req*), GFP_KERNEL); - init_waitqueue_head(&fc->io_wait); - spin_lock_init(&fc->io_lock); - for (i=0; iio_worker_thread[i] = kthread_create(fuse_process_user_queue, fc, "userq-worker-%d", i); - if (IS_ERR(fc->io_worker_thread[i])) { - rc = (int) PTR_ERR(fc->io_worker_thread[i]); - goto err_out; - } - } - rc = -ENOMEM; if (!fc->request_map) { printk(KERN_ERR "failed to allocate request map"); @@ -1378,9 +1373,18 @@ int fuse_conn_init(struct fuse_conn *fc) } fuse_conn_queues_init(fc->queue); + fc->user_mm = NULL; - for (i=0; iio_worker_thread[i]); + init_waitqueue_head(&fc->io_wait); + spin_lock_init(&fc->io_lock); + for (i=0; iio_worker_thread[i] = kthread_create(fuse_process_user_queue, fc, "userq-worker-%d", i); + if (IS_ERR(fc->io_worker_thread[i])) { + rc = (int) PTR_ERR(fc->io_worker_thread[i]); + goto err_out; + } + wake_up_process(fc->io_worker_thread[i]); + } return 0; err_out: From eb98f5a2a5624816644c5974dc7a697b76ff7766 Mon Sep 17 00:00:00 2001 From: Lakshmi Narasimhan Sundararajan Date: Sun, 4 Jul 2021 20:46:03 +0530 Subject: [PATCH 03/13] extend features for bgio Signed-off-by: Lakshmi Narasimhan Sundararajan --- pxd.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pxd.h b/pxd.h index d7d03a10..59c25c6e 100755 --- a/pxd.h +++ b/pxd.h @@ -213,6 +213,7 @@ struct pxd_device* find_pxd_device(struct pxd_context *ctx, uint64_t dev_id); */ // No arguments necessary other than opcode #define PXD_FEATURE_FASTPATH (0x1) +#define PXD_FEATURE_BGIO (0x2) static inline int pxd_supported_features(void) @@ -221,6 +222,9 @@ int pxd_supported_features(void) #ifdef __PX_FASTPATH__ features |= PXD_FEATURE_FASTPATH; #endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) + features |= PXD_FEATURE_BGIO; +#endif return features; } From 34bf7c4eeb2b4f0a41dfd00637f4dd7705d90a07 Mon Sep 17 00:00:00 2001 From: Lakshmi Narasimhan Sundararajan Date: Sun, 4 Jul 2021 21:05:29 +0530 Subject: [PATCH 04/13] merge code Signed-off-by: Lakshmi Narasimhan Sundararajan --- pxd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/pxd.c b/pxd.c index 9843e198..62af8912 100644 --- a/pxd.c +++ b/pxd.c @@ -2140,6 +2140,7 @@ static int pxd_control_release(struct inode *inode, struct file *file) pxd_printk("%s: not opened\n", __func__); } else { WRITE_ONCE(ctx->fc.connected, 0); + fuse_pause_user_queue(&ctx->fc); } schedule_delayed_work(&ctx->abort_work, pxd_timeout_secs * HZ); From 9b27da952199a9b029d7f9b15df09c1589922018 Mon Sep 17 00:00:00 2001 From: Lakshmi Narasimhan Sundararajan Date: Thu, 8 Jul 2021 22:27:35 +0530 Subject: [PATCH 05/13] fix compilation Signed-off-by: Lakshmi Narasimhan Sundararajan --- dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev.c b/dev.c index 0919354e..4814966d 100755 --- a/dev.c +++ b/dev.c @@ -1233,7 +1233,7 @@ void fuse_remove_user_access(struct mm_struct *mm, mm_segment_t old_fs) } } #else -mm_segment_t fuse_setup_user_access(struct fuse_conn *fc, bool *mm_fault) { *mm_fault = false; return get_fs();} +mm_segment_t fuse_setup_user_access(struct mm_struct *mm, bool *mm_fault) { *mm_fault = false; return get_fs();} void fuse_remove_user_access(struct mm_struct *mm, mm_segment_t old_fs) {} #endif From 73bd01fe584c4c3c41c9911cda3a59e18252538c Mon Sep 17 00:00:00 2001 From: Lakshmi Narasimhan Sundararajan Date: Tue, 20 Jul 2021 12:53:48 +0530 Subject: [PATCH 06/13] extend support to configure/disable offload Signed-off-by: Lakshmi Narasimhan Sundararajan --- dev.c | 10 ++++++---- fuse_i.h | 4 ++-- io.c | 3 ++- io.h | 2 +- pxd.c | 13 ++++++++++++- pxd.h | 5 ++--- pxd_io_uring.h | 3 ++- 7 files changed, 27 insertions(+), 13 deletions(-) diff --git a/dev.c b/dev.c index 4814966d..e77cad09 100755 --- a/dev.c +++ b/dev.c @@ -1305,7 +1305,8 @@ void fuse_pause_user_queue(struct fuse_conn *fc) int i; pr_info("parking worker threads"); - for (i=0; iio_worker_thread[i]); + for (i=0; iio_worker_thread[i]) kthread_park(fc->io_worker_thread[i]); BUG_ON(atomic_read(&cb->r.need_wake_up) != 0); if (fc->user_mm) { @@ -1321,10 +1322,11 @@ void fuse_restart_user_queue(struct fuse_conn *fc) mmgrab(current->mm); fc->user_mm = current->mm; pr_info("unparking worker threads"); - for (i=0; iio_worker_thread[i]); + for (i=0; iio_worker_thread[i]) kthread_unpark(fc->io_worker_thread[i]); } -int fuse_conn_init(struct fuse_conn *fc) +int fuse_conn_init(struct fuse_conn *fc, uint32_t max_workers) { int i, rc; int cpu; @@ -1377,7 +1379,7 @@ int fuse_conn_init(struct fuse_conn *fc) fc->user_mm = NULL; init_waitqueue_head(&fc->io_wait); spin_lock_init(&fc->io_lock); - for (i=0; iio_worker_thread[i] = kthread_create(fuse_process_user_queue, fc, "userq-worker-%d", i); if (IS_ERR(fc->io_worker_thread[i])) { rc = (int) PTR_ERR(fc->io_worker_thread[i]); diff --git a/fuse_i.h b/fuse_i.h index b378ff8a..494f735a 100755 --- a/fuse_i.h +++ b/fuse_i.h @@ -269,7 +269,7 @@ struct fuse_conn { /** user request processing */ wait_queue_head_t io_wait; -#define NWORKERS (8) +#define NWORKERS (8u) struct task_struct* io_worker_thread[NWORKERS]; struct mm_struct *user_mm; spinlock_t io_lock; @@ -322,7 +322,7 @@ void fuse_pause_user_queue(struct fuse_conn *fc); /** * Initialize fuse_conn */ -int fuse_conn_init(struct fuse_conn *fc); +int fuse_conn_init(struct fuse_conn *fc, uint32_t max_threads); /** * Abort pending requests diff --git a/io.c b/io.c index e9c67726..56b6d29f 100644 --- a/io.c +++ b/io.c @@ -2241,6 +2241,7 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, struct io_uring_params * { int ret; int i; + uint32_t max_threads = min(p->sqo_threads, NSLAVES); init_waitqueue_head(&ctx->sqo_wait); mmgrab(current->mm); @@ -2255,7 +2256,7 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, struct io_uring_params * if (!ctx->sq_thread_idle) ctx->sq_thread_idle = HZ; - for (i=0; isqo_thread[i] = kthread_create(io_sq_thread, ctx, "pxd-io-%d", i); if (IS_ERR(ctx->sqo_thread[i])) { ret = PTR_ERR(ctx->sqo_thread[i]); diff --git a/io.h b/io.h index 521d2e2e..dd99124a 100644 --- a/io.h +++ b/io.h @@ -87,7 +87,7 @@ struct io_ring_ctx { /* IO offload */ struct workqueue_struct *sqo_wq; -#define NSLAVES (8) +#define NSLAVES (8u) struct task_struct *sqo_thread[NSLAVES]; /* if using sq thread polling */ struct mm_struct *sqo_mm; wait_queue_head_t sqo_wait; diff --git a/pxd.c b/pxd.c index 62af8912..22c414f8 100644 --- a/pxd.c +++ b/pxd.c @@ -64,15 +64,26 @@ uint32_t pxd_num_contexts = PXD_NUM_CONTEXTS; uint32_t pxd_num_contexts_exported = PXD_NUM_CONTEXT_EXPORTED; uint32_t pxd_timeout_secs = PXD_TIMER_SECS_DEFAULT; uint32_t pxd_detect_zero_writes = 0; +uint32_t pxd_offload = 0; module_param(pxd_num_contexts_exported, uint, 0644); module_param(pxd_num_contexts, uint, 0644); module_param(pxd_detect_zero_writes, uint, 0644); +/// specify number of threads for bgio processing +module_param(pxd_offload, uint, 0644); static void pxd_abort_context(struct work_struct *work); static int pxd_nodewipe_cleanup(struct pxd_context *ctx); static int pxd_bus_add_dev(struct pxd_device *pxd_dev); +uint32_t pxd_offload_threads(void) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) + return min(NWORKERS, pxd_offload); +#endif + return 0; +} + struct pxd_context* find_context(unsigned ctx) { if (ctx >= pxd_num_contexts) { @@ -2256,7 +2267,7 @@ int pxd_context_init(struct pxd_context *ctx, int i) ctx->fops.mmap = pxd_mmap; if (ctx->id < pxd_num_contexts_exported) { - err = fuse_conn_init(&ctx->fc); + err = fuse_conn_init(&ctx->fc, pxd_offload_threads()); if (err) return err; } diff --git a/pxd.h b/pxd.h index 59c25c6e..fd357ef7 100755 --- a/pxd.h +++ b/pxd.h @@ -215,6 +215,7 @@ struct pxd_device* find_pxd_device(struct pxd_context *ctx, uint64_t dev_id); #define PXD_FEATURE_FASTPATH (0x1) #define PXD_FEATURE_BGIO (0x2) +uint32_t pxd_offload_threads(void); static inline int pxd_supported_features(void) { @@ -222,9 +223,7 @@ int pxd_supported_features(void) #ifdef __PX_FASTPATH__ features |= PXD_FEATURE_FASTPATH; #endif -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) - features |= PXD_FEATURE_BGIO; -#endif + if (pxd_offload_threads()) features |= PXD_FEATURE_BGIO; return features; } diff --git a/pxd_io_uring.h b/pxd_io_uring.h index 9bd56cc2..4b80f7cb 100644 --- a/pxd_io_uring.h +++ b/pxd_io_uring.h @@ -132,7 +132,8 @@ struct io_uring_params { __u32 flags; __u32 sq_thread_cpu; __u32 sq_thread_idle; - __u32 resv[5]; + __u32 sqo_threads; // ofload threads + __u32 resv[4]; struct io_sqring_offsets sq_off; struct io_cqring_offsets cq_off; }; From 15d981b1f0ccb5103b81528647a21ec93116204b Mon Sep 17 00:00:00 2001 From: Lakshmi Narasimhan Sundararajan Date: Tue, 20 Jul 2021 13:11:19 +0530 Subject: [PATCH 07/13] reset default Signed-off-by: Lakshmi Narasimhan Sundararajan --- pxd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pxd.c b/pxd.c index 22c414f8..57be1e18 100644 --- a/pxd.c +++ b/pxd.c @@ -64,7 +64,7 @@ uint32_t pxd_num_contexts = PXD_NUM_CONTEXTS; uint32_t pxd_num_contexts_exported = PXD_NUM_CONTEXT_EXPORTED; uint32_t pxd_timeout_secs = PXD_TIMER_SECS_DEFAULT; uint32_t pxd_detect_zero_writes = 0; -uint32_t pxd_offload = 0; +uint32_t pxd_offload = NWORKERS; module_param(pxd_num_contexts_exported, uint, 0644); module_param(pxd_num_contexts, uint, 0644); From a50a276f64cae5f3e63e10f2790d3c87a7640fbe Mon Sep 17 00:00:00 2001 From: Lakshmi Narasimhan Sundararajan Date: Tue, 27 Jul 2021 11:02:00 +0530 Subject: [PATCH 08/13] uring sync req Signed-off-by: Lakshmi Narasimhan Sundararajan --- io.c | 24 ++++++++++++++++++++++++ pxd.h | 1 + 2 files changed, 25 insertions(+) diff --git a/io.c b/io.c index 56b6d29f..1b34251c 100644 --- a/io.c +++ b/io.c @@ -2738,6 +2738,28 @@ static long io_ring_ioctl_init(struct io_ring_ctx *ctx, unsigned long arg) return 0; } +static long io_run_cmd(struct io_ring_ctx *ctx, unsigned long arg) +{ + struct io_uring_sqe entry; + struct sqe_submit s; + int ret; + + if (copy_from_user(&entry, (void *)arg, sizeof(entry))) + return -EFAULT; + + if (entry.flags & IOSQE_IO_DRAIN) + return -EINVAL; + + s.sqe = &entry; + s.index = 0; // should be invalid, needed only for drain reqs + + s.has_user = true; + s.needs_lock = false; + s.needs_fixed_file = false; + + return io_submit_sqe(ctx, &s, NULL); +} + static long io_uring_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct io_ring_ctx *ctx = filp->private_data; @@ -2748,6 +2770,8 @@ static long io_uring_ioctl(struct file *filp, unsigned int cmd, unsigned long ar return 0; case PXD_IOC_RUN_IO_QUEUE: return io_run_queue(ctx); + case PXD_IOC_RUN_CMD: + return io_run_cmd(ctx, arg); case PXD_IOC_REGISTER_FILE: return io_sqe_register_file(ctx, arg); case PXD_IOC_UNREGISTER_FILE: diff --git a/pxd.h b/pxd.h index fd357ef7..113f51b7 100755 --- a/pxd.h +++ b/pxd.h @@ -38,6 +38,7 @@ #define PXD_IOC_IO_FLUSHER _IO(PXD_IOCTL_MAGIC, 10) /* 0x50580a */ #define PXD_IOC_INIT_IO _IO(PXD_IOCTL_MAGIC, 11) /* 0x50580b */ #define PXD_IOC_WAKE_UP_SQO _IO(PXD_IOCTL_MAGIC, 12) /* 0x50580c */ +#define PXD_IOC_RUN_CMD _IO(PXD_IOCTL_MAGIC, 13) /* 0x50580d */ #define PXD_MAX_DEVICES 512 /**< maximum number of devices supported */ #define PXD_MAX_IO (1024*1024) /**< maximum io size in bytes */ From 021c68edd809d5908f833aaaaa689ee2d0d4100d Mon Sep 17 00:00:00 2001 From: Lakshmi Narasimhan Sundararajan Date: Tue, 27 Jul 2021 11:17:26 +0530 Subject: [PATCH 09/13] fix compile Signed-off-by: Lakshmi Narasimhan Sundararajan --- io.c | 1 - 1 file changed, 1 deletion(-) diff --git a/io.c b/io.c index 1b34251c..3dc905b5 100644 --- a/io.c +++ b/io.c @@ -2742,7 +2742,6 @@ static long io_run_cmd(struct io_ring_ctx *ctx, unsigned long arg) { struct io_uring_sqe entry; struct sqe_submit s; - int ret; if (copy_from_user(&entry, (void *)arg, sizeof(entry))) return -EFAULT; From dc853d486d48540f57dce6ea7bad3a084dd52d31 Mon Sep 17 00:00:00 2001 From: Lakshmi Narasimhan Sundararajan Date: Tue, 27 Jul 2021 11:33:26 +0530 Subject: [PATCH 10/13] extend init log Signed-off-by: Lakshmi Narasimhan Sundararajan --- pxd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pxd.c b/pxd.c index 57be1e18..d2b6e92e 100644 --- a/pxd.c +++ b/pxd.c @@ -2368,11 +2368,11 @@ int pxd_init(void) goto out_blkdev; } #ifdef __PX_BLKMQ__ - printk(KERN_INFO "pxd: blk-mq driver loaded version %s, features %#x\n", - gitversion, pxd_supported_features()); + printk(KERN_INFO "pxd: blk-mq driver loaded version %s, features %#x max threads %d\n", + gitversion, pxd_supported_features(), pxd_offload_threads()); #else - printk(KERN_INFO "pxd: driver loaded version %s, features %#x\n", - gitversion, pxd_supported_features()); + printk(KERN_INFO "pxd: driver loaded version %s, features %#x, max threads %d\n", + gitversion, pxd_supported_features(), pxd_offload_threads()); #endif return 0; From 5eaaef3467130184da2ba40df25f3a5993f4b704 Mon Sep 17 00:00:00 2001 From: Lakshmi Narasimhan Sundararajan Date: Tue, 27 Jul 2021 18:30:23 +0530 Subject: [PATCH 11/13] cache sqe and kill kmalloc Signed-off-by: Lakshmi Narasimhan Sundararajan --- io.c | 55 ++++++++++++++++++++++--------------------------------- io.h | 2 ++ 2 files changed, 24 insertions(+), 33 deletions(-) diff --git a/io.c b/io.c index 3dc905b5..5e065a13 100644 --- a/io.c +++ b/io.c @@ -1460,18 +1460,13 @@ static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req, if (!io_sequence_defer(ctx, req) && list_empty(&ctx->defer_list)) return 0; - sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL); - if (!sqe_copy) - return -EAGAIN; - + sqe_copy = &req->cached_sqe; spin_lock_irq(&ctx->completion_lock); if (!io_sequence_defer(ctx, req) && list_empty(&ctx->defer_list)) { spin_unlock_irq(&ctx->completion_lock); - kfree(sqe_copy); return 0; } - memcpy(sqe_copy, sqe, sizeof(*sqe_copy)); req->submit.sqe = sqe_copy; INIT_WORK(&req->work, io_sq_wq_submit_work); @@ -1630,9 +1625,6 @@ static void io_sq_wq_submit_work(struct work_struct *work) io_put_req(req); } - /* async context always use a copy of the sqe */ - kfree(sqe); - if (!async_list) break; if (!list_empty(&req_list)) { @@ -1741,6 +1733,8 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, unsigned flags; int fd; + memcpy(&req->cached_sqe, s->sqe, sizeof(*s->sqe)); + flags = READ_ONCE(s->sqe->flags); fd = READ_ONCE(s->sqe->fd); @@ -1794,31 +1788,26 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, ret = __io_submit_sqe(ctx, req, s, true); if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { - struct io_uring_sqe *sqe_copy; - - sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL); - if (sqe_copy) { - struct async_list *list; - - memcpy(sqe_copy, s->sqe, sizeof(*sqe_copy)); - s->sqe = sqe_copy; - - memcpy(&req->submit, s, sizeof(*s)); - list = io_async_list_from_sqe(ctx, s->sqe); - if (!io_add_to_prev_work(list, req)) { - if (list) - atomic_inc(&list->cnt); - INIT_WORK(&req->work, io_sq_wq_submit_work); - queue_work(ctx->sqo_wq, &req->work); - } - - /* - * Queued up for async execution, worker will release - * submit reference when the iocb is actually - * submitted. - */ - return 0; + struct io_uring_sqe *sqe_copy = &req->cached_sqe; + struct async_list *list; + + s->sqe = sqe_copy; + req->submit.sqe = sqe_copy; + + list = io_async_list_from_sqe(ctx, s->sqe); + if (!io_add_to_prev_work(list, req)) { + if (list) + atomic_inc(&list->cnt); + INIT_WORK(&req->work, io_sq_wq_submit_work); + queue_work(ctx->sqo_wq, &req->work); } + + /* + * Queued up for async execution, worker will release + * submit reference when the iocb is actually + * submitted. + */ + return 0; } out: diff --git a/io.h b/io.h index dd99124a..0023bc5e 100644 --- a/io.h +++ b/io.h @@ -49,6 +49,7 @@ #include #include #include "fuse_i.h" +#include "pxd_io_uring.h" struct async_list { spinlock_t lock; @@ -174,6 +175,7 @@ struct io_kiocb { struct io_poll_iocb poll; }; + struct io_uring_sqe __attribute__((aligned(8))) cached_sqe; // cached orig req struct sqe_submit submit; struct io_ring_ctx *ctx; From 1a257074a4533eaa83f5b43c50f520db24780126 Mon Sep 17 00:00:00 2001 From: Lakshmi Narasimhan Sundararajan Date: Tue, 27 Jul 2021 19:04:04 +0530 Subject: [PATCH 12/13] minor edits Signed-off-by: Lakshmi Narasimhan Sundararajan --- io.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/io.c b/io.c index 5e065a13..bf4c4ac6 100644 --- a/io.c +++ b/io.c @@ -2731,6 +2731,7 @@ static long io_run_cmd(struct io_ring_ctx *ctx, unsigned long arg) { struct io_uring_sqe entry; struct sqe_submit s; + long ret; if (copy_from_user(&entry, (void *)arg, sizeof(entry))) return -EFAULT; @@ -2738,14 +2739,18 @@ static long io_run_cmd(struct io_ring_ctx *ctx, unsigned long arg) if (entry.flags & IOSQE_IO_DRAIN) return -EINVAL; - s.sqe = &entry; + s.sqe = &entry; // local var instead of a ring entry. s.index = 0; // should be invalid, needed only for drain reqs s.has_user = true; s.needs_lock = false; s.needs_fixed_file = false; - return io_submit_sqe(ctx, &s, NULL); + if (!percpu_ref_tryget(&ctx->refs)) + return 0; + ret = io_submit_sqe(ctx, &s, NULL); + io_ring_drop_ctx_refs(ctx, 1); + return ret; } static long io_uring_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) From ed2c0584aef93824643f69ce9b619385382ca03e Mon Sep 17 00:00:00 2001 From: Lakshmi Narasimhan Sundararajan Date: Wed, 28 Jul 2021 17:10:07 +0530 Subject: [PATCH 13/13] default disable offload Signed-off-by: Lakshmi Narasimhan Sundararajan --- pxd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pxd.c b/pxd.c index d2b6e92e..44951f52 100644 --- a/pxd.c +++ b/pxd.c @@ -64,7 +64,7 @@ uint32_t pxd_num_contexts = PXD_NUM_CONTEXTS; uint32_t pxd_num_contexts_exported = PXD_NUM_CONTEXT_EXPORTED; uint32_t pxd_timeout_secs = PXD_TIMER_SECS_DEFAULT; uint32_t pxd_detect_zero_writes = 0; -uint32_t pxd_offload = NWORKERS; +uint32_t pxd_offload = 0; module_param(pxd_num_contexts_exported, uint, 0644); module_param(pxd_num_contexts, uint, 0644);