From 287c8840b60d5cdcf806b16e8cc5722f2dbf0738 Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Fri, 20 Jun 2025 17:34:53 +0200 Subject: [PATCH] fuse: add DLM_LOCK opcode When having writeback cache enabled it is beneficial for data consistency to communicate to the FUSE server when the kernel prepares a page for caching. This lets the FUSE server react and lock the page. Additionally the kernel lets the FUSE server decide how much data it locks by the same call and keeps the given information in the dlm lock management. If the feature is not supported it will be disabled after first unsuccessful use. - Add DLM_LOCK fuse opcode - Add cache page lock caching for writeback cache functionality. This means sending out a FUSE call whenever the kernel prepares a page for writeback cache. The kernel will manage the cache so that it will keep track of already acquired locks. (except for the case that is documented in the code) - Use rb-trees for the management of the already 'locked' page ranges - Use rw_semaphore for synchronization in fuse_dlm_cache --- fs/fuse/Makefile | 2 +- fs/fuse/dir.c | 6 + fs/fuse/file.c | 13 + fs/fuse/fuse_dlm_cache.c | 551 ++++++++++++++++++++++++++++++++++++++ fs/fuse/fuse_dlm_cache.h | 50 ++++ fs/fuse/fuse_i.h | 18 ++ fs/fuse/fuse_trace.h | 1 + fs/fuse/inode.c | 11 + include/uapi/linux/fuse.h | 36 +++ 9 files changed, 687 insertions(+), 1 deletion(-) create mode 100644 fs/fuse/fuse_dlm_cache.c create mode 100644 fs/fuse/fuse_dlm_cache.h diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 47e649f17f25ae..f6952d826bcb33 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_FUSE_FS) += fuse.o obj-$(CONFIG_CUSE) += cuse.o obj-$(CONFIG_VIRTIO_FS) += virtiofs.o -fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o +fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o fuse_dlm_cache.o fuse-y += iomode.o fuse-$(CONFIG_FUSE_DAX) += dax.o fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 12fb59555a3a2b..597fb0d33ef7b2 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -6,6 +6,7 @@ See the file COPYING. */ +#include "fuse_dlm_cache.h" #include "fuse_i.h" #include @@ -1924,6 +1925,8 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, * truncation has already been done by OPEN. But still * need to truncate page cache. */ + if (fc->dlm && fc->writeback_cache) + fuse_dlm_cache_release_locks(fi); i_size_write(inode, 0); truncate_pagecache(inode, 0); goto out; @@ -2029,6 +2032,9 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, */ if ((is_truncate || !is_wb) && S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { + if (fc->dlm && fc->writeback_cache) + fuse_dlm_unlock_range(fi, outarg.attr.size & PAGE_MASK, -1); + truncate_pagecache(inode, outarg.attr.size); invalidate_inode_pages2(mapping); } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c7e7260a422755..907a1785c9e636 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -7,6 +7,7 @@ */ #include "fuse_i.h" +#include "fuse_dlm_cache.h" #include #include @@ -1423,6 +1424,17 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) goto writethrough; } + /* if we have dlm support acquire the lock for the area + * we are writing into */ + if (fc->dlm) { + /* note that a file opened with O_APPEND will have relative values + * in ki_pos. This code is here for convenience and for libfuse overlay test. + * Filesystems should handle O_APPEND with 'direct io' to additionally + * get the performance benefits of 'parallel direct writes'. */ + loff_t pos = file->f_flags & O_APPEND ? i_size_read(inode) + iocb->ki_pos : iocb->ki_pos; + size_t length = iov_iter_count(from); + fuse_get_dlm_write_lock(file, pos, length); + } return generic_file_write_iter(iocb, from); } @@ -3336,6 +3348,7 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags) INIT_LIST_HEAD(&fi->write_files); INIT_LIST_HEAD(&fi->queued_writes); + fuse_dlm_cache_init(fi); fi->writectr = 0; fi->iocachectr = 0; init_waitqueue_head(&fi->page_waitq); diff --git a/fs/fuse/fuse_dlm_cache.c b/fs/fuse/fuse_dlm_cache.c new file mode 100644 index 00000000000000..ea947f34a9f70a --- /dev/null +++ b/fs/fuse/fuse_dlm_cache.c @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * FUSE page lock cache implementation + */ +#include "fuse_i.h" +#include "fuse_dlm_cache.h" + +#include +#include +#include +#include + + +/* A range of pages with a lock */ +struct fuse_dlm_range { + /* Interval tree node */ + struct rb_node rb; + /* Start page offset (inclusive) */ + pgoff_t start; + /* End page offset (inclusive) */ + pgoff_t end; + /* Subtree end value for interval tree */ + pgoff_t __subtree_end; + /* Lock mode */ + enum fuse_page_lock_mode mode; + /* Temporary list entry for operations */ + struct list_head list; +}; + +/* Lock modes for FUSE page cache */ +#define FUSE_PCACHE_LK_READ 1 /* Shared read lock */ +#define FUSE_PCACHE_LK_WRITE 2 /* Exclusive write lock */ + +/* Interval tree definitions for page ranges */ +static inline pgoff_t fuse_dlm_range_start(struct fuse_dlm_range *range) +{ + return range->start; +} + +static inline pgoff_t fuse_dlm_range_last(struct fuse_dlm_range *range) +{ + return range->end; +} + +INTERVAL_TREE_DEFINE(struct fuse_dlm_range, rb, pgoff_t, __subtree_end, + fuse_dlm_range_start, fuse_dlm_range_last, static, + fuse_page_it); + +/** + * fuse_page_cache_init - Initialize a page cache lock manager + * @cache: The cache to initialize + * + * Initialize a page cache lock manager for a FUSE inode. + * + * Return: 0 on success, negative error code on failure + */ +int fuse_dlm_cache_init(struct fuse_inode *inode) +{ + struct fuse_dlm_cache *cache = &inode->dlm_locked_areas; + + if (!cache) + return -EINVAL; + + init_rwsem(&cache->lock); + cache->ranges = RB_ROOT_CACHED; + + return 0; +} + +/** + * fuse_page_cache_destroy - Clean up a page cache lock manager + * @cache: The cache to clean up + * + * Release all locks and free all resources associated with the cache. + */ +void fuse_dlm_cache_release_locks(struct fuse_inode *inode) +{ + struct fuse_dlm_cache *cache = &inode->dlm_locked_areas; + struct fuse_dlm_range *range; + struct rb_node *node; + + if (!cache) + return; + + /* Release all locks */ + down_write(&cache->lock); + while ((node = rb_first_cached(&cache->ranges)) != NULL) { + range = rb_entry(node, struct fuse_dlm_range, rb); + fuse_page_it_remove(range, &cache->ranges); + kfree(range); + } + up_write(&cache->lock); +} + +/** + * fuse_dlm_find_overlapping - Find a range that overlaps with [start, end] + * @cache: The page cache + * @start: Start page offset + * @end: End page offset + * + * Return: Pointer to the first overlapping range, or NULL if none found + */ +static struct fuse_dlm_range * +fuse_dlm_find_overlapping(struct fuse_dlm_cache *cache, pgoff_t start, + pgoff_t end) +{ + return fuse_page_it_iter_first(&cache->ranges, start, end); +} + +/** + * fuse_page_try_merge - Try to merge ranges within a specific region + * @cache: The page cache + * @start: Start page offset + * @end: End page offset + * + * Attempt to merge ranges within and adjacent to the specified region + * that have the same lock mode. + */ +static void fuse_dlm_try_merge(struct fuse_dlm_cache *cache, pgoff_t start, + pgoff_t end) +{ + struct fuse_dlm_range *range, *next; + struct rb_node *node; + + if (!cache) + return; + + /* Find the first range that might need merging */ + range = NULL; + node = rb_first_cached(&cache->ranges); + while (node) { + range = rb_entry(node, struct fuse_dlm_range, rb); + if (range->end >= start - 1) + break; + node = rb_next(node); + } + + if (!range || range->start > end + 1) + return; + + /* Try to merge ranges in and around the specified region */ + while (range && range->start <= end + 1) { + /* Get next range before we potentially modify the tree */ + next = NULL; + if (rb_next(&range->rb)) { + next = rb_entry(rb_next(&range->rb), + struct fuse_dlm_range, rb); + } + + /* Try to merge with next range if adjacent and same mode */ + if (next && range->mode == next->mode && + range->end + 1 == next->start) { + /* Merge ranges */ + range->end = next->end; + + /* Remove next from tree */ + fuse_page_it_remove(next, &cache->ranges); + kfree(next); + + /* Continue with the same range */ + continue; + } + + /* Move to next range */ + range = next; + } +} + +/** + * fuse_dlm_lock_range - Lock a range of pages + * @cache: The page cache + * @start: Start page offset + * @end: End page offset + * @mode: Lock mode (read or write) + * + * Add a locked range on the specified range of pages. + * If parts of the range are already locked, only add the remaining parts. + * For overlapping ranges, handle lock compatibility: + * - READ locks are compatible with existing READ locks + * - READ locks are compatible with existing WRITE locks (downgrade not needed) + * - WRITE locks need to upgrade existing READ locks + * + * Return: 0 on success, negative error code on failure + */ +int fuse_dlm_lock_range(struct fuse_inode *inode, pgoff_t start, + pgoff_t end, enum fuse_page_lock_mode mode) +{ + struct fuse_dlm_cache *cache = &inode->dlm_locked_areas; + struct fuse_dlm_range *range, *new_range, *next; + int lock_mode; + int ret = 0; + LIST_HEAD(to_lock); + LIST_HEAD(to_upgrade); + pgoff_t current_start = start; + + if (!cache || start > end) + return -EINVAL; + + /* Convert to lock mode */ + lock_mode = (mode == FUSE_PAGE_LOCK_READ) ? FUSE_PCACHE_LK_READ : + FUSE_PCACHE_LK_WRITE; + + down_write(&cache->lock); + + /* Find all ranges that overlap with [start, end] */ + range = fuse_page_it_iter_first(&cache->ranges, start, end); + while (range) { + /* Get next overlapping range before we potentially modify the tree */ + next = fuse_page_it_iter_next(range, start, end); + + /* Check lock compatibility */ + if (lock_mode == FUSE_PCACHE_LK_WRITE && + lock_mode != range->mode) { + /* we own the lock but have to update it. */ + list_add_tail(&range->list, &to_upgrade); + } + /* If WRITE lock already exists - nothing to do */ + + /* If there's a gap before this range, we need to add the missing range */ + if (current_start < range->start) { + new_range = kmalloc(sizeof(*new_range), GFP_KERNEL); + if (!new_range) { + ret = -ENOMEM; + goto out_free; + } + + new_range->start = current_start; + new_range->end = range->start - 1; + new_range->mode = lock_mode; + INIT_LIST_HEAD(&new_range->list); + + list_add_tail(&new_range->list, &to_lock); + } + + /* Move current_start past this range */ + current_start = max(current_start, range->end + 1); + + /* Move to next range */ + range = next; + } + + /* If there's a gap after the last range to the end, extend the range */ + if (current_start <= end) { + new_range = kmalloc(sizeof(*new_range), GFP_KERNEL); + if (!new_range) { + ret = -ENOMEM; + goto out_free; + } + + new_range->start = current_start; + new_range->end = end; + new_range->mode = lock_mode; + INIT_LIST_HEAD(&new_range->list); + + list_add_tail(&new_range->list, &to_lock); + } + + /* update locks, if any lock is in this list it has the wrong mode */ + list_for_each_entry(range, &to_upgrade, list) { + /* Update the lock mode */ + range->mode = lock_mode; + } + + /* Add all new ranges to the tree */ + list_for_each_entry(new_range, &to_lock, list) { + /* Add to interval tree */ + fuse_page_it_insert(new_range, &cache->ranges); + } + + /* Try to merge adjacent ranges with the same mode */ + fuse_dlm_try_merge(cache, start, end); + + up_write(&cache->lock); + return 0; + +out_free: + /* Free any ranges we allocated but didn't insert */ + while (!list_empty(&to_lock)) { + new_range = + list_first_entry(&to_lock, struct fuse_dlm_range, list); + list_del(&new_range->list); + kfree(new_range); + } + + /* Restore original lock modes for any partially upgraded locks */ + list_for_each_entry(range, &to_upgrade, list) { + if (lock_mode == FUSE_PCACHE_LK_WRITE) { + /* We upgraded this lock but failed later, downgrade it back */ + range->mode = FUSE_PCACHE_LK_READ; + } + } + + up_write(&cache->lock); + return ret; +} + +/** + * fuse_dlm_punch_hole - Punch a hole in a locked range + * @cache: The page cache + * @start: Start page offset of the hole + * @end: End page offset of the hole + * + * Create a hole in a locked range by splitting it into two ranges. + * + * Return: 0 on success, negative error code on failure + */ +static int fuse_dlm_punch_hole(struct fuse_dlm_cache *cache, pgoff_t start, + pgoff_t end) +{ + struct fuse_dlm_range *range, *new_range; + int ret = 0; + + if (!cache || start > end) + return -EINVAL; + + /* Find a range that contains [start, end] */ + range = fuse_dlm_find_overlapping(cache, start, end); + if (!range) { + ret = -EINVAL; + goto out; + } + + /* If the hole is at the beginning of the range */ + if (start == range->start) { + range->start = end + 1; + goto out; + } + + /* If the hole is at the end of the range */ + if (end == range->end) { + range->end = start - 1; + goto out; + } + + /* The hole is in the middle, need to split */ + new_range = kmalloc(sizeof(*new_range), GFP_KERNEL); + if (!new_range) { + ret = -ENOMEM; + goto out; + } + + /* Copy properties from original range */ + *new_range = *range; + INIT_LIST_HEAD(&new_range->list); + + /* Adjust ranges */ + new_range->start = end + 1; + range->end = start - 1; + + /* Update interval tree */ + fuse_page_it_remove(range, &cache->ranges); + fuse_page_it_insert(range, &cache->ranges); + fuse_page_it_insert(new_range, &cache->ranges); + +out: + return ret; +} + +/** + * fuse_dlm_unlock_range - Unlock a range of pages + * @cache: The page cache + * @start: Start page offset + * @end: End page offset + * + * Release locks on the specified range of pages. + * + * Return: 0 on success, negative error code on failure + */ +int fuse_dlm_unlock_range(struct fuse_inode *inode, + pgoff_t start, pgoff_t end) +{ + struct fuse_dlm_cache *cache = &inode->dlm_locked_areas; + struct fuse_dlm_range *range, *next; + int ret = 0; + + if (!cache) + return -EINVAL; + + down_write(&cache->lock); + + /* Find all ranges that overlap with [start, end] */ + range = fuse_page_it_iter_first(&cache->ranges, start, end); + while (range) { + /* Get next overlapping range before we potentially modify the tree */ + next = fuse_page_it_iter_next(range, start, end); + + /* Check if we need to punch a hole */ + if (start > range->start && end < range->end) { + /* Punch a hole in the middle */ + ret = fuse_dlm_punch_hole(cache, start, end); + if (ret) + goto out; + /* After punching a hole, we're done */ + break; + } else if (start > range->start) { + /* Adjust the end of the range */ + range->end = start - 1; + } else if (end < range->end) { + /* Adjust the start of the range */ + range->start = end + 1; + } else { + /* Complete overlap, remove the range */ + fuse_page_it_remove(range, &cache->ranges); + kfree(range); + } + + range = next; + } + +out: + up_write(&cache->lock); + return ret; +} + +/** + * fuse_dlm_range_is_locked - Check if a page range is already locked + * @cache: The page cache + * @start: Start page offset + * @end: End page offset + * @mode: Lock mode to check for (or NULL to check for any lock) + * + * Check if the specified range of pages is already locked. + * The entire range must be locked for this to return true. + * + * Return: true if the entire range is locked, false otherwise + */ +bool fuse_dlm_range_is_locked(struct fuse_inode *inode, pgoff_t start, + pgoff_t end, enum fuse_page_lock_mode mode) +{ + struct fuse_dlm_cache *cache = &inode->dlm_locked_areas; + struct fuse_dlm_range *range; + int lock_mode = 0; + pgoff_t current_start = start; + + if (!cache || start > end) + return false; + + /* Convert to lock mode if specified */ + if (mode == FUSE_PAGE_LOCK_READ) + lock_mode = FUSE_PCACHE_LK_READ; + else if (mode == FUSE_PAGE_LOCK_WRITE) + lock_mode = FUSE_PCACHE_LK_WRITE; + + down_read(&cache->lock); + + /* Find the first range that overlaps with [start, end] */ + range = fuse_dlm_find_overlapping(cache, start, end); + + /* Check if the entire range is covered */ + while (range && current_start <= end) { + /* If we're checking for a specific mode, verify it matches */ + if (lock_mode && range->mode != lock_mode) { + /* Wrong lock mode */ + up_read(&cache->lock); + return false; + } + + /* Check if there's a gap before this range */ + if (current_start < range->start) { + /* Found a gap */ + up_read(&cache->lock); + return false; + } + + /* Move current_start past this range */ + current_start = range->end + 1; + + /* Get next overlapping range */ + range = fuse_page_it_iter_next(range, start, end); + } + + /* Check if we covered the entire range */ + if (current_start <= end) { + /* There's a gap at the end */ + up_read(&cache->lock); + return false; + } + + up_read(&cache->lock); + return true; +} + +/** + * request a dlm lock from the fuse server + */ +void fuse_get_dlm_write_lock(struct file *file, loff_t offset, + size_t length) +{ + struct fuse_file *ff = file->private_data; + struct inode *inode = file_inode(file); + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_mount *fm = ff->fm; + loff_t end = (offset + length - 1) | (PAGE_SIZE - 1); + + /* note that the offset and length don't have to be page aligned here + * but since we only get here on writeback caching we will send out + * page aligned requests */ + offset &= PAGE_MASK; + + FUSE_ARGS(args); + struct fuse_dlm_lock_in inarg; + struct fuse_dlm_lock_out outarg; + int err; + + /* note that this can be run from different processes + * at the same time. It is intentionally not protected + * since a DLM implementation in the FUSE server should take care + * of any races in lock requests */ + if (fuse_dlm_range_is_locked(fi, offset, + end, FUSE_PAGE_LOCK_WRITE)) + return; /* we already have this area locked */ + + memset(&inarg, 0, sizeof(inarg)); + inarg.fh = ff->fh; + + inarg.offset = offset; + inarg.size = end - offset + 1; + inarg.type = FUSE_DLM_LOCK_WRITE; + + args.opcode = FUSE_DLM_WB_LOCK; + args.nodeid = get_node_id(inode); + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.out_numargs = 1; + args.out_args[0].size = sizeof(outarg); + args.out_args[0].value = &outarg; + err = fuse_simple_request(fm, &args); + if (err == -ENOSYS) { + /* fuse server does not support dlm, save the info */ + fc->dlm = 0; + return; + } + + if (outarg.locksize < end - offset + 1) { + /* fuse server is seriously broken */ + pr_warn("fuse: dlm lock request for %llu bytes returned %u bytes\n", + end - offset + 1, outarg.locksize); + fuse_abort_conn(fc); + return; + } + + if (err) + return; + else + /* ignore any errors here, there is no way we can react appropriately */ + fuse_dlm_lock_range(fi, offset, + offset + outarg.locksize - 1, + FUSE_PAGE_LOCK_WRITE); +} diff --git a/fs/fuse/fuse_dlm_cache.h b/fs/fuse/fuse_dlm_cache.h new file mode 100644 index 00000000000000..98b27a2c15d8ba --- /dev/null +++ b/fs/fuse/fuse_dlm_cache.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * FUSE page cache lock implementation + */ + +#ifndef _FS_FUSE_DLM_CACHE_H +#define _FS_FUSE_DLM_CACHE_H + +#include +#include +#include +#include + + +struct fuse_inode; + +/* Lock modes for page ranges */ +enum fuse_page_lock_mode { FUSE_PAGE_LOCK_READ, FUSE_PAGE_LOCK_WRITE }; + +/* Page cache lock manager */ +struct fuse_dlm_cache { + /* Lock protecting the tree */ + struct rw_semaphore lock; + /* Interval tree of locked ranges */ + struct rb_root_cached ranges; +}; + +/* Initialize a page cache lock manager */ +int fuse_dlm_cache_init(struct fuse_inode *inode); + +/* Clean up a page cache lock manager */ +void fuse_dlm_cache_release_locks(struct fuse_inode *inode); + +/* Lock a range of pages */ +int fuse_dlm_lock_range(struct fuse_inode *inode, pgoff_t start, + pgoff_t end, enum fuse_page_lock_mode mode); + +/* Unlock a range of pages */ +int fuse_dlm_unlock_range(struct fuse_inode *inode, pgoff_t start, + pgoff_t end); + +/* Check if a page range is already locked */ +bool fuse_dlm_range_is_locked(struct fuse_inode *inode, pgoff_t start, + pgoff_t end, enum fuse_page_lock_mode mode); + +/* this is the interface to the filesystem */ +void fuse_get_dlm_write_lock(struct file *file, loff_t offset, + size_t length); + +#endif /* _FS_FUSE_DLM_CACHE_H */ diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index a68d7ec580c8e7..b85dcb2d7a0068 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -31,6 +31,7 @@ #include #include #include +#include "fuse_dlm_cache.h" /** Default max number of pages that can be used in a single read request */ #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 @@ -84,6 +85,17 @@ struct fuse_submount_lookup { struct fuse_forget_link *forget; }; +/** + * data structure to save the information that we have + * requested dlm locks for the given area from the fuse server +*/ +struct dlm_locked_area +{ + struct list_head list; + loff_t offset; + size_t size; +}; + /** FUSE inode */ struct fuse_inode { /** Inode data */ @@ -142,6 +154,9 @@ struct fuse_inode { /* List of writepage requestst (pending or sent) */ struct rb_root writepages; + + /* dlm locked areas we have sent lock requests for */ + struct fuse_dlm_cache dlm_locked_areas; }; /* readdir cache (directory only) */ @@ -842,6 +857,9 @@ struct fuse_conn { /* Is statx not implemented by fs? */ unsigned int no_statx:1; + /* do we have support for dlm in the fs? */ + unsigned int dlm:1; + /* Use io_uring for communication */ unsigned int io_uring; diff --git a/fs/fuse/fuse_trace.h b/fs/fuse/fuse_trace.h index 393c630e772635..9976e31a51a9c9 100644 --- a/fs/fuse/fuse_trace.h +++ b/fs/fuse/fuse_trace.h @@ -58,6 +58,7 @@ EM( FUSE_SYNCFS, "FUSE_SYNCFS") \ EM( FUSE_TMPFILE, "FUSE_TMPFILE") \ EM( FUSE_STATX, "FUSE_STATX") \ + EM( FUSE_DLM_WB_LOCK, "FUSE_DLM_WB_LOCK") \ EMe(CUSE_INIT, "CUSE_INIT") /* diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 417598bb357514..029809ab8ccd0d 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -7,6 +7,7 @@ */ #include "fuse_i.h" +#include "fuse_dlm_cache.h" #include "dev_uring_i.h" #include @@ -182,6 +183,7 @@ static void fuse_evict_inode(struct inode *inode) if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { WARN_ON(!list_empty(&fi->write_files)); WARN_ON(!list_empty(&fi->queued_writes)); + fuse_dlm_cache_release_locks(fi); } } @@ -565,6 +567,14 @@ int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, pg_end = -1; else pg_end = (offset + len - 1) >> PAGE_SHIFT; + + if (fc->dlm && fc->writeback_cache) + /* invalidate the range from the beginning of the first page + * in the given range to the last byte of the last page */ + fuse_dlm_unlock_range(fi, + pg_start << PAGE_SHIFT, + (pg_end << PAGE_SHIFT) | (PAGE_SIZE - 1)); + invalidate_inode_pages2_range(inode->i_mapping, pg_start, pg_end); } @@ -969,6 +979,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, fc->blocked = 0; fc->initialized = 0; fc->connected = 1; + fc->dlm = 1; atomic64_set(&fc->attr_version, 1); atomic64_set(&fc->evict_ctr, 1); get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 76ba774993db0d..bab03e93f79dd1 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -638,6 +638,7 @@ enum fuse_opcode { FUSE_SYNCFS = 50, FUSE_TMPFILE = 51, FUSE_STATX = 52, + FUSE_DLM_WB_LOCK = 53, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -1172,6 +1173,41 @@ struct fuse_supp_groups { uint32_t groups[]; }; +/** + * Type of the dlm lock requested + */ +enum fuse_dlm_lock_type { + FUSE_DLM_LOCK_NONE = 0, + FUSE_DLM_LOCK_READ = 1, + FUSE_DLM_LOCK_WRITE = 2 +}; + +/** + * struct fuse_dlm_lock_in - Lock request + * @fh: file handle + * @offset: offset into the file + * @size: size of the locked region + * @type: type of lock + */ +struct fuse_dlm_lock_in { + uint64_t fh; + uint64_t offset; + uint32_t size; + uint32_t type; + uint64_t reserved; +}; + +/** + * struct fuse_dlm_lock_out - Lock response + * @locksize: how many bytes where locked by the call + * (most of the time we want to lock more than is requested + * to reduce number of calls) + */ +struct fuse_dlm_lock_out { + uint32_t locksize; + uint32_t padding; +}; + /** * Size of the ring buffer header */