Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vflush: Speed up reclaim by doing less in the loop #328

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 12 additions & 18 deletions module/os/windows/spl/spl-time.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,12 @@
* origin. Hence its primary use is to specify intervals.
*/

static hrtime_t
zfs_abs_to_nano(uint64_t elapsed)
{
return (elapsed * KeQueryTimeIncrement() * 100);
}

/* Open Solaris lbolt is in hz */
uint64_t
zfs_lbolt(void)
{
uint64_t lbolt_hz;
LARGE_INTEGER ticks;
KeQueryTickCount(&ticks);
lbolt_hz = ticks.QuadPart * KeQueryTimeIncrement();
lbolt_hz = gethrtime() / 100;
lbolt_hz /= (10000000 / 119); // Solaris hz ?
return (lbolt_hz);
}
Expand All @@ -59,14 +51,16 @@ hrtime_t
gethrtime(void)
{
static LARGE_INTEGER start = { 0 };
static LARGE_INTEGER freq = { 0 };
LARGE_INTEGER now;
if (start.QuadPart == 0) {
KeQueryTickCount(&start);
start.QuadPart--;
start = KeQueryPerformanceCounter(&freq);
ASSERT(freq.QuadPart < NANOSEC);
ASSERT(freq.QuadPart > 0);
freq.QuadPart = NANOSEC / freq.QuadPart;
}
KeQueryTickCount(&now);
ASSERT((now.QuadPart != start.QuadPart));
return (zfs_abs_to_nano(now.QuadPart - start.QuadPart));
now = KeQueryPerformanceCounter(NULL);
return ((now.QuadPart - start.QuadPart) * freq.QuadPart);
}

/*
Expand All @@ -76,21 +70,21 @@ gethrtime(void)
int
random_get_bytes(uint8_t *ptr, uint32_t len)
{
LARGE_INTEGER TickCount;
LARGE_INTEGER PerfCounter;
ULONG r;
PULONG b;
int i;

KeQueryTickCount(&TickCount);
PerfCounter = KeQueryPerformanceCounter(NULL);

b = (PULONG) ptr;

for (i = 0; i < len / sizeof (ULONG); i++)
b[i] = RtlRandomEx(&TickCount.LowPart);
b[i] = RtlRandomEx(&PerfCounter.LowPart);

len &= (sizeof (ULONG) - 1);
if (len > 0) {
r = RtlRandomEx(&TickCount.LowPart);
r = RtlRandomEx(&PerfCounter.LowPart);
RtlCopyMemory(&b[i], &r, len);
}
return (0);
Expand Down
227 changes: 123 additions & 104 deletions module/os/windows/spl/spl-vnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1537,6 +1537,95 @@ mount_count_nodes(struct mount *mp, int flags)
return (count);
}

static void
flush_file_objects(struct vnode *rvp)
{
// Release the AVL tree
// Attempt to flush out any caches;

FILE_OBJECT *fileobject;
vnode_fileobjects_t *node;
int Status;

// Make sure we don't call vnode_flushcache() again from IRP_MJ_CLOSE.
rvp->v_flags |= VNODE_FLUSHING;

if (avl_is_empty(&rvp->v_fileobjects))
return;

for (node = avl_first(&rvp->v_fileobjects); node != NULL;
node = AVL_NEXT(&rvp->v_fileobjects, node)) {
fileobject = node->fileobject;

// Because the CC* calls can re-enter ZFS, we need to
// release the lock, and because we release the lock the
// while has to start from the top each time. We release
// the node at end of this while.

try {
Status = ObReferenceObjectByPointer(fileobject, 0,
*IoFileObjectType, KernelMode);
} except(EXCEPTION_EXECUTE_HANDLER) {
Status = GetExceptionCode();
}

// Try to lock fileobject before we use it.
if (NT_SUCCESS(Status)) {
// Let go of mutex, as flushcache will re-enter
// (IRP_MJ_CLEANUP)
mutex_exit(&rvp->v_mutex);
node->remove = vnode_flushcache(rvp, fileobject, TRUE);
ObDereferenceObject(fileobject);
mutex_enter(&rvp->v_mutex);
} // if ObReferenceObjectByPointer
} // for

// Remove any nodes we successfully closed.
restart_remove_closed:
for (node = avl_first(&rvp->v_fileobjects); node != NULL;
node = AVL_NEXT(&rvp->v_fileobjects, node)) {
if (node->remove) {
avl_remove(&rvp->v_fileobjects, node);
kmem_free(node, sizeof (*node));
goto restart_remove_closed;
}
}

dprintf("vp %p has %d fileobject(s) remaining\n", rvp,
avl_numnodes(&rvp->v_fileobjects));
}

static void
print_reclaim_stats(boolean_t init, int reclaims)
{
static int last_reclaims = 0;
int reclaims_delta;
int reclaims_per_second;
static hrtime_t last_stats_time = 0;
hrtime_t last_stats_time_delta;

if (init) {
last_stats_time = gethrtime();
return;
}

if ((reclaims % 1000) != 0) {
return;
}

reclaims_delta = reclaims - last_reclaims;
last_stats_time_delta = gethrtime() - last_stats_time;

reclaims_per_second = (((int64_t)reclaims_delta) * NANOSEC) /
MAX(last_stats_time_delta, 1);

dprintf("%s: %d reclaims processed (%d/s).\n", __func__, reclaims,
reclaims_per_second);

last_reclaims = reclaims;
last_stats_time = gethrtime();
}


/*
* Let's try something new. If we are to vflush, lets do everything we can
Expand All @@ -1555,126 +1644,56 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags)
// FORCECLOSE : release everything, force unmount

// if mp is NULL, we are reclaiming nodes, until threshold
int isbusy = 0;
int reclaims = 0;
vnode_fileobjects_t *node;
struct vnode *rvp;
int Status;
boolean_t filesonly = B_TRUE;

dprintf("vflush start\n");

mutex_enter(&vnode_all_list_lock);

filesanddirs:
while (1) {
for (rvp = list_head(&vnode_all_list);
rvp;
rvp = list_next(&vnode_all_list, rvp)) {

// skip vnodes not belonging to this mount
if (mp && rvp->v_mount != mp)
continue;

if (filesonly && vnode_isdir(rvp))
continue;

// If we aren't FORCE and asked to SKIPROOT, and node
// is MARKROOT, then go to next.
if (!(flags & FORCECLOSE)) {
if ((flags & SKIPROOT))
if (rvp->v_flags & VNODE_MARKROOT)
continue;
#if 0 // when we use SYSTEM vnodes
if ((flags & SKIPSYSTEM))
if (rvp->v_flags & VNODE_MARKSYSTEM)
continue;
#endif
}
// We are to remove this node, even if ROOT - unmark it.
mutex_exit(&vnode_all_list_lock);

// Release the AVL tree
// KIRQL OldIrql;

// Attempt to flush out any caches;
mutex_enter(&rvp->v_mutex);
// Make sure we don't call vnode_cacheflush() again
// from IRP_MJ_CLOSE.
rvp->v_flags |= VNODE_FLUSHING;

for (node = avl_first(&rvp->v_fileobjects);
node != NULL;
node = AVL_NEXT(&rvp->v_fileobjects, node)) {
FILE_OBJECT *fileobject = node->fileobject;

// Because the CC* calls can re-enter ZFS, we need to
// release the lock, and because we release the lock the
// while has to start from the top each time. We release
// the node at end of this while.

try {
Status = ObReferenceObjectByPointer(
fileobject,
0,
*IoFileObjectType,
KernelMode);
} except(EXCEPTION_EXECUTE_HANDLER) {
Status = GetExceptionCode();
}

// Try to lock fileobject before we use it.
if (NT_SUCCESS(Status)) {
int ok;
print_reclaim_stats(B_TRUE, 0);

// Let go of mutex, as flushcache will re-enter
// (IRP_MJ_CLEANUP)
mutex_exit(&rvp->v_mutex);
node->remove = vnode_flushcache(rvp,
fileobject, TRUE);
filesanddirs:
for (rvp = list_head(&vnode_all_list); rvp;
rvp = list_next(&vnode_all_list, rvp)) {
// skip vnodes not belonging to this mount
if (mp && rvp->v_mount != mp)
continue;

ObDereferenceObject(fileobject);
if (filesonly && vnode_isdir(rvp))
continue;

mutex_enter(&rvp->v_mutex);
// If we aren't FORCE and asked to SKIPROOT, and node
// is MARKROOT, then go to next.
if (!(flags & FORCECLOSE)) {
if ((flags & SKIPROOT))
if (rvp->v_flags & VNODE_MARKROOT)
continue;
#if 0 // when we use SYSTEM vnodes
if ((flags & SKIPSYSTEM))
if (rvp->v_flags & VNODE_MARKSYSTEM)
continue;
#endif
}
// We are to remove this node, even if ROOT - unmark it.

} // if ObReferenceObjectByPointer
} // for
if (rvp->v_flags & VNODE_DEAD) {
continue;
}

// Remove any nodes we successfully closed.
restart:
for (node = avl_first(&rvp->v_fileobjects);
node != NULL;
node = AVL_NEXT(&rvp->v_fileobjects, node)) {
if (node->remove) {
avl_remove(&rvp->v_fileobjects, node);
kmem_free(node, sizeof (*node));
goto restart;
}
}
mutex_enter(&rvp->v_mutex);

dprintf("vp %p has %d fileobject(s) remaining\n", rvp,
avl_numnodes(&rvp->v_fileobjects));
flush_file_objects(rvp);

// vnode_recycle_int() will call mutex_exit(&rvp->v_mutex);
// vnode_recycle_int() will exit v_mutex
// re-check flags, due to releasing locks
isbusy = 1;
if (!(rvp->v_flags & VNODE_DEAD))
isbusy = vnode_recycle_int(rvp,
(flags & FORCECLOSE) | VNODELOCKED);
else
mutex_exit(&rvp->v_mutex);

mutex_enter(&vnode_all_list_lock);

if (!isbusy) {
reclaims++;
break; // must restart loop if unlinked node
}
if (!vnode_recycle_int(rvp, (flags & FORCECLOSE) |
VNODELOCKED)) {
reclaims++;
print_reclaim_stats(B_FALSE, reclaims);
}

// If the end of the list was reached, stop entirely
if (!rvp)
break;
}

if (filesonly) {
Expand All @@ -1684,8 +1703,8 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags)

mutex_exit(&vnode_all_list_lock);

if (mp == NULL && reclaims > 0) {
dprintf("%s: %llu reclaims processed.\n", __func__, reclaims);
if (reclaims > 0) {
dprintf("%s: %d reclaims processed.\n", __func__, reclaims);
}


Expand Down
Loading