diff --git a/ports/atmel-samd/Makefile b/ports/atmel-samd/Makefile
index 2546c416d656..8be3ce327871 100644
--- a/ports/atmel-samd/Makefile
+++ b/ports/atmel-samd/Makefile
@@ -85,7 +85,7 @@ CFLAGS = -Os -DNDEBUG
 endif
 
 ifeq ($(CHIP_FAMILY), samd51)
-CFLAGS = -O2 -DNDEBUG
+CFLAGS = -Os -DNDEBUG
 endif
 
 #Debugging/Optimization
diff --git a/ports/atmel-samd/common-hal/neopixel_write/__init__.c b/ports/atmel-samd/common-hal/neopixel_write/__init__.c
index d9f7da6a83ae..6133415f48b3 100644
--- a/ports/atmel-samd/common-hal/neopixel_write/__init__.c
+++ b/ports/atmel-samd/common-hal/neopixel_write/__init__.c
@@ -32,18 +32,17 @@
 #include "tick.h"
 
 #ifdef SAMD51
-static inline void delay_cycles(uint8_t cycles) {
-    uint32_t start = SysTick->VAL;
-    uint32_t stop = start - cycles;
-    if (start < cycles) {
-        stop = 0xffffff + start - cycles;
-        while (SysTick->VAL < start || SysTick->VAL > stop) {}
-    } else {
-        // Make sure the systick value is between start and stop in case it
-        // wraps around before we read its value less than stop.
-        while (SysTick->VAL > stop && SysTick->VAL <= start) {}
+// This magical macro makes sure the delay isn't optimized out and is the
+// minimal three instructions.
+#define delay_cycles(cycles) \
+{ \
+    uint32_t t; \
+    asm volatile ( \
+        "movs %[t], %[c]\n\t" \
+        "loop%=:\n\t" \
+        "subs	%[t], #1\n\t" \
+        "bne.n  loop%=" : [t] "=r"(t) : [c] "I" (cycles)); \
     }
-}
 #endif
 
 uint64_t next_start_tick_ms = 0;
@@ -88,7 +87,7 @@ void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout,
         asm("nop; nop;");
         #endif
         #ifdef SAMD51
-        delay_cycles(18);
+        delay_cycles(3);
         #endif
         if(p & bitMask) {
             // This is the high delay unique to a one bit.
@@ -97,7 +96,7 @@ void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout,
             asm("nop; nop; nop; nop; nop; nop; nop;");
             #endif
             #ifdef SAMD51
-            delay_cycles(25);
+            delay_cycles(11);
             #endif
             *clr = pinMask;
         } else {
@@ -108,7 +107,7 @@ void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout,
             asm("nop; nop;");
             #endif
             #ifdef SAMD51
-            delay_cycles(25);
+            delay_cycles(3);
             #endif
         }
         if((bitMask >>= 1) != 0) {
@@ -119,7 +118,7 @@ void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout,
             asm("nop; nop; nop; nop; nop;");
             #endif
             #ifdef SAMD51
-            delay_cycles(44);
+            delay_cycles(20);
             #endif
         } else {
             if(ptr >= end) break;
@@ -130,7 +129,7 @@ void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout,
             // above operations take.
             // For the SK6812 its 0.6us +- 0.15us
             #ifdef SAMD51
-            delay_cycles(50);
+            delay_cycles(15);
             #endif
         }
     }
diff --git a/ports/atmel-samd/usb_mass_storage.c b/ports/atmel-samd/usb_mass_storage.c
index 8001cc097a4b..d9d9d9f39384 100644
--- a/ports/atmel-samd/usb_mass_storage.c
+++ b/ports/atmel-samd/usb_mass_storage.c
@@ -259,6 +259,9 @@ int32_t usb_msc_xfer_done(uint8_t lun) {
     if (active_read) {
         active_addr += 1;
         active_nblocks--;
+        if (active_nblocks == 0) {
+            active_read = false;
+        }
     }
 
     if (active_write) {
@@ -272,10 +275,6 @@ int32_t usb_msc_xfer_done(uint8_t lun) {
 // The start_read callback begins a read transaction which we accept but delay our response until the "main thread" calls usb_msc_background. Once it does, we read immediately from the drive into our cache and trigger the USB DMA to output the sector. Once the sector is transmitted, xfer_done will be called.
 void usb_msc_background(void) {
     if (active_read && !usb_busy) {
-        if (active_nblocks == 0) {
-            active_read = false;
-            return;
-        }
         fs_user_mount_t * vfs = get_vfs(active_lun);
         disk_read(vfs, sector_buffer, active_addr, 1);
         // TODO(tannewt): Check the read result.