diff --git a/.all-contributorsrc b/.all-contributorsrc
index a9eba26b..93aa5d2a 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -34,6 +34,13 @@
             "profile": "https://github.com/V0XNIHILI",
             "contributions": []
         },
+        {
+            "login": "jasonjabbour",
+            "name": "jasonjabbour",
+            "avatar_url": "https://avatars.githubusercontent.com/jasonjabbour",
+            "profile": "https://github.com/jasonjabbour",
+            "contributions": []
+        },
         {
             "login": "shanzehbatool",
             "name": "shanzehbatool",
@@ -97,20 +104,6 @@
             "profile": "https://github.com/jaysonzlin",
             "contributions": []
         },
-        {
-            "login": "jasonjabbour",
-            "name": "jasonjabbour",
-            "avatar_url": "https://avatars.githubusercontent.com/jasonjabbour",
-            "profile": "https://github.com/jasonjabbour",
-            "contributions": []
-        },
-        {
-            "login": "sophiacho1",
-            "name": "Sophia Cho",
-            "avatar_url": "https://avatars.githubusercontent.com/sophiacho1",
-            "profile": "https://github.com/sophiacho1",
-            "contributions": []
-        },
         {
             "login": "18jeffreyma",
             "name": "Jeffrey Ma",
@@ -125,6 +118,13 @@
             "profile": "https://github.com/andreamurillomtz",
             "contributions": []
         },
+        {
+            "login": "sophiacho1",
+            "name": "Sophia Cho",
+            "avatar_url": "https://avatars.githubusercontent.com/sophiacho1",
+            "profile": "https://github.com/sophiacho1",
+            "contributions": []
+        },
         {
             "login": "alxrod",
             "name": "Alex Rodriguez",
@@ -139,6 +139,13 @@
             "profile": "https://github.com/korneelf1",
             "contributions": []
         },
+        {
+            "login": "colbybanbury",
+            "name": "Colby Banbury",
+            "avatar_url": "https://avatars.githubusercontent.com/colbybanbury",
+            "profile": "https://github.com/colbybanbury",
+            "contributions": []
+        },
         {
             "login": "zishenwan",
             "name": "Zishen Wan",
@@ -147,10 +154,10 @@
             "contributions": []
         },
         {
-            "login": "colbybanbury",
-            "name": "Colby Banbury",
-            "avatar_url": "https://avatars.githubusercontent.com/colbybanbury",
-            "profile": "https://github.com/colbybanbury",
+            "login": "Sara-Khosravi",
+            "name": "Sara Khosravi",
+            "avatar_url": "https://avatars.githubusercontent.com/Sara-Khosravi",
+            "profile": "https://github.com/Sara-Khosravi",
             "contributions": []
         },
         {
@@ -181,13 +188,6 @@
             "profile": "https://github.com/arnaumarin",
             "contributions": []
         },
-        {
-            "login": "eezike",
-            "name": "Emeka Ezike",
-            "avatar_url": "https://avatars.githubusercontent.com/eezike",
-            "profile": "https://github.com/eezike",
-            "contributions": []
-        },
         {
             "login": "aptl26",
             "name": "Aghyad Deeb",
@@ -195,20 +195,6 @@
             "profile": "https://github.com/aptl26",
             "contributions": []
         },
-        {
-            "login": "Sara-Khosravi",
-            "name": "Sara Khosravi",
-            "avatar_url": "https://avatars.githubusercontent.com/Sara-Khosravi",
-            "profile": "https://github.com/Sara-Khosravi",
-            "contributions": []
-        },
-        {
-            "login": "Ekhao",
-            "name": "Emil Njor",
-            "avatar_url": "https://avatars.githubusercontent.com/Ekhao",
-            "profile": "https://github.com/Ekhao",
-            "contributions": []
-        },
         {
             "login": "AditiR-42",
             "name": "Aditi Raju",
@@ -224,10 +210,10 @@
             "contributions": []
         },
         {
-            "login": "MichaelSchnebly",
-            "name": "Michael Schnebly",
-            "avatar_url": "https://avatars.githubusercontent.com/MichaelSchnebly",
-            "profile": "https://github.com/MichaelSchnebly",
+            "login": "Ekhao",
+            "name": "Emil Njor",
+            "avatar_url": "https://avatars.githubusercontent.com/Ekhao",
+            "profile": "https://github.com/Ekhao",
             "contributions": []
         },
         {
@@ -245,10 +231,10 @@
             "contributions": []
         },
         {
-            "login": "leo47007",
-            "name": "Yu-Shun Hsiao",
-            "avatar_url": "https://avatars.githubusercontent.com/leo47007",
-            "profile": "https://github.com/leo47007",
+            "login": "MichaelSchnebly",
+            "name": "Michael Schnebly",
+            "avatar_url": "https://avatars.githubusercontent.com/MichaelSchnebly",
+            "profile": "https://github.com/MichaelSchnebly",
             "contributions": []
         },
         {
@@ -265,6 +251,13 @@
             "profile": "https://github.com/BaeHenryS",
             "contributions": []
         },
+        {
+            "login": "leo47007",
+            "name": "Yu-Shun Hsiao",
+            "avatar_url": "https://avatars.githubusercontent.com/leo47007",
+            "profile": "https://github.com/leo47007",
+            "contributions": []
+        },
         {
             "login": "mmaz",
             "name": "Mark Mazumder",
@@ -272,6 +265,20 @@
             "profile": "https://github.com/mmaz",
             "contributions": []
         },
+        {
+            "login": "eurashin",
+            "name": "eurashin",
+            "avatar_url": "https://avatars.githubusercontent.com/eurashin",
+            "profile": "https://github.com/eurashin",
+            "contributions": []
+        },
+        {
+            "login": "marcozennaro",
+            "name": "Marco Zennaro",
+            "avatar_url": "https://avatars.githubusercontent.com/marcozennaro",
+            "profile": "https://github.com/marcozennaro",
+            "contributions": []
+        },
         {
             "login": "arbass22",
             "name": "Andrew Bass",
@@ -280,10 +287,10 @@
             "contributions": []
         },
         {
-            "login": "eurashin",
-            "name": "eurashin",
-            "avatar_url": "https://avatars.githubusercontent.com/eurashin",
-            "profile": "https://github.com/eurashin",
+            "login": "Emeka Ezike",
+            "name": "Emeka Ezike",
+            "avatar_url": "https://www.gravatar.com/avatar/af39c27c6090c50a1921a9b6366e81cc?d=identicon&s=100",
+            "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors",
             "contributions": []
         },
         {
@@ -293,6 +300,13 @@
             "profile": "https://github.com/jzhou1318",
             "contributions": []
         },
+        {
+            "login": "ShvetankPrakash",
+            "name": "Shvetank Prakash",
+            "avatar_url": "https://avatars.githubusercontent.com/ShvetankPrakash",
+            "profile": "https://github.com/ShvetankPrakash",
+            "contributions": []
+        },
         {
             "login": "pongtr",
             "name": "Pong Trairatvorakul",
@@ -301,17 +315,17 @@
             "contributions": []
         },
         {
-            "login": "marcozennaro",
-            "name": "Marco Zennaro",
-            "avatar_url": "https://avatars.githubusercontent.com/marcozennaro",
-            "profile": "https://github.com/marcozennaro",
+            "login": "FinAminToastCrunch",
+            "name": "Fin Amin",
+            "avatar_url": "https://avatars.githubusercontent.com/FinAminToastCrunch",
+            "profile": "https://github.com/FinAminToastCrunch",
             "contributions": []
         },
         {
-            "login": "ShvetankPrakash",
-            "name": "Shvetank Prakash",
-            "avatar_url": "https://avatars.githubusercontent.com/ShvetankPrakash",
-            "profile": "https://github.com/ShvetankPrakash",
+            "login": "Allen-Kuang",
+            "name": "Allen-Kuang",
+            "avatar_url": "https://avatars.githubusercontent.com/Allen-Kuang",
+            "profile": "https://github.com/Allen-Kuang",
             "contributions": []
         },
         {
@@ -321,13 +335,6 @@
             "profile": "https://github.com/BrunoScaglione",
             "contributions": []
         },
-        {
-            "login": "Allen-Kuang",
-            "name": "Allen-Kuang",
-            "avatar_url": "https://avatars.githubusercontent.com/Allen-Kuang",
-            "profile": "https://github.com/Allen-Kuang",
-            "contributions": []
-        },
         {
             "login": "alex-oesterling",
             "name": "Alex Oesterling",
@@ -342,13 +349,6 @@
             "profile": "https://github.com/Gjain234",
             "contributions": []
         },
-        {
-            "login": "FinAminToastCrunch",
-            "name": "Fin Amin",
-            "avatar_url": "https://avatars.githubusercontent.com/FinAminToastCrunch",
-            "profile": "https://github.com/FinAminToastCrunch",
-            "contributions": []
-        },
         {
             "login": "serco425",
             "name": "Sercan Ayg\u00fcn",
@@ -363,6 +363,20 @@
             "profile": "https://github.com/gnodipac886",
             "contributions": []
         },
+        {
+            "login": "BravoBaldo",
+            "name": "Baldassarre Cesarano",
+            "avatar_url": "https://avatars.githubusercontent.com/BravoBaldo",
+            "profile": "https://github.com/BravoBaldo",
+            "contributions": []
+        },
+        {
+            "login": "YLab-UChicago",
+            "name": "yanjingl",
+            "avatar_url": "https://avatars.githubusercontent.com/YLab-UChicago",
+            "profile": "https://github.com/YLab-UChicago",
+            "contributions": []
+        },
         {
             "login": "abigailswallow",
             "name": "abigailswallow",
@@ -377,13 +391,6 @@
             "profile": "https://github.com/YangZhou1997",
             "contributions": []
         },
-        {
-            "login": "YLab-UChicago",
-            "name": "yanjingl",
-            "avatar_url": "https://avatars.githubusercontent.com/YLab-UChicago",
-            "profile": "https://github.com/YLab-UChicago",
-            "contributions": []
-        },
         {
             "login": "jasonlyik",
             "name": "Jason Yik",
@@ -405,13 +412,6 @@
             "profile": "https://github.com/emmanuel2406",
             "contributions": []
         },
-        {
-            "login": "ciyer64",
-            "name": "Curren Iyer",
-            "avatar_url": "https://avatars.githubusercontent.com/ciyer64",
-            "profile": "https://github.com/ciyer64",
-            "contributions": []
-        },
         {
             "login": "jessicaquaye",
             "name": "Jessica Quaye",
@@ -419,20 +419,6 @@
             "profile": "https://github.com/jessicaquaye",
             "contributions": []
         },
-        {
-            "login": "sjohri20",
-            "name": "Shreya Johri",
-            "avatar_url": "https://avatars.githubusercontent.com/sjohri20",
-            "profile": "https://github.com/sjohri20",
-            "contributions": []
-        },
-        {
-            "login": "vijay-edu",
-            "name": "Vijay Edupuganti",
-            "avatar_url": "https://avatars.githubusercontent.com/vijay-edu",
-            "profile": "https://github.com/vijay-edu",
-            "contributions": []
-        },
         {
             "login": "skmur",
             "name": "Sonia Murthy",
@@ -440,6 +426,13 @@
             "profile": "https://github.com/skmur",
             "contributions": []
         },
+        {
+            "login": "sjohri20",
+            "name": "Shreya Johri",
+            "avatar_url": "https://avatars.githubusercontent.com/sjohri20",
+            "profile": "https://github.com/sjohri20",
+            "contributions": []
+        },
         {
             "login": "vitasam",
             "name": "The Random DIY",
@@ -461,6 +454,13 @@
             "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors",
             "contributions": []
         },
+        {
+            "login": "Vijay Edupuganti",
+            "name": "Vijay Edupuganti",
+            "avatar_url": "https://www.gravatar.com/avatar/b15b6e0e9adf58099905c1a0fd474cb9?d=identicon&s=100",
+            "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors",
+            "contributions": []
+        },
         {
             "login": "Jothi Ramaswamy",
             "name": "Jothi Ramaswamy",
@@ -475,6 +475,13 @@
             "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors",
             "contributions": []
         },
+        {
+            "login": "Curren Iyer",
+            "name": "Curren Iyer",
+            "avatar_url": "https://www.gravatar.com/avatar/bd53d146aa888548c8db4da02bf81e7a?d=identicon&s=100",
+            "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors",
+            "contributions": []
+        },
         {
             "login": "a-saraf",
             "name": "a-saraf",
diff --git a/QTDublinIrish.otf b/QTDublinIrish.otf
deleted file mode 100644
index 6067986e..00000000
Binary files a/QTDublinIrish.otf and /dev/null differ
diff --git a/README.md b/README.md
index 5c2e3035..dabc6946 100644
--- a/README.md
+++ b/README.md
@@ -89,97 +89,100 @@ This project follows the [all-contributors](https://allcontributors.org) specifi
       <td align="center" valign="top" width="20%"><a href="https://github.com/uchendui"><img src="https://avatars.githubusercontent.com/uchendui?s=100" width="100px;" alt="Ikechukwu Uchendu"/><br /><sub><b>Ikechukwu Uchendu</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/Naeemkh"><img src="https://avatars.githubusercontent.com/Naeemkh?s=100" width="100px;" alt="Naeem Khoshnevis"/><br /><sub><b>Naeem Khoshnevis</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/V0XNIHILI"><img src="https://avatars.githubusercontent.com/V0XNIHILI?s=100" width="100px;" alt="Douwe den Blanken"/><br /><sub><b>Douwe den Blanken</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/shanzehbatool"><img src="https://avatars.githubusercontent.com/shanzehbatool?s=100" width="100px;" alt="shanzehbatool"/><br /><sub><b>shanzehbatool</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/jasonjabbour"><img src="https://avatars.githubusercontent.com/jasonjabbour?s=100" width="100px;" alt="jasonjabbour"/><br /><sub><b>jasonjabbour</b></sub></a><br /></td>
     </tr>
     <tr>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/shanzehbatool"><img src="https://avatars.githubusercontent.com/shanzehbatool?s=100" width="100px;" alt="shanzehbatool"/><br /><sub><b>shanzehbatool</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/kai4avaya"><img src="https://avatars.githubusercontent.com/kai4avaya?s=100" width="100px;" alt="kai4avaya"/><br /><sub><b>kai4avaya</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/eliasab16"><img src="https://avatars.githubusercontent.com/eliasab16?s=100" width="100px;" alt="Elias Nuwara"/><br /><sub><b>Elias Nuwara</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/JaredP94"><img src="https://avatars.githubusercontent.com/JaredP94?s=100" width="100px;" alt="Jared Ping"/><br /><sub><b>Jared Ping</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/mpstewart1"><img src="https://avatars.githubusercontent.com/mpstewart1?s=100" width="100px;" alt="Matthew Stewart"/><br /><sub><b>Matthew Stewart</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/ishapira1"><img src="https://avatars.githubusercontent.com/ishapira1?s=100" width="100px;" alt="Itai Shapira"/><br /><sub><b>Itai Shapira</b></sub></a><br /></td>
     </tr>
     <tr>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/ishapira1"><img src="https://avatars.githubusercontent.com/ishapira1?s=100" width="100px;" alt="Itai Shapira"/><br /><sub><b>Itai Shapira</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/Mjrovai"><img src="https://avatars.githubusercontent.com/Mjrovai?s=100" width="100px;" alt="Marcelo Rovai"/><br /><sub><b>Marcelo Rovai</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/8863743b4f26c1a20e730fcf7ebc3bc0?d=identicon&s=100?s=100" width="100px;" alt="Maximilian Lam"/><br /><sub><b>Maximilian Lam</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/jaysonzlin"><img src="https://avatars.githubusercontent.com/jaysonzlin?s=100" width="100px;" alt="Jayson Lin"/><br /><sub><b>Jayson Lin</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/jasonjabbour"><img src="https://avatars.githubusercontent.com/jasonjabbour?s=100" width="100px;" alt="jasonjabbour"/><br /><sub><b>jasonjabbour</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/sophiacho1"><img src="https://avatars.githubusercontent.com/sophiacho1?s=100" width="100px;" alt="Sophia Cho"/><br /><sub><b>Sophia Cho</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/18jeffreyma"><img src="https://avatars.githubusercontent.com/18jeffreyma?s=100" width="100px;" alt="Jeffrey Ma"/><br /><sub><b>Jeffrey Ma</b></sub></a><br /></td>
     </tr>
     <tr>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/18jeffreyma"><img src="https://avatars.githubusercontent.com/18jeffreyma?s=100" width="100px;" alt="Jeffrey Ma"/><br /><sub><b>Jeffrey Ma</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/andreamurillomtz"><img src="https://avatars.githubusercontent.com/andreamurillomtz?s=100" width="100px;" alt="Andrea"/><br /><sub><b>Andrea</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/sophiacho1"><img src="https://avatars.githubusercontent.com/sophiacho1?s=100" width="100px;" alt="Sophia Cho"/><br /><sub><b>Sophia Cho</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/alxrod"><img src="https://avatars.githubusercontent.com/alxrod?s=100" width="100px;" alt="Alex Rodriguez"/><br /><sub><b>Alex Rodriguez</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/korneelf1"><img src="https://avatars.githubusercontent.com/korneelf1?s=100" width="100px;" alt="Korneel Van den Berghe"/><br /><sub><b>Korneel Van den Berghe</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/zishenwan"><img src="https://avatars.githubusercontent.com/zishenwan?s=100" width="100px;" alt="Zishen Wan"/><br /><sub><b>Zishen Wan</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/colbybanbury"><img src="https://avatars.githubusercontent.com/colbybanbury?s=100" width="100px;" alt="Colby Banbury"/><br /><sub><b>Colby Banbury</b></sub></a><br /></td>
     </tr>
     <tr>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/colbybanbury"><img src="https://avatars.githubusercontent.com/colbybanbury?s=100" width="100px;" alt="Colby Banbury"/><br /><sub><b>Colby Banbury</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/zishenwan"><img src="https://avatars.githubusercontent.com/zishenwan?s=100" width="100px;" alt="Zishen Wan"/><br /><sub><b>Zishen Wan</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/Sara-Khosravi"><img src="https://avatars.githubusercontent.com/Sara-Khosravi?s=100" width="100px;" alt="Sara Khosravi"/><br /><sub><b>Sara Khosravi</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/DivyaAmirtharaj"><img src="https://avatars.githubusercontent.com/DivyaAmirtharaj?s=100" width="100px;" alt="Divya Amirtharaj"/><br /><sub><b>Divya Amirtharaj</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/srivatsankrishnan"><img src="https://avatars.githubusercontent.com/srivatsankrishnan?s=100" width="100px;" alt="Srivatsan Krishnan"/><br /><sub><b>Srivatsan Krishnan</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/ma3mool"><img src="https://avatars.githubusercontent.com/ma3mool?s=100" width="100px;" alt="Abdulrahman Mahmoud"/><br /><sub><b>Abdulrahman Mahmoud</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/arnaumarin"><img src="https://avatars.githubusercontent.com/arnaumarin?s=100" width="100px;" alt="arnaumarin"/><br /><sub><b>arnaumarin</b></sub></a><br /></td>
     </tr>
     <tr>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/eezike"><img src="https://avatars.githubusercontent.com/eezike?s=100" width="100px;" alt="Emeka Ezike"/><br /><sub><b>Emeka Ezike</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/arnaumarin"><img src="https://avatars.githubusercontent.com/arnaumarin?s=100" width="100px;" alt="arnaumarin"/><br /><sub><b>arnaumarin</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/aptl26"><img src="https://avatars.githubusercontent.com/aptl26?s=100" width="100px;" alt="Aghyad Deeb"/><br /><sub><b>Aghyad Deeb</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/Sara-Khosravi"><img src="https://avatars.githubusercontent.com/Sara-Khosravi?s=100" width="100px;" alt="Sara Khosravi"/><br /><sub><b>Sara Khosravi</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/Ekhao"><img src="https://avatars.githubusercontent.com/Ekhao?s=100" width="100px;" alt="Emil Njor"/><br /><sub><b>Emil Njor</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/AditiR-42"><img src="https://avatars.githubusercontent.com/AditiR-42?s=100" width="100px;" alt="Aditi Raju"/><br /><sub><b>Aditi Raju</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/jared-ni"><img src="https://avatars.githubusercontent.com/jared-ni?s=100" width="100px;" alt="Jared Ni"/><br /><sub><b>Jared Ni</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/Ekhao"><img src="https://avatars.githubusercontent.com/Ekhao?s=100" width="100px;" alt="Emil Njor"/><br /><sub><b>Emil Njor</b></sub></a><br /></td>
     </tr>
     <tr>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/jared-ni"><img src="https://avatars.githubusercontent.com/jared-ni?s=100" width="100px;" alt="Jared Ni"/><br /><sub><b>Jared Ni</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/MichaelSchnebly"><img src="https://avatars.githubusercontent.com/MichaelSchnebly?s=100" width="100px;" alt="Michael Schnebly"/><br /><sub><b>Michael Schnebly</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/ELSuitorHarvard"><img src="https://avatars.githubusercontent.com/ELSuitorHarvard?s=100" width="100px;" alt="ELSuitorHarvard"/><br /><sub><b>ELSuitorHarvard</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/oishib"><img src="https://avatars.githubusercontent.com/oishib?s=100" width="100px;" alt="oishib"/><br /><sub><b>oishib</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/leo47007"><img src="https://avatars.githubusercontent.com/leo47007?s=100" width="100px;" alt="Yu-Shun Hsiao"/><br /><sub><b>Yu-Shun Hsiao</b></sub></a><br /></td>
-    </tr>
-    <tr>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/MichaelSchnebly"><img src="https://avatars.githubusercontent.com/MichaelSchnebly?s=100" width="100px;" alt="Michael Schnebly"/><br /><sub><b>Michael Schnebly</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/jaywonchung"><img src="https://avatars.githubusercontent.com/jaywonchung?s=100" width="100px;" alt="Jae-Won Chung"/><br /><sub><b>Jae-Won Chung</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/BaeHenryS"><img src="https://avatars.githubusercontent.com/BaeHenryS?s=100" width="100px;" alt="Henry Bae"/><br /><sub><b>Henry Bae</b></sub></a><br /></td>
+    </tr>
+    <tr>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/leo47007"><img src="https://avatars.githubusercontent.com/leo47007?s=100" width="100px;" alt="Yu-Shun Hsiao"/><br /><sub><b>Yu-Shun Hsiao</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/mmaz"><img src="https://avatars.githubusercontent.com/mmaz?s=100" width="100px;" alt="Mark Mazumder"/><br /><sub><b>Mark Mazumder</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/arbass22"><img src="https://avatars.githubusercontent.com/arbass22?s=100" width="100px;" alt="Andrew Bass"/><br /><sub><b>Andrew Bass</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/eurashin"><img src="https://avatars.githubusercontent.com/eurashin?s=100" width="100px;" alt="eurashin"/><br /><sub><b>eurashin</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/marcozennaro"><img src="https://avatars.githubusercontent.com/marcozennaro?s=100" width="100px;" alt="Marco Zennaro"/><br /><sub><b>Marco Zennaro</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/arbass22"><img src="https://avatars.githubusercontent.com/arbass22?s=100" width="100px;" alt="Andrew Bass"/><br /><sub><b>Andrew Bass</b></sub></a><br /></td>
     </tr>
     <tr>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/af39c27c6090c50a1921a9b6366e81cc?d=identicon&s=100?s=100" width="100px;" alt="Emeka Ezike"/><br /><sub><b>Emeka Ezike</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/jzhou1318"><img src="https://avatars.githubusercontent.com/jzhou1318?s=100" width="100px;" alt="Jennifer Zhou"/><br /><sub><b>Jennifer Zhou</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/pongtr"><img src="https://avatars.githubusercontent.com/pongtr?s=100" width="100px;" alt="Pong Trairatvorakul"/><br /><sub><b>Pong Trairatvorakul</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/marcozennaro"><img src="https://avatars.githubusercontent.com/marcozennaro?s=100" width="100px;" alt="Marco Zennaro"/><br /><sub><b>Marco Zennaro</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/ShvetankPrakash"><img src="https://avatars.githubusercontent.com/ShvetankPrakash?s=100" width="100px;" alt="Shvetank Prakash"/><br /><sub><b>Shvetank Prakash</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/BrunoScaglione"><img src="https://avatars.githubusercontent.com/BrunoScaglione?s=100" width="100px;" alt="Bruno Scaglione"/><br /><sub><b>Bruno Scaglione</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/pongtr"><img src="https://avatars.githubusercontent.com/pongtr?s=100" width="100px;" alt="Pong Trairatvorakul"/><br /><sub><b>Pong Trairatvorakul</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/FinAminToastCrunch"><img src="https://avatars.githubusercontent.com/FinAminToastCrunch?s=100" width="100px;" alt="Fin Amin"/><br /><sub><b>Fin Amin</b></sub></a><br /></td>
     </tr>
     <tr>
       <td align="center" valign="top" width="20%"><a href="https://github.com/Allen-Kuang"><img src="https://avatars.githubusercontent.com/Allen-Kuang?s=100" width="100px;" alt="Allen-Kuang"/><br /><sub><b>Allen-Kuang</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/BrunoScaglione"><img src="https://avatars.githubusercontent.com/BrunoScaglione?s=100" width="100px;" alt="Bruno Scaglione"/><br /><sub><b>Bruno Scaglione</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/alex-oesterling"><img src="https://avatars.githubusercontent.com/alex-oesterling?s=100" width="100px;" alt="Alex Oesterling"/><br /><sub><b>Alex Oesterling</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/Gjain234"><img src="https://avatars.githubusercontent.com/Gjain234?s=100" width="100px;" alt="Gauri Jain"/><br /><sub><b>Gauri Jain</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/FinAminToastCrunch"><img src="https://avatars.githubusercontent.com/FinAminToastCrunch?s=100" width="100px;" alt="Fin Amin"/><br /><sub><b>Fin Amin</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/serco425"><img src="https://avatars.githubusercontent.com/serco425?s=100" width="100px;" alt="Sercan Aygün"/><br /><sub><b>Sercan Aygün</b></sub></a><br /></td>
     </tr>
     <tr>
       <td align="center" valign="top" width="20%"><a href="https://github.com/gnodipac886"><img src="https://avatars.githubusercontent.com/gnodipac886?s=100" width="100px;" alt="gnodipac886"/><br /><sub><b>gnodipac886</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/BravoBaldo"><img src="https://avatars.githubusercontent.com/BravoBaldo?s=100" width="100px;" alt="Baldassarre Cesarano"/><br /><sub><b>Baldassarre Cesarano</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/YLab-UChicago"><img src="https://avatars.githubusercontent.com/YLab-UChicago?s=100" width="100px;" alt="yanjingl"/><br /><sub><b>yanjingl</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/abigailswallow"><img src="https://avatars.githubusercontent.com/abigailswallow?s=100" width="100px;" alt="abigailswallow"/><br /><sub><b>abigailswallow</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/YangZhou1997"><img src="https://avatars.githubusercontent.com/YangZhou1997?s=100" width="100px;" alt="Yang Zhou"/><br /><sub><b>Yang Zhou</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/YLab-UChicago"><img src="https://avatars.githubusercontent.com/YLab-UChicago?s=100" width="100px;" alt="yanjingl"/><br /><sub><b>yanjingl</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/jasonlyik"><img src="https://avatars.githubusercontent.com/jasonlyik?s=100" width="100px;" alt="Jason Yik"/><br /><sub><b>Jason Yik</b></sub></a><br /></td>
     </tr>
     <tr>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/jasonlyik"><img src="https://avatars.githubusercontent.com/jasonlyik?s=100" width="100px;" alt="Jason Yik"/><br /><sub><b>Jason Yik</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/happyappledog"><img src="https://avatars.githubusercontent.com/happyappledog?s=100" width="100px;" alt="happyappledog"/><br /><sub><b>happyappledog</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/emmanuel2406"><img src="https://avatars.githubusercontent.com/emmanuel2406?s=100" width="100px;" alt="Emmanuel Rassou"/><br /><sub><b>Emmanuel Rassou</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/ciyer64"><img src="https://avatars.githubusercontent.com/ciyer64?s=100" width="100px;" alt="Curren Iyer"/><br /><sub><b>Curren Iyer</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/jessicaquaye"><img src="https://avatars.githubusercontent.com/jessicaquaye?s=100" width="100px;" alt="Jessica Quaye"/><br /><sub><b>Jessica Quaye</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/sjohri20"><img src="https://avatars.githubusercontent.com/sjohri20?s=100" width="100px;" alt="Shreya Johri"/><br /><sub><b>Shreya Johri</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/skmur"><img src="https://avatars.githubusercontent.com/skmur?s=100" width="100px;" alt="Sonia Murthy"/><br /><sub><b>Sonia Murthy</b></sub></a><br /></td>
     </tr>
     <tr>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/vijay-edu"><img src="https://avatars.githubusercontent.com/vijay-edu?s=100" width="100px;" alt="Vijay Edupuganti"/><br /><sub><b>Vijay Edupuganti</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/skmur"><img src="https://avatars.githubusercontent.com/skmur?s=100" width="100px;" alt="Sonia Murthy"/><br /><sub><b>Sonia Murthy</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/sjohri20"><img src="https://avatars.githubusercontent.com/sjohri20?s=100" width="100px;" alt="Shreya Johri"/><br /><sub><b>Shreya Johri</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/vitasam"><img src="https://avatars.githubusercontent.com/vitasam?s=100" width="100px;" alt="The Random DIY"/><br /><sub><b>The Random DIY</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/fc4f3460cdfb9365ab59bdeafb06413e?d=identicon&s=100?s=100" width="100px;" alt="Costin-Andrei Oncescu"/><br /><sub><b>Costin-Andrei Oncescu</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/7cd8d5dfd83071f23979019d97655dc5?d=identicon&s=100?s=100" width="100px;" alt="Annie Laurie Cook"/><br /><sub><b>Annie Laurie Cook</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/b15b6e0e9adf58099905c1a0fd474cb9?d=identicon&s=100?s=100" width="100px;" alt="Vijay Edupuganti"/><br /><sub><b>Vijay Edupuganti</b></sub></a><br /></td>
     </tr>
     <tr>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/f88052cca4f401d9b0f43aed0a53434a?d=identicon&s=100?s=100" width="100px;" alt="Jothi Ramaswamy"/><br /><sub><b>Jothi Ramaswamy</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/35a8d9ffd03f05e79a2c6ce6206a56f2?d=identicon&s=100?s=100" width="100px;" alt="Batur Arslan"/><br /><sub><b>Batur Arslan</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/bd53d146aa888548c8db4da02bf81e7a?d=identicon&s=100?s=100" width="100px;" alt="Curren Iyer"/><br /><sub><b>Curren Iyer</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/a5a47df988ab1720dd706062e523ca32?d=identicon&s=100?s=100" width="100px;" alt="a-saraf"/><br /><sub><b>a-saraf</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/c2dc311aa8122d5f5f061e1db14682b1?d=identicon&s=100?s=100" width="100px;" alt="songhan"/><br /><sub><b>songhan</b></sub></a><br /></td>
+    </tr>
+    <tr>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/43b1feff77c8a95fd581774fb8ec891f?d=identicon&s=100?s=100" width="100px;" alt="Zishen"/><br /><sub><b>Zishen</b></sub></a><br /></td>
     </tr>
   </tbody>
diff --git a/_quarto.yml b/_quarto.yml
index 13ff5d2d..0ca664cc 100644
--- a/_quarto.yml
+++ b/_quarto.yml
@@ -87,7 +87,7 @@ book:
       Written, edited and curated by Prof. Vijay Janapa Reddi (Harvard University)
     right: |
       This book was built with <a href="https://quarto.org/">Quarto</a>.
-
+      
   chapters:
     - text: "---"
     - part: FRONT MATTER
@@ -176,13 +176,6 @@ book:
       - contents/community.qmd
       - contents/case_studies.qmd
 
-citation: true
-
-license: CC-BY-NC-SA
-
-filters:
-  - custom_callout.lua
-
 bibliography: 
   # main
   - contents/introduction/introduction.bib
@@ -230,6 +223,13 @@ crossref:
       key: vid
       latex-env: vid
 
+citation: true
+
+license: CC-BY-NC-SA
+
+filters:
+  - custom_callout.lua
+
 editor:
   render-on-save: true
 
diff --git a/contents/benchmarking/benchmarking.qmd b/contents/benchmarking/benchmarking.qmd
index 3651ac5d..a7ddf90a 100644
--- a/contents/benchmarking/benchmarking.qmd
+++ b/contents/benchmarking/benchmarking.qmd
@@ -199,17 +199,16 @@ Example: Tasks for natural language processing benchmarks might include sentimen
 
 Once a task is defined, benchmarks require metrics to quantify performance. These metrics offer objective measures to compare different models or systems. In classification tasks, metrics like accuracy, precision, recall, and [F1 score](https://en.wikipedia.org/wiki/F-score) are commonly used. Mean squared or absolute errors might be employed for regression tasks.
 
-#### Baselines
+#### Baselines and Baseline Models
 
-Benchmarks often include baseline models or reference implementations. These serve as starting points or minimum performance standards against which new models or techniques can be compared. In many benchmark suites, simple models like linear regression or basic neural networks serve as baselines to provide context for more complex model evaluations.
+Benchmarks often include baseline models or reference implementations. These usually serve as starting points or minimum performance standards for comparing new models or techniques. Baseline models help researchers measure the effectiveness of new algorithms.
 
-In classification tasks, metrics like accuracy, precision, recall, and [F1 score](https://en.wikipedia.org/wiki/F-score) are commonly used. Mean squared or absolute errors might be employed for regression tasks.
+In benchmark suites, simple models like linear regression or basic neural networks are often the common baselines. These provide context when evaluating more complex models. By comparing against these simpler models, researchers can quantify improvements from advanced approaches.
 
-#### Baseline Models
+Performance metrics vary by task, but here are some examples:
 
-Benchmarks often include baseline models or reference implementations. These serve as starting points or minimum performance standards against which new models or techniques can be compared.
-
-Example: In many benchmark suites, simple models like linear regression or basic neural networks serve as baselines to provide context for more complex model evaluations.
+* Classification tasks use metrics such as accuracy, precision, recall, and F1 score.
+* Regression tasks often use mean squared error or mean absolute error.
 
 #### Hardware and Software Specifications
 
@@ -237,11 +236,12 @@ Example: A benchmark might highlight that while Model A scored higher than Model
 
 ### Training vs. Inference
 
-The development life cycle of a machine learning model involves two critical phases - training and inference. Training is the process of learning patterns from data to create the model. Inference refers to the model making predictions on new unlabeled data. Both phases play indispensable yet distinct roles. Consequently, each phase warrants rigorous benchmarking to evaluate performance metrics like speed, accuracy, and computational efficiency.
+The development life cycle of a machine learning model involves two critical phases - training and inference. [Training](../training/training.qmd), as you may recall, is the process of learning patterns from data to create the model. Inference refers to the model making predictions on new unlabeled data. Both phases play indispensable yet distinct roles. Consequently, each phase warrants rigorous benchmarking to evaluate performance metrics like speed, accuracy, and computational efficiency.
 
 Benchmarking the training phase provides insights into how different model architectures, hyperparameter values, and optimization algorithms impact the time and resources needed to train the model. For instance, benchmarking shows how neural network depth affects training time on a given dataset. Benchmarking also reveals how hardware accelerators like GPUs and TPUs can speed up training.
 
-On the other hand, benchmarking inference evaluates model performance in real-world conditions after deployment. Key metrics include latency, throughput, memory footprint, and power consumption. Inference benchmarking determines if a model meets the requirements of its target application regarding response time and device constraints, which is typically the focus of TinyML. However, we will discuss these broadly to ensure a general understanding.
+On the other hand, benchmarking inference evaluates model performance in real-world conditions after deployment. Key metrics include latency, throughput, memory footprint, and power consumption. This type of benchmarking determines if a model meets the requirements of its target application regarding response time and device constraints. However, we will discuss these broadly to ensure a general understanding.
+
 
 ### Training Benchmarks
 
@@ -271,7 +271,7 @@ The following metrics are often considered important:
 
 4. **Memory Consumption:** The amount of memory the training process uses. Memory consumption can be a limiting factor for training large models or datasets. For example, Google researchers faced significant memory consumption challenges when training BERT. The model has hundreds of millions of parameters, requiring large amounts of memory. The researchers had to develop techniques to reduce memory consumption, such as gradient checkpointing and model parallelism.
 
-5. ** Energy Consumption: ** The energy consumed during training. As machine learning models become more complex, energy consumption has become an important consideration. Training large machine learning models can consume significant energy, leading to a large carbon footprint. For instance, the training of OpenAI's GPT-3 was estimated to have a carbon footprint equivalent to traveling by car for 700,000 kilometers.
+5. **Energy Consumption:** The energy consumed during training. As machine learning models become more complex, energy consumption has become an important consideration. Training large machine learning models can consume significant energy, leading to a large carbon footprint. For instance, the training of OpenAI's GPT-3 was estimated to have a carbon footprint equivalent to traveling by car for 700,000 kilometers.
 
 6. **Throughput:** The number of training samples processed per unit time. Higher throughput generally indicates a more efficient training process. The throughput is an important metric to consider when training a recommendation system for an e-commerce platform. A high throughput ensures that the model can process large volumes of user interaction data promptly, which is crucial for maintaining the relevance and accuracy of the recommendations. But it's also important to understand how to balance throughput with latency bounds. Therefore, a latency-bounded throughput constraint is often imposed on service-level agreements for data center application deployments.
 
@@ -333,8 +333,8 @@ Metrics:
 * Time to completion for each workload
 * Memory bandwidth
 
-**[MLPerf Training Benchmark](https://github.com/mlcommons/training):** MLPerf is a suite of benchmarks that grew out of DAWNBench and Fathom and other collective works such as [DeepBench](https://github.com/baidu-research/DeepBench) that was designed to measure the performance of machine learning hardware, software, and services. The MLPerf Training benchmark [@mattson2020mlperf] focuses on the time it takes to train models to a target quality metric. It includes diverse workloads, such as image classification, object detection, translation, and reinforcement learning. It's metrics include:
-
+#### Example Use Case
+ 
 Consider a scenario where we want to benchmark the training of an image classification model on a specific hardware platform.
 
 1. **Task:** The task is to train a convolutional neural network (CNN) for image classification on the CIFAR-10 dataset.
@@ -355,7 +355,7 @@ Inference in machine learning refers to using a trained model to make prediction
 
 When we build machine learning models, our ultimate goal is to deploy them in real-world applications where they can provide accurate and reliable predictions on new, unseen data. This process of using a trained model to make predictions is known as inference. A machine learning model's real-world performance can differ significantly from its performance on training or validation datasets, which makes benchmarking inference a crucial step in the development and deployment of machine learning models.
 
-Benchmarking inference allows us to evaluate how well a machine-learning model performs in real-world scenarios. This evaluation ensures that the model is practical and reliable when deployed in applications, providing a more comprehensive understanding of the model's behavior with real data. Additionally, benchmarking can help identify potential bottlenecks or limitations in the model's performance. For example, if a model takes less time to predict, it may be impractical for real-time applications such as autonomous driving or voice assistants.
+Benchmarking inference allows us to evaluate how well a machine-learning model performs in real-world scenarios. This evaluation ensures that the model is practical and reliable when deployed in applications, providing a more comprehensive understanding of the model's behavior with real data. Additionally, benchmarking can help identify potential bottlenecks or limitations in the model's performance. For example, if a model takes too long to predict, it may be impractical for real-time applications such as autonomous driving or voice assistants.
 
 Resource efficiency is another critical aspect of inference, as it can be computationally intensive and require significant memory and processing power. Benchmarking helps ensure that the model is efficient regarding resource usage, which is particularly important for edge devices with limited computational capabilities, such as smartphones or IoT devices. Moreover, benchmarking allows us to compare the performance of our model with competing models or previous versions of the same model. This comparison is essential for making informed decisions about which model to deploy in a specific application.
 
@@ -371,11 +371,9 @@ Finally, it is vital to ensure that the model's predictions are not only accurat
 
 4. **Throughput:** Throughput assesses the system's capacity by measuring the number of inferences or predictions a machine learning model can handle within a specific unit of time. Consider a speech recognition system that employs a Recurrent Neural Network (RNN) as its underlying model; if this system can process and understand 50 different audio clips in a minute, then its throughput rate stands at 50 clips per minute.
 
-3. **Energy Efficiency:** Energy efficiency is a metric that determines the amount of energy consumed by the machine learning model to perform a single inference. A prime example of this would be a natural language processing model built on a Transformer network architecture; if it utilizes 0.1 Joules of energy to translate a sentence from English to French, its energy efficiency is measured at 0.1 Joules per inference.
+5. **Energy Efficiency:** Energy efficiency is a metric that determines the amount of energy consumed by the machine learning model to perform a single inference. A prime example of this would be a natural language processing model built on a Transformer network architecture; if it utilizes 0.1 Joules of energy to translate a sentence from English to French, its energy efficiency is measured at 0.1 Joules per inference.
 
-6. **Energy Efficiency:** Energy efficiency is a metric that determines the amount of energy consumed by the machine learning model to perform a single inference. A prime example of this would be a natural language processing model built on a Transformer network architecture; if it utilizes 0.1 Joules of energy to translate a sentence from English to French, its energy efficiency is measured at 0.1 Joules per inference.
-
-7. **Memory Usage:** Memory usage quantifies the volume of RAM needed by a machine learning model to carry out inference tasks. A relevant example to illustrate this would be a face recognition system based on a CNN; if such a system requires 150 MB of RAM to process and recognize faces within an image, its memory usage is 150 MB.
+6. **Memory Usage:** Memory usage quantifies the volume of RAM needed by a machine learning model to carry out inference tasks. A relevant example to illustrate this would be a face recognition system based on a CNN; if such a system requires 150 MB of RAM to process and recognize faces within an image, its memory usage is 150 MB.
 
 #### Tasks
 
@@ -427,8 +425,6 @@ Metrics:
 
 **[OpenVINO toolkit](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/overview.html):** OpenVINO toolkit provides a benchmark tool to measure the performance of deep learning models for various tasks, such as image classification, object detection, and facial recognition, on Intel hardware. It offers detailed insights into the models' inference performance on different hardware configurations. It's metrics include:
 
-OpenVINO toolkit provides a benchmark tool to measure the performance of deep learning models for various tasks, such as image classification, object detection, and facial recognition, on Intel hardware. It offers detailed insights into the models' inference performance on different hardware configurations.
-
 Metrics:
 
 * Inference time
@@ -436,7 +432,7 @@ Metrics:
 * Latency
 * CPU and GPU utilization
 
-*Example Use Case*
+#### Example Use Case
 
 Consider a scenario where we want to evaluate the inference performance of an object detection model on a specific edge device.
 
diff --git a/contents/contributors.qmd b/contents/contributors.qmd
index 7fafb5cc..e479bc20 100644
--- a/contents/contributors.qmd
+++ b/contents/contributors.qmd
@@ -2,10 +2,9 @@
 comments: false
 ---
 
-# Contributors {.unnumbered}
-
-We extend our sincere thanks to the diverse group of individuals who have generously contributed their expertise, insights, and time to improve both the content and codebase of this project. Below you will find a list of all contributors. If you would like to contribute to this project, please see our [GitHub](https://github.com/harvard-edge/cs249r_book) page.
+# Contributors & Thanks {.unnumbered}
 
+We extend our sincere thanks to the diverse group of individuals who have generously contributed their expertise, insights, time, and support to improve both the content and codebase of this project. This includes not only those who have directly contributed through code and writing but also those who have helped by identifying issues, providing feedback, and offering suggestions. Below, you will find a list of all contributors. If you would like to contribute to this project, please visit our [GitHub](https://github.com/harvard-edge/cs249r_book) page for more information.
 
 <style>
 
@@ -77,97 +76,100 @@ We extend our sincere thanks to the diverse group of individuals who have genero
       <td align="center" valign="top" width="20%"><a href="https://github.com/uchendui"><img src="https://avatars.githubusercontent.com/uchendui?s=100" width="100px;" alt="Ikechukwu Uchendu"/><br /><sub><b>Ikechukwu Uchendu</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/Naeemkh"><img src="https://avatars.githubusercontent.com/Naeemkh?s=100" width="100px;" alt="Naeem Khoshnevis"/><br /><sub><b>Naeem Khoshnevis</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/V0XNIHILI"><img src="https://avatars.githubusercontent.com/V0XNIHILI?s=100" width="100px;" alt="Douwe den Blanken"/><br /><sub><b>Douwe den Blanken</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/shanzehbatool"><img src="https://avatars.githubusercontent.com/shanzehbatool?s=100" width="100px;" alt="shanzehbatool"/><br /><sub><b>shanzehbatool</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/jasonjabbour"><img src="https://avatars.githubusercontent.com/jasonjabbour?s=100" width="100px;" alt="jasonjabbour"/><br /><sub><b>jasonjabbour</b></sub></a><br /></td>
     </tr>
     <tr>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/shanzehbatool"><img src="https://avatars.githubusercontent.com/shanzehbatool?s=100" width="100px;" alt="shanzehbatool"/><br /><sub><b>shanzehbatool</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/kai4avaya"><img src="https://avatars.githubusercontent.com/kai4avaya?s=100" width="100px;" alt="kai4avaya"/><br /><sub><b>kai4avaya</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/eliasab16"><img src="https://avatars.githubusercontent.com/eliasab16?s=100" width="100px;" alt="Elias Nuwara"/><br /><sub><b>Elias Nuwara</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/JaredP94"><img src="https://avatars.githubusercontent.com/JaredP94?s=100" width="100px;" alt="Jared Ping"/><br /><sub><b>Jared Ping</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/mpstewart1"><img src="https://avatars.githubusercontent.com/mpstewart1?s=100" width="100px;" alt="Matthew Stewart"/><br /><sub><b>Matthew Stewart</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/ishapira1"><img src="https://avatars.githubusercontent.com/ishapira1?s=100" width="100px;" alt="Itai Shapira"/><br /><sub><b>Itai Shapira</b></sub></a><br /></td>
     </tr>
     <tr>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/ishapira1"><img src="https://avatars.githubusercontent.com/ishapira1?s=100" width="100px;" alt="Itai Shapira"/><br /><sub><b>Itai Shapira</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/Mjrovai"><img src="https://avatars.githubusercontent.com/Mjrovai?s=100" width="100px;" alt="Marcelo Rovai"/><br /><sub><b>Marcelo Rovai</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/8863743b4f26c1a20e730fcf7ebc3bc0?d=identicon&s=100?s=100" width="100px;" alt="Maximilian Lam"/><br /><sub><b>Maximilian Lam</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/jaysonzlin"><img src="https://avatars.githubusercontent.com/jaysonzlin?s=100" width="100px;" alt="Jayson Lin"/><br /><sub><b>Jayson Lin</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/jasonjabbour"><img src="https://avatars.githubusercontent.com/jasonjabbour?s=100" width="100px;" alt="jasonjabbour"/><br /><sub><b>jasonjabbour</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/sophiacho1"><img src="https://avatars.githubusercontent.com/sophiacho1?s=100" width="100px;" alt="Sophia Cho"/><br /><sub><b>Sophia Cho</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/18jeffreyma"><img src="https://avatars.githubusercontent.com/18jeffreyma?s=100" width="100px;" alt="Jeffrey Ma"/><br /><sub><b>Jeffrey Ma</b></sub></a><br /></td>
     </tr>
     <tr>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/18jeffreyma"><img src="https://avatars.githubusercontent.com/18jeffreyma?s=100" width="100px;" alt="Jeffrey Ma"/><br /><sub><b>Jeffrey Ma</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/andreamurillomtz"><img src="https://avatars.githubusercontent.com/andreamurillomtz?s=100" width="100px;" alt="Andrea"/><br /><sub><b>Andrea</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/sophiacho1"><img src="https://avatars.githubusercontent.com/sophiacho1?s=100" width="100px;" alt="Sophia Cho"/><br /><sub><b>Sophia Cho</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/alxrod"><img src="https://avatars.githubusercontent.com/alxrod?s=100" width="100px;" alt="Alex Rodriguez"/><br /><sub><b>Alex Rodriguez</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/korneelf1"><img src="https://avatars.githubusercontent.com/korneelf1?s=100" width="100px;" alt="Korneel Van den Berghe"/><br /><sub><b>Korneel Van den Berghe</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/zishenwan"><img src="https://avatars.githubusercontent.com/zishenwan?s=100" width="100px;" alt="Zishen Wan"/><br /><sub><b>Zishen Wan</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/colbybanbury"><img src="https://avatars.githubusercontent.com/colbybanbury?s=100" width="100px;" alt="Colby Banbury"/><br /><sub><b>Colby Banbury</b></sub></a><br /></td>
     </tr>
     <tr>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/colbybanbury"><img src="https://avatars.githubusercontent.com/colbybanbury?s=100" width="100px;" alt="Colby Banbury"/><br /><sub><b>Colby Banbury</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/zishenwan"><img src="https://avatars.githubusercontent.com/zishenwan?s=100" width="100px;" alt="Zishen Wan"/><br /><sub><b>Zishen Wan</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/Sara-Khosravi"><img src="https://avatars.githubusercontent.com/Sara-Khosravi?s=100" width="100px;" alt="Sara Khosravi"/><br /><sub><b>Sara Khosravi</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/DivyaAmirtharaj"><img src="https://avatars.githubusercontent.com/DivyaAmirtharaj?s=100" width="100px;" alt="Divya Amirtharaj"/><br /><sub><b>Divya Amirtharaj</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/srivatsankrishnan"><img src="https://avatars.githubusercontent.com/srivatsankrishnan?s=100" width="100px;" alt="Srivatsan Krishnan"/><br /><sub><b>Srivatsan Krishnan</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/ma3mool"><img src="https://avatars.githubusercontent.com/ma3mool?s=100" width="100px;" alt="Abdulrahman Mahmoud"/><br /><sub><b>Abdulrahman Mahmoud</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/arnaumarin"><img src="https://avatars.githubusercontent.com/arnaumarin?s=100" width="100px;" alt="arnaumarin"/><br /><sub><b>arnaumarin</b></sub></a><br /></td>
     </tr>
     <tr>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/eezike"><img src="https://avatars.githubusercontent.com/eezike?s=100" width="100px;" alt="Emeka Ezike"/><br /><sub><b>Emeka Ezike</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/arnaumarin"><img src="https://avatars.githubusercontent.com/arnaumarin?s=100" width="100px;" alt="arnaumarin"/><br /><sub><b>arnaumarin</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/aptl26"><img src="https://avatars.githubusercontent.com/aptl26?s=100" width="100px;" alt="Aghyad Deeb"/><br /><sub><b>Aghyad Deeb</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/Sara-Khosravi"><img src="https://avatars.githubusercontent.com/Sara-Khosravi?s=100" width="100px;" alt="Sara Khosravi"/><br /><sub><b>Sara Khosravi</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/Ekhao"><img src="https://avatars.githubusercontent.com/Ekhao?s=100" width="100px;" alt="Emil Njor"/><br /><sub><b>Emil Njor</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/AditiR-42"><img src="https://avatars.githubusercontent.com/AditiR-42?s=100" width="100px;" alt="Aditi Raju"/><br /><sub><b>Aditi Raju</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/jared-ni"><img src="https://avatars.githubusercontent.com/jared-ni?s=100" width="100px;" alt="Jared Ni"/><br /><sub><b>Jared Ni</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/Ekhao"><img src="https://avatars.githubusercontent.com/Ekhao?s=100" width="100px;" alt="Emil Njor"/><br /><sub><b>Emil Njor</b></sub></a><br /></td>
     </tr>
     <tr>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/jared-ni"><img src="https://avatars.githubusercontent.com/jared-ni?s=100" width="100px;" alt="Jared Ni"/><br /><sub><b>Jared Ni</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/MichaelSchnebly"><img src="https://avatars.githubusercontent.com/MichaelSchnebly?s=100" width="100px;" alt="Michael Schnebly"/><br /><sub><b>Michael Schnebly</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/ELSuitorHarvard"><img src="https://avatars.githubusercontent.com/ELSuitorHarvard?s=100" width="100px;" alt="ELSuitorHarvard"/><br /><sub><b>ELSuitorHarvard</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/oishib"><img src="https://avatars.githubusercontent.com/oishib?s=100" width="100px;" alt="oishib"/><br /><sub><b>oishib</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/leo47007"><img src="https://avatars.githubusercontent.com/leo47007?s=100" width="100px;" alt="Yu-Shun Hsiao"/><br /><sub><b>Yu-Shun Hsiao</b></sub></a><br /></td>
-    </tr>
-    <tr>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/MichaelSchnebly"><img src="https://avatars.githubusercontent.com/MichaelSchnebly?s=100" width="100px;" alt="Michael Schnebly"/><br /><sub><b>Michael Schnebly</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/jaywonchung"><img src="https://avatars.githubusercontent.com/jaywonchung?s=100" width="100px;" alt="Jae-Won Chung"/><br /><sub><b>Jae-Won Chung</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/BaeHenryS"><img src="https://avatars.githubusercontent.com/BaeHenryS?s=100" width="100px;" alt="Henry Bae"/><br /><sub><b>Henry Bae</b></sub></a><br /></td>
+    </tr>
+    <tr>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/leo47007"><img src="https://avatars.githubusercontent.com/leo47007?s=100" width="100px;" alt="Yu-Shun Hsiao"/><br /><sub><b>Yu-Shun Hsiao</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/mmaz"><img src="https://avatars.githubusercontent.com/mmaz?s=100" width="100px;" alt="Mark Mazumder"/><br /><sub><b>Mark Mazumder</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/arbass22"><img src="https://avatars.githubusercontent.com/arbass22?s=100" width="100px;" alt="Andrew Bass"/><br /><sub><b>Andrew Bass</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/eurashin"><img src="https://avatars.githubusercontent.com/eurashin?s=100" width="100px;" alt="eurashin"/><br /><sub><b>eurashin</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/marcozennaro"><img src="https://avatars.githubusercontent.com/marcozennaro?s=100" width="100px;" alt="Marco Zennaro"/><br /><sub><b>Marco Zennaro</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/arbass22"><img src="https://avatars.githubusercontent.com/arbass22?s=100" width="100px;" alt="Andrew Bass"/><br /><sub><b>Andrew Bass</b></sub></a><br /></td>
     </tr>
     <tr>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/af39c27c6090c50a1921a9b6366e81cc?d=identicon&s=100?s=100" width="100px;" alt="Emeka Ezike"/><br /><sub><b>Emeka Ezike</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/jzhou1318"><img src="https://avatars.githubusercontent.com/jzhou1318?s=100" width="100px;" alt="Jennifer Zhou"/><br /><sub><b>Jennifer Zhou</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/pongtr"><img src="https://avatars.githubusercontent.com/pongtr?s=100" width="100px;" alt="Pong Trairatvorakul"/><br /><sub><b>Pong Trairatvorakul</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/marcozennaro"><img src="https://avatars.githubusercontent.com/marcozennaro?s=100" width="100px;" alt="Marco Zennaro"/><br /><sub><b>Marco Zennaro</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/ShvetankPrakash"><img src="https://avatars.githubusercontent.com/ShvetankPrakash?s=100" width="100px;" alt="Shvetank Prakash"/><br /><sub><b>Shvetank Prakash</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/BrunoScaglione"><img src="https://avatars.githubusercontent.com/BrunoScaglione?s=100" width="100px;" alt="Bruno Scaglione"/><br /><sub><b>Bruno Scaglione</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/pongtr"><img src="https://avatars.githubusercontent.com/pongtr?s=100" width="100px;" alt="Pong Trairatvorakul"/><br /><sub><b>Pong Trairatvorakul</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/FinAminToastCrunch"><img src="https://avatars.githubusercontent.com/FinAminToastCrunch?s=100" width="100px;" alt="Fin Amin"/><br /><sub><b>Fin Amin</b></sub></a><br /></td>
     </tr>
     <tr>
       <td align="center" valign="top" width="20%"><a href="https://github.com/Allen-Kuang"><img src="https://avatars.githubusercontent.com/Allen-Kuang?s=100" width="100px;" alt="Allen-Kuang"/><br /><sub><b>Allen-Kuang</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/BrunoScaglione"><img src="https://avatars.githubusercontent.com/BrunoScaglione?s=100" width="100px;" alt="Bruno Scaglione"/><br /><sub><b>Bruno Scaglione</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/alex-oesterling"><img src="https://avatars.githubusercontent.com/alex-oesterling?s=100" width="100px;" alt="Alex Oesterling"/><br /><sub><b>Alex Oesterling</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/Gjain234"><img src="https://avatars.githubusercontent.com/Gjain234?s=100" width="100px;" alt="Gauri Jain"/><br /><sub><b>Gauri Jain</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/FinAminToastCrunch"><img src="https://avatars.githubusercontent.com/FinAminToastCrunch?s=100" width="100px;" alt="Fin Amin"/><br /><sub><b>Fin Amin</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/serco425"><img src="https://avatars.githubusercontent.com/serco425?s=100" width="100px;" alt="Sercan Aygün"/><br /><sub><b>Sercan Aygün</b></sub></a><br /></td>
     </tr>
     <tr>
       <td align="center" valign="top" width="20%"><a href="https://github.com/gnodipac886"><img src="https://avatars.githubusercontent.com/gnodipac886?s=100" width="100px;" alt="gnodipac886"/><br /><sub><b>gnodipac886</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/BravoBaldo"><img src="https://avatars.githubusercontent.com/BravoBaldo?s=100" width="100px;" alt="Baldassarre Cesarano"/><br /><sub><b>Baldassarre Cesarano</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/YLab-UChicago"><img src="https://avatars.githubusercontent.com/YLab-UChicago?s=100" width="100px;" alt="yanjingl"/><br /><sub><b>yanjingl</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/abigailswallow"><img src="https://avatars.githubusercontent.com/abigailswallow?s=100" width="100px;" alt="abigailswallow"/><br /><sub><b>abigailswallow</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/YangZhou1997"><img src="https://avatars.githubusercontent.com/YangZhou1997?s=100" width="100px;" alt="Yang Zhou"/><br /><sub><b>Yang Zhou</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/YLab-UChicago"><img src="https://avatars.githubusercontent.com/YLab-UChicago?s=100" width="100px;" alt="yanjingl"/><br /><sub><b>yanjingl</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/jasonlyik"><img src="https://avatars.githubusercontent.com/jasonlyik?s=100" width="100px;" alt="Jason Yik"/><br /><sub><b>Jason Yik</b></sub></a><br /></td>
     </tr>
     <tr>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/jasonlyik"><img src="https://avatars.githubusercontent.com/jasonlyik?s=100" width="100px;" alt="Jason Yik"/><br /><sub><b>Jason Yik</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/happyappledog"><img src="https://avatars.githubusercontent.com/happyappledog?s=100" width="100px;" alt="happyappledog"/><br /><sub><b>happyappledog</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/emmanuel2406"><img src="https://avatars.githubusercontent.com/emmanuel2406?s=100" width="100px;" alt="Emmanuel Rassou"/><br /><sub><b>Emmanuel Rassou</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/ciyer64"><img src="https://avatars.githubusercontent.com/ciyer64?s=100" width="100px;" alt="Curren Iyer"/><br /><sub><b>Curren Iyer</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/jessicaquaye"><img src="https://avatars.githubusercontent.com/jessicaquaye?s=100" width="100px;" alt="Jessica Quaye"/><br /><sub><b>Jessica Quaye</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/sjohri20"><img src="https://avatars.githubusercontent.com/sjohri20?s=100" width="100px;" alt="Shreya Johri"/><br /><sub><b>Shreya Johri</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/skmur"><img src="https://avatars.githubusercontent.com/skmur?s=100" width="100px;" alt="Sonia Murthy"/><br /><sub><b>Sonia Murthy</b></sub></a><br /></td>
     </tr>
     <tr>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/vijay-edu"><img src="https://avatars.githubusercontent.com/vijay-edu?s=100" width="100px;" alt="Vijay Edupuganti"/><br /><sub><b>Vijay Edupuganti</b></sub></a><br /></td>
-      <td align="center" valign="top" width="20%"><a href="https://github.com/skmur"><img src="https://avatars.githubusercontent.com/skmur?s=100" width="100px;" alt="Sonia Murthy"/><br /><sub><b>Sonia Murthy</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/sjohri20"><img src="https://avatars.githubusercontent.com/sjohri20?s=100" width="100px;" alt="Shreya Johri"/><br /><sub><b>Shreya Johri</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/vitasam"><img src="https://avatars.githubusercontent.com/vitasam?s=100" width="100px;" alt="The Random DIY"/><br /><sub><b>The Random DIY</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/fc4f3460cdfb9365ab59bdeafb06413e?d=identicon&s=100?s=100" width="100px;" alt="Costin-Andrei Oncescu"/><br /><sub><b>Costin-Andrei Oncescu</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/7cd8d5dfd83071f23979019d97655dc5?d=identicon&s=100?s=100" width="100px;" alt="Annie Laurie Cook"/><br /><sub><b>Annie Laurie Cook</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/b15b6e0e9adf58099905c1a0fd474cb9?d=identicon&s=100?s=100" width="100px;" alt="Vijay Edupuganti"/><br /><sub><b>Vijay Edupuganti</b></sub></a><br /></td>
     </tr>
     <tr>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/f88052cca4f401d9b0f43aed0a53434a?d=identicon&s=100?s=100" width="100px;" alt="Jothi Ramaswamy"/><br /><sub><b>Jothi Ramaswamy</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/35a8d9ffd03f05e79a2c6ce6206a56f2?d=identicon&s=100?s=100" width="100px;" alt="Batur Arslan"/><br /><sub><b>Batur Arslan</b></sub></a><br /></td>
+      <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/bd53d146aa888548c8db4da02bf81e7a?d=identicon&s=100?s=100" width="100px;" alt="Curren Iyer"/><br /><sub><b>Curren Iyer</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/a5a47df988ab1720dd706062e523ca32?d=identicon&s=100?s=100" width="100px;" alt="a-saraf"/><br /><sub><b>a-saraf</b></sub></a><br /></td>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/c2dc311aa8122d5f5f061e1db14682b1?d=identicon&s=100?s=100" width="100px;" alt="songhan"/><br /><sub><b>songhan</b></sub></a><br /></td>
+    </tr>
+    <tr>
       <td align="center" valign="top" width="20%"><a href="https://github.com/harvard-edge/cs249r_book/graphs/contributors"><img src="https://www.gravatar.com/avatar/43b1feff77c8a95fd581774fb8ec891f?d=identicon&s=100?s=100" width="100px;" alt="Zishen"/><br /><sub><b>Zishen</b></sub></a><br /></td>
     </tr>
   </tbody>
diff --git a/contents/efficient_ai/efficient_ai.qmd b/contents/efficient_ai/efficient_ai.qmd
index 64c2c47e..6465485e 100644
--- a/contents/efficient_ai/efficient_ai.qmd
+++ b/contents/efficient_ai/efficient_ai.qmd
@@ -22,55 +22,57 @@ Efficiency in artificial intelligence (AI) is not simply a luxury but a necessit
 
 - Understand why techniques for model compression are important.
 
-- Get an inclination for why efficient AI hardware is important.
+- Gain an appreciation for the value of efficient AI hardware.
 
-- Appreciate the significance of numerics and their representations.
+- Recognize the importance of numerical representations and their precision.
 
-- We appreciate that we need to understand the nuances of model comparison beyond accuracy.
+- Understand the nuances of model comparison beyond just accuracy.
 
-- Recognize efficiency encompasses technology, costs, environment, and ethics.
+- Recognize that model comparison involves memory, computation, power, and speed, not just accuracy.
+
+- Recognize efficiency encompasses technology, costs, and ethics.
 
 :::
 
-The focus is on gaining a conceptual understanding of the motivations and significance of the various strategies for achieving efficient AI, both in terms of techniques and a holistic perspective. Subsequent chapters will dive into the nitty-gritty details of these various concepts.
+The focus is on gaining a conceptual understanding of the motivations and significance of the various strategies for achieving efficient AI, both in terms of techniques and a holistic perspective. Subsequent chapters provide a more in-depth exploration of these multiple concepts.
 
 ## Introduction
 
-Training models can consume significant energy, sometimes equivalent to the carbon footprint of sizable industrial processes. We will cover some of these sustainability details in the [AI Sustainability](../sustainable_ai/sustainable_ai.qmd) chapter. On the deployment side, if these models are not optimized for efficiency, they can quickly drain device batteries, demand excessive memory, or fall short of real-time processing needs. Through this introduction, we aim to elucidate the nuances of efficiency, setting the groundwork for a comprehensive exploration in the subsequent chapters.
+Training models can consume significant energy, sometimes equivalent to the carbon footprint of sizable industrial processes. We will cover some of these sustainability details in the [AI Sustainability](../sustainable_ai/sustainable_ai.qmd) chapter. On the deployment side, if these models are not optimized for efficiency, they can quickly drain device batteries, demand excessive memory, or fall short of real-time processing needs. Through this chapter, we aim to elucidate the nuances of efficiency, setting the groundwork for a comprehensive exploration in the subsequent chapters.
 
 ## The Need for Efficient AI
 
-Efficiency takes on different connotations depending on where AI computations occur. Let's revisit and differentiate between Cloud, Edge, and TinyML in terms of efficiency. @fig-platforms provides a big picture comparison of the three different platforms.
+Efficiency takes on different connotations depending on where AI computations occur. Let's revisit Cloud, Edge, and TinyML (as discussed in [ML Systems](../ml_systems/ml_systems.qmd)) and differentiate between them in terms of efficiency. @fig-platforms provides a big-picture comparison of the three different platforms.
 
 ![Cloud, Mobile and TinyML. Source: @schizas2022tinyml.](https://www.mdpi.com/futureinternet/futureinternet-14-00363/article_deploy/html/images/futureinternet-14-00363-g001-550.jpg){#fig-platforms}
 
-For cloud AI, traditional AI models often run in large-scale data centers equipped with powerful GPUs and TPUs [@barroso2019datacenter]. Here, efficiency pertains to optimizing computational resources, reducing costs, and ensuring timely data processing and return. However, relying on the cloud introduced latency, especially when dealing with large data streams that must be uploaded, processed, and downloaded.
+**Cloud AI:** Traditional AI models often run in large-scale data centers equipped with powerful GPUs and TPUs [@barroso2019datacenter]. Here, efficiency pertains to optimizing computational resources, reducing costs, and ensuring timely data processing and return. However, relying on the cloud introduces latency, especially when dealing with large data streams that require uploading, processing, and downloading.
 
-For edge AI, edge computing brings AI closer to the data source, processing information directly on local devices like smartphones, cameras, or industrial machines [@li2019edge]. Here, efficiency encompasses quick real-time responses and reduced data transmission needs. The constraints, however, are tighter—these devices, while more powerful than microcontrollers, have limited computational power compared to cloud setups.
+**Edge AI:** Edge computing brings AI closer to the data source, processing information directly on local devices like smartphones, cameras, or industrial machines [@li2019edge]. Here, efficiency encompasses quick real-time responses and reduced data transmission needs. However, the constraints are tighter—these devices, while more powerful than microcontrollers, have limited computational power compared to cloud setups.
 
-Pushing the frontier even further is TinyML, where AI models run on microcontrollers or extremely resource-constrained environments. The difference in processor and memory performance between TinyML and cloud or mobile systems can be several orders of magnitude [@warden2019tinyml]. Efficiency in TinyML is about ensuring models are lightweight enough to fit on these devices, use minimal energy (critical for battery-powered devices), and still perform their tasks effectively.
+**TinyML:** TinyML pushes the boundaries by enabling AI models to run on microcontrollers or extremely resource-constrained environments. The processor and memory performance difference between TinyML and cloud or mobile systems can be several orders of magnitude [@warden2019tinyml]. Efficiency in TinyML is about ensuring models are lightweight enough to fit on these devices, consume minimal energy (critical for battery-powered devices), and still perform their tasks effectively.
 
-The spectrum from Cloud to TinyML represents a shift from vast, centralized computational resources to distributed, localized, and constrained environments. As we transition from one to the other, the challenges and strategies related to efficiency evolve, underlining the need for specialized approaches tailored to each scenario. Having underscored the need for efficient AI, especially within the context of TinyML, we will transition to exploring the methodologies devised to meet these challenges. The following sections outline the main concepts we will go deeper into later. We will demonstrate the breadth and depth of innovation needed to achieve efficient AI as we go into these strategies.
+The spectrum from Cloud to TinyML represents a shift from vast, centralized computational resources to distributed, localized, and constrained environments. As we transition from one to the other, the challenges and strategies related to efficiency evolve, underlining the need for specialized approaches tailored to each scenario. Having established the need for efficient AI, especially within the context of TinyML, we will transition to exploring the methodologies devised to meet these challenges. The following sections outline the main concepts we will dive deeper into later. We will demonstrate the breadth and depth of innovation needed to achieve efficient AI as we explore these strategies.
 
 ## Efficient Model Architectures
 
-Choosing the right model architecture is as crucial as optimizing it. In recent years, researchers have explored some novel architectures that can have inherently fewer parameters while maintaining strong performance.
+Selecting an optimal model architecture is as crucial as optimizing it. In recent years, researchers have made significant strides in exploring innovative architectures that can inherently have fewer parameters while maintaining strong performance.
 
-**MobileNets:** MobileNets are efficient mobile and embedded vision application models [@howard2017mobilenets]. The key idea that led to their success is the use of depth-wise separable convolutions, which significantly reduce the number of parameters and computations in the network. MobileNetV2 and V3 further improve this design by introducing inverted residuals and linear bottlenecks.
+**MobileNets:** MobileNets are efficient mobile and embedded vision application models [@howard2017mobilenets]. The key idea that led to their success is depth-wise separable convolutions, significantly reducing the number of parameters and computations in the network. MobileNetV2 and V3 further enhance this design by introducing inverted residuals and linear bottlenecks.
 
 **SqueezeNet:** SqueezeNet is a class of ML models known for its smaller size without sacrificing accuracy. It achieves this by using a "fire module" that reduces the number of input channels to 3x3 filters, thus reducing the parameters [@iandola2016squeezenet]. Moreover, it employs delayed downsampling to increase the accuracy by maintaining a larger feature map.
 
 **ResNet variants:** The Residual Network (ResNet) architecture allows for the introduction of skip connections or shortcuts [@he2016deep]. Some variants of ResNet are designed to be more efficient. For instance, ResNet-SE incorporates the "squeeze and excitation" mechanism to recalibrate feature maps [@hu2018squeeze], while ResNeXt offers grouped convolutions for efficiency [@xie2017aggregated].
 
-## Efficient Model Compression
+## Efficient Model Compression {#sec-efficient-model-compression}
 
-Model compression methods are very important for bringing deep learning models to devices with limited resources. These techniques reduce models' size, energy consumption, and computational demands without significantly losing accuracy. At a high level, the methods can briefly be binned into the following fundamental methods:
+Model compression methods are essential for bringing deep learning models to devices with limited resources. These techniques reduce models' size, energy consumption, and computational demands without significantly losing accuracy. At a high level, the methods can be categorized into the following fundamental methods:
 
-**Pruning:** This is akin to trimming the branches of a tree. This was first thought of in the [Optimal Brain Damage](https://proceedings.neurips.cc/paper/1989/file/6c9882bbac1c7093bd25041881277658-Paper.pdf) paper [@lecun1989optimal]. This was later popularized in the context of deep learning by @han2016deep. Certain weights or even entire neurons are removed from the network in pruning based on specific criteria. This can significantly reduce the model size. Various strategies include weight pruning, neuron pruning, and structured pruning. We will explore these in more detail in @sec-pruning. @fig-pruning is an examples of neural network pruning: removing some of the nodes in the inner layers (based on a specific criteria) reduces the numbers of edges between the nodes and, in turn, the size of the model.
+**Pruning:** We've mentioned pruning a few times in previous chapters but have not yet formally introduced it. Pruning is similar to trimming the branches of a tree. This was first thought of in the [Optimal Brain Damage](https://proceedings.neurips.cc/paper/1989/file/6c9882bbac1c7093bd25041881277658-Paper.pdf) paper [@lecun1989optimal] and was later popularized in the context of deep learning by @han2016deep. Certain weights or entire neurons are removed from the network in pruning based on specific criteria. This can significantly reduce the model size. We will explore two of the main pruning strategies, structured and unstructured pruning, in @sec-pruning. @fig-pruning is an example of neural network pruning, where removing some of the nodes in the inner layers (based on specific criteria) reduces the number of edges between the nodes and, in turn, the model's size.
 
 ![Neural Network Pruning.](images/jpg/pruning.jpeg){#fig-pruning}
 
-**Quantization:** quantization is the process of constraining an input from a large set to output in a smaller set, primarily in deep learning; this means reducing the number of bits that represent the weights and biases of the model. For example, using 16-bit or 8-bit representations instead of 32-bit can reduce the model size and speed up computations, with a minor trade-off in accuracy. We will explore these in more detail in @sec-quant. @fig-quantization shows an example of quantization by rounding to the closest number. The conversion from 32-bit floating point to 16-bit reduces the memory usage by 50%. And going from 32-bit to 8-bit Integer, memory is reduced by 75%. While the loss in numeric precision, and consequently model performance, is minor, the memory usage efficiency is very significant.
+**Quantization:** Quantization is the process of constraining an input from a large set to output in a smaller set, primarily in deep learning; this means reducing the number of bits that represent the weights and biases of the model. For example, using 16-bit or 8-bit representations instead of 32-bit can reduce the model size and speed up computations, with a minor trade-off in accuracy. We will explore these in more detail in @sec-quant. @fig-quantization shows an example of quantization by rounding to the closest number. The conversion from 32-bit floating point to 16-bit reduces memory usage by 50%. Going from a 32-bit to an 8-bit integer reduces memory usage by 75%. While the loss in numeric precision, and consequently model performance, is minor, the memory usage efficiency is significant.
 
 ![Different forms of quantization.](images/jpg/quantization.jpeg){#fig-quantization}
 
@@ -78,33 +80,47 @@ Model compression methods are very important for bringing deep learning models t
 
 ## Efficient Inference Hardware
 
-[Training](../training/training.qmd): An AI model is an intensive task that requires powerful hardware and can take hours to weeks, but inference needs to be as fast as possible, especially in real-time applications. This is where efficient inference hardware comes into play. We can achieve rapid response times and power-efficient operation by optimizing the hardware specifically for inference tasks, which is especially crucial for edge devices and embedded systems.
+In the [Training](../training/training.qmd) chapter, we discussed the process of training AI models. Now, from an efficiency standpoint, it’s important to note that training is a resource and time-intensive task, often requiring powerful hardware and taking anywhere from hours to weeks to complete. Inference, on the other hand, needs to be as fast as possible, especially in real-time applications. This is where efficient inference hardware comes into play. By optimizing the hardware specifically for inference tasks, we can achieve rapid response times and power-efficient operation, which is especially crucial for edge devices and embedded systems.
 
-**TPUs (Tensor Processing Units):** [TPUs](https://cloud.google.com/tpu) are custom-built ASICs (Application-Specific Integrated Circuits) by Google to accelerate machine learning workloads [@jouppi2017datacenter]. They are optimized for tensor operations, offering high throughput for low-precision arithmetic, and are designed specifically for neural network machine learning. TPUs significantly accelerate model training and inference compared to general-purpose GPU/CPUs. This boost means faster model training and real-time or near-real-time inference capabilities, which are crucial for applications like voice search and augmented reality.
+**TPUs (Tensor Processing Units):** [TPUs](https://cloud.google.com/tpu) are custom-built ASICs (Application-Specific Integrated Circuits) by Google to accelerate machine learning workloads [@jouppi2017datacenter]. They are optimized for tensor operations, offering high throughput for low-precision arithmetic, and are designed specifically for neural network machine learning. TPUs significantly accelerate model training and inference compared to general-purpose GPU/CPUs. This boost means faster model training and real-time or near-real-time inference capabilities, crucial for applications like voice search and augmented reality.
 
 [Edge TPUs](https://cloud.google.com/edge-tpu) are a smaller, power-efficient version of Google's TPUs tailored for edge devices. They provide fast on-device ML inferencing for TensorFlow Lite models. Edge TPUs allow for low-latency, high-efficiency inference on edge devices like smartphones, IoT devices, and embedded systems. AI capabilities can be deployed in real-time applications without communicating with a central server, thus saving bandwidth and reducing latency. Consider the table in @fig-edge-tpu-perf. It shows the performance differences between running different models on CPUs versus a Coral USB accelerator. The Coral USB accelerator is an accessory by Google's Coral AI platform that lets developers connect Edge TPUs to Linux computers. Running inference on the Edge TPUs was 70 to 100 times faster than on CPUs.
 
-![Accelerator vs CPU performance comparison. Source: [TensorFlow Blog.](https://blog.tensorflow.org/2019/03/build-ai-that-works-offline-with-coral.html)](images/png/tflite_edge_tpu_perf.png){#fig-edge-tpu-perf}
+![Accelerator vs CPU performance comparison across different hardware configurations. Desktop CPU: 64-bit Intel(R) Xeon(R) E5–1650 v4 @ 3.60GHz. Embedded CPU: Quad-core Cortex-A53 @ 1.5GHz, †Dev Board: Quad-core Cortex-A53 @ 1.5GHz + Edge TPU. Source: [TensorFlow Blog.](https://blog.tensorflow.org/2019/03/build-ai-that-works-offline-with-coral.html)](images/png/tflite_edge_tpu_perf.png){#fig-edge-tpu-perf}
+
 
-**NN Accelerators:** Fixed-function neural network accelerators are hardware accelerators designed explicitly for neural network computations. They can be standalone chips or part of a larger system-on-chip (SoC) solution. By optimizing the hardware for the specific operations that neural networks require, such as matrix multiplications and convolutions, NN accelerators can achieve faster inference times and lower power consumption than general-purpose CPUs and GPUs. They are especially beneficial in TinyML devices with power or thermal constraints, such as smartwatches, micro-drones, or robotics.
 
-But these are all but the most common examples. A number of other types of hardware are emerging that have the potential to offer significant advantages for inference. These include, but are not limited to, neuromorphic hardware, photonic computing, etc. In [@sec-aihw], we will explore these in greater detail.
+**NN (Neural Network) Accelerators:** Fixed-function neural network accelerators are hardware accelerators designed explicitly for neural network computations. They can be standalone chips or part of a larger system-on-chip (SoC) solution. By optimizing the hardware for the specific operations that neural networks require, such as matrix multiplications and convolutions, NN accelerators can achieve faster inference times and lower power consumption than general-purpose CPUs and GPUs. They are especially beneficial in TinyML devices with power or thermal constraints, such as smartwatches, micro-drones, or robotics.
 
-Efficient hardware for inference speeds up the process, saves energy, extends battery life, and can operate in real-time conditions. As AI continues to be integrated into myriad applications- from smart cameras to voice assistants- the role of optimized hardware will only become more prominent. By leveraging these specialized hardware components, developers and engineers can bring the power of AI to devices and situations that were previously unthinkable.
+But these are all but the most common examples. Several other types of hardware are emerging that have the potential to offer significant advantages for inference. These include, but are not limited to, neuromorphic hardware, photonic computing, etc. In [@sec-aihw], we will explore these in greater detail.
+
+Efficient hardware for inference speeds up the process, saves energy, extends battery life, and can operate in real-time conditions. As AI continues to be integrated into myriad applications, from smart cameras to voice assistants, the role of optimized hardware will only become more prominent. By leveraging these specialized hardware components, developers and engineers can bring the power of AI to devices and situations that were previously unthinkable.
 
 ## Efficient Numerics
 
 Machine learning, and especially deep learning, involves enormous amounts of computation. Models can have millions to billions of parameters, often trained on vast datasets. Every operation, every multiplication or addition, demands computational resources. Therefore, the precision of the numbers used in these operations can significantly impact the computational speed, energy consumption, and memory requirements. This is where the concept of efficient numerics comes into play.
 
-### Numerical Formats
+### Numerical Formats {#sec-numerical-formats}
 
 There are many different types of numerics. Numerics have a long history in computing systems.
 
-**Floating point:** Known as single-precision floating-point, FP32 utilizes 32 bits to represent a number, incorporating its sign, exponent, and fraction. FP32 is widely adopted in many deep learning frameworks and balances accuracy and computational requirements. It's prevalent in the training phase for many neural networks due to its sufficient precision in capturing minute details during weight updates.
+**Floating point:** Known as a single-precision floating point, FP32 utilizes 32 bits to represent a number, incorporating its sign, exponent, and mantissa. Understanding how floating point numbers are represented under the hood is crucial for grasping the various optimizations possible in numerical computations. The sign bit determines whether the number is positive or negative, the exponent controls the range of values that can be represented, and the mantissa determines the precision of the number. The combination of these components allows floating point numbers to represent a vast range of values with varying degrees of precision. 
+
+@vid-floating-point-numbers provides a comprehensive overview of these three main components - sign, exponent, and mantissa - and how they work together to represent floating point numbers.
+
+:::{#vid-floating-point-numbers .callout-important}
+
+# Floating Point Numbers 
+
+{{< video https://youtu.be/gc1Nl3mmCuY?si=nImcymfbE5H392vu >}}
 
+:::
+
+
+FP32 is widely adopted in many deep learning frameworks and balances accuracy and computational requirements. It is prevalent in the training phase for many neural networks due to its sufficient precision in capturing minute details during weight updates.
 Also known as half-precision floating point, FP16 uses 16 bits to represent a number, including its sign, exponent, and fraction. It offers a good balance between precision and memory savings. FP16 is particularly popular in deep learning training on GPUs that support mixed-precision arithmetic, combining the speed benefits of FP16 with the precision of FP32 where needed.
 
-Several other numerical formats fall into an exotic class. An exotic example is BF16 or Brain Floating Point. It is a 16-bit numerical format designed explicitly for deep learning applications. It's a compromise between FP32 and FP16, retaining the 8-bit exponent from FP32 while reducing the mantissa to 7 bits (as compared to FP32's 23-bit mantissa). This structure prioritizes range over precision. BF16 has achieved training results comparable in accuracy to FP32 while using significantly less memory and computational resources [@kalamkar2019study]. This makes it suitable not just for inference but also for training deep neural networks.
+Several other numerical formats fall into an exotic class. An exotic example is BF16 or Brain Floating Point. It is a 16-bit numerical format designed explicitly for deep learning applications. It is a compromise between FP32 and FP16, retaining the 8-bit exponent from FP32 while reducing the mantissa to 7 bits (as compared to FP32's 23-bit mantissa). This structure prioritizes range over precision. BF16 has achieved training results comparable in accuracy to FP32 while using significantly less memory and computational resources [@kalamkar2019study]. This makes it suitable not just for inference but also for training deep neural networks.
 
 By retaining the 8-bit exponent of FP32, BF16 offers a similar range, which is crucial for deep learning tasks where certain operations can result in very large or very small numbers. At the same time, by truncating precision, BF16 allows for reduced memory and computational requirements compared to FP32. BF16 has emerged as a promising middle ground in the landscape of numerical formats for deep learning, providing an efficient and effective alternative to the more traditional FP32 and FP16 formats.
 
@@ -149,7 +165,7 @@ Efficient numerics is not just about reducing the bit-width of numbers but under
 
 : Comparing precision levels in deep learning. {#tbl-precision .striped .hover}
 
-### Efficiency Benefits
+### Efficiency Benefits {#sec-efficiency-benefits}
 
 Numerical efficiency matters for machine learning workloads for several reasons:
 
@@ -171,7 +187,7 @@ It's worth noting that the actual benefits and trade-offs can vary based on the
 
 A deep understanding of model evaluation methods is important to guide this process systematically. When assessing AI models' effectiveness and suitability for various applications, efficiency metrics come to the forefront.
 
-**FLOPs (Floating Point Operations)** gauge a model's computational demands. For instance, a modern neural network like BERT has billions of FLOPs, which might be manageable on a powerful cloud server but would be taxing on a smartphone. Higher FLOPs can lead to more prolonged inference times and significant power drain, especially on devices without specialized hardware accelerators. Hence, for real-time applications such as video streaming or gaming, models with lower FLOPs might be more desirable.
+**FLOPs (Floating Point Operations)**, as introduced in [Training](../training/training.html), gauge a model's computational demands. For instance, a modern neural network like BERT has billions of FLOPs, which might be manageable on a powerful cloud server but would be taxing on a smartphone. Higher FLOPs can lead to more prolonged inference times and significant power drain, especially on devices without specialized hardware accelerators. Hence, for real-time applications such as video streaming or gaming, models with lower FLOPs might be more desirable.
 
 **Memory Usage** pertains to how much storage the model requires, affecting both the deploying device's storage and RAM. Consider deploying a model onto a smartphone: a model that occupies several gigabytes of space not only consumes precious storage but might also be slower due to the need to load large weights into memory. This becomes especially crucial for edge devices like security cameras or drones, where minimal memory footprints are vital for storage and rapid data processing.
 
@@ -183,25 +199,26 @@ In essence, these efficiency metrics are more than numbers dictating where and h
 
 ### Efficiency Comparisons
 
-The ecosystem contains an abundance of models, each boasting its unique strengths and idiosyncrasies. However, pure model accuracy figures or training and inference speeds paint a partial picture. When we dive deeper into comparative analyses, several critical nuances emerge.
-
+The landscape of machine learning models is vast, with each model offering a unique set of strengths and implementation considerations. While raw accuracy figures or training and inference speeds might be tempting benchmarks, they provide an incomplete picture. A deeper comparative analysis reveals several critical factors influencing a model's suitability for TinyML applications.
 Often, we encounter the delicate balance between accuracy and efficiency. For instance, while a dense, deep learning model and a lightweight MobileNet variant might excel in image classification, their computational demands could be at two extremes. This differentiation is especially pronounced when comparing deployments on resource-abundant cloud servers versus constrained TinyML devices. In many real-world scenarios, the marginal gains in accuracy could be overshadowed by the inefficiencies of a resource-intensive model.
 
-Moreover, the optimal model choice is only sometimes universal but often depends on the specifics of an application. Consider object detection: a model that excels in general scenarios that might falter in niche environments, such as when detecting manufacturing defects on a factory floor. This adaptability- or the lack of it- can dictate a model's real-world utility.
+Moreover, the optimal model choice is not always universal but often depends on the specifics of an application. For instance, a model that excels in general object detection scenarios might struggle in niche environments, such as detecting manufacturing defects on a factory floor. This adaptability- or the lack of it- can influence a model's real-world utility.
 
 Another important consideration is the relationship between model complexity and its practical benefits. Take voice-activated assistants, such as "Alexa" or "OK Google." While a complex model might demonstrate a marginally superior understanding of user speech if it's slower to respond than a simpler counterpart, the user experience could be compromised. Thus, adding layers or parameters only sometimes equates to better real-world outcomes.
 
+Another important consideration is the relationship between model complexity and its practical benefits. Take voice-activated assistants like "Alexa" or "OK Google." While a complex model might demonstrate a marginally superior understanding of user speech if it's slower to respond than a simpler counterpart, the user experience could be compromised. Thus, adding layers or parameters only sometimes equates to better real-world outcomes.
+
 Furthermore, while benchmark datasets, such as ImageNet [@russakovsky2015imagenet], COCO [@lin2014microsoft], Visual Wake Words [@chowdhery2019visual], Google Speech Commands [@warden2018speech], etc. provide a standardized performance metric, they might not capture the diversity and unpredictability of real-world data. Two facial recognition models with similar benchmark scores might exhibit varied competencies when faced with diverse ethnic backgrounds or challenging lighting conditions. Such disparities underscore the importance of robustness and consistency across varied data. For example, @fig-stoves from the Dollar Street dataset shows stove images across extreme monthly incomes. Stoves have different shapes and technological levels across different regions and income levels. A model that is not trained on diverse datasets might perform well on a benchmark but fail in real-world applications. So, if a model was trained on pictures of stoves found in wealthy countries only, it would fail to recognize stoves from poorer regions.
 
-![Different types of stoves. Source: Dollar Street stove images.](https://pbs.twimg.com/media/DmUyPSSW0AAChGa.jpg){#fig-stoves}
+![Different types of stoves. Source: Dollar Street stove images.](images/jpg/ds_stoves.jpg){#fig-stoves}
 
 In essence, a thorough comparative analysis transcends numerical metrics. It's a holistic assessment intertwined with real-world applications, costs, and the intricate subtleties that each model brings to the table. This is why having standard benchmarks and metrics widely established and adopted by the community becomes important.
 
 ## Conclusion
 
-Efficient AI is extremely important as we push towards broader and more diverse real-world deployment of machine learning. This chapter provided an overview, exploring the various methodologies and considerations behind achieving efficient AI, starting with the fundamental need, similarities, and differences across cloud, Edge, and TinyML systems.
+Efficient AI is crucial as we push towards broader and more diverse real-world deployment of machine learning. This chapter provided an overview, exploring the various methodologies and considerations behind achieving efficient AI, starting with the fundamental need, similarities, and differences across cloud, Edge, and TinyML systems.
 
-We saw that efficient model architectures can be useful for optimizations. Model compression techniques such as pruning, quantization, and knowledge distillation exist to help reduce computational demands and memory footprint without significantly impacting accuracy. Specialized hardware like TPUs and NN accelerators offer optimized silicon for neural network operations and data flow. Efficient numerics balance precision and efficiency, enabling models to attain robust performance using minimal resources. In the subsequent chapters, we will explore these different topics in depth and in detail.
+We examined efficient model architectures and their usefulness for optimization. Model compression techniques such as pruning, quantization, and knowledge distillation exist to help reduce computational demands and memory footprint without significantly impacting accuracy. Specialized hardware like TPUs and NN accelerators offer optimized silicon for neural network operations and data flow. Efficient numerics balance precision and efficiency, enabling models to attain robust performance using minimal resources. We will explore these topics in depth and detail in the subsequent chapters.
 
 Together, these form a holistic framework for efficient AI. But the journey doesn't end here. Achieving optimally efficient intelligence requires continued research and innovation. As models become more sophisticated, datasets grow, and applications diversify into specialized domains, efficiency must evolve in lockstep. Measuring real-world impact requires nuanced benchmarks and standardized metrics beyond simplistic accuracy figures.
 
@@ -248,3 +265,4 @@ In addition to exercises, we offer a series of hands-on labs allowing students t
 
 - _Coming soon._
 :::
+
diff --git a/contents/efficient_ai/images/jpg/ds_stoves.jpg b/contents/efficient_ai/images/jpg/ds_stoves.jpg
new file mode 100644
index 00000000..80760498
Binary files /dev/null and b/contents/efficient_ai/images/jpg/ds_stoves.jpg differ
diff --git a/contents/frameworks/frameworks.qmd b/contents/frameworks/frameworks.qmd
index 1476b39f..45eb1abf 100644
--- a/contents/frameworks/frameworks.qmd
+++ b/contents/frameworks/frameworks.qmd
@@ -212,7 +212,7 @@ PyTorch and TensorFlow have established themselves as frontrunners in the indust
 
 Having introduced the popular machine learning frameworks and provided a high-level comparison, this section will introduce you to the core functionalities that form the fabric of these frameworks. It will cover the special structure called tensors, which these frameworks use to handle complex multi-dimensional data more easily. You will also learn how these frameworks represent different types of neural network architectures and their required operations through computational graphs. Additionally, you will see how they offer tools that make the development of machine learning models more abstract and efficient, such as data loaders, implemented loss optimization algorithms, efficient differentiation techniques, and the ability to accelerate your training process on hardware accelerators.
 
-### Tensor data structures
+### Tensor data structures {#sec-tensor-data-structures}
 
 To understand tensors, let us start from the familiar concepts in linear algebra. As demonstrated in @fig-tensor-data-structure, vectors can be represented as a stack of numbers in a 1-dimensional array. Matrices follow the same idea, and one can think of them as many vectors stacked on each other, making them 2 dimensional. Higher dimensional tensors work the same way. A 3-dimensional tensor is simply a set of matrices stacked on each other in another direction. Therefore, vectors and matrices can be considered special cases of tensors with 1D and 2D dimensions, respectively.
 
@@ -374,12 +374,23 @@ y = torch.matmul(x, weights) + biases
 
 The above example does not have separate compile/build/run phases. Ops define and execute immediately. With dynamic graphs, the definition is intertwined with execution, providing a more intuitive, interactive workflow. However, the downside is that there is less potential for optimization since the framework only sees the graph as it is built.
 
-Recently, the distinction has blurred as frameworks adopt both modes. TensorFlow 2.0 defaults to dynamic graph mode while letting users work with static graphs when needed. Dynamic declaration offers flexibility and ease of use, making frameworks more user-friendly, while static graphs provide optimization benefits at the cost of interactivity. The ideal framework balances these approaches. Here is a table comparing the pros and cons of static vs dynamic execution graphs:
+Recently, the distinction has blurred as frameworks adopt both modes. TensorFlow 2.0 defaults to dynamic graph mode while letting users work with static graphs when needed. Dynamic declaration offers flexibility and ease of use, making frameworks more user-friendly, while static graphs provide optimization benefits at the cost of interactivity. The ideal framework balances these approaches. @tbl-exec-graph compares the pros and cons of static versus dynamic execution graphs:
 
-| Execution Graph | Pros | Cons |
-| --- | --- | --- |
-| Static (Declare-then-execute) | Enable graph optimizations by seeing full model ahead of time<br/>Can export and deploy frozen graphs<br/>Graph is packaged independently of code | Less flexible for research and iteration<br/>Changes require rebuilding graph<br/>Execution has separate compile and run phases |
-| Dynamic (Define-by-run) | Intuitive imperative style like Python code<br/>Interleave graph build with execution<br/>Easy to modify graphs<br/>Debugging seamlessly fits workflow | Harder to optimize without full graph<br/>Possible slowdowns from graph building during execution<br/>Can require more memory |
++------------------------------------+-----------------------------------------------------+------------------------------------------------------------+
+| Execution Graph                    | Pros                                                | Cons                                                       |
++:===================================+:====================================================+:===========================================================+
+| Static (Declare-then-execute)      | - Enable graph optimizations by seeing full model   | - Less flexible for research and iteration                 |
+|                                    |   ahead of time                                     | - Changes require rebuilding graph                         |
+|                                    | - Can export and deploy frozen graphs               | - Execution has separate compile and run phases            |
+|                                    | - Graph is packaged independently of code           |                                                            |
++------------------------------------+-----------------------------------------------------+------------------------------------------------------------+
+| Dynamic (Define-by-run)            | - Intuitive imperative style like Python code       | - Harder to optimize without full graph                    |
+|                                    | - Interleave graph build with execution             | - Possible slowdowns from graph building during execution  |
+|                                    | - Easy to modify graphs                             | - Can require more memory                                  |
+|                                    | - Debugging seamlessly fits workflow                |                                                            |
++------------------------------------+-----------------------------------------------------+------------------------------------------------------------+
+
+: Comparison between Static (Declare-then-execute) and Dynamic (Define-by-run) Execution Graphs, highlighting their respective pros and cons. {#tbl-exec-graph .striped .hover}
 
 ### Data Pipeline Tools
 
@@ -634,7 +645,7 @@ The infrastructure to support key model development workflows needed to be impro
 
 #### No Standard Benchmark
 
-Without unified benchmarks, there was no standard way to assess and compare the capabilities of different hardware platforms from vendors like NVIDIA, Arm, and Ambiq Micro. Existing evaluations relied on proprietary benchmarks tailored to showcase the strengths of particular chips. This made it impossible to measure hardware improvements objectively in a fair, neutral manner. The [Benchmarking AI](../benchmarking/benchmarking. cmd) chapter discusses this topic in more detail.
+Without unified benchmarks, there was no standard way to assess and compare the capabilities of different hardware platforms from vendors like NVIDIA, Arm, and Ambiq Micro. Existing evaluations relied on proprietary benchmarks tailored to showcase the strengths of particular chips. This made it impossible to measure hardware improvements objectively in a fair, neutral manner. The [Benchmarking AI](../benchmarking/benchmarking.qmd) chapter discusses this topic in more detail.
 
 #### Minimal Real-World Testing
 
diff --git a/contents/hw_acceleration/hw_acceleration.qmd b/contents/hw_acceleration/hw_acceleration.qmd
index f832f7eb..6f08842b 100644
--- a/contents/hw_acceleration/hw_acceleration.qmd
+++ b/contents/hw_acceleration/hw_acceleration.qmd
@@ -122,7 +122,7 @@ As a result, AI research and practice must prioritize energy efficiency and carb
 
 The scale of AI systems is expected to keep growing. Developing sustainable AI is crucial for managing the environmental footprint and enabling widespread beneficial deployment of this transformative technology.
 
-We will learn about [Sustainable AI](../sustainable_ai/sustainable_ai. cmd) in a later chapter, where we will discuss it in more detail.
+We will learn about [Sustainable AI](../sustainable_ai/sustainable_ai.qmd) in a later chapter, where we will discuss it in more detail.
 
 ## Accelerator Types {#sec-aihw}
 
@@ -407,7 +407,7 @@ The recent groundbreaking research conducted by OpenAI [@brown2020language] with
 
 ### Central Processing Units (CPUs)
 
-The term CPUs has a long history that dates back to 1955 [@weik1955survey] while the first microprocessor CPU-the Intel 4004-was invented in 1971 ([Who Invented the Microprocessor?](https://computerhistory.org/blog/who-invented-the-microprocessor/)). Compilers compile high-level programming languages like Python, Java, or C to assemble instructions (x86, ARM, RISC-V, etc.) for CPUs to process. The set of instructions a CPU understands is called the "instruction set." It must be agreed upon by both the hardware and software running atop it (See section 5 for a more in-depth description of instruction set architectures-ISAs).
+The term CPUs has a long history that dates back to 1955 [@weik1955survey] while the first microprocessor CPU-the Intel 4004-was invented in 1971 ([Who Invented the Microprocessor?](https://computerhistory.org/blog/who-invented-the-microprocessor/)). Compilers compile high-level programming languages like Python, Java, or C to assemble instructions (x86, ARM, RISC-V, etc.) for CPUs to process. The set of instructions a CPU understands is called the "instruction set architecture" (ISA), which defines the commands that the processor can execute directly. It must be agreed upon by both the hardware and software running atop it (See section 5 for a more in-depth description of instruction set architectures-ISAs).
 
 An overview of significant developments in CPUs:
 
@@ -686,7 +686,7 @@ However, accuracy in modeling subtle low-level interactions between components i
 
 Benchmarking is a critical process that quantifies and compares the performance of various hardware platforms designed to speed up artificial intelligence applications. It guides purchasing decisions, development focus, and performance optimization efforts for hardware manufacturers and software developers.
 
-The [benchmarking chapter](../benchmarking/benchmarking. cmd) explores this topic in great detail, explaining why it has become an indispensable part of the AI hardware development cycle and how it impacts the broader technology landscape. Here, we will briefly review the main concepts, but we recommend that you refer to the chapter for more details.
+The [benchmarking chapter](../benchmarking/benchmarking.qmd) explores this topic in great detail, explaining why it has become an indispensable part of the AI hardware development cycle and how it impacts the broader technology landscape. Here, we will briefly review the main concepts, but we recommend that you refer to the chapter for more details.
 
 Benchmarking suites such as MLPerf, Fathom, and AI Benchmark offer a set of standardized tests that can be used across different hardware platforms. These suites measure AI accelerator performance across various neural networks and machine learning tasks, from basic image classification to complex language processing. Providing a common ground for Comparison, they help ensure that performance claims are consistent and verifiable. These "tools" are applied not only to guide the development of hardware but also to ensure that the software stack leverages the full potential of the underlying architecture.
 
@@ -819,7 +819,7 @@ Chiplets are interconnected using advanced packaging techniques like high-densit
 
 Some key advantages of using chiplets for AI include:
 
-* **Flexibility:** Flexibility: Chiplets allow for the combination of different chip types, process nodes, and memories tailored for each function. This is more modular versus a fixed wafer-scale design.
+* **Flexibility:** Chiplets allow for the combination of different chip types, process nodes, and memories tailored for each function. This is more modular versus a fixed wafer-scale design.
 * **Yield:** Smaller chiplets have a higher yield than a gigantic wafer-scale chip. Defects are contained in individual chiplets.
 * **Cost:** Leverages existing manufacturing capabilities versus requiring specialized new processes. Reduces costs by reusing mature fabrication.
 * **Compatibility:** Can integrate with more conventional system architectures like PCIe and standard DDR memory interfaces.
@@ -952,7 +952,7 @@ Quantum algorithms carefully manipulate these inherently quantum mechanical effe
 
 However, quantum states are fragile and prone to errors that require error-correcting protocols. The non-intuitive nature of quantum programming also introduces challenges not present in classical computing.
 
-* Noisy and fragile quantum bits are difficult to scale up. The largest quantum computer today has less than 100 qubits.
+* Noisy and fragile quantum bits are difficult to scale up. The largest quantum computer today has less than 1000 qubits.
 * Restricted set of available quantum gates and circuits relative to classical programming.
 * Lack of datasets and benchmarks to evaluate quantum ML in practical domains.
 
@@ -1004,8 +1004,8 @@ The key benefits of applying ML to simulation and verification are faster design
 A key goal is designing hardware architectures optimized for performance, power, and efficiency. ML introduces new techniques to automate and improve architecture design space exploration for general-purpose and specialized hardware like ML accelerators. Some promising examples include:
 
 * **Architecture search for hardware:** Search techniques like evolutionary algorithms [@kao2020gamma], Bayesian optimization (@reagen2017case, @bhardwaj2020comprehensive), reinforcement learning (@kao2020confuciux, @krishnan2022multiagent) can automatically generate novel hardware architectures by mutating and mixing design attributes like cache size, number of parallel units, memory bandwidth, and so on. This allows for efficient navigation of large design spaces.
-* **Predictive modeling for optimization:** - ML models can be trained to predict hardware performance, power, and efficiency metrics for a given architecture. These become "surrogate models" [@krishnan2023archgym] for fast optimization and space exploration by substituting lengthy simulations.
-* **Specialized accelerator optimization:** - For specialized chips like tensor processing units for AI, automated architecture search techniques based on ML algorithms [@zhang2022fullstack] show promise for finding fast, efficient designs.
+* **Predictive modeling for optimization:** ML models can be trained to predict hardware performance, power, and efficiency metrics for a given architecture. These become "surrogate models" [@krishnan2023archgym] for fast optimization and space exploration by substituting lengthy simulations.
+* **Specialized accelerator optimization:** For specialized chips like tensor processing units for AI, automated architecture search techniques based on ML algorithms [@zhang2022fullstack] show promise for finding fast, efficient designs.
 
 The benefits of using ML include superior design space exploration, automated optimization, and reduced manual effort. Challenges include long training times for some techniques and local optima limitations. However, ML for hardware architecture holds great potential for unlocking performance and efficiency gains.
 
@@ -1017,7 +1017,7 @@ Once a hardware design is complete, it moves to manufacturing. However, variabil
 * **Process optimization:** Supervised learning models can be trained on process data to identify factors that lead to low yields. The models can then optimize parameters to improve yields, throughput, or consistency.
 * **Yield prediction:** By analyzing test data from fabricated designs using techniques like regression trees, ML models can predict yields early in production, allowing process adjustments.
 * **Defect detection:** Computer vision ML techniques can be applied to images of designs to identify defects invisible to the human eye. This enables precision quality control and root cause analysis.
-* **Proactive failure analysis:** - ML models can help predict, diagnose, and prevent issues that lead to downstream defects and failures by analyzing structured and unstructured process data.
+* **Proactive failure analysis:** ML models can help predict, diagnose, and prevent issues that lead to downstream defects and failures by analyzing structured and unstructured process data.
 
 Applying ML to manufacturing enables process optimization, real-time quality control, predictive maintenance, and higher yields. Challenges include managing complex manufacturing data and variations. But ML is poised to transform semiconductor manufacturing.
 
diff --git a/contents/labs/raspi/image_classification/image_classification.qmd b/contents/labs/raspi/image_classification/image_classification.qmd
index 5309d6a3..c4405d72 100644
--- a/contents/labs/raspi/image_classification/image_classification.qmd
+++ b/contents/labs/raspi/image_classification/image_classification.qmd
@@ -167,7 +167,7 @@ You can access it from another device by entering the Raspberry Pi's IP address
 
 ![](images/png/image-20240823145059675.png)
 
-Define your working directory in the raspi and create a new Python 3 notebook.
+Define your working directory in the Raspi and create a new Python 3 notebook.
 
 ### Verifying the Setup
 
@@ -188,7 +188,7 @@ interpreter.allocate_tensors()
 print("TFLite Interpreter created successfully!")
 ```
 
-You can create the python script using nano on the terminal, saving it with `CTRL+0` + `ENTER` + `CTRL+X`
+You can create the Python script using nano on the terminal, saving it with `CTRL+0` + `ENTER` + `CTRL+X`
 
 ![](images/png/nano.png)
 
@@ -233,7 +233,7 @@ input_details = interpreter.get_input_details()
 output_details = interpreter.get_output_details()
 ```
 
-**Input details** will give us the information needed about how the model should be fed with an image. The shape of (1, 224, 224, 3) informs us that an image with dimensions (224x224x3) should be input one by one (Batch Dimension: 1). 
+**Input details** will give us information about how the model should be fed with an image. The shape of (1, 224, 224, 3) informs us that an image with dimensions (224x224x3) should be input one by one (Batch Dimension: 1). 
 
 ![](images/png/input_details.png)
 
@@ -254,7 +254,7 @@ dtype('uint8')
 
 This shows that the input image should be raw pixels (0 - 255).
 
-Le'ts get a test image. You can transfer it from your computer or download one for testing. Let's first create a folder under our working directory:
+Let's get a test image. You can transfer it from your computer or download one for testing. Let's first create a folder under our working directory:
 
 ```bash
 mkdir images
@@ -278,7 +278,7 @@ plt.show()
 
 ![](images/png/cat_original.png)
 
-We can see the image size, runing the command:
+We can see the image size running the command:
 
 ```python
 width, height = img.size
@@ -318,7 +318,7 @@ interpreter.invoke()
 predictions = interpreter.get_tensor(output_details[0]['index'])[0]
 ```
 
-The predictions is an array with 1001 elements. Let's get the Top-5 indices where ths its elements has the high values:
+The prediction is an array with 1001 elements. Let’s get the Top-5 indices where their elements have high values:
 
 ```python
 top_k_results = 5
@@ -328,7 +328,7 @@ top_k_indices
 
 The top_k_indices is an array with 5 elements: `array([283, 286, 282])`
 
-So, 283, 286 , 282, 288 and 479 are the most probable classes of the given image. Having the index, we must find to what class it appoints (such as car, cat, or dog). The text file downloaded with the model has a label associated with each index from 0 to 1,000. Let’s use a function to load the .txt file as a list:
+So, 283, 286, 282, 288, and 479 are the image's most probable classes. Having the index, we must find to what class it appoints (such as car, cat, or dog). The text file downloaded with the model has a label associated with each index from 0 to 1,000. Let’s use a function to load the .txt file as a list:
 
 ```python
 def load_labels(filename):
@@ -349,7 +349,7 @@ print(labels[288])
 print(labels[479])
 ```
 
-As a result we have:
+As a result, we have:
 
 ```bash
 Egyptian cat
@@ -359,7 +359,7 @@ lynx
 carton
 ```
 
-We can see that at least the 4 top indices are related to felines. The **prediction** content is the probability associated with each one of the labels. As we saw on output details, those values are quantized and should be dequantized and apply softmax. 
+At least the four top indices are related to felines. The **prediction** content is the probability associated with each one of the labels. As we saw on output details, those values are quantized and should be dequantized and apply softmax. 
 
 ```python
 scale, zero_point = output_details[0]['quantization']
@@ -405,8 +405,8 @@ carton              : 2%
 
 ### Define a general Image Classification function
 
-Let's create a general funtion to given an image as input, we get the Top-5 possible classes:
-
+Let's create a general function to give an image as input, and we get the Top-5 possible classes:
+<div class="scroll-code-block">
 ```python
 def image_classification(img_path, model_path, labels, top_k_results=5):
     # load the image
@@ -452,8 +452,8 @@ def image_classification(img_path, model_path, labels, top_k_results=5):
             labels[top_k_indices[i]],
             (int(probabilities[top_k_indices[i]]*100))))
 ```
-
-And loading some images for testing we have:
+</div>
+And loading some images for testing, we have:
 
 ![](images/jpeg/img_class_func.jpg)
 
@@ -502,11 +502,11 @@ On the notebook [Cifar 10 - Image Classification on a Raspi with TFLite](https:/
     >>> print(picamera2.__file__)
     ```
 
-    ```python
-    /home/mjrovai/tflite/lib/python3.11/site-packages/picamera2/__init__.py
-    ```
+The above code will show the file location of the `picamera2` module itself, proving that the library can be accessed from the environment. 
 
-The above command will show the file location of the `picamera2` module itself, proving that the library can be accessed from the environment. 
+```python
+/home/mjrovai/tflite/lib/python3.11/site-packages/picamera2/__init__.py
+```
 
 You can also list the available cameras in the system:
 
@@ -577,6 +577,7 @@ Once we have defined our Machine Learning project goal, the next and most crucia
 
 2. Let's create a new Python script combining image capture with a web server.  We'll call it `get_img_data.py`:
 
+<div class="scroll-code-block">
 ```python
 from flask import Flask, Response, render_template_string, request, redirect, url_for
 from picamera2 import Picamera2
@@ -759,6 +760,7 @@ if __name__ == '__main__':
     threading.Thread(target=get_frame, daemon=True).start()
     app.run(host='0.0.0.0', port=5000, threaded=True)
 ```
+</div>
 
 4. Run this script:
 
@@ -1034,7 +1036,7 @@ labels = ['background', 'periquito', 'robot']
 
 > Note that the models trained on the Edge Impulse Studio will output values with index 0, 1, 2, etc., where the actual labels will follow an alphabetic order.
 
-Load the model, allocate the tensors and get the input and output tensor details:
+Load the model, allocate the tensors, and get the input and output tensor details:
 
 ```python
 # Load the TFLite model
@@ -1046,7 +1048,7 @@ input_details = interpreter.get_input_details()
 output_details = interpreter.get_output_details()
 ```
 
-One inportant difference to note, is that the `dtype` of the input details of the model are now `int8`, wich means that the input values goes from -128 to +127, while each pixel of outr image goes form 0 to 256. This means that we should pre-process the image t mach it. We can check here:
+One important difference to note is that the `dtype` of the input details of the model is now `int8`, which means that the input values go from -128 to +127, while each pixel of our image goes from 0 to 256. This means that we should pre-process the image to match it. We can check here:
 
 ```python
 input_dtype = input_details[0]['dtype']
@@ -1057,7 +1059,7 @@ input_dtype
 numpy.int8
 ```
 
-So, let's open the image show it:
+So, let's open the image and show it:
 
 ```python
 img = Image.open(img_path)
@@ -1102,9 +1104,9 @@ inference_time = (end_time - start_time) * 1000  # Convert to milliseconds
 print ("Inference time: {:.1f}ms".format(inference_time))
 ```
 
-The model will take around 125ms to perform the inference in the Raspi-Zero. This time is 3 to 4 times longer than a Raspi-5. 
+The model will take around 125ms to perform the inference in the Raspi-Zero, which is 3 to 4 times longer than a Raspi-5.
 
-Now, we can get the output labels and probabilities. It is also importnat to note that the model trained on the Edge Impulse Studio has a softmax in its output (different from the original Movilenet V2), and we should use the raw output of the model as the "probabilities".
+Now, we can get the output labels and probabilities. It is also important to note that the model trained on the Edge Impulse Studio has a softmax in its output (different from the original Movilenet V2), and we should use the model's raw output as the “probabilities.”
 
 ```python
 # Obtain results and map them to the classes
@@ -1130,8 +1132,9 @@ for i in range(top_k_results):
 
 ![](images/png/infer-result.png)
 
-Let's modify the function created before, so we can handle different type of models:
+Let’s modify the function created before so that we can handle different type of models:
 
+<div class="scroll-code-block">
 ```python
 def image_classification(img_path, model_path, labels, top_k_results=3, 
                          apply_softmax=False):
@@ -1200,6 +1203,7 @@ def image_classification(img_path, model_path, labels, top_k_results=3,
     print ("\n\tInference time: {:.1f}ms".format(inference_time))
 
 ```
+</div>
 
 And test it with different images and the int8 quantized model (**160x160 alpha =1.0**). 
 
@@ -1216,7 +1220,7 @@ The model lost some accuracy, but it is still OK once our model does not look fo
 Let's develop an app to capture images with the USB camera in real time, showing its classification. 
 
 Using the nano on the terminal, save the code below, such as `img_class_live_infer.py`. 
-
+<div class="scroll-code-block">
 ```python
 from flask import Flask, Response, render_template_string, request, jsonify
 from picamera2 import Picamera2
@@ -1406,6 +1410,7 @@ if __name__ == '__main__':
     app.run(host='0.0.0.0', port=5000, threaded=True)
 
 ```
+</div>
 
 On the terminal, run:
 
@@ -1498,8 +1503,6 @@ The code creates a web application for real-time image classification using a Ra
 3. Access the web interface from a browser using the Raspberry Pi's IP address.
 4. Start classification and adjust settings as needed.
 
-Based on the comprehensive content provided, here's a suggested conclusion for the Image Classification chapter:
-
 ## Conclusion:
 
 Image classification has emerged as a powerful and versatile application of machine learning, with significant implications for various fields, from healthcare to environmental monitoring. This chapter has demonstrated how to implement a robust image classification system on edge devices like the Raspi-Zero and Rasp-5, showcasing the potential for real-time, on-device intelligence.
diff --git a/contents/labs/raspi/setup/setup.qmd b/contents/labs/raspi/setup/setup.qmd
index 42d40fab..2e092fa5 100644
--- a/contents/labs/raspi/setup/setup.qmd
+++ b/contents/labs/raspi/setup/setup.qmd
@@ -160,11 +160,11 @@ The easiest way to interact with the Rasp-Zero is via SSH ("Headless"). You can
 1. Find your Raspberry Pi's IP address (for example, check your router).
 
 2. On your computer, open a terminal and connect via SSH:
-   ```shell
+   ```bash
    ssh username@[raspberry_pi_ip_address]   
    ```
    Alternatively, if you do not have the IP address, you can try the following:
-    ```shell
+    ```bash
    ssh username@hostname.local   
     ```
     for example, `ssh mjrovai@rpi-5.local` , `ssh mjrovai@raspi.local` , etc.
@@ -173,19 +173,19 @@ The easiest way to interact with the Rasp-Zero is via SSH ("Headless"). You can
    
    When you see the prompt:
    
-   ```shell
+   ```bash
    mjrovai@rpi-5:~ $
    ```
    
    It means that you are interacting remotely with your Raspi.
    It is a good practice to update/upgrade the system regularly. For that, you should run:
    
-   ```shell
+   ```bash
    sudo apt-get update
    sudo apt upgrade
    ```
    You should confirm the Raspi IP address. On the terminal, you can use:
-   ```shell
+   ```bash
    hostname -I
    ```
 
@@ -198,7 +198,7 @@ When you want to turn off your Raspberry Pi, there are better ideas than just pu
 
 For safety shut down, use the command line:
 
-```shell
+```bash
 sudo shutdown -h now
 ```
 
@@ -220,7 +220,7 @@ Let's create a text file on our computer, for example, `test.txt`.
 
 To copy the file named `test.txt` from your personal computer to a user’s home folder on your Raspberry Pi, run the following command from the directory containing `test.txt`, replacing the `<username>` placeholder with the username you use to log in to your Raspberry Pi and the `<pi_ip_address>` placeholder with your Raspberry Pi’s IP address:
 
-```shell
+```bash
 $ scp test.txt <username>@<pi_ip_address>:~/
 ```
 
@@ -228,13 +228,13 @@ $ scp test.txt <username>@<pi_ip_address>:~/
 
 For example, let's transfer the file `test.txt`  to the ROOT of my Raspi-zero, which has an IP of `192.168.4.210`:
 
-```shell
+```bash
 scp test.txt mjrovai@192.168.4.210:~/
 ```
 
 ![](images/png/transfer_file.png)
 
-I use a different shell profile to differentiate the terminals. The above action happens **on your computer**. Now, let's go to our Raspi (using the SSH) and check if the file is there:
+I use a different profile to differentiate the terminals. The above action happens **on your computer**. Now, let's go to our Raspi (using the SSH) and check if the file is there:
 
 ![](images/png/list_archives.png)
 
@@ -242,7 +242,7 @@ I use a different shell profile to differentiate the terminals. The above action
 
 To copy a file named `test.txt` from a user’s home directory on a Raspberry Pi to the current directory on another computer, run the following command **on your Host Computer**:
 
-```shell
+```bash
 $ scp <username>@<pi_ip_address>:myfile.txt .
 ```
 
@@ -250,13 +250,13 @@ For example:
 
 On the Raspi, let's create a copy of the file with another name:
 
-```shell
+```bash
 cp test.txt test_2.txt
 ```
 
 And on the Host Computer (in my case, a Mac)
 
-```shell
+```bash
 scp mjrovai@192.168.4.210:test_2.txt .
 ```
 
@@ -266,7 +266,7 @@ scp mjrovai@192.168.4.210:test_2.txt .
 
 Transferring files using FTP, such as [FileZilla FTP Client](https://filezilla-project.org/download.php?type=client), is also possible. Follow the instructions, install the program for your Desktop OS, and use the Raspi IP address as the `Host`. For example:
 
-```shell
+```bash
 sftp://192.168.4.210
 ```
 
@@ -278,7 +278,7 @@ and enter your Raspi `username and password`. Pressing `Quickconnect` will open
 
 Using `htop`, a cross-platform interactive process viewer, you can easily monitor the resources running on your Raspi, such as the list of processes, the running CPUs, and the memory used in real-time.  To lunch `hop`, enter with the command on the terminal:
 
-```shell
+```bash
 htop
 ```
 
@@ -292,19 +292,19 @@ By default, the Rapi-Zero's SWAP (Swp) memory is only 100MB, which is very small
 
 First, turn off swap-file:
 
-```shell
+```bash
 sudo dphys-swapfile swapoff
 ```
 
 Next, you should open and change the file `/etc/dphys-swapfile`. For that, we will use the nano:
 
-```shell
+```bash
 sudo nano /etc/dphys-swapfile
 ```
 
-Search for the **CONF_SWAPSIZE** variable (default is 100) and update it to **2000**:
+Search for the **CONF_SWAPSIZE** variable (default is 200) and update it to **2000**:
 
-```shell
+```bash
 CONF_SWAPSIZE=2000
 ```
 
@@ -312,7 +312,7 @@ And save the file.
 
 Next, turn on the swapfile again and reboot the Rasp-zero:
 
-```shell
+```bash
 sudo dphys-swapfile setup
 sudo dphys-swapfile swapon
 sudo reboot
@@ -332,7 +332,7 @@ The Raspi is an excellent device for computer vision applications; a camera is n
 
 1. Power off the Raspi:
 
-```shell
+```bash
 sudo shutdown -h no
 ```
 
@@ -345,7 +345,7 @@ sudo shutdown -h no
 3. Power on again and run the SSH
 3. To check if your USB camera is recognized, run:
 
-```shell
+```bash
 lsusb
 ```
 
@@ -355,7 +355,7 @@ You should see your camera listed in the output.
 
 5. To take a test picture with your USB camera, use:
 
-```shell
+```bash
 fswebcam test_image.jpg
 ```
 
@@ -367,7 +367,7 @@ This will save an image named "test_image.jpg" in your current directory.
 
 Open a terminal **on your host computer** and run:
 
-```shell
+```bash
 scp mjrovai@raspi-zero.local:~/test_image.jpg .
 ```
 
@@ -379,7 +379,7 @@ scp mjrovai@raspi-zero.local:~/test_image.jpg .
 
 7. If the image quality isn't satisfactory, you can adjust various settings; for example, define a resolution that is suitable for YOLO (640x640):
 
-```shell
+```bash
 fswebcam -r 640x640 --no-banner test_image_yolo.jpg
 ```
 
@@ -401,13 +401,13 @@ For stream video (which is more resource-intensive), we can install and use mjpg
 
 First, install Git:
 
-```shell
+```bash
 sudo apt install git
 ```
 
 Now, we should install the necessary dependencies for mjpg-streamer, clone the repository, and proceed with the installation:
 
-```shell
+```bash
 sudo apt install cmake libjpeg62-turbo-dev
 git clone https://github.com/jacksonliam/mjpg-streamer.git
 cd mjpg-streamer/mjpg-streamer-experimental
@@ -417,15 +417,17 @@ sudo make install
 
 Then start the stream with:
 
-```shell
+```bash
 mjpg_streamer -i "input_uvc.so" -o "output_http.so -w ./www"
 ```
 
-We can then access the stream by opening a web browser and navigating to: `http://<your_pi_ip_address>:8080`. In my case: http://192.168.4.210:8080
+We can then access the stream by opening a web browser and navigating to: 
+
+`http://<your_pi_ip_address>:8080`. In my case: http://192.168.4.210:8080
 
 We should see a webpage with options to view the stream. Click on the link that says "Stream" or try accessing:
 
-```shell
+```bash
 http://<raspberry_pi_ip_address>:8080/?action=stream
 ```
 
@@ -445,31 +447,31 @@ Here is another example of a v2 Camera Module, which has a **Sony IMX219** 8-meg
 
 Any camera module will work on the Raspis, but for that, the `onfiguration.txt` file must be updated:
 
-```shell
+```bash
 sudo nano /boot/firmware/config.txt
 ```
 
 At the bottom of the file, for example, to use the 5MP Arducam OV5647 camera, add the line:
 
-```shell
+```bash
 dtoverlay=ov5647,cam0
 ```
 
 Or for the v2 module, wich has the 8MP Sony IMX219 camera:
 
-```shell
+```bash
 dtoverlay=imx219,cam0
 ```
 
 Save the file (CTRL+O [ENTER] CRTL+X) and reboot the Raspi:
 
-```shell
+```bash
 Sudo reboot
 ```
 
 After the boot, you can see if the camera is listed:
 
-```shell
+```bash
 libcamera-hello --list-cameras
 ```
 
@@ -481,7 +483,7 @@ libcamera-hello --list-cameras
 
 Let's capture a jpeg image with a resolution of 640 x 480 for testing and save it to a file named `test_cli_camera.jpg`
 
-```shell
+```bash
 rpicam-jpeg --output test_cli_camera.jpg --width 640 --height 480
 ```
 
@@ -498,7 +500,7 @@ While we've primarily interacted with the Raspberry Pi using terminal commands v
 1. Enable the VNC Server:
    - Connect to your Raspberry Pi via SSH.
    - Run the Raspberry Pi configuration tool by entering:
-     ```shell
+     ```bash
      sudo raspi-config
      ```
    - Navigate to `Interface Options` using the arrow keys.
@@ -518,7 +520,7 @@ While we've primarily interacted with the Raspberry Pi using terminal commands v
 
 3. Once installed, you should confirm the Raspi IP address. For example, on the terminal, you can use:
 
-   ```shell
+   ```bash
    hostname -I
    ```
 
@@ -548,15 +550,15 @@ While we've primarily interacted with the Raspberry Pi using terminal commands v
 ## Updating and Installing Software
 
 1. Update your system:
-   ```shell
+   ```bash
    sudo apt update && sudo apt upgrade -y
    ```
 2. Install essential software:
-   ```shell
+   ```bash
    sudo apt install python3-pip -y
    ```
 3. Enable pip for Python projects:
-   ```shell
+   ```bash
    sudo rm /usr/lib/python3.11/EXTERNALLY-MANAGED
    ```
 
diff --git a/contents/labs/seeed/xiao_esp32s3/xiao_esp32s3.qmd b/contents/labs/seeed/xiao_esp32s3/xiao_esp32s3.qmd
index 0c768d8a..90064ee8 100644
--- a/contents/labs/seeed/xiao_esp32s3/xiao_esp32s3.qmd
+++ b/contents/labs/seeed/xiao_esp32s3/xiao_esp32s3.qmd
@@ -17,9 +17,14 @@ These labs provide a unique opportunity to gain practical experience with machin
 
 ## Exercises
 
-| **Modality** | **Task**                                    | **Description**                           | **Link**                                |
-| ------------ | ------------------------------------------- | ----------------------------------------- | --------------------------------------- |
-| Vision       | Image Classification                        | Learn to classify images                  | [Link](./image_classification/image_classification.qmd) |
-| Vision       | Object Detection                            | Implement object detection                | [Link](./object_detection/object_detection.qmd)     |
-| Sound        | Keyword Spotting                   | Explore voice recognition systems         | [Link](./kws/kws.qmd)                  |
-| IMU          | Motion Classification and Anomaly Detection | Classify motion data and detect anomalies | [Link](./motion_classification/motion_classification.qmd)     |
++-----------+--------------------------------------------+-------------------------------------------+----------------------------------------------------------+
+| Modality  | Task                                       | Description                               | Link                                                     |
++:==========+:===========================================+:==========================================+:=========================================================+
+| Vision    | Image Classification                       | Learn to classify images                  | [Link](./image_classification/image_classification.qmd)  |
++-----------+--------------------------------------------+-------------------------------------------+----------------------------------------------------------+
+| Vision    | Object Detection                           | Implement object detection                | [Link](./object_detection/object_detection.qmd)          |
++-----------+--------------------------------------------+-------------------------------------------+----------------------------------------------------------+
+| Sound     | Keyword Spotting                           | Explore voice recognition systems         | [Link](./kws/kws.qmd)                                    |
++-----------+--------------------------------------------+-------------------------------------------+----------------------------------------------------------+
+| IMU       | Motion Classification and Anomaly Detection| Classify motion data and detect anomalies | [Link](./motion_classification/motion_classification.qmd)|
++-----------+--------------------------------------------+-------------------------------------------+----------------------------------------------------------+
diff --git a/contents/labs/shared/kws_feature_eng/kws_feature_eng.qmd b/contents/labs/shared/kws_feature_eng/kws_feature_eng.qmd
index 4bc78f98..12bebc29 100644
--- a/contents/labs/shared/kws_feature_eng/kws_feature_eng.qmd
+++ b/contents/labs/shared/kws_feature_eng/kws_feature_eng.qmd
@@ -127,7 +127,7 @@ Let's apply what we discussed while working on an actual audio sample. Open the
 
 ## Conclusion
 
-### **What** Feature Extraction technique **should we use?**
+*What Feature Extraction technique should we use?*
 
 Mel-frequency Cepstral Coefficients (MFCCs), Mel-Frequency Energies (MFEs), or Spectrogram are techniques for representing audio data, which are often helpful in different contexts.
 
diff --git a/contents/ondevice_learning/ondevice_learning.qmd b/contents/ondevice_learning/ondevice_learning.qmd
index 08e3a234..58b3bfcb 100644
--- a/contents/ondevice_learning/ondevice_learning.qmd
+++ b/contents/ondevice_learning/ondevice_learning.qmd
@@ -65,7 +65,7 @@ With valuable data summaries and model updates permanently stored on individual
 
 #### Regulatory Compliance
 
-On-device learning helps address major privacy regulations like ([GDPR](https://gdpr.eu/tag/gdpr/)) and [CCPA](https://oag.ca.gov/privacy/ccpa). These regulations require data localization, restricting cross-border data transfers to approved countries with adequate controls. GDPR also mandates privacy by design and consent requirements for data collection. By keeping data processing and model training localized on-device, sensitive user data is not transferred across borders. This avoids major compliance headaches for organizations.
+On-device learning helps address major privacy regulations like [GDPR](https://gdpr.eu/tag/gdpr/) and [CCPA](https://oag.ca.gov/privacy/ccpa). These regulations require data localization, restricting cross-border data transfers to approved countries with adequate controls. GDPR also mandates privacy by design and consent requirements for data collection. By keeping data processing and model training localized on-device, sensitive user data is not transferred across borders. This avoids major compliance headaches for organizations.
 
 For example, a healthcare provider monitoring patient vitals with wearables must ensure cross-border data transfers comply with HIPAA and GDPR if using the cloud. Determining which country's laws apply and securing approvals for international data flows introduces legal and engineering burdens. With on-device learning, no data leaves the device, simplifying compliance. The time and resources spent on compliance are reduced significantly.  
 
@@ -119,7 +119,7 @@ Reasons data may be non-IID in on-device settings:
 * **Device differences:** Sensors, regions, and demographics affect data.
 * **Temporal effects:** time of day, seasonal impacts on data.
 
-The effectiveness of ML relies heavily on large, diverse training data. With small, localized datasets, on-device models may fail to generalize across different user populations and environments. For example, a disease detection model trained only on images from a single hospital would not generalize well to other patient demographics. Withel's real-world performance would only improve with extensive, diverse medical improvement. Thus, while cloud-based learning leverages massive datasets, on-device learning relies on much smaller, decentralized data silos unique to each user.
+The effectiveness of ML relies heavily on large, diverse training data. With small, localized datasets, on-device models may fail to generalize across different user populations and environments. For example, a disease detection model trained only on images from a single hospital would not generalize well to other patient demographics. The real-world performance would only improve with extensive and diverse medical advancements. Thus, while cloud-based learning leverages massive datasets, on-device learning relies on much smaller, decentralized data silos unique to each user.
 
 The limited data and optimizations required for on-device learning can negatively impact model accuracy and generalization:
 
@@ -177,8 +177,6 @@ The primary goal of pruning is to remove parts of the model that do not contribu
 
 Traditional cloud-based DNN frameworks have too much memory overhead to be used on-device. [For example](http://arxiv.org/abs/2206.15472), deep learning systems like PyTorch and TensorFlow require hundreds of megabytes of memory overhead when training models such as [MobilenetV2](https://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html), and the overhead scales as the number of training parameters increases.
 
-Traditional cloud-based DNN frameworks have too much memory overhead to be used on-device. For example, deep learning systems like PyTorch and TensorFlow require hundreds of megabytes of memory overhead when training models such as MobilenetV2-w0.35, and the overhead scales as the number of training parameters increases.
-
 Current research for lightweight DNNs mostly explores CNN architectures. Several bare-metal frameworks designed for running Neural Networks on MCUs by keeping computational overhead and memory footprint low also exist. Some examples include MNN, TVM, and TensorFlow Lite. However, they can only perform inference during forward passes and lack support for backpropagation. While these models are designed for edge deployment, their reduction in model weights and architectural connections led to reduced resource requirements for continuous learning.
 
 The tradeoff between performance and model support is clear when adapting the most popular DNN systems. How do we adapt existing DNN models to resource-constrained settings while maintaining support for backpropagation and continuous learning? The latest research suggests algorithm and system codesign techniques that help reduce the resource consumption of ML training on edge devices. Utilizing techniques such as quantization-aware scaling (QAS), sparse updates, and other cutting-edge techniques, on-device learning is possible on embedded systems with a few hundred kilobytes of RAM without additional memory while maintaining [high accuracy](http://arxiv.org/abs/2206.15472).
@@ -201,7 +199,7 @@ However, the quantization process can also introduce quantization errors that ca
 
 The QAS process involves two main steps:
 
-* **Quantization-aware training:** In this step, the neural network is trained with quantization in mind, using simulated quantization to mimic the effects of quantization during the forward and backward passes. This allows the model to learn to compensate for the quantization errors and improve its performance on low-precision hardware. Refer to the QAT section in Model Optimizations for details.
+* **Quantization-aware training:** In this step, the neural network is trained with quantization in mind, simulating it to mimic its effects during forward and backward passes. This allows the model to learn to compensate for the quantization errors and improve its performance on low-precision hardware. Refer to the QAT section in Model Optimizations for details.
 
 * **Quantization and scaling:** After training, the model is quantized to a low-precision format, and the scale factors are adjusted to minimize the quantization errors. The scale factors are chosen based on the distribution of the weights and activations in the model and are adjusted to ensure that the quantized values are within the range of the low-precision format.
 
@@ -462,9 +460,9 @@ However, we cannot just reduce communication by sending pieces of those gradient
 
 ### Optimized Aggregation
 
-In addition to reducing the communication overhead, optimizing the aggregation function can improve model training speed and accuracy in certain federated learning use cases. While the standard for aggregation is just averaging, various other approaches can improve model efficiency, accuracy, and security. One alternative is clipped averaging, which clips the model updates within a specific range. Another strategy to preserve security is differential privacy average aggregation. This approach integrates differential privacy into the aggregation step to protect client identities. Each client adds a layer of random noise to their updates before communicating to the server. The server then updates the server with the noisy updates, meaning that the amount of noise needs to be tuned carefully to balance privacy and accuracy.
+In addition to reducing the communication overhead, optimizing the aggregation function can improve model training speed and accuracy in certain federated learning use cases. While the standard for aggregation is just averaging, various other approaches can improve model efficiency, accuracy, and security. One alternative is clipped averaging, which clips the model updates within a specific range. Another strategy to preserve security is differential privacy average aggregation. This approach integrates differential privacy into the aggregation step to protect client identities. Each client adds a layer of random noise to their updates before communicating to the server. The server then updates itself with the noisy updates, meaning that the amount of noise needs to be tuned carefully to balance privacy and accuracy.
 
-In addition to security-enhancing aggregation methods, there are several modifications to the aggregation methods that can improve training speed and performance by adding client metadata along with the weight updates. Momentum aggregation is a technique that helps address the convergence problem. In federated learning, client data can be extremely heterogeneous depending on the different environments in which the devices are used. That means that many models with heterogeneous data may need help to converge. Each client stores a momentum term locally, which tracks the pace of change over several updates. With clients communicating this momentum, the server can factor in the rate of change of each update when changing the global model to accelerate convergence. Similarly, weighted aggregation can factor in the client performance or other parameters like device type or network connection strength to adjust the weight with which the server should incorporate the model updates. Further description of specific aggregation algorithms is described by @moshawrab2023reviewing.
+In addition to security-enhancing aggregation methods, there are several modifications to the aggregation methods that can improve training speed and performance by adding client metadata along with the weight updates. Momentum aggregation is a technique that helps address the convergence problem. In federated learning, client data can be extremely heterogeneous depending on the different environments in which the devices are used. That means that many models with heterogeneous data may need help to converge. Each client stores a momentum term locally, which tracks the pace of change over several updates. With clients communicating this momentum, the server can factor in the rate of change of each update when changing the global model to accelerate convergence. Similarly, weighted aggregation can factor in the client performance or other parameters like device type or network connection strength to adjust the weight with which the server should incorporate the model updates. Further descriptions of specific aggregation algorithms are provided by @moshawrab2023reviewing.
 
 ### Handling non-IID Data
 
@@ -548,7 +546,7 @@ With data poisoning, attackers purchase domains and upload content that influenc
 
 [Microsoft Tay](https://en.wikipedia.org/wiki/Tay_(chatbot)) was a chatbot launched by Microsoft in 2016. It was designed to learn from its interactions with users on social media platforms like Twitter. Unfortunately, Microsoft Tay became a prime example of data poisoning in ML models. Within 24 hours of its launch, Microsoft had to take Tay offline because it had started producing offensive and inappropriate messages, including hate speech and racist comments. This occurred because some users on social media intentionally fed Tay with harmful and offensive input, which the chatbot then learned from and incorporated into its responses.
 
-This incident is a clear example of data poisoning because malicious actors intentionally manipulated the data used to train and inform the chatbot's responses. The data poisoning resulted in the chatbot adopting harmful biases and producing output that its developers did not intend. It demonstrates how even small amounts of maliciously crafted data can significantly impact the behavior of ML models and highlights the importance of implementing robust data filtering and validation mechanisms to prevent such incidents from occurring.
+This incident is a clear example of data poisoning because malicious actors intentionally manipulated the data used to train the chatbot and shape its responses. The data poisoning resulted in the chatbot adopting harmful biases and producing output that its developers did not intend. It demonstrates how even small amounts of maliciously crafted data can significantly impact the behavior of ML models and highlights the importance of implementing robust data filtering and validation mechanisms to prevent such incidents from occurring.
 
 Such biases could have dangerous real-world impacts. Rigorous data validation, anomaly detection, and tracking of data provenance are critical defensive measures. Adopting frameworks like Five Safes ensures models are trained on high-quality, representative data [@desai2016five].
 
@@ -560,7 +558,7 @@ During the training phase, attackers might inject malicious data into the traini
 
 In an embedded security camera system, for instance, this could allow an intruder to avoid detection by wearing a specific pattern that the model has been tricked into classifying as non-threatening.
 
-During the inference phase, attackers can use adversarial examples to fool the model. Adversarial examples are inputs that have been slightly altered in a way that causes the model to make incorrect predictions. For instance, an attacker might add a small amount of noise to an image in a way that causes a face recognition system to misidentify a person. These attacks can be particularly concerning in applications where safety is at stake, such as autonomous vehicles. In the example you mentioned, the researchers were able to cause a traffic sign recognition system to misclassify a stop sign as a speed sign. This type of misclassification could lead to accidents if it occurred in a real-world autonomous driving system.
+During the inference phase, attackers can use adversarial examples to fool the model. Adversarial examples are inputs that have been slightly altered in a way that causes the model to make incorrect predictions. For instance, an attacker might add a small amount of noise to an image in a way that causes a face recognition system to misidentify a person. These attacks can be particularly concerning in applications where safety is at stake, such as autonomous vehicles. A real-world example of this is when researchers were able to cause a traffic sign recognition system to misclassify a stop sign as a speed limit sign. This type of misclassification could lead to accidents if it occurred in a real-world autonomous driving system.
 
 To mitigate these risks, several defenses can be employed:
 
@@ -713,16 +711,16 @@ By sparsely updating layers tailored to the device and task, TinyTrain significa
 +:=======================+:=======================================================================+:==========================================================+
 | Tiny Training Engine   | - On-device training                                                   | - Traces forward & backward graphs                        |
 |                        | - Optimize memory & computation                                        | - Prunes frozen weights                                   |
-|                        | - Leverage pruning, sparsity, etc.                                      | - Interleaves backprop & gradients                        |
+|                        | - Leverage pruning, sparsity, etc.                                     | - Interleaves backprop & gradients                        |
 |                        |                                                                        | - Code generation                                         |
 +------------------------+------------------------------------------------------------------------+-----------------------------------------------------------+
 | TinyTL                 | - On-device training                                                   | - Freezes most weights                                    |
 |                        | - Optimize memory & computation                                        | - Only adapts biases                                      |
-|                        | - Leverage freezing, sparsity, etc.                                     | - Uses residual model                                     |
+|                        | - Leverage freezing, sparsity, etc.                                    | - Uses residual model                                     |
 +------------------------+------------------------------------------------------------------------+-----------------------------------------------------------+
 | TinyTrain              | - On-device training                                                   | - Meta-training in pretraining                            |
 |                        | - Optimize memory & computation                                        | - Task-adaptive sparse updating                           |
-|                        | - Leverage sparsity, etc.                                               | - Selective layer updating                                |
+|                        | - Leverage sparsity, etc.                                              | - Selective layer updating                                |
 +------------------------+------------------------------------------------------------------------+-----------------------------------------------------------+
 
 : Comparison of frameworks for on-device training optimization. {#tbl-framework-comparison .striped .hover}
@@ -737,7 +735,7 @@ Mechanisms such as reduced model complexity, optimization and data compression t
 
 The critical security concerns in on-device learning highlighted in this chapter, ranging from data poisoning and adversarial attacks to specific risks introduced by on-device learning, must be addressed in real workloads for on-device learning to be a viable paradigm. Effective mitigation strategies, such as data validation, encryption, differential privacy, anomaly detection, and input data validation, are crucial to safeguard on-device learning systems from these threats.
 
-The emergence of specialized on-device training frameworks like Tiny Training Engine, Tiny Transfer Learning, and Tiny Train presents practical tools to enable efficient on-device training. These frameworks employ various techniques to optimize memory usage, reduce computational overhead, and streamline the on-device training process.
+The emergence of specialized on-device training frameworks such as Tiny Training Engine, Tiny Transfer Learning, and Tiny Train presents practical tools that enable efficient on-device training. These frameworks employ various techniques to optimize memory usage, reduce computational overhead, and streamline the on-device training process.
 
 In conclusion, on-device learning stands at the forefront of TinyML, promising a future where models can autonomously acquire knowledge and adapt to changing environments on edge devices. The application of on-device learning has the potential to revolutionize various domains, including healthcare, industrial IoT, and smart cities. However, the transformative potential of on-device learning must be balanced with robust security measures to protect against data breaches and adversarial threats. Embracing innovative on-device training frameworks and implementing stringent security protocols are key steps in unlocking the full potential of on-device learning. As this technology continues to evolve, it holds the promise of making our devices smarter, more responsive, and better integrated into our daily lives.
 
diff --git a/contents/ops/ops.qmd b/contents/ops/ops.qmd
index def2055a..da36071b 100644
--- a/contents/ops/ops.qmd
+++ b/contents/ops/ops.qmd
@@ -10,7 +10,7 @@ Resources: [Slides](#sec-embedded-aiops-resource), [Videos](#sec-embedded-aiops-
 
 ![_DALL·E 3 Prompt: Create a detailed, wide rectangular illustration of an AI workflow. The image should showcase the process across six stages, with a flow from left to right: 1. Data collection, with diverse individuals of different genders and descents using a variety of devices like laptops, smartphones, and sensors to gather data. 2. Data processing, displaying a data center with active servers and databases with glowing lights. 3. Model training, represented by a computer screen with code, neural network diagrams, and progress indicators. 4. Model evaluation, featuring people examining data analytics on large monitors. 5. Deployment, where the AI is integrated into robotics, mobile apps, and industrial equipment. 6. Monitoring, showing professionals tracking AI performance metrics on dashboards to check for accuracy and concept drift over time. Each stage should be distinctly marked and the style should be clean, sleek, and modern with a dynamic and informative color scheme._](images/png/cover_ml_ops.png)
 
-This chapter explores the practices and architectures needed to effectively develop, deploy, and manage ML models across their entire lifecycle. We examine the various phases of the ML process, including data collection, model training, evaluation, deployment, and monitoring. The importance of automation, collaboration, and continuous improvement is also discussed. We contrast different environments for ML model deployment, from cloud servers to embedded edge devices, and analyze their distinct constraints. We demonstrate how to tailor ML system design and operations through concrete examples for reliable and optimized model performance in any target environment. The goal is to provide readers with a comprehensive understanding of ML model management so they can successfully build and run ML applications that sustainably deliver value.
+This chapter explores the practices and architectures needed to effectively develop, deploy, and manage ML models across their entire lifecycle. We examine the various phases of the ML process, including data collection, model training, evaluation, deployment, and monitoring. The importance of automation, collaboration, and continuous improvement is also something we discuss. We contrast different environments for ML model deployment, from cloud servers to embedded edge devices, and analyze their distinct constraints. We demonstrate how to tailor ML system design and operations through concrete examples for reliable and optimized model performance in any target environment. The goal is to provide readers with a comprehensive understanding of ML model management so they can successfully build and run ML applications that sustainably deliver value.
 
 ::: {.callout-tip}
 
@@ -64,7 +64,7 @@ While Agile and DevOps share common principles around collaboration and feedback
 
 ### MLOps
 
-[MLOps](https://cloud.google.com/solutions/machine-learning/mlops-continuous-delivery-and-automation-pipelines-in-machine-learning), on the other hand, stands for MLOps, and it extends the principles of DevOps to the ML lifecycle. MLOps aims to automate and streamline the end-to-end ML lifecycle, from data preparation and model development to deployment and monitoring. The main focus of MLOps is to facilitate collaboration between data scientists, data engineers, and IT operations and to automate the deployment, monitoring, and management of ML models. Some key factors led to the rise of MLOps.
+[MLOps](https://cloud.google.com/solutions/machine-learning/mlops-continuous-delivery-and-automation-pipelines-in-machine-learning), on the other hand, stands for Machine Learning Operations, and it extends the principles of DevOps to the ML lifecycle. MLOps aims to automate and streamline the end-to-end ML lifecycle, from data preparation and model development to deployment and monitoring. The main focus of MLOps is to facilitate collaboration between data scientists, data engineers, and IT operations and to automate the deployment, monitoring, and management of ML models. Some key factors led to the rise of MLOps.
 
 * **Data drift:** Data drift degrades model performance over time, motivating the need for rigorous monitoring and automated retraining procedures provided by MLOps.
 * **Reproducibility:** The lack of reproducibility in machine learning experiments motivated MLOps systems to track code, data, and environment variables to enable reproducible ML workflows.
@@ -73,9 +73,7 @@ While Agile and DevOps share common principles around collaboration and feedback
 * **Friction:** The friction in manually retraining and deploying models motivated the need for MLOps systems that automate machine learning deployment pipelines.
 * **Optimization:** The complexity of configuring machine learning infrastructure motivated the need for MLOps platforms with optimized, ready-made ML infrastructure.
 
-While DevOps and MLOps share the common goal of automating and streamlining processes, their focus and challenges differ. DevOps primarily deals with the challenges of software development and IT operations. In contrast, MLOps deals with the additional complexities of managing ML models, such as [data versioning](https://dvc.org/), [model versioning](https://dvc.org/), and [model monitoring](https://www.fiddler.ai/). MLOps also requires stakeholder collaboration, including data scientists, engineers, and IT operations.
-
-While DevOps and MLOps share similarities in their goals and principles, they differ in their focus and challenges. DevOps focuses on improving the collaboration between development and operations teams and automating software delivery. In contrast, MLOps focuses on streamlining and automating the ML lifecycle and facilitating collaboration between data scientists, data engineers, and IT operations. @tbl-mlops compares and summarizes them side by side.
+While DevOps and MLOps share the common goal of automating and streamlining processes, they differ significantly in their focus and challenges. DevOps primarily deals with software development and IT operations. It enables collaboration between these teams and automate software delivery. In contrast, MLOps focuses on the machine learning lifecycle. It addresses additional complexities such as [data versioning](https://dvc.org/), [model versioning](https://dvc.org/), and [model monitoring](https://www.fiddler.ai/). MLOps requires collaboration among a broader range of stakeholders, including data scientists, data engineers, and IT operations. It goes beyond the scope of traditional DevOps by incorporating the unique challenges of managing ML models throughout their lifecycle. @tbl-mlops provides a side-by-side comparison of DevOps and MLOps, highlighting their key differences and similarities.
 
 +----------------------+--------------------------------------------+-------------------------------------------------------+
 | Aspect               | DevOps                                     | MLOps                                                 |
@@ -113,7 +111,7 @@ Learn more about ML Lifecycles through a case study featuring speech recognition
 
 ## Key Components of MLOps
 
-In this chapter, we will provide an overview of the core components of MLOps, an emerging set of practices that enables robust delivery and lifecycle management of ML models in production. While some MLOps elements like automation and monitoring were covered in previous chapters, we will integrate them into an integrated framework and expand on additional capabilities like governance. Additionally, we will describe and link to popular tools used within each component, such as [LabelStudio](https://labelstud.io/) for data labeling. By the end, we hope that you will understand the end-to-end MLOps methodology that takes models from ideation to sustainable value creation within organizations.
+In this chapter, we will provide an overview of the core components of MLOps, an emerging set of practices that enables robust delivery and lifecycle management of ML models in production. While some MLOps elements like automation and monitoring were covered in previous chapters, we will integrate them into a framework and expand on additional capabilities like governance. Additionally, we will describe and link to popular tools used within each component, such as [LabelStudio](https://labelstud.io/) for data labeling. By the end, we hope that you will understand the end-to-end MLOps methodology that takes models from ideation to sustainable value creation within organizations.
 
 ### Data Management
 
@@ -285,7 +283,7 @@ Enabling transparency, traceability, and communication via MLOps empowers teams
 
 ## Hidden Technical Debt in ML Systems
 
-Technical debt is increasingly pressing for ML systems (see Figure 14.2). This metaphor, originally proposed in the 1990s, likens the long-term costs of quick software development to financial debt. Just as some financial debt powers beneficial growth, carefully managed technical debt enables rapid iteration. However, left unchecked, accumulating technical debt can outweigh any gains.
+Technical debt is increasingly pressing for ML systems. This metaphor, originally proposed in the 1990s, likens the long-term costs of quick software development to financial debt. Just as some financial debt powers beneficial growth, carefully managed technical debt enables rapid iteration. However, left unchecked, accumulating technical debt can outweigh any gains.
 
 @fig-technical-debt illustrates the various components contributing to ML systems' hidden technical debt. It shows the interconnected nature of configuration, data collection, and feature extraction, which is foundational to the ML codebase. The box sizes indicate the proportion of the entire system represented by each component. In industry ML systems, the code for the model algorithm makes up only a tiny fraction (see the small black box in the middle compared to all the other large boxes). The complexity of ML systems and the fast-paced nature of the industry make it very easy to accumulate technical debt.
 
@@ -301,7 +299,9 @@ Tight coupling between ML model components makes isolating changes difficult. Mo
 
 ### Correction Cascades
 
-The flowchart in @fig-correction-cascades-flowchart depicts the concept of correction cascades in the ML workflow, from problem statement to model deployment. The arcs represent the potential iterative corrections needed at each workflow stage, with different colors corresponding to distinct issues such as interacting with physical world brittleness, inadequate application-domain expertise, conflicting reward systems, and poor cross-organizational documentation. The red arrows indicate the impact of cascades, which can lead to significant revisions in the model development process. In contrast, the dotted red line represents the drastic measure of abandoning the process to restart. This visual emphasizes the complex, interconnected nature of ML system development and the importance of addressing these issues early in the development cycle to mitigate their amplifying effects downstream.
+@fig-correction-cascades-flowchart illustrates the concept of correction cascades in the ML workflow, from problem statement to model deployment. The arcs represent the potential iterative corrections needed at each workflow stage, with different colors corresponding to distinct issues such as interacting with physical world brittleness, inadequate application-domain expertise, conflicting reward systems, and poor cross-organizational documentation. 
+
+The red arrows indicate the impact of cascades, which can lead to significant revisions in the model development process. In contrast, the dotted red line represents the drastic measure of abandoning the process to restart. This visual emphasizes the complex, interconnected nature of ML system development and the importance of addressing these issues early in the development cycle to mitigate their amplifying effects downstream.
 
 ![Correction cascades flowchart. Source: @sculley2015hidden.](images/png/data_cascades.png){#fig-correction-cascades-flowchart}
 
@@ -312,11 +312,7 @@ Several factors inform the decision to build models sequentially or not:
 * **Dataset size and rate of growth:** With small, static datasets, fine-tuning existing models often makes sense. For large, growing datasets, training custom models from scratch allows more flexibility to account for new data.
 * **Available computing resources:** Fine-tuning requires fewer resources than training large models from scratch. With limited resources, leveraging existing models may be the only feasible approach.
 
-While fine-tuning can be efficient, modifying foundational components later becomes extremely costly due to the cascading effects on subsequent models. Careful thought should be given to identifying where introducing fresh model architectures, even with large resource requirements, can avoid correction cascades down the line (see Figure 14.3). There are still scenarios where sequential model building makes sense, which entails weighing these tradeoffs around efficiency, flexibility, and technical debt.
-
-@fig-data-cascades-debt depicts the concept of correction cascades in the ML workflow, from problem statement to model deployment. The arcs represent the potential iterative corrections needed at each stage of the workflow, with different colors corresponding to distinct issues such as interacting with physical world brittleness, inadequate application-domain expertise, conflicting reward systems, and poor cross-organizational documentation. The red arrows indicate the impact of cascades, which can lead to significant revisions in the model development process. In contrast, the dotted red line represents the drastic measure of abandoning the process to restart. This visual emphasizes the complex, interconnected nature of ML system development and the importance of addressing these issues early in the development cycle to mitigate their amplifying effects downstream.
-
-![Data cascades. Source: @sambasivan2021.](images/png/data_cascades.png){#fig-data-cascades-debt}
+While fine-tuning existing models can be efficient, modifying foundational components later becomes extremely costly due to these cascading effects. Therefore, careful consideration should be given to introducing fresh model architectures, even if resource-intensive, to avoid correction cascades down the line. This approach may help mitigate the amplifying effects of issues downstream and reduce technical debt. However, there are still scenarios where sequential model building makes sense, necessitating a thoughtful balance between efficiency, flexibility, and long-term maintainability in the ML development process.
 
 ### Undeclared Consumers
 
@@ -561,12 +557,12 @@ The volume of aggregated data is much lower, often requiring techniques like fed
 
 Furthermore, the models must use simplified architectures optimized for low-power edge hardware. Given the computing limitations, high-end GPUs are inaccessible for intensive deep learning. Training leverages lower-powered edge servers and clusters with distributed approaches to spread load.
 
-Strategies like transfer learning become essential to mitigate data scarcity and irregularity (see Figure 14.5). Models can pre-train on large public datasets and then finetune the training on limited domain-specific edge data. Even incremental on-device learning to customize models helps overcome the decentralized nature of embedded data. The lack of broad labeled data also motivates semi-supervised techniques.
-
-@fig-transfer-learning-mlops illustrates the concept of transfer learning in model training within an MLOps framework. It showcases a neural network where the initial layers (W_{A1} to W_{A4}), which are responsible for general feature extraction, are frozen (indicated by the green dashed line), meaning their weights are not updated during training. This reuse of pre-trained layers accelerates learning by utilizing knowledge gained from previous tasks. The latter layers (W_{A5} to W_{A7}), depicted beyond the blue dashed line, are finetuned for the specific task at hand, focusing on task-specific feature learning. This approach allows the model to adapt to the new task using fewer resources and potentially achieve higher performance on specialized tasks by reusing the general features learned from a broader dataset.
+Transfer learning emerges as a crucial strategy to address data scarcity and irregularity in machine learning, particularly in edge computing scenarios. As illustrated in @fig-transfer-learning-mlops, this approach involves pre-training models on large public datasets and then fine-tuning them on limited domain-specific edge data. The figure depicts a neural network where initial layers (W_{A1} to W_{A4}), responsible for general feature extraction, are frozen (indicated by a green dashed line). These layers retain knowledge from previous tasks, accelerating learning and reducing resource requirements. The latter layers (W_{A5} to W_{A7}), beyond the blue dashed line, are fine-tuned for the specific task, focusing on task-specific feature learning.
 
 ![Transfer learning in MLOps. Source: HarvardX.](images/png/transfer_learning.png){#fig-transfer-learning-mlops}
 
+This method not only mitigates data scarcity but also accommodates the decentralized nature of embedded data. Furthermore, techniques like incremental on-device learning can further customize models to specific use cases. The lack of broad labeled data in many domains also motivates the use of semi-supervised techniques, complementing the transfer learning approach. By leveraging pre-existing knowledge and adapting it to specialized tasks, transfer learning within an MLOps framework enables models to achieve higher performance with fewer resources, even in data-constrained environments.
+
 For example, a smart home assistant may pre-train an audio recognition model on public YouTube clips, which helps bootstrap with general knowledge. It then transfers learning to a small sample of home data to classify customized appliances and events, specializing in the model. The model transforms into a lightweight neural network optimized for microphone-enabled devices across the home.
 
 So, embedded MLOps face acute challenges in constructing training datasets, designing efficient models, and distributing compute for model development compared to traditional settings. Given the embedded constraints, careful adaptation, such as transfer learning and distributed training, is required to train models.
diff --git a/contents/optimizations/optimizations.qmd b/contents/optimizations/optimizations.qmd
index 582e6a01..6238950b 100644
--- a/contents/optimizations/optimizations.qmd
+++ b/contents/optimizations/optimizations.qmd
@@ -22,10 +22,10 @@ When machine learning models are deployed on systems, especially on resource-con
 
 * Explore hardware-aware optimization approaches to match models to target device capabilities
 
-* Discover software tools like frameworks and model conversion platforms that enable deployment of optimized models
-
 * Develop holistic thinking to balance tradeoffs in model complexity, accuracy, latency, power etc. based on application requirements
 
+* Discover software tools like frameworks and model conversion platforms that enable deployment of optimized models
+
 * Gain strategic insight into selecting and applying model optimizations based on use case constraints and hardware targets
 
 :::
@@ -47,7 +47,7 @@ The first avenue of attack for model optimization starts in familiar territory f
 
 Most traditional ML practitioners design models with a general high-level objective in mind, whether it be image classification, person detection, or keyword spotting as mentioned previously in this textbook. Their designs generally end up naturally fitting into some soft constraints due to limited compute resources during development, but generally these designs are not aware of later constraints, such as those required if the model is to be deployed on a more constrained device instead of the cloud.
 
-In this section, we'll discuss how practitioners can harness principles of hardware-software co-design even at a model's high level architecture to make their models compatible with edge devices. From most to least hardware aware at this level of modification, we discuss several of the most common strategies for efficient model parametrization: pruning, model compression, and edge-friendly model architectures.
+In this section, we'll discuss how practitioners can harness principles of hardware-software co-design even at a model's high level architecture to make their models compatible with edge devices. From most to least hardware aware at this level of modification, we discuss several of the most common strategies for efficient model parametrization: pruning, model compression, and edge-friendly model architectures. You were introduced to pruning and model compression in @sec-efficient-model-compression; now, this section will go one step beyond the definitions to provide you with a technical understanding of how these techniques work.
 
 ### Pruning {#sec-pruning}
 
@@ -61,25 +61,19 @@ Model pruning is especially useful when deploying machine learning models to dev
 
 There are several common pruning techniques used in machine learning, these include structured pruning, unstructured pruning, iterative pruning, bayesian pruning, and even random pruning. In addition to pruning the weights, one can also prune the activations. Activation pruning specifically targets neurons or filters that activate rarely or have overall low activation. There are numerous other methods, such as sensitivity and movement pruning. For a comprehensive list of methods, the reader is encouraged to read the following paper: ["A Survey on Deep Neural Network Pruning: Taxonomy, Comparison, Analysis, and Recommendations" (2023)](https://arxiv.org/pdf/2308.06767.pdf).
 
-So how does one choose the type of pruning methods? Many variations of pruning techniques exist where each varies the heuristic of what should be kept and pruned from the model as well the number of times pruning occurs. Traditionally, pruning happens after the model is fully trained, where the pruned model may experience mild accuracy loss. However, as we will discuss further, recent discoveries have found that pruning can be used during training (i.e., iteratively) to identify more efficient and accurate model representations.
+So how does one choose the type of pruning methods? Many variations of pruning techniques exist where each varies the heuristic of what should be kept and pruned from the model as well as number of times pruning occurs. Traditionally, pruning happens after the model is fully trained, where the pruned model may experience mild accuracy loss. However, as we will discuss further, recent discoveries have found that pruning can be used during training (i.e., iteratively) to identify more efficient and accurate model representations.
 
 #### Structured Pruning
 
 We start with structured pruning, a technique that reduces the size of a neural network by eliminating entire model-specific substructures while maintaining the overall model structure. It removes entire neurons/channels or layers based on importance criteria. For example, for a convolutional neural network (CNN), this could be certain filter instances or channels. For fully connected networks, this could be neurons themselves while maintaining full connectivity or even be elimination of entire model layers that are deemed to be insignificant. This type of pruning often leads to regular, structured sparse networks that are hardware friendly.
 
-##### Components
-
 Best practices have started to emerge on how to think about structured pruning. There are three main components:
 
-1. Structures to target for pruning
-2. Establishing a criteria for pruning
-3. Selecting a pruning strategy
-
-##### Structures to target for pruning
+##### 1. Structures to Target for Pruning
 
-Given that there are different strategies, each of these structures (i.e., neurons, channels and layers) is pruned based on specific criteria and strategies, ensuring that the reduced model maintains as much of the predictive prowess of the original model as possible while gaining in computational efficiency and reduction in size.
+Given the variety of approaches, different structures within a neural network are pruned based on specific criteria. The primary structures for pruning include neurons, channels, and sometimes entire layers, each with its unique implications and methodologies. The goal in each approach is to ensure that the reduced model retains as much of the original model's predictive prowess as possible while improving computational efficiency and reducing size.
 
-The primary structures targeted for pruning include **neurons**, channels, and sometimes, entire layers, each having its unique implications and methodologies. When neurons are pruned, we are removing entire neurons along with their associated weights and biases, thereby reducing the width of the layer. This type of pruning is often utilized in fully connected layers.
+When **neurons** are pruned, we are removing entire neurons along with their associated weights and biases, thereby reducing the width of the layer. This type of pruning is often utilized in fully connected layers.
 
 With **channel** pruning, which is predominantly applied in convolutional neural networks (CNNs), it involves eliminating entire channels or filters, which in turn reduces the depth of the feature maps and impacts the network's ability to extract certain features from the input data. This is particularly crucial in image processing tasks where computational efficiency is paramount.
 
@@ -89,7 +83,7 @@ Finally, **layer** pruning takes a more aggressive approach by removing entire l
 
 ![Channel vs layer pruning.](images/jpg/modeloptimization_channel_layer_pruning.jpeg){#fig-channel-layer-pruning}
 
-##### Establishing a criteria for pruning
+##### 2. Establishing a Criteria for Pruning
 
 Establishing well-defined criteria for determining which specific structures to prune from a neural network model is a crucial component of the model pruning process. The core goal here is to identify and remove components that contribute the least to the model's predictive capabilities, while retaining structures integral to preserving the model's accuracy.
 
@@ -97,16 +91,16 @@ A widely adopted and effective strategy for systematically pruning structures re
 
 There are several techniques for assigning these importance scores:
 
-* Weight magnitude-based pruning assigns scores based on the absolute values of the weights. Components with very small weights contribute minimally to activations and can be removed.
-* Gradient-based pruning utilizes the gradients of the loss function with respect to each weight to determine sensitivity. Weights with low gradient magnitudes when altered have little effect on the loss and can be pruned.
-* Activation-based pruning tracks activation values for neurons/filters over a validation dataset. Consistently low activation values suggest less relevance, warranting removal.
-* Taylor expansion approximates the change in loss function from removing a given weight. Weights with negligible impact on loss are prime candidates for pruning.
+* **Weight Magnitude-Based Pruning**: This approach assigns importance scores to a structure by evaluating the aggregate magnitude of their associated weights. Structures with smaller overall weight magnitudes are considered less critical to the network’s performance.
+* **Gradient-Based Pruning**: This technique utilizes the gradients of the loss function with respect to the weights associated with a structure. Structures with low cumulative gradient magnitudes, indicating minimal impact on the loss when altered, are prime candidates for pruning.
+* **Activation-Based Pruning**: This method tracks how often a neuron or filter is activated by storing this information in a parameter called the activation counter. Each time the structure is activated, the counter is incremented. A low activation count suggests that the structure is less relevant.
+* **Taylor Expansion-Based Pruning**: This approach approximates the change in the loss function from removing a given weight. By assessing the cumulative loss disturbance from removing all the weights associated with a structure, you can identify structures with negligible impact on the loss, making them suitable candidates for pruning.
 
 The idea is to measure, either directly or indirectly, the contribution of each component to the model's output. Structures with minimal influence according to the defined criteria are pruned first. This enables selective, optimized pruning that maximally compresses models while preserving predictive capacity. In general, it is important to evaluate the impact of removing particular structures on the model's output, with recent works such as [@rachwan2022winning] and [@lubana2020gradient] investigating combinations of techniques like magnitude-based pruning and gradient-based pruning.
 
-##### Selecting a pruning strategy
+##### 3. Selecting a pruning strategy
 
-The pruning strategy orchestrates how structures are removed and integrates with subsequent model fine-tuning to recover predictive performance. Two main structured pruning strategies exist: iterative pruning and one-shot pruning.
+Now that you understand some techniques for determining the importance of structures within a neural network, the next step is to decide how to apply these insights. This involves selecting an appropriate pruning strategy, which dictates how and when the identified structures are removed and how the model is fine-tuned to maintain its performance. Two main structured pruning strategies exist: iterative pruning and one-shot pruning.
 
 **Iterative pruning** gradually removes structures across multiple cycles of pruning followed by fine-tuning. In each cycle, a small set of structures are pruned based on importance criteria. The model is then fine-tuned, allowing it to adjust smoothly to the structural changes before the next pruning iteration. This gradual, cyclic approach prevents abrupt accuracy drops. It allows the model to slowly adapt as structures are reduced across iterations.
 
@@ -127,17 +121,11 @@ Now consider the same network we had in the iterative pruning example. Whereas i
 
 Structured pruning brings forth a myriad of advantages that cater to various facets of model deployment and utilization, especially in environments where computational resources are constrained.
 
-##### Computational Efficiency
-
-By eliminating entire structures, such as neurons or channels, structured pruning significantly diminishes the computational load during both training and inference phases, thereby enabling faster model predictions and training convergence. Moreover, the removal of structures inherently reduces the model's memory footprint, ensuring that it demands less storage and memory during operation, which is particularly beneficial in memory-constrained environments like TinyML systems.
-
-##### Hardware Efficiency
+-  **Computational Efficiency:** By eliminating entire structures, such as neurons or channels, structured pruning significantly diminishes the computational load during both training and inference phases, thereby enabling faster model predictions and training convergence. Moreover, the removal of structures inherently reduces the model's memory footprint, ensuring that it demands less storage and memory during operation, which is particularly beneficial in memory-constrained environments like TinyML systems.
 
-Structured pruning often results in models that are more amenable to deployment on specialized hardware, such as Field-Programmable Gate Arrays (FPGAs) or Application-Specific Integrated Circuits (ASICs), due to the regularity and simplicity of the pruned architecture. With reduced computational requirements, it translates to lower energy consumption, which is crucial for battery-powered devices and sustainable computing practices.
+- **Hardware Efficiency:** Structured pruning often results in models that are more amenable to deployment on specialized hardware, such as Field-Programmable Gate Arrays (FPGAs) or Application-Specific Integrated Circuits (ASICs), due to the regularity and simplicity of the pruned architecture. With reduced computational requirements, it translates to lower energy consumption, which is crucial for battery-powered devices and sustainable computing practices.
 
-##### Maintenance and Deployment
-
-The pruned model, while smaller, retains its original architectural form, which can simplify the deployment pipeline and ensure compatibility with existing systems and frameworks. Also, with fewer parameters and simpler structures, the pruned model becomes easier to manage and monitor in production environments, potentially reducing the overhead associated with model maintenance and updates. Later on, when we dive into [MLOps](../ops/ops.qmd), this need will become apparent.
+- **Maintenance and Deployment:** The pruned model, while smaller, retains its original architectural form, which can simplify the deployment pipeline and ensure compatibility with existing systems and frameworks. Also, with fewer parameters and simpler structures, the pruned model becomes easier to manage and monitor in production environments, potentially reducing the overhead associated with model maintenance and updates. Later on, when we dive into [MLOps](../ops/ops.qmd), this need will become apparent.
 
 #### Unstructured Pruning
 
@@ -177,7 +165,7 @@ In @fig-structured-unstructured we have examples that illustrate the differences
 
 Pruning has evolved from a purely post-training technique that came at the cost of some accuracy, to a powerful meta-learning approach applied during training to reduce model complexity. This advancement in turn improves compute, memory, and latency efficiency at both training and inference.
 
-A breakthrough finding that catalyzed this evolution was the [lottery ticket hypothesis](https://arxiv.org/abs/1803.03635) by @jonathan2019lottery. They empirically discovered by Jonathan Frankle and Michael Carbin. Their work states that within dense neural networks, there exist sparse subnetworks, referred to as "winning tickets," that can match or even exceed the performance of the original model when trained in isolation. Specifically, these winning tickets, when initialized using the same weights as the original network, can achieve similarly high training convergence and accuracy on a given task. It is worthwhile pointing out that they empirically discovered the lottery ticket hypothesis, which was later formalized.
+A breakthrough finding that catalyzed this evolution was the [lottery ticket hypothesis](https://arxiv.org/abs/1803.03635) by @jonathan2019lottery. Their work states that within dense neural networks, there exist sparse subnetworks, referred to as "winning tickets," that can match or even exceed the performance of the original model when trained in isolation. Specifically, these winning tickets, when initialized using the same weights as the original network, can achieve similarly high training convergence and accuracy on a given task. It is worthwhile pointing out that they empirically discovered the lottery ticket hypothesis, which was later formalized.
 
 The intuition behind this hypothesis is that, during the training process of a neural network, many neurons and connections become redundant or unimportant, particularly with the inclusion of training techniques encouraging redundancy like dropout. Identifying, pruning out, and initializing these "winning tickets'' allows for faster training and more efficient models, as they contain the essential model decision information for the task. Furthermore, as generally known with the bias-variance tradeoff theory, these tickets suffer less from overparameterization and thus generalize better rather than overfitting to the task.
 
@@ -185,17 +173,17 @@ In @fig-lottery-ticket-hypothesis we have an example experiment showing pruning
 
 ![Lottery ticket hypothesis experiments.](images/png/modeloptimization_lottery_ticket_hypothesis.png){#fig-lottery-ticket-hypothesis}
 
-The following is the process of finding the winning lottery ticket subnetwork, as also shown in @fig-winning-ticket (left side):
+To uncover these winning lottery tickets within a neural network, a systematic process is followed. This process, which is illustrated in @fig-winning-ticket (left side), involves iteratively training, pruning, and reinitializing the network. The steps below outline this approach:
 
-1- Initialize the network's weights to random values.
+1. Initialize the network's weights to random values.
 
-2- Train the network until it converges to the desired performance.
+2. Train the network until it converges to the desired performance.
 
-3- Prune out some percentage of the edges with the lowest weight values.
+3. Prune out some percentage of the edges with the lowest weight values.
 
-4- Reinitialize the network with the same random values from step 1.
+4. Reinitialize the network with the same random values from step 1.
 
-5- Repeat steps 2-4 for a number of times, or as long as the accuracy doesn't significantly degrade.
+5. Repeat steps 2-4 for a number of times, or as long as the accuracy doesn't significantly degrade.
 
 When we finish, we are left with a pruned network (@fig-winning-ticket right side), which is a subnetwork of the one we start with. The subnetwork should have a significantly smaller structure, while maintaining a comparable level of accuracy. 
 
@@ -205,33 +193,15 @@ When we finish, we are left with a pruned network (@fig-winning-ticket right sid
 
 There is no free lunch with pruning optimizations, with some choices coming with both improvements and costs to considers. Below we discuss some tradeoffs for practitioners to consider.
 
-##### Quality vs. Size Reduction
-
-A key challenge in both structured and unstructured pruning is balancing size reduction with maintaining or improving predictive performance. This trade-off becomes more complex with unstructured pruning, where individual weight removal can create sparse weight matrices. Ensuring the pruned model retains generalization capacity while becoming more computationally efficient is critical, often requiring extensive experimentation and validation.
-
-##### Determining Pruning Criteria
-
-Establishing a robust pruning criteria, whether for removing entire structures (structured pruning) or individual weights (unstructured pruning), is challenging. The criteria must accurately identify elements whose removal minimally impacts performance. For unstructured pruning, this might involve additional complexities due to the potential for generating sparse weight matrices, which can be computationally inefficient on certain hardware.
-
-##### Fine-Tuning and Retraining
-
-Post-pruning fine-tuning is imperative in both structured and unstructured pruning to recover lost performance and stabilize the model. The challenge encompasses determining the extent, duration, and nature of the fine-tuning process, which can be influenced by the pruning method and the degree of pruning applied.
-
-##### Scalability of Pruning Strategies
-
-Ensuring that pruning strategies, whether structured or unstructured, are scalable and applicable across various models and domains is challenging. Unstructured pruning might introduce additional challenges related to managing and deploying models with sparse weight matrices, especially in hardware that is not optimized for sparse computations.
+- **Managing Sparse Weight Matrices:** A sparse weight matrix is a matrix in which many of the elements are zero. Unstructured pruning often results in sparse weight matrices, where many weights are pruned to zero. While this reduces model size, it also introduces several challenges. Computational inefficiency can arise because standard hardware is optimized for dense matrix operations. Without optimizations that take advantage of sparsity, the computational savings from pruning can be lost. Although sparse matrices can be stored without specialized formats, effectively leveraging their sparsity requires careful handling to avoid wasting resources. Algorithmically, navigating sparse structures requires efficiently skipping over zero entries, which adds complexity to the computation and model updates.
 
-##### Hardware Compatibility and Efficiency
+- **Quality vs. Size Reduction:** A key challenge in both structured and unstructured pruning is balancing size reduction with maintaining or improving predictive performance. Establishing robust pruning criteria, whether for removing entire structures (structured pruning) or individual weights (unstructured pruning), is essential. These pruning criteria chosen must accurately identify elements whose removal minimally impacts performance. Careful experimentation is often needed to ensure the pruned model remains efficient while maintaining its predictive performance.
 
-Especially pertinent to unstructured pruning, hardware compatibility and efficiency become critical. Unstructured pruning often results in sparse weight matrices, which may not be efficiently handled by certain hardware, potentially negating the computational benefits of pruning (see @fig-sparse-matrix). Ensuring that pruned models, particularly those resulting from unstructured pruning, are compatible and efficient on the target hardware is a significant consideration.
+- **Fine-Tuning and Retraining:** Post-pruning fine-tuning is imperative in both structured and unstructured pruning to recover lost performance and stabilize the model. The challenge encompasses determining the extent, duration, and nature of the fine-tuning process, which can be influenced by the pruning method and the degree of pruning applied.
 
-##### Complexity in Implementing Pruning Algorithms
+- **Hardware Compatibility and Efficiency:** Especially pertinent to unstructured pruning, hardware compatibility and efficiency become critical. Unstructured pruning often results in sparse weight matrices, which may not be efficiently handled by certain hardware, potentially negating the computational benefits of pruning (see @fig-sparse-matrix). Ensuring that pruned models, particularly those resulting from unstructured pruning, are scalable, compatible, and efficient on the target hardware is a significant consideration.
 
-Unstructured pruning might introduce additional complexity in implementing pruning algorithms due to the need to manage sparse representations of weights. Developing or adapting algorithms that can efficiently handle, store, and compute sparse weight matrices is an additional challenge and consideration in unstructured pruning.
-
-##### Legal and Ethical Considerations
-
-Last but not least, adherence to legal and ethical guidelines is important, especially in domains with significant consequences. Pruning methods must undergo rigorous validation, testing, and potentially certification processes to ensure compliance with relevant regulations and standards, though arguably at this time no such formal standards and best practices exist that are vetted and validated by 3rd party entities. This is particularly crucial in high-stakes applications like medical AI and autonomous driving, where quality drops due to pruning-like optimizations can be life-threatening. Moreover, ethical considerations extend beyond safety to fairness and equality; recent work by [@tran2022pruning] has revealed that pruning can disproportionately impact people of color, underscoring the need for comprehensive ethical evaluation in the pruning process.
+- **Legal and Ethical Considerations:** Last but not least, adherence to legal and ethical guidelines is important, especially in domains with significant consequences. Pruning methods must undergo rigorous validation, testing, and potentially certification processes to ensure compliance with relevant regulations and standards, though arguably at this time no such formal standards and best practices exist that are vetted and validated by 3rd party entities. This is particularly crucial in high-stakes applications like medical AI and autonomous driving, where quality drops due to pruning-like optimizations can be life-threatening. Moreover, ethical considerations extend beyond safety to fairness and equality; recent work by [@tran2022pruning] has revealed that pruning can disproportionately impact people of color, underscoring the need for comprehensive ethical evaluation in the pruning process.
 
 ![Sparse weight matrix.](images/jpg/modeloptimization_sprase_matrix.jpeg){#fig-sparse-matrix}
 
@@ -251,15 +221,15 @@ Model compression techniques are crucial for deploying deep learning models on r
 
 #### Knowledge Distillation {#sec-kd}
 
-One popular technique is knowledge distillation (KD), which transfers knowledge from a large, complex "teacher" model to a smaller "student" model. The key idea is to train the student model to mimic the teacher's outputs. The concept of KD was first popularized by @hinton2015distilling.
+One popular technique is **knowledge distillation (KD)**, which transfers knowledge from a large, complex "teacher" model to a smaller "student" model. The key idea is to train the student model to mimic the teacher's outputs. The concept of KD was first popularized by @hinton2015distilling.
 
 ##### Overview and Benefits
 
-At its core, KD strategically leverages the refined outputs of a pre-trained teacher model to transfer knowledge to a smaller student model. The key technique is using "soft targets" derived from the teacher's probabilistic predictions. Specifically, the teacher's outputs are passed through a temperature-scaled softmax function, yielding softened probability distributions over classes. This softening provides richer supervision signals for the student model compared to hard target labels.
+Knowledge distillation involves transferring knowledge from a large, complex teacher model to a smaller student model. The core idea is to use the teacher's outputs, known as **soft targets**, to guide the training of the student model. Unlike traditional "hard targets" (the true labels), soft targets are the probability distributions over classes that the teacher model predicts. These distributions provide richer information about the relationships between classes, which can help the student model learn more effectively.
 
-The loss function is another critical component that typically amalgamates a distillation loss, which measures the divergence between the teacher and student outputs, and a classification loss, which ensures the student model adheres to the true data labels. The Kullback-Leibler (KL) divergence is commonly employed to quantify the distillation loss, providing a measure of the discrepancy between the probability distributions output by the teacher and student models.
+You have learned that the softmax function converts a model’s raw outputs into a probability distribution over classes. A key technique in KD is **temperature scaling**, which is applied to the softmax function of the teacher model's outputs. By introducing a temperature parameter, the distribution can be adjusted: a higher temperature produces softer probabilities, meaning the differences between class probabilities become less extreme. This softening effect results in a more uniform distribution, where the model’s confidence in the most likely class is reduced, and other classes have higher, non-zero probabilities. This is valuable for the student model because it allows it to learn not just from the most likely class but from the relative probabilities of all classes, capturing subtle patterns that might be missed if trained only on hard targets. Thus, temperature scaling facilitates the transfer of more nuanced knowledge from the teacher to the student model.
 
-Another core concept is "temperature scaling" in the softmax function. It plays the role of controlling the granularity of the information distilled from the teacher model. A higher temperature parameter produces softer, more informative distributions, thereby facilitating the transfer of more nuanced knowledge to the student model. However, it also introduces the challenge of effectively balancing the trade-off between the informativeness of the soft targets and the stability of the training process.
+The loss function in knowledge distillation typically combines two components: a distillation loss and a classification loss. The distillation loss, often calculated using Kullback-Leibler (KL) divergence, measures the difference between the soft targets produced by the teacher model and the outputs of the student model, encouraging the student to mimic the teacher’s predictions. Meanwhile, the classification loss ensures that the student model correctly predicts the true labels based on the original data. Together, these two components help the student model retain the knowledge of the teacher while adhering to the ground truth labels.
 
 These components, when adeptly configured and harmonized, enable the student model to assimilate the teacher model's knowledge, crafting a pathway towards efficient and robust smaller models that retain the predictive prowess of their larger counterparts. @fig-knowledge-distillation visualizes the training procedure of knowledge distillation. Note how the logits or soft labels of the teacher model are used to provide a distillation loss for the student model to learn from.
 
@@ -293,7 +263,7 @@ But practitioners and researchers encounter a spectrum of challenges and conside
 
 Low-rank matrix factorization is a valuable tool for dimensionality reduction and making compute fit onto edge devices but, like other techniques, needs to be carefully tuned to the model and task at hand. A key challenge resides in managing the computational complexity inherent to LRMF, especially when grappling with high-dimensional and large-scale data. The computational burden, particularly in the context of real-time applications and massive datasets, remains a significant hurdle for effectively using LRMF.
 
-Moreover, the conundrum of choosing the optimal rank, \(k\), for the factorization introduces another layer of complexity. The selection of \(k\) inherently involves a trade-off between approximation accuracy and model simplicity, and identifying a rank that adeptly balances these conflicting objectives often demands a combination of domain expertise, empirical validation, and sometimes, heuristic approaches. The challenge is further amplified when the data encompasses noise or when the inherent low-rank structure is not pronounced, making the determination of a suitable \(k\) even more elusive.
+Moreover, the conundrum of choosing the optimal rank $k$, for the factorization introduces another layer of complexity. The selection of $k$ inherently involves a trade-off between approximation accuracy and model simplicity, and identifying a rank that adeptly balances these conflicting objectives often demands a combination of domain expertise, empirical validation, and sometimes, heuristic approaches. The challenge is further amplified when the data encompasses noise or when the inherent low-rank structure is not pronounced, making the determination of a suitable $k$ even more elusive.
 
 Handling missing or sparse data, a common occurrence in applications like recommendation systems, poses another substantial challenge. Traditional matrix factorization techniques, such as Singular Value Decomposition (SVD), are not directly applicable to matrices with missing entries, necessitating the development and application of specialized algorithms that can factorize incomplete matrices while mitigating the risks of overfitting to the observed entries. This often involves incorporating regularization terms or constraining the factorization in specific ways, which in turn introduces additional hyperparameters that need to be judiciously selected.
 
@@ -301,7 +271,7 @@ Furthermore, in scenarios where data evolves or grows over time, developing LRMF
 
 #### Tensor Decomposition
 
-Similar to low-rank matrix factorization, more complex models may store weights in higher dimensions, such as tensors: tensor decomposition is the higher-dimensional analogue of matrix factorization, where a model tensor is decomposed into lower rank components (see @fig-tensor-decomposition), which again are easier to compute on and store but may suffer from the same issues as mentioned above of information loss and nuanced hyperparameter tuning. Mathematically, given a tensor $\mathcal{A}$, tensor decomposition seeks to represent $\mathcal{A}$ as a combination of simpler tensors, facilitating a compressed representation that approximates the original data while minimizing the loss of information.
+You have learned in @sec-tensor-data-structures that tensors are flexible structures, commonly used by ML Frameworks, that can represent data in higher dimensions. Similar to low-rank matrix factorization, more complex models may store weights in higher dimensions, such as tensors. Tensor decomposition is the higher-dimensional analogue of matrix factorization, where a model tensor is decomposed into lower rank components (see @fig-tensor-decomposition). These lower-rank components are easier to compute on and store but may suffer from the same issues mentioned above, such as information loss and the need for nuanced hyperparameter tuning. Mathematically, given a tensor $\mathcal{A}$, tensor decomposition seeks to represent $\mathcal{A}$ as a combination of simpler tensors, facilitating a compressed representation that approximates the original data while minimizing the loss of information.
 
 The work of Tamara G. Kolda and Brett W. Bader, ["Tensor Decompositions and Applications"](https://epubs.siam.org/doi/abs/10.1137/07070111X) (2009), stands out as a seminal paper in the field of tensor decompositions. The authors provide a comprehensive overview of various tensor decomposition methods, exploring their mathematical underpinnings, algorithms, and a wide array of applications, ranging from signal processing to data mining. Of course, the reason we are discussing it is because it has huge potential for system performance improvements, particularly in the space of TinyML, where throughput and memory footprint savings are crucial to feasibility of deployments.
 
@@ -319,13 +289,13 @@ This Colab dives into a technique for compressing models while maintaining high
 
 ### Edge-Aware Model Design
 
-Finally, we reach the other end of the hardware-software gradient, where we specifically make model architecture decisions directly given knowledge of the edge devices we wish to deploy on.
+Now, we reach the other end of the hardware-software gradient, where we specifically make model architecture decisions directly given knowledge of the edge devices we wish to deploy on.
 
 As covered in previous sections, edge devices are constrained specifically with limitations on memory and parallelizable computations: as such, if there are critical inference speed requirements, computations must be flexible enough to satisfy hardware constraints, something that can be designed at the model architecture level. Furthermore, trying to cram SOTA large ML models onto edge devices even after pruning and compression is generally infeasible purely due to size: the model complexity itself must be chosen with more nuance as to more feasibly fit the device. Edge ML developers have approached this architectural challenge both through designing bespoke edge ML model architectures and through device-aware neural architecture search (NAS), which can more systematically generate feasible on-device model architectures.
 
 #### Model Design Techniques
 
-One edge friendly architecture design is depthwise separable convolutions. Commonly used in deep learning for image processing, it consists of two distinct steps: the first is the depthwise convolution, where each input channel is convolved independently with its own set of learnable filters, as show in @fig-depthwise-convolution. This step reduces computational complexity by a significant margin compared to standard convolutions, as it drastically reduces the number of parameters and computations involved. The second step is the pointwise convolution, which combines the output of the depthwise convolution channels through a 1x1 convolution, creating inter-channel interactions. This approach offers several advantages. Pros include reduced model size, faster inference times, and often better generalization due to fewer parameters, making it suitable for mobile and embedded applications. However, depthwise separable convolutions may not capture complex spatial interactions as effectively as standard convolutions and might require more depth (layers) to achieve the same level of representational power, potentially leading to longer training times. Nonetheless, their efficiency in terms of parameters and computation makes them a popular choice in modern convolutional neural network architectures.
+One edge friendly architecture design, commonly used in deep learning for image processing, is depthwise separable convolutions. It consists of two distinct steps: the first is the depthwise convolution, where each input channel is convolved independently with its own set of learnable filters, as shown in @fig-depthwise-convolution. This step reduces computational complexity by a significant margin compared to standard convolutions, as it drastically reduces the number of parameters and computations involved. The second step is the pointwise convolution, which combines the output of the depthwise convolution channels through a 1x1 convolution, creating inter-channel interactions. This approach offers several advantages. Benefits include reduced model size, faster inference times, and often better generalization due to fewer parameters, making it suitable for mobile and embedded applications. However, depthwise separable convolutions may not capture complex spatial interactions as effectively as standard convolutions and might require more depth (layers) to achieve the same level of representational power, potentially leading to longer training times. Nonetheless, their efficiency in terms of parameters and computation makes them a popular choice in modern convolutional neural network architectures.
 
 ![Depthwise separable convolutions. Source: @hegde2023introduction.](images/png/modeloptimization_depthwise_separable_convolution.png){#fig-depthwise-convolution}
 
@@ -343,7 +313,7 @@ These models are essential in the context of edge computing where limited proces
 
 #### Streamlining Model Architecture Search
 
-Finally, systematized pipelines for searching for performant edge-compatible model architectures are possible through frameworks like [TinyNAS](https://arxiv.org/abs/2007.10319) by @lin2020mcunet and [MorphNet](https://arxiv.org/abs/1711.06798) by @gordon2018morphnet.
+Lastly, to address the challenge of finding efficient model architectures that are compatible with edge devices, researchers have developed systematized pipelines that streamline the search for performant designs. Two notable frameworks in this space are [TinyNAS](https://arxiv.org/abs/2007.10319) by @lin2020mcunet and [MorphNet](https://arxiv.org/abs/1711.06798) by @gordon2018morphnet, which automate the process of optimizing neural network architectures for edge deployment.
 
 TinyNAS is an innovative neural architecture search framework introduced in the MCUNet paper, designed to efficiently discover lightweight neural network architectures for edge devices with limited computational resources. Leveraging reinforcement learning and a compact search space of micro neural modules, TinyNAS optimizes for both accuracy and latency, enabling the deployment of deep learning models on microcontrollers, IoT devices, and other resource-constrained platforms. Specifically, TinyNAS, in conjunction with a network optimizer TinyEngine, generates different search spaces by scaling the input resolution and the model width of a model, then collects the computation FLOPs distribution of satisfying networks within the search space to evaluate its priority. TinyNAS relies on the assumption that a search space that accommodates higher FLOPs under memory constraint can produce higher accuracy models, something that the authors verified in practice in their work. In empirical performance, TinyEngine reduced the peak memory usage of models by around 3.4 times and accelerated inference by 1.7 to 3.3 times compared to [TFLite](https://www.tensorflow.org/lite) and [CMSIS-NN](https://www.keil.com/pack/doc/CMSIS/NN/html/index.html).
 
@@ -392,7 +362,7 @@ Numerical data, the bedrock upon which machine learning models stand, manifest i
 
 #### Precision
 
-Precision, delineating the exactness with which a number is represented, bifurcates typically into single, double, half and in recent years there have been a number of other precisions that have emerged to better support machine learning tasks efficiently on the underlying hardware.
+Precision, delineating the exactness with which a number is represented, bifurcates typically into single, double, half and in recent years there have been a number of other precisions that have emerged to better support machine learning tasks efficiently on the underlying hardware. 
 
 **Double Precision (Float64):** Allocating 64 bits, double precision (e.g., 3.141592653589793) provides heightened accuracy, albeit demanding augmented memory and computational resources. In scientific computations, where precision is paramount, variables like π might be represented with Float64.
 
@@ -408,14 +378,7 @@ Precision, delineating the exactness with which a number is represented, bifurca
 
 **Integer:** Integer representations are made using 8, 4, and 2 bits. They are often used during the inference phase of neural networks, where the weights and activations of the model are quantized to these lower precisions. Integer representations are deterministic and offer significant speed and memory advantages over floating-point representations. For many inference tasks, especially on edge devices, the slight loss in accuracy due to quantization is often acceptable given the efficiency gains. An extreme form of integer numerics is for binary neural networks (BNNs), where weights and activations are constrained to one of two values: either +1 or -1.
 
-| **Precision**                         | **Pros**                                                                                             | **Cons**                                                                                                                   |
-|:---------------------------------------|:------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------|
-| **FP32** (Floating Point 32-bit)      | Standard precision used in most deep learning frameworks.<br>  High accuracy due to ample representational capacity.<br>  Well-suited for training. | High memory usage.<br>  Slower inference times compared to quantized models.<br>  Higher energy consumption.          |
-| **FP16** (Floating Point 16-bit)      | Reduces memory usage compared to FP32.<br>  Speeds up computations on hardware that supports FP16.<br>  Often used in mixed-precision training to balance speed and accuracy. | Lower representational capacity compared to FP32.<br>  Risk of numerical instability in some models or layers.       |
-| **INT8** (8-bit Integer)               | Significantly reduced memory footprint compared to floating-point representations.<br>  Faster inference if hardware supports INT8 computations.<br>  Suitable for many post-training quantization scenarios. | Quantization can lead to some accuracy loss.<br>  Requires careful calibration during quantization to minimize accuracy degradation. |
-| **INT4** (4-bit Integer)               | Even lower memory usage than INT8.<br>  Further speed-up potential for inference. | Higher risk of accuracy loss compared to INT8.<br>  Calibration during quantization becomes more critical.           |
-| **Binary**                            | Minimal memory footprint (only 1 bit per parameter).<br>  Extremely fast inference due to bitwise operations.<br>  Power efficient. | Significant accuracy drop for many tasks.<br>  Complex training dynamics due to extreme quantization.              |
-| **Ternary**                           | Low memory usage but slightly more than binary.<br>  Offers a middle ground between representation and efficiency. | Accuracy might still be lower than higher precision models.<br>  Training dynamics can be complex.                 |
+You may refer back to @sec-numerical-formats for a table comparison between the trade-offs of different numeric types. 
 
 #### Numeric Encoding and Storage
 
@@ -432,19 +395,7 @@ The key goals of these new formats are to provide lower precision alternatives t
 
 ### Efficiency Benefits
 
-Numerical efficiency matters for machine learning workloads for a number of reasons:
-
-**Computational Efficiency:** High-precision computations (like FP32 or FP64) can be slow and resource-intensive. By reducing numeric precision, one can achieve faster computation times, especially on specialized hardware that supports lower precision.
-
-**Memory Efficiency:** Storage requirements decrease with reduced numeric precision. For instance, FP16 requires half the memory of FP32. This is crucial when deploying models to edge devices with limited memory or when working with very large models.
-
-**Power Efficiency:** Lower precision computations often consume less power, which is especially important for battery-operated devices.
-
-**Noise Introduction:** Interestingly, the noise introduced by using lower precision can sometimes act as a regularizer, helping to prevent overfitting in some models.
-
-**Hardware Acceleration:** Many modern AI accelerators and GPUs are optimized for lower precision operations, leveraging the efficiency benefits of such numerics.
-
-Efficient numerics is not just about reducing the bit-width of numbers but understanding the trade-offs between accuracy and efficiency. As machine learning models become more pervasive, especially in real-world, resource-constrained environments, the focus on efficient numerics will continue to grow. By thoughtfully selecting and leveraging the appropriate numeric precision, one can achieve robust model performance while optimizing for speed, memory, and energy.
+As you learned in @sec-efficiency-benefits, numerical efficiency matters for machine learning workloads for a number of reasons. Efficient numerics is not just about reducing the bit-width of numbers but understanding the trade-offs between accuracy and efficiency. As machine learning models become more pervasive, especially in real-world, resource-constrained environments, the focus on efficient numerics will continue to grow. By thoughtfully selecting and leveraging the appropriate numeric precision, one can achieve robust model performance while optimizing for speed, memory, and energy.
 
 ### Numeric Representation Nuances
 
@@ -454,9 +405,7 @@ There are a number of nuances with numerical representations for ML that require
 
 The memory footprint of ML models, particularly those of considerable complexity and depth, can be substantial, thereby posing a significant challenge in both training and deployment phases. For instance, a deep neural network with 100 million parameters, represented using Float32 (32 bits or 4 bytes per parameter), would necessitate approximately 400 MB of memory just for storing the model weights. This does not account for additional memory requirements during training for storing gradients, optimizer states, and forward pass caches, which can further amplify the memory usage, potentially straining the resources on certain hardware, especially edge devices with limited memory capacity.
 
-#### Impact on Model Parameters and Weights
-
-The numeric representation casts a significant impact on the storage and computational requisites of ML model parameters and weights. For instance, a model utilizing Float64 for weights will demand double the memory and potentially increased computational time compared to a counterpart employing Float32. A weight matrix, for instance, with dimensions [1000, 1000] using Float64 would consume approximately 8MB of memory, whereas using Float32 would halve this to approximately 4MB.
+The choice of numeric representation further impacts memory usage and computational efficiency.  For example, using Float64 for model weights would double the memory requirements compared to Float32, and could potentially increase computational time as well. For a weight matrix with dimensions [1000, 1000], Float64 would consume approximately 8MB of memory, while Float32 would reduce this to about 4MB. Thus, selecting an appropriate numeric format is crucial for optimizing both memory and computational efficiency.
 
 #### Computational Complexity
 
@@ -480,7 +429,7 @@ The trade-off between numerical precision and model accuracy is a nuanced challe
 
 #### Trade-off Examples
 
-To understand and appreciate the nuances let's consider some use case examples. Through these we will realize that the choice of numeric representation is not merely a technical decision but a strategic one, influencing the model's predictive acumen, its computational demands, and its deployability across diverse computational environments. In this section we will look at a couple of examples to better understand the trade-offs with numerics and how they tie to the real world.
+To understand and appreciate the nuances, let's consider some use case examples. Through these we will realize that the choice of numeric representation is not merely a technical decision but a strategic one, influencing the model's predictive acumen, its computational demands, and its deployability across diverse computational environments. In this section we will look at a couple of examples to better understand the trade-offs with numerics and how they tie to the real world.
 
 ##### Autonomous Vehicles
 
@@ -523,14 +472,6 @@ These examples illustrate diverse scenarios where the challenges of numerics rep
 
 Quantization is prevalent in various scientific and technological domains, and it essentially involves the mapping or constraining of a continuous set or range into a discrete counterpart to minimize the number of bits required.
 
-#### History
-
-Historically, the idea of quantization is not novel and can be traced back to ancient times, particularly in the realm of music and astronomy. In music, the Greeks utilized a system of tetrachords, segmenting the continuous range of pitches into discrete notes, thereby quantizing musical sounds. In astronomy and physics, the concept of quantization was present in the discretized models of planetary orbits, as seen in the Ptolemaic and Copernican systems.
-
-During the 1800s, quantization-based discretization was used to approximate the calculation of integrals, and further used to investigate the impact of rounding errors on the integration result. With algorithms, Lloyd's K-Means Algorithm is a classic example of quantization. However, the term "quantization" was firmly embedded in scientific literature with the advent of quantum mechanics in the early 20th century, where it was used to describe the phenomenon that certain physical properties, such as energy, exist only in discrete, quantized states. This principle was important in explaining phenomena at the atomic and subatomic levels. In the digital age, quantization found its application in signal processing, where continuous signals are converted into a discrete digital form, and in numerical algorithms, where computations on real-valued numbers are performed with finite-precision arithmetic.
-
-Extending upon this second application and relevant to this section, it is used in computer science to optimize neural networks by reducing the precision of the network weights. Thus, quantization, as a concept, has been subtly woven into the tapestry of scientific and technological development, evolving and adapting to the needs and discoveries of various epochs.
-
 #### Initial Breakdown
 
 We begin our foray into quantization with a brief analysis of one important use for quantization.
@@ -561,21 +502,21 @@ $$
 Q(r)=Int(r/S) - Z
 $$
 
-where Q is the quantization operator, r is a real valued input (in our case, an activation or weight), S is a real valued scaling factor, and Z is an integer zero point. The Int function maps a real value to an integer value through a rounding operation. Through this function, we have effectively mapped real values r to some integer values, resulting in quantized levels which are uniformly spaced.
+where $Q$ is the quantization operator, $r$ is a real valued input (in our case, an activation or weight), $S$ is a real valued scaling factor, and $Z$ is an integer zero point. The `Int` function maps a real value to an integer value through a rounding operation. Through this function, we have effectively mapped real values $r$ to some integer values, resulting in quantized levels which are uniformly spaced.
 
-When the need arises for practitioners to retrieve the original higher precision values, real values r can be recovered from quantized values through an operation known as **dequantization**. In the example above, this would mean performing the following operation on our quantized value:
+When the need arises for practitioners to retrieve the original higher precision values, real values $r$ can be recovered from quantized values through an operation known as **dequantization**. In the example above, this would mean performing the following operation on our quantized value:
 
 $$
 \bar{r} = S(Q(r) + Z)
 $$
 
-As discussed, some precision in the real value is lost by quantization. In this case, the recovered value $\bar{r}$ will not exactly match r due to the rounding operation. This is an important tradeoff to note; however, in many successful uses of quantization, the loss of precision can be negligible and the test accuracy remains high. Despite this, uniform quantization continues to be the current de-facto choice due to its simplicity and efficient mapping to hardware.
+As discussed, some precision in the real value is lost by quantization. In this case, the recovered value $\bar{r}$ will not exactly match $r$ due to the rounding operation. This is an important tradeoff to note; however, in many successful uses of quantization, the loss of precision can be negligible and the test accuracy remains high. Despite this, uniform quantization continues to be the current de-facto choice due to its simplicity and efficient mapping to hardware.
 
 #### Non-uniform Quantization
 
 Non-uniform quantization, on the other hand, does not maintain a consistent interval between quantized values. This approach might be used to allocate more possible discrete values in regions where the parameter values are more densely populated, thereby preserving more detail where it is most needed. For instance, in bell-shaped distributions of weights with long tails, a set of weights in a model predominantly lies within a certain range; thus, more quantization levels might be allocated to that range to preserve finer details, enabling us to better capture information. However, one major weakness of non-uniform quantization is that it requires dequantization before higher precision computations due to its non-uniformity, restricting its ability to accelerate computation compared to uniform quantization.
 
-Typically, a rule-based non-uniform quantization uses a logarithmic distribution of exponentially increasing steps and levels as opposed to linearly. Another popular branch lies in binary-code-based quantization where real number vectors are quantized into binary vectors with a scaling factor. Notably, there is no closed form solution for minimizing errors between the real value and non-uniformly quantized value, so most quantizations in this field rely on heuristic solutions. For instance, [recent work](https://arxiv.org/abs/1802.00150) by @xu2018alternating formulates non-uniform quantization as an optimization problem where the quantization steps/levels in quantizer Q are adjusted to minimize the difference between the original tensor and quantized counterpart.
+Typically, a rule-based non-uniform quantization uses a logarithmic distribution of exponentially increasing steps and levels as opposed to linearly. Another popular branch lies in binary-code-based quantization where real number vectors are quantized into binary vectors with a scaling factor. Notably, there is no closed form solution for minimizing errors between the real value and non-uniformly quantized value, so most quantizations in this field rely on heuristic solutions. For instance, [recent work](https://arxiv.org/abs/1802.00150) by @xu2018alternating formulates non-uniform quantization as an optimization problem where the quantization steps/levels in quantizer $Q$ are adjusted to minimize the difference between the original tensor and quantized counterpart.
 
 $$
 \min_Q ||Q(r)-r||^2
@@ -613,7 +554,11 @@ Importantly, the quality of calibration can make a difference between a quantize
 
 #### Symmetric Quantization
 
-Symmetric quantization maps real values to a symmetrical clipping range centered around 0. This involves choosing a range [$\alpha$, $\beta$] where $\alpha = -\beta$. For example, one symmetrical range would be based on the min/max values of the real values such that: -$\alpha = \beta = max(abs(r_{max}), abs(r_{min}))$.
+Symmetric quantization maps real values to a symmetrical clipping range centered around 0. This involves choosing a range [$\alpha$, $\beta$] where $\alpha = -\beta$. For example, one symmetrical range would be based on the min/max values of the real values such that: 
+
+$$
+\alpha = \beta = max(abs(r_{max}), abs(r_{min}))
+$$
 
 Symmetric clipping ranges are the most widely adopted in practice as they have the advantage of easier implementation. In particular, the mapping of zero to zero in the clipping range (sometimes called "zeroing out of the zero point") can lead to reduction in computational cost during inference [[@wu2020integer]](https://arxiv.org/abs/2004.09602).
 
@@ -626,14 +571,14 @@ Asymmetric quantization maps real values to an asymmetrical clipping range that
 
 #### Granularity
 
-Upon deciding the type of clipping range, it is essential to tighten the range to allow a model to retain as much of its accuracy as possible. We'll be taking a look at convolutional neural networks as our way of exploring methods that fine tune the granularity of clipping ranges for quantization. The input activation of a layer in our CNN undergoes convolution with multiple convolutional filters. Every convolutional filter can possess a unique range of values. Notice how in @fig-quantization-granularity, the range for Filter1 is much smaller than that for Filter 3. Consequently, one distinguishing feature of quantization approaches is the precision with which the clipping range [α,β] is determined for the weights.
+Upon deciding the type of clipping range, it is essential to tighten the range to allow a model to retain as much of its accuracy as possible. We'll be taking a look at convolutional neural networks as our way of exploring methods that fine tune the granularity of clipping ranges for quantization. The input activation of a layer in our CNN undergoes convolution with multiple convolutional filters. Every convolutional filter can possess a unique range of values. Notice how in @fig-quantization-granularity, the range for Filter 1 is much smaller than that for Filter 3. Consequently, one distinguishing feature of quantization approaches is the precision with which the clipping range [α,β] is determined for the weights.
 
 ![Quantization granularity: variable ranges. Source: @gholami2021survey.](images/png/efficientnumerics_granularity.png){#fig-quantization-granularity}
 
-1. Layerwise Quantization: This approach determines the clipping range by considering all of the weights in the convolutional filters of a layer. Then, the same clipping range is used for all convolutional filters. It's the simplest to implement, and, as such, it often results in sub-optimal accuracy due the wide variety of differing ranges between filters. For example, a convolutional kernel with a narrower range of parameters loses its quantization resolution due to another kernel in the same layer having a wider range.
-2. Groupwise Quantization: This approach groups different channels inside a layer to calculate the clipping range. This method can be helpful when the distribution of parameters across a single convolution/activation varies a lot. In practice, this method was useful in Q-BERT [@sheng2019qbert] for quantizing Transformer [@vaswani2017attention] models that consist of fully-connected attention layers. The downside with this approach comes with the extra cost of accounting for different scaling factors.
-3. Channelwise Quantization: This popular method uses a fixed range for each convolutional filter that is independent of other channels. Because each channel is assigned a dedicated scaling factor, this method ensures a higher quantization resolution and often results in higher accuracy.
-4. Sub-channelwise Quantization: Taking channelwise quantization to the extreme, this method determines the clipping range with respect to any groups of parameters in a convolution or fully-connected layer. It may result in considerable overhead since different scaling factors need to be taken into account when processing a single convolution or fully-connected layer.
+1. **Layerwise Quantization:** This approach determines the clipping range by considering all of the weights in the convolutional filters of a layer. Then, the same clipping range is used for all convolutional filters. It's the simplest to implement, and, as such, it often results in sub-optimal accuracy due the wide variety of differing ranges between filters. For example, a convolutional kernel with a narrower range of parameters loses its quantization resolution due to another kernel in the same layer having a wider range.
+2. **Groupwise Quantization:** This approach groups different channels inside a layer to calculate the clipping range. This method can be helpful when the distribution of parameters across a single convolution/activation varies a lot. In practice, this method was useful in Q-BERT [@sheng2019qbert] for quantizing Transformer [@vaswani2017attention] models that consist of fully-connected attention layers. The downside with this approach comes with the extra cost of accounting for different scaling factors.
+3. **Channelwise Quantization:** This popular method uses a fixed range for each convolutional filter that is independent of other channels. Because each channel is assigned a dedicated scaling factor, this method ensures a higher quantization resolution and often results in higher accuracy.
+4. **Sub-channelwise Quantization:** Taking channelwise quantization to the extreme, this method determines the clipping range with respect to any groups of parameters in a convolution or fully-connected layer. It may result in considerable overhead since different scaling factors need to be taken into account when processing a single convolution or fully-connected layer.
 
 Of these, channelwise quantization is the current standard used for quantizing convolutional kernels, since it enables the adjustment of clipping ranges for each individual kernel with negligible overhead.
 
@@ -651,14 +596,14 @@ Between the two, calculating the range dynamically usually is very costly, so mo
 
 The two prevailing techniques for quantizing models are Post Training Quantization and Quantization-Aware Training.
 
-**Post Training Quantization** - Post-training quantization (PTQ) is a quantization technique where the model is quantized after it has been trained. The model is trained in floating point and then weights and activations are quantized as a post-processing step. This is the simplest approach and does not require access to the training data. Unlike Quantization-Aware Training (QAT), PTQ sets weight and activation quantization parameters directly, making it low-overhead and suitable for limited or unlabeled data situations. However, not readjusting the weights after quantizing, especially in low-precision quantization can lead to very different behavior and thus lower accuracy. To tackle this, techniques like bias correction, equalizing weight ranges, and adaptive rounding methods have been developed. PTQ can also be applied in zero-shot scenarios, where no training or testing data are available. This method has been made even more efficient to benefit compute- and memory- intensive large language models. Recently, SmoothQuant, a training-free, accuracy-preserving, and general-purpose PTQ solution which enables 8-bit weight, 8-bit activation quantization for LLMs, has been developed, demonstrating up to 1.56x speedup and 2x memory reduction for LLMs with negligible loss in accuracy [[@xiao2022smoothquant]](https://arxiv.org/abs/2211.10438).
+**Post Training Quantization:** Post-training quantization (PTQ) is a quantization technique where the model is quantized after it has been trained. The model is trained in floating point and then weights and activations are quantized as a post-processing step. This is the simplest approach and does not require access to the training data. Unlike Quantization-Aware Training (QAT), PTQ sets weight and activation quantization parameters directly, making it low-overhead and suitable for limited or unlabeled data situations. However, not readjusting the weights after quantizing, especially in low-precision quantization can lead to very different behavior and thus lower accuracy. To tackle this, techniques like bias correction, equalizing weight ranges, and adaptive rounding methods have been developed. PTQ can also be applied in zero-shot scenarios, where no training or testing data are available. This method has been made even more efficient to benefit compute- and memory- intensive large language models. Recently, SmoothQuant, a training-free, accuracy-preserving, and general-purpose PTQ solution which enables 8-bit weight, 8-bit activation quantization for LLMs, has been developed, demonstrating up to 1.56x speedup and 2x memory reduction for LLMs with negligible loss in accuracy [[@xiao2022smoothquant]](https://arxiv.org/abs/2211.10438).
 
 
 In PTQ, a pretrained model undergoes a calibration process, as shown in @fig-PTQ-diagram. Calibration involves using a separate dataset known as calibration data, a specific subset of the training data reserved for quantization to help find the appropriate clipping ranges and scaling factors.
 
 ![Post-Training Quantization and calibration. Source: @gholami2021survey.](images/png/efficientnumerics_PTQ.png){#fig-PTQ-diagram}
 
-**Quantization-Aware Training** - Quantization-aware training (QAT) is a fine-tuning of the PTQ model. The model is trained aware of quantization, allowing it to adjust for quantization effects. This produces better accuracy with quantized inference. Quantizing a trained neural network model with methods such as PTQ introduces perturbations that can deviate the model from its original convergence point. For instance, Krishnamoorthi showed that even with per-channel quantization, networks like MobileNet do not reach baseline accuracy with int8 Post Training Quantization (PTQ) and require Quantization-Aware Training (QAT) [[@krishnamoorthi2018quantizing]](https://arxiv.org/abs/1806.08342).To address this, QAT retrains the model with quantized parameters, employing forward and backward passes in floating point but quantizing parameters after each gradient update. Handling the non-differentiable quantization operator is crucial; a widely used method is the Straight Through Estimator (STE), approximating the rounding operation as an identity function. While other methods and variations exist, STE remains the most commonly used due to its practical effectiveness.
+**Quantization-Aware Training:** Quantization-aware training (QAT) is a fine-tuning of the PTQ model. The model is trained aware of quantization, allowing it to adjust for quantization effects. This produces better accuracy with quantized inference. Quantizing a trained neural network model with methods such as PTQ introduces perturbations that can deviate the model from its original convergence point. For instance, Krishnamoorthi showed that even with per-channel quantization, networks like MobileNet do not reach baseline accuracy with int8 Post Training Quantization (PTQ) and require Quantization-Aware Training (QAT) [[@krishnamoorthi2018quantizing]](https://arxiv.org/abs/1806.08342).To address this, QAT retrains the model with quantized parameters, employing forward and backward passes in floating point but quantizing parameters after each gradient update. Handling the non-differentiable quantization operator is crucial; a widely used method is the Straight Through Estimator (STE), approximating the rounding operation as an identity function. While other methods and variations exist, STE remains the most commonly used due to its practical effectiveness.
 In QAT, a pretrained model is quantized and then finetuned using training data to adjust parameters and recover accuracy degradation, as shown in @fig-QAT-diagram. The calibration process is often conducted in parallel with the finetuning process for QAT.
 
 ![Quantization-Aware Training. Source: @gholami2021survey.](images/png/efficientnumerics_QAT.png){#fig-QAT-diagram}
@@ -689,25 +634,25 @@ Quantization-Aware Training serves as a natural extension of Post-Training Quant
 
 ### Weights vs. Activations
 
-Weight Quantization: Involves converting the continuous or high-precision weights of a model to lower-precision, such as converting Float32 weights to quantized INT8 (integer) weights - in @fig-weight-activations-quantization, weight quantization is taking place in the second step (red squares) when we multiply the inputs. This reduces the model size, thereby reducing the memory required to store the model and the computational resources needed to perform inference. For example, consider a weight matrix in a neural network layer with Float32 weights as [0.215, -1.432, 0.902, ...]. Through weight quantization, these might be mapped to INT8 values like [27, -183, 115, ...], significantly reducing the memory required to store them.
+**Weight Quantization:** Involves converting the continuous or high-precision weights of a model to lower-precision, such as converting Float32 weights to quantized INT8 (integer) weights - in @fig-weight-activations-quantization, weight quantization is taking place in the second step (red squares) when we multiply the inputs. This reduces the model size, thereby reducing the memory required to store the model and the computational resources needed to perform inference. For example, consider a weight matrix in a neural network layer with Float32 weights as [0.215, -1.432, 0.902, ...]. Through weight quantization, these might be mapped to INT8 values like [27, -183, 115, ...], significantly reducing the memory required to store them.
 
 ![Weight and activation quantization. Source: HarvardX.](images/png/efficientnumerics_weightsactivations.png){#fig-weight-activations-quantization}
 
-Activation Quantization: Involves quantizing the activation values (outputs of layers) during model inference. This can reduce the computational resources required during inference, but it introduces additional challenges in maintaining model accuracy due to the reduced precision of intermediate computations. For example, in a convolutional neural network (CNN), the activation maps (feature maps) produced by convolutional layers, originally in Float32, might be quantized to INT8 during inference to accelerate computation, especially on hardware optimized for integer arithmetic. Additionally, recent work has explored the use of Activation-aware Weight Quantization for LLM compression and acceleration, which involves protecting only 1% of the most important salient weights by observing the activations not weights [[@lin2023awq]](https://arxiv.org/pdf/2306.00978.pdf).
+**Activation Quantization:** Involves quantizing the activation values (outputs of layers) during model inference. This can reduce the computational resources required during inference, but it introduces additional challenges in maintaining model accuracy due to the reduced precision of intermediate computations. For example, in a convolutional neural network (CNN), the activation maps (feature maps) produced by convolutional layers, originally in Float32, might be quantized to INT8 during inference to accelerate computation, especially on hardware optimized for integer arithmetic. Additionally, recent work has explored the use of Activation-aware Weight Quantization for LLM compression and acceleration, which involves protecting only 1% of the most important salient weights by observing the activations not weights [[@lin2023awq]](https://arxiv.org/pdf/2306.00978.pdf).
 
 ### Trade-offs
 
 Quantization invariably introduces a trade-off between model size/performance and accuracy. While it significantly reduces the memory footprint and can accelerate inference, especially on hardware optimized for low-precision arithmetic, the reduced precision can degrade model accuracy.
 
-Model Size: A model with weights represented as Float32 being quantized to INT8 can theoretically reduce the model size by a factor of 4, enabling it to be deployed on devices with limited memory. The model size of large language models is developing at a faster pace than the GPU memory in recent years, leading to a big gap between the supply and demand for memory. @fig-model-size-pace illustrates the recent trend of the widening gap between model size (red line) and accelerator memory (yellow line). Quantization and model compression techniques can help bridge the gap
+**Model Size:** A model with weights represented as Float32 being quantized to INT8 can theoretically reduce the model size by a factor of 4, enabling it to be deployed on devices with limited memory. The model size of large language models is developing at a faster pace than the GPU memory in recent years, leading to a big gap between the supply and demand for memory. @fig-model-size-pace illustrates the recent trend of the widening gap between model size (red line) and accelerator memory (yellow line). Quantization and model compression techniques can help bridge the gap
 
 ![Model size vs. accelerator memory. Source: @xiao2022smoothquant.](images/png/efficientnumerics_modelsizes.png){#fig-model-size-pace}
 
-Inference Speed: Quantization can also accelerate inference, as lower-precision arithmetic is computationally less expensive. For example, certain hardware accelerators, like Google's Edge TPU, are optimized for INT8 arithmetic and can perform inference significantly faster with INT8 quantized models compared to their floating-point counterparts. The reduction in memory from quantization helps reduce the amount of data transmission, saving up memory and speeding the process. @fig-nvidia-turing compares the increase in throughput and the reduction in bandwidth memory for different data type on the NVIDIA Turing GPU.
+**Inference Speed:** Quantization can also accelerate inference, as lower-precision arithmetic is computationally less expensive. For example, certain hardware accelerators, like Google's Edge TPU, are optimized for INT8 arithmetic and can perform inference significantly faster with INT8 quantized models compared to their floating-point counterparts. The reduction in memory from quantization helps reduce the amount of data transmission, saving up memory and speeding the process. @fig-nvidia-turing compares the increase in throughput and the reduction in bandwidth memory for different data type on the NVIDIA Turing GPU.
 
 ![Benefits of lower precision data types. Source: @wu2020integer.](images/png/efficientnumerics_benefitsofprecision.png){#fig-nvidia-turing}
 
-Accuracy: The reduction in numerical precision post-quantization can lead to a degradation in model accuracy, which might be acceptable in certain applications (e.g., image classification) but not in others (e.g., medical diagnosis). Therefore, post-quantization, the model typically requires re-calibration or fine-tuning to mitigate accuracy loss. Furthermore, recent work has explored the use of [Activation-aware Weight Quantization [@lin2023awq]](https://arxiv.org/pdf/2306.00978.pdf) which is based on the observation that protecting only 1% of salient weights can greatly reduce quantization error.
+**Accuracy:** The reduction in numerical precision post-quantization can lead to a degradation in model accuracy, which might be acceptable in certain applications (e.g., image classification) but not in others (e.g., medical diagnosis). Therefore, post-quantization, the model typically requires re-calibration or fine-tuning to mitigate accuracy loss. Furthermore, recent work has explored the use of [Activation-aware Weight Quantization [@lin2023awq]](https://arxiv.org/pdf/2306.00978.pdf) which is based on the observation that protecting only 1% of salient weights can greatly reduce quantization error.
 
 ### Quantization and Pruning
 
@@ -717,7 +662,7 @@ Pruning and quantization work well together, and it's been found that pruning do
 
 ### Edge-aware Quantization
 
-Quantization not only reduces model size but also enables faster computations and draws less power, making it vital to edge development. Edge devices typically have tight resource constraints with compute, memory, and power, which are impossible to meet for many of the deep NN models of today. Furthermore, edge processors do not support floating point operations, making integer quantization particularly important for chips like GAP-8, a RISC-V SoC for edge inference with a dedicated CNN accelerator, which only support integer arithmetic..
+Quantization not only reduces model size but also enables faster computations and draws less power, making it vital to edge development. Edge devices typically have tight resource constraints with compute, memory, and power, which are impossible to meet for many of the deep NN models of today. Furthermore, edge processors do not support floating point operations, making integer quantization particularly important for chips like GAP-8, a RISC-V SoC for edge inference with a dedicated CNN accelerator, which only support integer arithmetic.
 
 One hardware platform utilizing quantization is the ARM Cortex-M group of 32-bit RISC ARM processor cores. They leverage fixed-point quantization with power of two scaling factors so that quantization and dequantization can be efficiently done by bit shifting. Additionally, Google Edge TPUs, Google's emerging solution for running inference at the edge, is designed for small, low-powered devices and can only support 8-bit arithmetic. Many complex neural network models that could only be deployed on servers due to their high computational needs can now be run on edge devices thanks to recent advancements (e.g. quantization methods) in edge computing field.
 
@@ -883,17 +828,17 @@ These automated modules only require the user to provide the original floating p
 
 Hardware libraries like TensorRT and TensorFlow XLA allow models to be highly optimized for target hardware through techniques that we discussed earlier.
 
-Quantization: For example, TensorRT and TensorFlow Lite both support quantization of models during conversion to their format. This provides speedups on mobile SoCs with INT8/INT4 support.
+- **Quantization:** For example, TensorRT and TensorFlow Lite both support quantization of models during conversion to their format. This provides speedups on mobile SoCs with INT8/INT4 support.
 
-Kernel Optimization: For instance, TensorRT does auto-tuning to optimize CUDA kernels based on the GPU architecture for each layer in the model graph. This extracts maximum throughput.
+- **Kernel Optimization:** For instance, TensorRT does auto-tuning to optimize CUDA kernels based on the GPU architecture for each layer in the model graph. This extracts maximum throughput.
 
-Operator Fusion: TensorFlow XLA does aggressive fusion to create optimized binary for TPUs. On mobile, frameworks like NCNN also support fused operators.
+- **Operator Fusion:** TensorFlow XLA does aggressive fusion to create optimized binary for TPUs. On mobile, frameworks like NCNN also support fused operators.
 
-Hardware-Specific Code: Libraries are used to generate optimized binary code specialized for the target hardware. For example, [TensorRT](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html) uses Nvidia CUDA/cuDNN libraries which are hand-tuned for each GPU architecture. This hardware-specific coding is key for performance. On TinyML devices, this can mean assembly code optimized for a Cortex M4 CPU for example. Vendors provide CMSIS-NN and other libraries.
+- **Hardware-Specific Code:** Libraries are used to generate optimized binary code specialized for the target hardware. For example, [TensorRT](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html) uses Nvidia CUDA/cuDNN libraries which are hand-tuned for each GPU architecture. This hardware-specific coding is key for performance. On TinyML devices, this can mean assembly code optimized for a Cortex M4 CPU for example. Vendors provide CMSIS-NN and other libraries.
 
-Data Layout Optimizations - We can efficiently leverage memory hierarchy of hardware like cache and registers through techniques like tensor/weight rearrangement, tiling, and reuse. For example, TensorFlow XLA optimizes buffer layouts to maximize TPU utilization. This helps any memory constrained systems.
+- **Data Layout Optimizations:** We can efficiently leverage memory hierarchy of hardware like cache and registers through techniques like tensor/weight rearrangement, tiling, and reuse. For example, TensorFlow XLA optimizes buffer layouts to maximize TPU utilization. This helps any memory constrained systems.
 
-Profiling-based Tuning - We can use profiling tools to identify bottlenecks. For example, adjust kernel fusion levels based on latency profiling. On mobile SoCs, vendors like Qualcomm provide profilers in SNPE to find optimization opportunities in CNNs. This data-driven approach is important for performance.
+- **Profiling-based Tuning:** We can use profiling tools to identify bottlenecks. For example, adjust kernel fusion levels based on latency profiling. On mobile SoCs, vendors like Qualcomm provide profilers in SNPE to find optimization opportunities in CNNs. This data-driven approach is important for performance.
 
 By integrating framework models with these hardware libraries through conversion and execution pipelines, ML developers can achieve significant speedups and efficiency gains from low-level optimizations tailored to the target hardware. The tight integration between software and hardware is key to enabling performant deployment of ML applications, especially on mobile and TinyML devices.
 
@@ -901,7 +846,7 @@ By integrating framework models with these hardware libraries through conversion
 
 Implementing model optimization techniques without visibility into the effects on the model can be challenging. Dedicated tooling or visualization tools can provide critical and useful insight into model changes and helps track the optimization process. Let's consider the optimizations we considered earlier, such as pruning for sparsity and quantization.
 
-##### Sparsity (ADD SOME LINKS INTO HERE)
+##### Sparsity
 
 For example, consider sparsity optimizations. Sparsity visualization tools can provide critical insights into pruned models by mapping out exactly which weights have been removed. For example, sparsity heat maps can use color gradients to indicate the percentage of weights pruned in each layer of a neural network. Layers with higher percentages pruned appear darker (see @fig-sprase-heat-map). This identifies which layers have been simplified the most by pruning ([Souza 2020](https://www.numenta.com/blog/2020/10/30/case-for-sparsity-in-neural-networks-part-2-dynamic-sparsity/)).
 
diff --git a/contents/privacy_security/privacy_security.qmd b/contents/privacy_security/privacy_security.qmd
index 696e6046..28036b11 100644
--- a/contents/privacy_security/privacy_security.qmd
+++ b/contents/privacy_security/privacy_security.qmd
@@ -352,23 +352,23 @@ However, balancing robust protections with embedded systems' tight size and powe
 
 ### Side-Channel Attacks
 
-Side-channel attacks are a category of security breach that depends on information gained from a computer system's physical implementation. Unlike direct attacks on software or network vulnerabilities, side-channel attacks exploit a system's hardware characteristics. These attacks can be particularly effective against complex machine learning systems, where large amounts of data are processed, and a high level of security is expected.
+Side-channel attacks constitute a class of security breaches that exploit information inadvertently revealed through the physical implementation of computing systems. In contrast to direct attacks targeting software or network vulnerabilities, these attacks leverage the system's inherent hardware characteristics to extract sensitive information.
 
-The fundamental premise of a side-channel attack is that a device's operation can inadvertently leak information. Such leaks can come from various sources, including the electrical power a device consumes [@kocher1999differential], the electromagnetic fields it emits [@gandolfi2001electromagnetic], the time it takes to process certain operations, or even the sounds it produces. Each channel can indirectly glimpse the system's internal processes, revealing information that can compromise security.
+The fundamental premise of a side-channel attack is that a device's operation can inadvertently reveal information. Such leaks can come from various sources, including the electrical power a device consumes [@kocher1999differential], the electromagnetic fields it emits [@gandolfi2001electromagnetic], the time it takes to process certain operations, or even the sounds it produces. Each channel can indirectly glimpse the system's internal processes, revealing information that can compromise security.
 
 For instance, consider a machine learning system performing encrypted transactions. Encryption algorithms are supposed to secure data but require computational work to encrypt and decrypt information. An attacker can analyze the power consumption patterns of the device performing encryption to figure out the cryptographic key. With sophisticated statistical methods, small variations in power usage during the encryption process can be correlated with the data being processed, eventually revealing the key. Some differential analysis attack techniques are Differential Power Analysis (DPA) [@Kocher2011Intro], Differential Electromagnetic Analysis (DEMA), and Correlation Power Analysis (CPA).
 
 For example, consider an attacker trying to break the AES encryption algorithm using a differential analysis attack. The attacker would first need to collect many power or electromagnetic traces (a trace is a record of consumptions or emissions) of the device while performing AES encryption.
 
-Once the attacker has collected sufficient traces, they would then use a statistical technique to identify correlations between the traces and the different values of the plaintext (original, unencrypted text) and ciphertext (encrypted text). These correlations would then be used to infer the value of a bit in the AES key and, eventually, the entire key. Differential analysis attacks are dangerous because they are low-cost, effective, and non-intrusive, allowing attackers to bypass algorithmic and hardware-level security measures. Compromises by these attacks are also hard to detect because they do not physically modify the device or break the encryption algorithm.
+Once the attacker has collected sufficient traces, they would use a statistical technique to identify correlations between the traces and the different values of the plaintext (original, unencrypted text) and ciphertext (encrypted text). These correlations would then be used to infer the value of a bit in the AES key and, eventually, the entire key. Differential analysis attacks are dangerous because they are low-cost, effective, and non-intrusive, allowing attackers to bypass algorithmic and hardware-level security measures. Compromises by these attacks are also hard to detect because they do not physically modify the device or break the encryption algorithm.
 
-Below is a simplified visualization of how analyzing the power consumption patterns of the encryption device can help us extract information about the algorithm's operations and, in turn, the secret data. Say we have a device that takes a 5-byte password as input. We will analyze and compare the different voltage patterns that are measured while the encryption device performs operations on the input to authenticate the password.
+Below, a simplified visualization illustrates how analyzing the encryption device's power consumption patterns can help extract information about the algorithm's operations and, in turn, the secret data. Consider a device that takes a 5-byte password as input. The different voltage patterns measured while the encryption device performs operations on the input to authenticate the password will be analyzed and compared.
 
-First, consider the power analysis of the device's operations after entering a correct password in the first picture in @fig-encryption. The dense blue graph outputs the encryption device's voltage measurement. What matters here is the comparison between the different analysis charts rather than the specific details of what is going on in each scenario.
+First, the power analysis of the device's operations after entering a correct password is shown in the first picture in @fig-encryption. The dense blue graph outputs the encryption device's voltage measurement. What is significant here is the comparison between the different analysis charts rather than the specific details of what is happening in each scenario.
 
 ![Power analysis of an encryption device with a correct password. Source: [Colin O'Flynn.](https://www.youtube.com/watch?v=2iDLfuEBcs8)](images/png/image5.png){#fig-encryption}
 
-Let's look at the power analysis chart when we enter an incorrect password in @fig-encryption2. The first three bytes of the password are correct. As a result, we can see that the voltage patterns are very similar or identical between the two charts, up to and including the fourth byte. After the device processes the fourth byte, it determines a mismatch between the secret key and the attempted input. We notice a change in the pattern at the transition point between the fourth and fifth bytes: the voltage has gone up (the current has gone down) because the device has stopped processing the rest of the input.
+When an incorrect password is entered, the power analysis chart is shown in @fig-encryption2. The first three bytes of the password are correct. As a result, the voltage patterns are very similar or identical between the two charts, up to and including the fourth byte. After the device processes the fourth byte, a mismatch between the secret key and the attempted input is determined. A change in the pattern at the transition point between the fourth and fifth bytes is noticed: the voltage increases (the current decreases) because the device has stopped processing the rest of the input.
 
 ![Power analysis of an encryption device with a (partially) wrong password. Source: [Colin O'Flynn.](https://www.youtube.com/watch?v=2iDLfuEBcs8)](images/png/image16.png){#fig-encryption2}
 
@@ -376,7 +376,7 @@ Let's look at the power analysis chart when we enter an incorrect password in @f
 
 ![Power analysis of an encryption device with a wrong password. Source: [Colin O'Flynn.](https://www.youtube.com/watch?v=2iDLfuEBcs8)](images/png/image15.png){#fig-encryption3}
 
-The example above shows how we can infer information about the encryption process and the secret key by analyzing different inputs and trying to 'eavesdrop' on the device's operations on each input byte. For a more detailed explanation, watch @vid-powerattack below.
+The example above demonstrates how information about the encryption process and the secret key can be inferred by analyzing different inputs and attempting to 'eavesdrop' on the device's operations on each input byte. For a more detailed explanation, watch @vid-powerattack below.
 
 :::{#vid-powerattack .callout-important}
 
@@ -386,11 +386,11 @@ The example above shows how we can infer information about the encryption proces
 
 :::
 
-Another example is an ML system for speech recognition, which processes voice commands to perform actions. By measuring the time it takes for the system to respond to commands or the power used during processing, an attacker could infer what commands are being processed and thus learn about the system's operational patterns. Even more subtle, the sound emitted by a computer's fan or hard drive could change in response to the workload, which a sensitive microphone could pick up and analyze to determine what kind of operations are being performed.
+Another example is an ML system for speech recognition, which processes voice commands to perform actions. By measuring the latency for the system to respond to commands or the power used during processing, an attacker could infer what commands are being processed and thus learn about the system's operational patterns. Even more subtly, the sound emitted by a computer's fan or hard drive could change in response to the workload, which a sensitive microphone could pick up and analyze to determine what kind of operations are being performed.
 
-In real-world scenarios, side-channel attacks have been used to extract encryption keys and compromise secure communications. One of the earliest recorded side-channel attacks dates back to the 1960s when British intelligence agency MI5 faced the challenge of deciphering encrypted communications from the Egyptian Embassy in London. Their cipher-breaking attempts were thwarted by the computational limitations of the time until an ingenious observation changed the game.
+In real-world scenarios, side-channel attacks have effectively extracted encryption keys and compromised secure communications. One of the earliest recorded instances of such an attack occurred in the 1960s when the British intelligence agency MI5 confronted the challenge of deciphering encrypted communications from the Egyptian Embassy in London. Their cipher-breaking efforts were initially thwarted by the computational limitations of the time until an ingenious observation by MI5 agent Peter Wright altered the course of the operation.
 
-MI5 agent Peter Wright proposed using a microphone to capture the subtle acoustic signatures emitted from the embassy's rotor cipher machine during encryption [@Burnet1989Spycatcher]. The distinct mechanical clicks of the rotors as operators configured them daily leaked critical information about the initial settings. This simple side channel of sound enabled MI5 to dramatically reduce the complexity of deciphering messages. This early acoustic leak attack highlights that side-channel attacks are not merely a digital age novelty but a continuation of age-old cryptanalytic principles. The notion that where there is a signal, there is an opportunity for interception remains foundational. From mechanical clicks to electrical fluctuations and beyond, side channels enable adversaries to extract secrets indirectly through careful signal analysis.
+MI5 agent Peter Wright proposed using a microphone to capture the subtle acoustic signatures emitted from the embassy's rotor cipher machine during encryption [@Burnet1989Spycatcher]. The distinct mechanical clicks of the rotors as operators configured them daily leaked critical information about the initial settings. This simple side channel of sound enabled MI5 to reduce the complexity of deciphering messages dramatically. This early acoustic leak attack highlights that side-channel attacks are not merely a digital age novelty but a continuation of age-old cryptanalytic principles. The notion that where there is a signal, there is an opportunity for interception remains foundational. From mechanical clicks to electrical fluctuations and beyond, side channels enable adversaries to extract secrets indirectly through careful signal analysis.
 
 Today, acoustic cryptanalysis has evolved into attacks like keyboard eavesdropping [@Asonov2004Keyboard]. Electrical side channels range from power analysis on cryptographic hardware [@gnad2017voltage] to voltage fluctuations [@zhao2018fpga] on machine learning accelerators. Timing, electromagnetic emission, and even heat footprints can likewise be exploited. New and unexpected side channels often emerge as computing becomes more interconnected and miniaturized.
 
@@ -828,7 +828,7 @@ There are several tradeoffs to make with differential Privacy, as is the case wi
 
 **Batch vs. online tradeoffs:** For online learning systems with continuous high-volume queries, differentially private algorithms require new mechanisms to maintain utility and prevent too much accumulated privacy loss since each query can potentially alter the privacy budget. Batch offline processing is simpler from a computational perspective as it processes data in large batches, where each batch is treated as a single query. High-dimensional sparse data also increases sensitivity analysis challenges.
 
-**Distributed training:** When training models using [distributed](../training/training. cmd) or [federated](../optimizations/optimizations. cmd) approaches, new cryptographic protocols are needed to track and bound privacy leakage across nodes. Secure multiparty computation with encrypted data for differential Privacy adds substantial computational load.
+**Distributed training:** When training models using [distributed](../training/training.qmd) or [federated](../optimizations/optimizations.qmd) approaches, new cryptographic protocols are needed to track and bound privacy leakage across nodes. Secure multiparty computation with encrypted data for differential Privacy adds substantial computational load.
 
 While differential Privacy provides strong formal privacy guarantees, implementing it rigorously requires additions and modifications to the machine learning pipeline at a computational cost. Managing these overheads while preserving model accuracy remains an active research area.
 
@@ -860,7 +860,7 @@ Want to train an ML model without compromising anyone's secrets? Differential Pr
 
 #### Core Idea
 
-Federated Learning (FL) is a type of machine learning in which a model is built and distributed across multiple devices or servers while keeping the training data localized. It was previously discussed in the [Model Optimizations](../optimizations/optimizations. cmd) chapter. Still, we will recap it here briefly to complete it and focus on things that pertain to this chapter.
+Federated Learning (FL) is a type of machine learning in which a model is built and distributed across multiple devices or servers while keeping the training data localized. It was previously discussed in the [Model Optimizations](../optimizations/optimizations.qmd) chapter. Still, we will recap it here briefly to complete it and focus on things that pertain to this chapter.
 
 FL aims to train machine learning models across decentralized networks of devices or systems while keeping all training data localized. @fig-fl-lifecycle illustrates this process: each participating device leverages its local data to calculate model updates, which are then aggregated to build an improved global model. However, the raw training data is never directly shared, transferred, or compiled. This privacy-preserving approach allows for the joint development of ML models without centralizing the potentially sensitive training data in one place.
 
diff --git a/contents/sustainable_ai/sustainable_ai.qmd b/contents/sustainable_ai/sustainable_ai.qmd
index d4c2b873..f556eb8d 100644
--- a/contents/sustainable_ai/sustainable_ai.qmd
+++ b/contents/sustainable_ai/sustainable_ai.qmd
@@ -374,7 +374,7 @@ As the impact of AI on the environment becomes increasingly evident, the focus o
 
 **Future Proofing:** Designing AI systems anticipating future needs and changes can improve sustainability. This may involve making models adaptable via transfer learning and modular architectures. It also includes planning capacity for projected increases in operational scale and data volumes.
 
-**Efficiency and Minimalism:** This principle focuses on creating AI models that achieve desired results with the least possible resource use. It involves simplifying models and algorithms to reduce computational requirements. Specific techniques include pruning redundant parameters, quantizing and compressing models, and designing efficient model architectures, such as those discussed in the [Optimizations](../optimizations/optimizations. cmd) chapter.
+**Efficiency and Minimalism:** This principle focuses on creating AI models that achieve desired results with the least possible resource use. It involves simplifying models and algorithms to reduce computational requirements. Specific techniques include pruning redundant parameters, quantizing and compressing models, and designing efficient model architectures, such as those discussed in the [Optimizations](../optimizations/optimizations.qmd) chapter.
 
 **Lifecycle Assessment (LCA) Integration:** Analyzing environmental impacts throughout the development and deployment of lifecycles highlights unsustainable practices early on. Teams can then make adjustments instead of discovering issues late when they are more difficult to address. Integrating this analysis into the standard design flow avoids creating legacy sustainability problems.
 
@@ -441,7 +441,7 @@ The availability and ongoing development of Green AI frameworks and tools are cr
 
 Benchmarks and leaderboards are important for driving progress in Green AI, as they provide standardized ways to measure and compare different methods. Well-designed benchmarks that capture relevant metrics around energy efficiency, carbon emissions, and other sustainability factors enable the community to track advancements fairly and meaningfully.
 
-Extensive benchmarks exist for tracking AI model performance, such as those extensively discussed in the [Benchmarking](../benchmarking/benchmarking. cmd) chapter. Still, a clear and pressing need exists for additional standardized benchmarks focused on sustainability metrics like energy efficiency, carbon emissions, and overall ecological impact. Understanding the environmental costs of AI currently needs to be improved by a lack of transparency and standardized measurement around these factors.
+Extensive benchmarks exist for tracking AI model performance, such as those extensively discussed in the [Benchmarking](../benchmarking/benchmarking.qmd) chapter. Still, a clear and pressing need exists for additional standardized benchmarks focused on sustainability metrics like energy efficiency, carbon emissions, and overall ecological impact. Understanding the environmental costs of AI currently needs to be improved by a lack of transparency and standardized measurement around these factors.
 
 Emerging efforts such as the [ML.ENERGY Leaderboard](https://ml.energy/leaderboard), which provides performance and energy consumption benchmarking results for large language models (LLMs) text generation, assists in enhancing the understanding of the energy cost of GenAI deployment.
 
diff --git a/contents/tools.qmd b/contents/tools.qmd
index 3d04e2a7..7bc88c2f 100644
--- a/contents/tools.qmd
+++ b/contents/tools.qmd
@@ -4,7 +4,7 @@ This is a non-exhaustive list of tools and frameworks that are available for emb
 
 ## Hardware Kits
 
-### **Microcontrollers and Development Boards**
+### Microcontrollers and Development Boards
 
 | No | Hardware                  | Processor                      | Features                                                | TinyML Compatibility                            |
 |:----|:------------------------------|:--------------------------------|:---------------------------------------------------------|:-------------------------------------------------|
@@ -20,7 +20,7 @@ This is a non-exhaustive list of tools and frameworks that are available for emb
 
 ## Software Tools
 
-### **Machine Learning Frameworks**
+### Machine Learning Frameworks
 
 | No | Machine Learning Framework | Description                                                                    | Use Cases                                |
 |:----|:---------------------------|:--------------------------------------------------------------------------------|:------------------------------------------|
@@ -28,7 +28,7 @@ This is a non-exhaustive list of tools and frameworks that are available for emb
 | 2  | Edge Impulse             | A platform providing tools for creating machine learning models optimized for edge devices | Data collection, model training, deployment on tiny devices |
 | 3  | ONNX Runtime             | A performance-optimized engine for running ONNX models, fine-tuned for edge devices | Cross-platform deployment of machine learning models |
 
-### **Libraries and APIs**
+### Libraries and APIs
 
 | No | Library/API | Description                                                                                          | Use Cases                                |
 |:----|:-------------|:------------------------------------------------------------------------------------------------------|:------------------------------------------|
diff --git a/index.qmd b/index.qmd
index acd65645..d5e23fb4 100644
--- a/index.qmd
+++ b/index.qmd
@@ -6,7 +6,7 @@ Welcome to {{< var title.long >}}. This book is your gateway to the fast-paced w
 <iframe src="github-button.html"></iframe>
 ```
 
-We aim to make this open-source book a collaborative effort that brings together insights from students, professionals, and the broader community of AI practitioners. We want to create a one-stop guide that dives deep into the nuts and bolts of AI systems and their many applications.
+We have created this open-source book as a collaborative effort to bring together insights from students, professionals, and the broader community of AI practitioners. Our goal is to develop a comprehensive guide that explores the intricacies of AI systems and their numerous applications.
 
 > "If you want to go fast, go alone. If you want to go far, go together."
 > -- African Proverb
diff --git a/style.scss b/style.scss
index e41803b2..9ed2d83a 100644
--- a/style.scss
+++ b/style.scss
@@ -174,3 +174,27 @@ div.callout-hint.callout > .callout-header::before {
   padding-left: 1em;
   padding-right: 1em;
 }
+
+/* scrolling windows for long Python scripts in the HTML e-book version */
+.scroll-code-block {
+  max-height: 400px;
+  overflow-y: auto;
+  border: 1px solid #e0e0e0;
+  border-radius: 4px;
+  padding: 10px;
+  background-color: #f8f8f8;
+}
+
+.scroll-code-block pre {
+  margin: 0;
+  padding: 0;
+  border: none;
+  background-color: transparent;
+}
+
+.scroll-code-block code {
+  display: block;
+  white-space: pre;
+  word-wrap: normal;
+  overflow-x: auto;
+}