diff --git a/.all-contributorsrc b/.all-contributorsrc index 581dc920..c70549c5 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -2,7 +2,7 @@ "projectName": "cs249r_book", "projectOwner": "harvard-edge", "files": [ - "contents/contributors.qmd", + "contents/core/acknowledgements/acknowledgements.qmd", "README.md" ], "contributors": [ @@ -69,13 +69,6 @@ "profile": "https://github.com/kai4avaya", "contributions": [] }, - { - "login": "mpstewart1", - "name": "Matthew Stewart", - "avatar_url": "https://avatars.githubusercontent.com/mpstewart1", - "profile": "https://github.com/mpstewart1", - "contributions": [] - }, { "login": "eliasab16", "name": "Elias Nuwara", @@ -83,6 +76,13 @@ "profile": "https://github.com/eliasab16", "contributions": [] }, + { + "login": "mpstewart1", + "name": "Matthew Stewart", + "avatar_url": "https://avatars.githubusercontent.com/mpstewart1", + "profile": "https://github.com/mpstewart1", + "contributions": [] + }, { "login": "JaredP94", "name": "Jared Ping", @@ -118,13 +118,6 @@ "profile": "https://github.com/sophiacho1", "contributions": [] }, - { - "login": "andreamurillomtz", - "name": "Andrea", - "avatar_url": "https://avatars.githubusercontent.com/andreamurillomtz", - "profile": "https://github.com/andreamurillomtz", - "contributions": [] - }, { "login": "18jeffreyma", "name": "Jeffrey Ma", @@ -132,6 +125,13 @@ "profile": "https://github.com/18jeffreyma", "contributions": [] }, + { + "login": "andreamurillomtz", + "name": "Andrea", + "avatar_url": "https://avatars.githubusercontent.com/andreamurillomtz", + "profile": "https://github.com/andreamurillomtz", + "contributions": [] + }, { "login": "alxrod", "name": "Alex Rodriguez", @@ -160,13 +160,6 @@ "profile": "https://github.com/zishenwan", "contributions": [] }, - { - "login": "ma3mool", - "name": "Abdulrahman Mahmoud", - "avatar_url": "https://avatars.githubusercontent.com/ma3mool", - "profile": "https://github.com/ma3mool", - "contributions": [] - }, { "login": "srivatsankrishnan", "name": "Srivatsan Krishnan", @@ -182,10 +175,10 @@ "contributions": [] }, { - "login": "eezike", - "name": "Emeka Ezike", - "avatar_url": "https://avatars.githubusercontent.com/eezike", - "profile": "https://github.com/eezike", + "login": "ma3mool", + "name": "Abdulrahman Mahmoud", + "avatar_url": "https://avatars.githubusercontent.com/ma3mool", + "profile": "https://github.com/ma3mool", "contributions": [] }, { @@ -209,13 +202,6 @@ "profile": "https://github.com/arnaumarin", "contributions": [] }, - { - "login": "Ekhao", - "name": "Emil Njor", - "avatar_url": "https://avatars.githubusercontent.com/Ekhao", - "profile": "https://github.com/Ekhao", - "contributions": [] - }, { "login": "AditiR-42", "name": "Aditi Raju", @@ -244,6 +230,13 @@ "profile": "https://github.com/oishib", "contributions": [] }, + { + "login": "Ekhao", + "name": "Emil Njor", + "avatar_url": "https://avatars.githubusercontent.com/Ekhao", + "profile": "https://github.com/Ekhao", + "contributions": [] + }, { "login": "ELSuitorHarvard", "name": "ELSuitorHarvard", @@ -258,13 +251,6 @@ "profile": "https://github.com/BaeHenryS", "contributions": [] }, - { - "login": "jaywonchung", - "name": "Jae-Won Chung", - "avatar_url": "https://avatars.githubusercontent.com/jaywonchung", - "profile": "https://github.com/jaywonchung", - "contributions": [] - }, { "login": "leo47007", "name": "Yu-Shun Hsiao", @@ -272,6 +258,13 @@ "profile": "https://github.com/leo47007", "contributions": [] }, + { + "login": "jaywonchung", + "name": "Jae-Won Chung", + "avatar_url": "https://avatars.githubusercontent.com/jaywonchung", + "profile": "https://github.com/jaywonchung", + "contributions": [] + }, { "login": "mmaz", "name": "Mark Mazumder", @@ -280,17 +273,17 @@ "contributions": [] }, { - "login": "marcozennaro", - "name": "Marco Zennaro", - "avatar_url": "https://avatars.githubusercontent.com/marcozennaro", - "profile": "https://github.com/marcozennaro", + "login": "pongtr", + "name": "Pong Trairatvorakul", + "avatar_url": "https://avatars.githubusercontent.com/pongtr", + "profile": "https://github.com/pongtr", "contributions": [] }, { - "login": "euranofshin", - "name": "Eura Nofshin", - "avatar_url": "https://avatars.githubusercontent.com/euranofshin", - "profile": "https://github.com/euranofshin", + "login": "ShvetankPrakash", + "name": "Shvetank Prakash", + "avatar_url": "https://avatars.githubusercontent.com/ShvetankPrakash", + "profile": "https://github.com/ShvetankPrakash", "contributions": [] }, { @@ -301,31 +294,31 @@ "contributions": [] }, { - "login": "pongtr", - "name": "Pong Trairatvorakul", - "avatar_url": "https://avatars.githubusercontent.com/pongtr", - "profile": "https://github.com/pongtr", + "login": "marcozennaro", + "name": "Marco Zennaro", + "avatar_url": "https://avatars.githubusercontent.com/marcozennaro", + "profile": "https://github.com/marcozennaro", "contributions": [] }, { - "login": "jzhou1318", - "name": "Jennifer Zhou", - "avatar_url": "https://avatars.githubusercontent.com/jzhou1318", - "profile": "https://github.com/jzhou1318", + "login": "Emeka Ezike", + "name": "Emeka Ezike", + "avatar_url": "https://www.gravatar.com/avatar/af39c27c6090c50a1921a9b6366e81cc?d=identicon&s=100", + "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors", "contributions": [] }, { - "login": "ShvetankPrakash", - "name": "Shvetank Prakash", - "avatar_url": "https://avatars.githubusercontent.com/ShvetankPrakash", - "profile": "https://github.com/ShvetankPrakash", + "login": "euranofshin", + "name": "Eura Nofshin", + "avatar_url": "https://avatars.githubusercontent.com/euranofshin", + "profile": "https://github.com/euranofshin", "contributions": [] }, { - "login": "alex-oesterling", - "name": "Alex Oesterling", - "avatar_url": "https://avatars.githubusercontent.com/alex-oesterling", - "profile": "https://github.com/alex-oesterling", + "login": "jzhou1318", + "name": "Jennifer Zhou", + "avatar_url": "https://avatars.githubusercontent.com/jzhou1318", + "profile": "https://github.com/jzhou1318", "contributions": [] }, { @@ -335,6 +328,13 @@ "profile": "https://github.com/aryatschand", "contributions": [] }, + { + "login": "FinAminToastCrunch", + "name": "Fin Amin", + "avatar_url": "https://avatars.githubusercontent.com/FinAminToastCrunch", + "profile": "https://github.com/FinAminToastCrunch", + "contributions": [] + }, { "login": "BrunoScaglione", "name": "Bruno Scaglione", @@ -356,13 +356,6 @@ "profile": "https://github.com/Allen-Kuang", "contributions": [] }, - { - "login": "FinAminToastCrunch", - "name": "Fin Amin", - "avatar_url": "https://avatars.githubusercontent.com/FinAminToastCrunch", - "profile": "https://github.com/FinAminToastCrunch", - "contributions": [] - }, { "login": "Fatima Shah", "name": "Fatima Shah", @@ -392,10 +385,10 @@ "contributions": [] }, { - "login": "BravoBaldo", - "name": "Baldassarre Cesarano", - "avatar_url": "https://avatars.githubusercontent.com/BravoBaldo", - "profile": "https://github.com/BravoBaldo", + "login": "alex-oesterling", + "name": "Alex Oesterling", + "avatar_url": "https://avatars.githubusercontent.com/alex-oesterling", + "profile": "https://github.com/alex-oesterling", "contributions": [] }, { @@ -406,17 +399,17 @@ "contributions": [] }, { - "login": "bilgeacun", - "name": "Bilge Acun", - "avatar_url": "https://avatars.githubusercontent.com/bilgeacun", - "profile": "https://github.com/bilgeacun", + "login": "emmanuel2406", + "name": "Emmanuel Rassou", + "avatar_url": "https://avatars.githubusercontent.com/emmanuel2406", + "profile": "https://github.com/emmanuel2406", "contributions": [] }, { - "login": "YLab-UChicago", - "name": "yanjingl", - "avatar_url": "https://avatars.githubusercontent.com/YLab-UChicago", - "profile": "https://github.com/YLab-UChicago", + "login": "abigailswallow", + "name": "abigailswallow", + "avatar_url": "https://avatars.githubusercontent.com/abigailswallow", + "profile": "https://github.com/abigailswallow", "contributions": [] }, { @@ -427,17 +420,10 @@ "contributions": [] }, { - "login": "abigailswallow", - "name": "abigailswallow", - "avatar_url": "https://avatars.githubusercontent.com/abigailswallow", - "profile": "https://github.com/abigailswallow", - "contributions": [] - }, - { - "login": "jasonlyik", - "name": "Jason Yik", - "avatar_url": "https://avatars.githubusercontent.com/jasonlyik", - "profile": "https://github.com/jasonlyik", + "login": "bilgeacun", + "name": "Bilge Acun", + "avatar_url": "https://avatars.githubusercontent.com/bilgeacun", + "profile": "https://github.com/bilgeacun", "contributions": [] }, { @@ -448,24 +434,17 @@ "contributions": [] }, { - "login": "ciyer64", - "name": "Curren Iyer", - "avatar_url": "https://avatars.githubusercontent.com/ciyer64", - "profile": "https://github.com/ciyer64", - "contributions": [] - }, - { - "login": "emmanuel2406", - "name": "Emmanuel Rassou", - "avatar_url": "https://avatars.githubusercontent.com/emmanuel2406", - "profile": "https://github.com/emmanuel2406", + "login": "jessicaquaye", + "name": "Jessica Quaye", + "avatar_url": "https://avatars.githubusercontent.com/jessicaquaye", + "profile": "https://github.com/jessicaquaye", "contributions": [] }, { - "login": "skmur", - "name": "Sonia Murthy", - "avatar_url": "https://avatars.githubusercontent.com/skmur", - "profile": "https://github.com/skmur", + "login": "jasonlyik", + "name": "Jason Yik", + "avatar_url": "https://avatars.githubusercontent.com/jasonlyik", + "profile": "https://github.com/jasonlyik", "contributions": [] }, { @@ -476,17 +455,10 @@ "contributions": [] }, { - "login": "jessicaquaye", - "name": "Jessica Quaye", - "avatar_url": "https://avatars.githubusercontent.com/jessicaquaye", - "profile": "https://github.com/jessicaquaye", - "contributions": [] - }, - { - "login": "vijay-edu", - "name": "Vijay Edupuganti", - "avatar_url": "https://avatars.githubusercontent.com/vijay-edu", - "profile": "https://github.com/vijay-edu", + "login": "skmur", + "name": "Sonia Murthy", + "avatar_url": "https://avatars.githubusercontent.com/skmur", + "profile": "https://github.com/skmur", "contributions": [] }, { @@ -496,6 +468,13 @@ "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors", "contributions": [] }, + { + "login": "Baldassarre Cesarano", + "name": "Baldassarre Cesarano", + "avatar_url": "https://www.gravatar.com/avatar/13b816dd84837bb4700a55f47a70763e?d=identicon&s=100", + "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors", + "contributions": [] + }, { "login": "Annie Laurie Cook", "name": "Annie Laurie Cook", @@ -503,6 +482,13 @@ "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors", "contributions": [] }, + { + "login": "Vijay Edupuganti", + "name": "Vijay Edupuganti", + "avatar_url": "https://www.gravatar.com/avatar/b15b6e0e9adf58099905c1a0fd474cb9?d=identicon&s=100", + "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors", + "contributions": [] + }, { "login": "Jothi Ramaswamy", "name": "Jothi Ramaswamy", @@ -517,6 +503,13 @@ "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors", "contributions": [] }, + { + "login": "Curren Iyer", + "name": "Curren Iyer", + "avatar_url": "https://www.gravatar.com/avatar/bd53d146aa888548c8db4da02bf81e7a?d=identicon&s=100", + "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors", + "contributions": [] + }, { "login": "Fatima Shah", "name": "Fatima Shah", @@ -524,6 +517,13 @@ "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors", "contributions": [] }, + { + "login": "yanjingl", + "name": "yanjingl", + "avatar_url": "https://www.gravatar.com/avatar/f5d58ba6aa9b00189d4c018d370e8f43?d=identicon&s=100", + "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors", + "contributions": [] + }, { "login": "a-saraf", "name": "a-saraf", diff --git a/.github/workflows/auto-add-contributors.yml b/.github/workflows/auto-add-contributors.yml index 6d347c36..f944a94f 100644 --- a/.github/workflows/auto-add-contributors.yml +++ b/.github/workflows/auto-add-contributors.yml @@ -53,7 +53,7 @@ jobs: git add -u # Commit and push if there are changes - git diff-index --quiet HEAD || git commit -m "Update readme and contributors.qmd with contributors" + git diff-index --quiet HEAD || git commit -m "Update readme and acknowledgements.qmd with contributors" git push env: diff --git a/.github/workflows/contributors/update_contributors.py b/.github/workflows/contributors/update_contributors.py index c3652bb9..c21a992f 100644 --- a/.github/workflows/contributors/update_contributors.py +++ b/.github/workflows/contributors/update_contributors.py @@ -413,7 +413,7 @@ def generate_gravatar_url(name): final_result = dict( projectName=REPO, projectOwner=OWNER, - files=["contents/contributors.qmd", "README.md"], + files=["contents/core/acknowledgements/acknowledgements.qmd", "README.md"], contributors=[ dict( login=( diff --git a/CNAME b/CNAME new file mode 100644 index 00000000..b7a5b578 --- /dev/null +++ b/CNAME @@ -0,0 +1 @@ +mlsysbook.ai diff --git a/README.md b/README.md index f94d993e..9dbd4df5 100644 --- a/README.md +++ b/README.md @@ -107,10 +107,10 @@ This project follows the [all-contributors](https://allcontributors.org) specifi Douwe den Blanken
Douwe den Blanken

shanzehbatool
shanzehbatool

Kai Kleinbard
Kai Kleinbard

- Matthew Stewart
Matthew Stewart

+ Elias Nuwara
Elias Nuwara

- Elias Nuwara
Elias Nuwara

+ Matthew Stewart
Matthew Stewart

Jared Ping
Jared Ping

Itai Shapira
Itai Shapira

Maximilian Lam
Maximilian Lam

@@ -118,86 +118,86 @@ This project follows the [all-contributors](https://allcontributors.org) specifi Sophia Cho
Sophia Cho

- Andrea
Andrea

Jeffrey Ma
Jeffrey Ma

+ Andrea
Andrea

Alex Rodriguez
Alex Rodriguez

Korneel Van den Berghe
Korneel Van den Berghe

Colby Banbury
Colby Banbury

Zishen Wan
Zishen Wan

- Abdulrahman Mahmoud
Abdulrahman Mahmoud

Srivatsan Krishnan
Srivatsan Krishnan

Divya Amirtharaj
Divya Amirtharaj

+ Abdulrahman Mahmoud
Abdulrahman Mahmoud

- Emeka Ezike
Emeka Ezike

Aghyad Deeb
Aghyad Deeb

Haoran Qiu
Haoran Qiu

marin-llobet
marin-llobet

- Emil Njor
Emil Njor

- - Aditi Raju
Aditi Raju

Jared Ni
Jared Ni

+ + Michael Schnebly
Michael Schnebly

oishib
oishib

+ Emil Njor
Emil Njor

ELSuitorHarvard
ELSuitorHarvard

+ Henry Bae
Henry Bae

- Henry Bae
Henry Bae

- Jae-Won Chung
Jae-Won Chung

Yu-Shun Hsiao
Yu-Shun Hsiao

+ Jae-Won Chung
Jae-Won Chung

Mark Mazumder
Mark Mazumder

- Marco Zennaro
Marco Zennaro

+ Pong Trairatvorakul
Pong Trairatvorakul

+ Shvetank Prakash
Shvetank Prakash

- Eura Nofshin
Eura Nofshin

Andrew Bass
Andrew Bass

- Pong Trairatvorakul
Pong Trairatvorakul

+ Marco Zennaro
Marco Zennaro

+ Emeka Ezike
Emeka Ezike

+ Eura Nofshin
Eura Nofshin

Jennifer Zhou
Jennifer Zhou

- Shvetank Prakash
Shvetank Prakash

- Alex Oesterling
Alex Oesterling

Arya Tschand
Arya Tschand

+ Fin Amin
Fin Amin

Bruno Scaglione
Bruno Scaglione

Gauri Jain
Gauri Jain

Allen-Kuang
Allen-Kuang

- Fin Amin
Fin Amin

Fatima Shah
Fatima Shah

The Random DIY
The Random DIY

gnodipac886
gnodipac886

Sercan Aygün
Sercan Aygün

+ Alex Oesterling
Alex Oesterling

- Baldassarre Cesarano
Baldassarre Cesarano

Abenezer
Abenezer

- Bilge Acun
Bilge Acun

- yanjingl
yanjingl

+ Emmanuel Rassou
Emmanuel Rassou

+ abigailswallow
abigailswallow

Yang Zhou
Yang Zhou

+ Bilge Acun
Bilge Acun

- abigailswallow
abigailswallow

- Jason Yik
Jason Yik

happyappledog
happyappledog

- Curren Iyer
Curren Iyer

- Emmanuel Rassou
Emmanuel Rassou

- - - Sonia Murthy
Sonia Murthy

- Shreya Johri
Shreya Johri

Jessica Quaye
Jessica Quaye

- Vijay Edupuganti
Vijay Edupuganti

- Costin-Andrei Oncescu
Costin-Andrei Oncescu

+ Jason Yik
Jason Yik

+ Shreya Johri
Shreya Johri

+ Sonia Murthy
Sonia Murthy

+ Costin-Andrei Oncescu
Costin-Andrei Oncescu

+ Baldassarre Cesarano
Baldassarre Cesarano

Annie Laurie Cook
Annie Laurie Cook

+ Vijay Edupuganti
Vijay Edupuganti

Jothi Ramaswamy
Jothi Ramaswamy

+ + Batur Arslan
Batur Arslan

+ Curren Iyer
Curren Iyer

Fatima Shah
Fatima Shah

+ yanjingl
yanjingl

a-saraf
a-saraf

diff --git a/_quarto.yml b/_quarto.yml index 25c4f789..e9d68f51 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -8,9 +8,12 @@ project: website: announcement: icon: star-half - dismissable: false - content: 🌟 **We Hit 1,000 GitHub Stars - Thank You!** 🌟 Thanks to you, Arduino and SEEED are donating NiclaVision and XIAO ESP32S3 boards for AI education. Let's keep going! Every additional 25 stars helps us bring more resources to the community. Click here to ⭐ - type: info + dismissable: true + content: | + ⭐ [Oct 18] We Hit 1,000 GitHub Stars 🎉 Thanks to you, Arduino and SEEED donated AI hardware kits for education!
+ 🎓 [Nov 15] The [EDGE AI Foundation](https://www.edgeaifoundation.org/) is **matching scholarship funds** for every new GitHub ⭐ (up to 10,000 stars). Click here to support! 🙏
+ 🚀 Our mission. 1 ⭐ = 1 👩‍🎓 Learner. Every star tells a story: learners gaining knowledge and supporters fueling the mission. Together, we're making a difference. + position: below-navbar book: @@ -82,15 +85,13 @@ book: created using Google's Notebook LM and inspired by insights drawn from our [IEEE education viewpoint paper](https://web.eng.fiu.edu/gaquan/Papers/ESWEEK24Papers/CPS-Proceedings/pdfs/CODES-ISSS/563900a043/563900a043.pdf). This podcast provides an accessible overview of what this book is all about. - - Don't miss out—after all, it's AI talking about... AI. 🤯   - _Acknowledgment:_ Special thanks to [Marco Zennaro](https://www.ictp.it/member/marco-zennaro), one of our early community contributors to the [AI for Good](./contents/core/ai_for_good/ai_for_good.qmd) chapter, for inspiring the creation of this podcast. Thank you, Marco! + _Acknowledgment:_ Special thanks to [Marco Zennaro](https://www.ictp.it/member/marco-zennaro), one of our early community contributors who helped us with the [AI for Good](./contents/core/ai_for_good/ai_for_good.qmd) chapter, for inspiring the creation of this podcast. Thank you, Marco! ---- @@ -109,12 +110,9 @@ book: chapters: - text: "---" - index.qmd - - contents/copyright.qmd - - contents/dedication.qmd - contents/core/acknowledgements/acknowledgements.qmd - - contents/contributors.qmd + - contents/core/about/about.qmd - contents/ai/socratiq.qmd - - contents/about.qmd - text: "---" - contents/core/introduction/introduction.qmd - contents/core/ml_systems/ml_systems.qmd @@ -243,6 +241,8 @@ format: code-block-bg: true code-block-border-left: "#A51C30" + lightbox: true + table: classes: [table-striped, table-hover] @@ -370,3 +370,5 @@ format: - pdfpagelayout=TwoPageRight include-in-header: - file: "tex/header-includes.tex" +resources: + - CNAME diff --git a/contents/about.qmd b/contents/about.qmd deleted file mode 100644 index 5021926d..00000000 --- a/contents/about.qmd +++ /dev/null @@ -1,67 +0,0 @@ ---- -comments: false ---- - -# About the Book {.unnumbered} - -## Overview - -Welcome to this collaborative project initiated by the CS249r Machine Learning Systems class at Harvard University. Our goal is to make this book a community resource that assists educators and learners in understanding ML systems. The book will be regularly updated to reflect new insights into ML systems and effective teaching methods. - -## Topics Explored - -This book offers a comprehensive look at various aspects of machine learning systems. We cover the entire end-to-end ML systems workflow, starting with fundamental concepts and progressing through data engineering, AI frameworks, and model training. - -You'll learn about optimizing models for efficiency, deploying AI on various hardware platforms, and benchmarking performance. The book also explores more advanced topics like security, privacy, responsible and sustainable AI, robust and generative AI, and the social impact of AI. By the end, you'll have a solid foundation and practical insights into both the technical and ethical dimensions of machine learning. - -By the time you finish this book, we hope that you'll have a foundational understanding of machine learning and its applications. You'll also learn about real-world implementations of machine learning systems and gain practical experience through project-based labs and assignments. - -## Who Should Read This - -This book is tailored for individuals at various stages in their interaction with machine learning systems. It starts with the fundamentals and progresses to more advanced topics pertinent to the ML community and broader research areas. The most relevant audiences include: - -* **Students in Computer Science and Electrical Engineering:** Senior and graduate students in these fields will find this book invaluable. It introduces the techniques used in designing and building ML systems, focusing on fundamentals rather than depth—typically the focus of classroom instruction. This book aims to provide the necessary background and context, enabling instructors to delve deeper into advanced topics. An important aspect is the end-to-end focus, often overlooked in traditional curricula. - -* **Systems Engineers:** For engineers, this book serves as a guide to understanding the challenges of intelligent applications, especially on resource-constrained ML platforms. It covers the conceptual framework and practical components that constitute an ML system, extending beyond specific areas you might specialize in at your job. - -* **Researchers and Academics:** Researchers will find that this book addresses the unique challenges of running machine learning algorithms on diverse platforms. Efficiency is becoming increasingly important; understanding algorithms alone is not sufficient, as a deeper understanding of systems is necessary to build more efficient models. For researchers, the book cites seminal papers, guiding you towards foundational works that have shaped the field and drawing connections between various areas with significant implications for your work. - -## Key Learning Outcomes - -Readers will acquire skills in training and deploying deep neural network models on various platforms, along with understanding the broader challenges involved in their design, development, and deployment. Specifically, after completing this book, learners will be able to: - -1. Explain core concepts and their relevance to AI systems. - -2. Describe the fundamental components and architecture of AI systems. - -3. Compare and contrast various hardware platforms for AI deployment, selecting appropriate options for specific use cases. - -4. Design and implement training processes for AI models across different systems. - -5. Apply optimization techniques to improve AI model performance and efficiency. - -6. Analyze real-world AI applications and their implementation strategies. - -7. Evaluate current challenges in AI systems and predict future trends in the field. - -8. Develop a complete machine learning-enabled project, from conception to deployment. - -9. Troubleshoot common issues in AI model training and deployment. - -10. Critically assess the ethical implications and societal impacts of AI systems. - -## Prerequisites for Readers - -* **Basic Programming Skills:** We recommend that you have some prior programming experience, ideally in Python. A grasp of variables, data types, and control structures will make it easier to engage with the book. - -* **Some Machine Learning Knowledge:** While not mandatory, a basic understanding of machine learning concepts will help you absorb the material more readily. If you're new to the field, the book provides enough background information to get you up to speed. - -* **Basic Systems Knowledge:** A basic level of systems knowledge at an undergraduate junior or senior level is recommended. Understanding system architecture, operating systems, and basic networking will be beneficial. - -* **Python Programming (Optional):** If you're familiar with Python, you'll find it easier to engage with the coding sections of the book. Knowing libraries like NumPy, scikit-learn, and TensorFlow will be particularly helpful. - -* **Willingness to Learn:** The book is designed to be accessible to a broad audience, with varying levels of technical expertise. A willingness to challenge yourself and engage in practical exercises will help you get the most out of it. - -* **Resource Availability:** For the hands-on aspects, you'll need a computer with Python and the relevant libraries installed. Optional access to development boards or specific hardware will also be beneficial for experimenting with machine learning model deployment. - -By meeting these prerequisites, you'll be well-positioned to deepen your understanding of machine learning systems, engage in coding exercises, and even implement practical applications on various devices. diff --git a/contents/contributors.qmd b/contents/contributors.qmd deleted file mode 100644 index ad32e3e7..00000000 --- a/contents/contributors.qmd +++ /dev/null @@ -1,203 +0,0 @@ ---- -comments: false ---- - -# Contributors & Thanks {.unnumbered} - -We extend our sincere thanks to the diverse group of individuals who have generously contributed their expertise, insights, time, and support to improve both the content and codebase of this project. This includes not only those who have directly contributed through code and writing but also those who have helped by identifying issues, providing feedback, and offering suggestions. Below, you will find a list of all contributors. If you would like to contribute to this project, please visit our [GitHub](https://github.com/harvard-edge/cs249r_book) page for more information. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Vijay Janapa Reddi
Vijay Janapa Reddi

jasonjabbour
jasonjabbour

Ikechukwu Uchendu
Ikechukwu Uchendu

Naeem Khoshnevis
Naeem Khoshnevis

Marcelo Rovai
Marcelo Rovai

Sara Khosravi
Sara Khosravi

Douwe den Blanken
Douwe den Blanken

shanzehbatool
shanzehbatool

Kai Kleinbard
Kai Kleinbard

Matthew Stewart
Matthew Stewart

Elias Nuwara
Elias Nuwara

Jared Ping
Jared Ping

Itai Shapira
Itai Shapira

Maximilian Lam
Maximilian Lam

Jayson Lin
Jayson Lin

Sophia Cho
Sophia Cho

Andrea
Andrea

Jeffrey Ma
Jeffrey Ma

Alex Rodriguez
Alex Rodriguez

Korneel Van den Berghe
Korneel Van den Berghe

Colby Banbury
Colby Banbury

Zishen Wan
Zishen Wan

Abdulrahman Mahmoud
Abdulrahman Mahmoud

Srivatsan Krishnan
Srivatsan Krishnan

Divya Amirtharaj
Divya Amirtharaj

Emeka Ezike
Emeka Ezike

Aghyad Deeb
Aghyad Deeb

Haoran Qiu
Haoran Qiu

marin-llobet
marin-llobet

Emil Njor
Emil Njor

Aditi Raju
Aditi Raju

Jared Ni
Jared Ni

Michael Schnebly
Michael Schnebly

oishib
oishib

ELSuitorHarvard
ELSuitorHarvard

Henry Bae
Henry Bae

Jae-Won Chung
Jae-Won Chung

Yu-Shun Hsiao
Yu-Shun Hsiao

Mark Mazumder
Mark Mazumder

Marco Zennaro
Marco Zennaro

Eura Nofshin
Eura Nofshin

Andrew Bass
Andrew Bass

Pong Trairatvorakul
Pong Trairatvorakul

Jennifer Zhou
Jennifer Zhou

Shvetank Prakash
Shvetank Prakash

Alex Oesterling
Alex Oesterling

Arya Tschand
Arya Tschand

Bruno Scaglione
Bruno Scaglione

Gauri Jain
Gauri Jain

Allen-Kuang
Allen-Kuang

Fin Amin
Fin Amin

Fatima Shah
Fatima Shah

The Random DIY
The Random DIY

gnodipac886
gnodipac886

Sercan Aygün
Sercan Aygün

Baldassarre Cesarano
Baldassarre Cesarano

Abenezer
Abenezer

Bilge Acun
Bilge Acun

yanjingl
yanjingl

Yang Zhou
Yang Zhou

abigailswallow
abigailswallow

Jason Yik
Jason Yik

happyappledog
happyappledog

Curren Iyer
Curren Iyer

Emmanuel Rassou
Emmanuel Rassou

Sonia Murthy
Sonia Murthy

Shreya Johri
Shreya Johri

Jessica Quaye
Jessica Quaye

Vijay Edupuganti
Vijay Edupuganti

Costin-Andrei Oncescu
Costin-Andrei Oncescu

Annie Laurie Cook
Annie Laurie Cook

Jothi Ramaswamy
Jothi Ramaswamy

Batur Arslan
Batur Arslan

Fatima Shah
Fatima Shah

a-saraf
a-saraf

songhan
songhan

Zishen
Zishen

- - - - - - - - \ No newline at end of file diff --git a/contents/copyright.qmd b/contents/copyright.qmd deleted file mode 100644 index 91154706..00000000 --- a/contents/copyright.qmd +++ /dev/null @@ -1,15 +0,0 @@ ---- -comments: false ---- - -# Copyright {.unnumbered} - -This book is open-source and developed collaboratively through GitHub. Unless otherwise stated, this work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0 CC BY-SA 4.0). You can find the full text of the license [here](https://creativecommons.org/licenses/by-nc-sa/4.0). - -Contributors to this project have dedicated their contributions to the public domain or under the same open license as the original project. While the contributions are collaborative, each contributor retains copyright in their respective contributions. - -For details on authorship, contributions, and how to contribute, please see the project repository on [GitHub](https://github.com/harvard-edge/cs249r_book). - -All trademarks and registered trademarks mentioned in this book are the property of their respective owners. - -The information provided in this book is believed to be accurate and reliable. However, the authors, editors, and publishers cannot be held liable for any damages caused or alleged to be caused either directly or indirectly by the information contained in this book. diff --git a/contents/core/about/about.qmd b/contents/core/about/about.qmd new file mode 100644 index 00000000..d76ec0c5 --- /dev/null +++ b/contents/core/about/about.qmd @@ -0,0 +1,192 @@ +--- +comments: false +--- + +# About the Book {.unnumbered} + +## Overview + +Welcome to this collaborative textbook, developed as part of the CS249r Machine Learning Systems class at Harvard University. Our goal is to provide a comprehensive resource for educators and students seeking to understand machine learning systems. This book is continually updated to incorporate the latest insights and effective teaching strategies. + +## What's Inside the Book + +We explore the technical foundations of machine learning systems, the challenges of building and deploying these systems across the computing continuum, and the vast array of applications they enable. A unique aspect of this book is its function as a conduit to seminal scholarly works and academic research papers, aimed at enriching the reader's understanding and encouraging deeper exploration of the subject. This approach seeks to bridge the gap between pedagogical materials and cutting-edge research trends, offering a comprehensive guide that is in step with the evolving field of applied machine learning. + +To improve the learning experience, we have included a variety of supplementary materials. Throughout the book, you will find slides that summarize key concepts, videos that provide in-depth explanations and demonstrations, exercises that reinforce your understanding, and labs that offer hands-on experience with the tools and techniques discussed. These additional resources are designed to cater to different learning styles and help you gain a deeper, more practical understanding of the subject matter. + +## Topics Explored + +This textbook offers a comprehensive exploration of various aspects of machine learning systems, covering the entire end-to-end workflow. Starting with foundational concepts, it progresses through essential areas such as data engineering, AI frameworks, and model training. + +To enhance the learning experience, we included a diverse array of supplementary materials. These resources consist of slides that summarize key concepts, videos providing detailed explanations and demonstrations, exercises designed to reinforce understanding, and labs that offer hands-on experience with the discussed tools and techniques. + +Readers will gain insights into optimizing models for efficiency, deploying AI across different hardware platforms, and benchmarking performance. The book also delves into advanced topics, including security, privacy, responsible and sustainable AI, robust AI, and generative AI. Additionally, it examines the social impact of AI, concluding with an emphasis on the positive contributions AI can make to society. + +## Key Learning Outcomes + +Readers will acquire skills in training and deploying deep neural network models on various platforms, along with understanding the broader challenges involved in their design, development, and deployment. Specifically, after completing this book, learners will be able to: + +::: {.callout-tip} + +1. Explain core concepts and their relevance to AI systems. + +2. Describe the fundamental components and architecture of AI systems. + +3. Compare and contrast various hardware platforms for AI deployment, selecting appropriate options for specific use cases. + +4. Design and implement training processes for AI models across different systems. + +5. Apply optimization techniques to improve AI model performance and efficiency. + +6. Analyze real-world AI applications and their implementation strategies. + +7. Evaluate current challenges in AI systems and predict future trends in the field. + +8. Develop a complete machine learning-enabled project, from conception to deployment. + +9. Troubleshoot common issues in AI model training and deployment. + +10. Critically assess the ethical implications and societal impacts of AI systems. + +::: + +## Prerequisites for Readers + +* **Basic Programming Skills:** We recommend that you have some prior programming experience, ideally in Python. A grasp of variables, data types, and control structures will make it easier to engage with the book. + +* **Some Machine Learning Knowledge:** While not mandatory, a basic understanding of machine learning concepts will help you absorb the material more readily. If you're new to the field, the book provides enough background information to get you up to speed. + +* **Basic Systems Knowledge:** A basic level of systems knowledge at an undergraduate junior or senior level is recommended. Understanding system architecture, operating systems, and basic networking will be beneficial. + +* **Python Programming (Optional):** If you're familiar with Python, you'll find it easier to engage with the coding sections of the book. Knowing libraries like NumPy, scikit-learn, and TensorFlow will be particularly helpful. + +* **Willingness to Learn:** The book is designed to be accessible to a broad audience, with varying levels of technical expertise. A willingness to challenge yourself and engage in practical exercises will help you get the most out of it. + +* **Resource Availability:** For the hands-on aspects, you'll need a computer with Python and the relevant libraries installed. Optional access to development boards or specific hardware will also be beneficial for experimenting with machine learning model deployment. + +By meeting these prerequisites, you'll be well-positioned to deepen your understanding of machine learning systems, engage in coding exercises, and even implement practical applications on various devices. + +## Who Should Read This + +This book is designed for individuals at different stages of their journey with machine learning systems, from beginners to those more advanced in the field. It introduces fundamental concepts and progresses to complex topics relevant to the machine learning community and expansive research areas. The key audiences for this book include: + +* **Students in Computer Science and Electrical Engineering:** Senior and graduate students will find this book particularly valuable. It introduces the techniques essential for designing and building ML systems, focusing on foundational knowledge rather than exhaustive detail---often the focus of classroom instruction. This book will provide the necessary background and context, enabling instructors to explore advanced topics more deeply. An essential feature is its end-to-end perspective, which is often overlooked in traditional curricula. + +* **Systems Engineers:** This book serves as a guide for engineers seeking to understand the complexities of intelligent systems and applications, particularly involving ML. It encompasses the conceptual frameworks and practical components that comprise an ML system, extending beyond the specific areas you might encounter in your professional role. + +* **Researchers and Academics:** For researchers, this book addresses the distinct challenges of executing machine learning algorithms across diverse platforms. As efficiency gains importance, a robust understanding of systems, beyond algorithms alone, is crucial for developing more efficient models. The book references seminal papers, directing researchers to works that have influenced the field and establishing connections between various areas with significant implications for their research. + +## How to Navigate This Book + +To get the most out of this book, we recommend a structured learning approach that leverages the various resources provided. Each chapter includes slides, videos, exercises, and labs to cater to different learning styles and reinforce your understanding. + +1. **Fundamentals (Chapters 1-3):** Start by building a strong foundation with the initial chapters, which provide an introduction to AI and cover core topics like AI systems and deep learning. + +2. **Workflow (Chapters 4-6):** With that foundation, move on to the chapters focused on practical aspects of the AI model building process like workflows, data engineering, and frameworks. + +3. **Training (Chapters 7-10):** These chapters offer insights into effectively training AI models, including techniques for efficiency, optimizations, and acceleration. + +4. **Deployment (Chapters 11-13):** Learn about deploying AI on devices and monitoring the operationalization through methods like benchmarking, on-device learning, and MLOps. + +5. **Advanced Topics (Chapters 14-18):** Critically examine topics like security, privacy, ethics, sustainability, robustness, and generative AI. + +6. **Social Impact (Chapter 19):** Explore the positive applications and potential of AI for societal good. + +7. **Conclusion (Chapter 20):** Reflect on the key takeaways and future directions in AI systems. + +While the book is designed for progressive learning, we encourage an interconnected learning approach that allows you to navigate chapters based on your interests and needs. Throughout the book, you'll find case studies and hands-on exercises that help you relate theory to real-world applications. We also recommend participating in forums and groups to engage in [discussions](https://github.com/harvard-edge/cs249r_book/discussions), debate concepts, and share insights with fellow learners. Regularly revisiting chapters can help reinforce your learning and offer new perspectives on the concepts covered. By adopting this structured yet flexible approach and actively engaging with the content and the community, you'll embark on a fulfilling and enriching learning experience that maximizes your understanding. + +## Chapter-by-Chapter Insights + +Here's a closer look at what each chapter covers. We have structured the book into six main sections: Fundamentals, Workflow, Training, Deployment, Advanced Topics, and Impact. These sections closely reflect the major components of a typical machine learning pipeline, from understanding the basic concepts to deploying and maintaining AI systems in real-world applications. By organizing the content in this manner, we aim to provide a logical progression that mirrors the actual process of developing and implementing AI systems. + +### Fundamentals + +In the Fundamentals section, we lay the groundwork for understanding AI. This is far from being a thorough deep dive into the algorithms, but we aim to introduce key concepts, provide an overview of machine learning systems, and dive into the principles and algorithms of deep learning that power AI applications in their associated systems. This section equips you with the essential knowledge needed to grasp the subsequent chapters. + +1. **[Introduction:](../introduction/introduction.qmd)** This chapter sets the stage, providing an overview of AI and laying the groundwork for the chapters that follow. +2. **[ML Systems:](../ml_systems/ml_systems.qmd)** We introduce the basics of machine learning systems, the platforms where AI algorithms are widely applied. +3. **[Deep Learning Primer:](../dl_primer/dl_primer.qmd)** This chapter offers a brief introduction to the algorithms and principles that underpin AI applications in ML systems. + +### Workflow + +The Workflow section guides you through the practical aspects of building AI models. We break down the AI workflow, discuss data engineering best practices, and review popular AI frameworks. By the end of this section, you'll have a clear understanding of the steps involved in developing proficient AI applications and the tools available to streamline the process. + +4. **[AI Workflow:](../workflow/workflow.qmd)** This chapter breaks down the machine learning workflow, offering insights into the steps leading to proficient AI applications. +5. **[Data Engineering:](../data_engineering/data_engineering.qmd)** We focus on the importance of data in AI systems, discussing how to effectively manage and organize data. +6. **[AI Frameworks:](../frameworks/frameworks.qmd)** This chapter reviews different frameworks for developing machine learning models, guiding you in choosing the most suitable one for your projects. + +### Training + +In the Training section, we explore techniques for training efficient and reliable AI models. We cover strategies for achieving efficiency, model optimizations, and the role of specialized hardware in AI acceleration. This section empowers you with the knowledge to develop high-performing models that can be seamlessly integrated into AI systems. + +7. **[AI Training:](../training/training.qmd)** This chapter explores model training, exploring techniques for developing efficient and reliable models. +8. **[Efficient AI:](../efficient_ai/efficient_ai.qmd)** Here, we discuss strategies for achieving efficiency in AI applications, from computational resource optimization to performance enhancement. +9. **[Model Optimizations:](../optimizations/optimizations.qmd)** We explore various avenues for optimizing AI models for seamless integration into AI systems. +10. **[AI Acceleration:](../hw_acceleration/hw_acceleration.qmd)** We discuss the role of specialized hardware in enhancing the performance of AI systems. + +### Deployment + +The Deployment section focuses on the challenges and solutions for deploying AI models. We discuss benchmarking methods to evaluate AI system performance, techniques for on-device learning to improve efficiency and privacy, and the processes involved in ML operations. This section equips you with the skills to effectively deploy and maintain AI functionalities in AI systems. + +11. **[Benchmarking AI:](../benchmarking/benchmarking.qmd)** This chapter focuses on how to evaluate AI systems through systematic benchmarking methods. +12. **[On-Device Learning:](../ondevice_learning/ondevice_learning.qmd)** We explore techniques for localized learning, which enhances both efficiency and privacy. +13. **[ML Operations:](../ops/ops.qmd)** This chapter looks at the processes involved in the seamless integration, monitoring, and maintenance of AI functionalities. + +### Advanced Topics + +In the Advanced Topics section, We will study the critical issues surrounding AI. We address privacy and security concerns, explore the ethical principles of responsible AI, discuss strategies for sustainable AI development, examine techniques for building robust AI models, and introduce the exciting field of generative AI. This section broadens your understanding of the complex landscape of AI and prepares you to navigate its challenges. + +14. **[Security & Privacy:](../privacy_security/privacy_security.qmd)** As AI becomes more ubiquitous, this chapter addresses the crucial aspects of privacy and security in AI systems. +15. **[Responsible AI:](../responsible_ai/responsible_ai.qmd)** We discuss the ethical principles guiding the responsible use of AI, focusing on fairness, accountability, and transparency. +16. **[Sustainable AI:](../sustainable_ai/sustainable_ai.qmd)** This chapter explores practices and strategies for sustainable AI, ensuring long-term viability and reduced environmental impact. +17. **[Robust AI:](../robust_ai/robust_ai.qmd)** We discuss techniques for developing reliable and robust AI models that can perform consistently across various conditions. +18. **[Generative AI:](../generative_ai/generative_ai.qmd)** This chapter explores the algorithms and techniques behind generative AI, opening avenues for innovation and creativity. + +### Social Impact + +The Impact section highlights the transformative potential of AI in various domains. We showcase real-world applications of TinyML in healthcare, agriculture, conservation, and other areas where AI is making a positive difference. This section inspires you to leverage the power of AI for societal good and to contribute to the development of impactful solutions. + +19. **[AI for Good:](../ai_for_good/ai_for_good.qmd)** We highlight positive applications of TinyML in areas like healthcare, agriculture, and conservation. + +### Closing + +In the Closing section, we reflect on the key learnings from the book and look ahead to the future of AI. We synthesize the concepts covered, discuss emerging trends, and provide guidance on continuing your learning journey in this rapidly evolving field. This section leaves you with a comprehensive understanding of AI and the excitement to apply your knowledge in innovative ways. + +20. **[Conclusion:](../conclusion/conclusion.qmd)** The book concludes with a reflection on the key learnings and future directions in the field of AI. + +## Tailored Learning + +We understand that readers have diverse interests; some may wish to grasp the fundamentals, while others are eager to delve into advanced topics like hardware acceleration or AI ethics. To help you navigate the book more effectively, we've created a persona-based reading guide tailored to your specific interests and goals. This guide assists you in identifying the reader persona that best matches your interests. Each persona represents a distinct reader profile with specific objectives. By selecting the persona that resonates with you, you can focus on the chapters and sections most relevant to your needs. + ++------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ +| Persona | Description | Chapters | Focus | ++:=======================+:=========================================================================+:==============================================+:==========================================================================================================+ +| The TinyML Newbie | You are new to the field of TinyML and eager to learn the basics. | 1-3, 8, 9, 10, 12 | Understand the fundamentals, gain insights into efficient and optimized ML, | +| | | | and learn about on-device learning. | ++------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ +| The EdgeML Enthusiast | You have some TinyML knowledge and are interested in exploring | 1-3, 8, 9, 10, 12, 13 | Build a strong foundation, delve into the intricacies of efficient ML, | +| | the broader world of EdgeML. | | and explore the operational aspects of embedded systems. | ++------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ +| The Computer Visionary | You are fascinated by computer vision and its applications in TinyML | 1-3, 5, 8-10, 12, 13, 17, 20 | Start with the basics, explore data engineering, and study methods for optimizing ML | +| | and EdgeML. | | models. Learn about robustness and the future of ML systems. | ++------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ +| The Data Maestro | You are passionate about data and its crucial role in ML systems. | 1-5, 8-13 | Gain a comprehensive understanding of data's role in ML systems, explore the ML | +| | | | workflow, and dive into model optimization and deployment considerations. | ++------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ +| The Hardware Hero | You are excited about the hardware aspects of ML systems and how | 1-3, 6, 8-10, 12, 14, 17, 20 | Build a solid foundation in ML systems and frameworks, explore challenges of | +| | they impact model performance. | | optimizing models for efficiency, hardware-software co-design, and security aspects. | ++------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ +| The Sustainability | You are an advocate for sustainability and want to learn how to | 1-3, 8-10, 12, 15, 16, 20 | Begin with the fundamentals of ML systems and TinyML, explore model optimization | +| Champion | develop eco-friendly AI systems. | | techniques, and learn about responsible and sustainable AI practices. | ++------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ +| The AI Ethicist | You are concerned about the ethical implications of AI and want to | 1-3, 5, 7, 12, 14-16, 19, 20 | Gain insights into the ethical considerations surrounding AI, including fairness, | +| | ensure responsible development and deployment. | | privacy, sustainability, and responsible development practices. | ++------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ +| The Full-Stack ML | You are a seasoned ML expert and want to deepen your understanding | The entire book | Understand the end-to-end process of building and deploying ML systems, from data | +| Engineer | of the entire ML system stack. | | engineering and model optimization to hardware acceleration and ethical considerations. | ++------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ + +## Join the Community + +Learning in the fast-paced world of AI is a collaborative journey. We set out to nurture a vibrant community of learners, innovators, and contributors. As you explore the concepts and engage with the exercises, we encourage you to share your insights and experiences. Whether it's a novel approach, an interesting application, or a thought-provoking question, your contributions can enrich the learning ecosystem. Engage in discussions, offer and seek guidance, and collaborate on projects to foster a culture of mutual growth and learning. By sharing knowledge, you play an important role in fostering a globally connected, informed, and empowered community. diff --git a/contents/core/acknowledgements/acknowledgements.qmd b/contents/core/acknowledgements/acknowledgements.qmd index 7f285aab..85fe1434 100644 --- a/contents/core/acknowledgements/acknowledgements.qmd +++ b/contents/core/acknowledgements/acknowledgements.qmd @@ -4,32 +4,224 @@ comments: false # Acknowledgements {.unnumbered} -Assembling this book has been a long journey, spanning several years of hard work. The initial idea for this book sprang from the [TinyML edX course](https://www.edx.org/certificates/professional-certificate/harvardx-tiny-machine-learning), and its realization would not have been possible without the invaluable contributions of countless individuals. We are deeply indebted to the researchers whose groundbreaking work laid the foundation for this book. +This book, inspired by the [TinyML edX course](https://www.edx.org/certificates/professional-certificate/harvardx-tiny-machine-learning) and CS294r at Harvard University, is the result of years of hard work and collaboration with many students, researchers and practioners. We are deeply indebted to the folks whose groundbreaking work laid its foundation. -## Individual Contributors - -We extend our heartfelt gratitude to the open source community of learners, teachers and sharers. Whether you contributed an entire section, a single sentence, or merely corrected a typo, your efforts have enhanced this book. We deeply appreciate everyone's time, expertise, and commitment. This book is as much yours as it is ours. - -Special thanks go to Professor Vijay Janapa Reddi, whose belief in the transformative power of open-source communities and invaluable guidance have been our guiding light from the outset. - -We also owe a great deal to the team at GitHub and Quarto. You've revolutionized the way people collaborate, and this book stands as a testament to what can be achieved when barriers to global cooperation are removed. +As our understanding of machine learning systems deepened, we realized that fundamental principles apply across scales, from tiny embedded systems to large-scale deployments. This realization shaped the book's expansion into an exploration of machine learning systems with the aim of providing a foundation applicable across the spectrum of implementations. ## Funding Agencies and Companies -We are immensely grateful for the generous support from the various funding agencies and companies that supported the teaching assistants (TAs) involved in this work. The organizations listed below played a crucial role in bringing this project to life with their contributions. +We are grateful for the support from various funding agencies and companies that backed the teaching assistants involved in this work. The following organizations played a crucial role in bringing this project to life: ::: {layout-nrow=2} -![](./images/png/HDSI.png){height=100%} +![](images/png/HDSI.png){height=100%} -![](./images/png/harvard-xtension-school.png){height=100%} +![](images/png/harvard-xtension-school.png){height=100%} -![](./images/png/google.png){height=100%} +![](images/png/google.png){height=100%} -![](./images/png/NSF.png){height=100%} +![](images/png/NSF.png){height=100%} ::: -## To Our Readers - -To all who pick up this book, we want to thank you! We wrote it with you in mind, hoping to provoke thought, inspire questions, and perhaps even ignite a spark of inspiration. After all, what is the point of writing if no one is reading? +## Contributors + +We express our sincere gratitude to the open-source community of learners, educators, and contributors. Each contribution, whether a chapter section or a single-word correction, has significantly enhanced the quality of this resource. We also acknowledge those who have shared insights, identified issues, and provided valuable feedback behind the scenes. + +A comprehensive list of all GitHub contributors, automatically updated with each new contribution, is available below. For those interested in contributing further, please consult our [GitHub](https://github.com/harvard-edge/cs249r_book) page for more information. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Vijay Janapa Reddi
Vijay Janapa Reddi

jasonjabbour
jasonjabbour

Ikechukwu Uchendu
Ikechukwu Uchendu

Naeem Khoshnevis
Naeem Khoshnevis

Marcelo Rovai
Marcelo Rovai

Sara Khosravi
Sara Khosravi

Douwe den Blanken
Douwe den Blanken

shanzehbatool
shanzehbatool

Kai Kleinbard
Kai Kleinbard

Elias Nuwara
Elias Nuwara

Matthew Stewart
Matthew Stewart

Jared Ping
Jared Ping

Itai Shapira
Itai Shapira

Maximilian Lam
Maximilian Lam

Jayson Lin
Jayson Lin

Sophia Cho
Sophia Cho

Jeffrey Ma
Jeffrey Ma

Andrea
Andrea

Alex Rodriguez
Alex Rodriguez

Korneel Van den Berghe
Korneel Van den Berghe

Colby Banbury
Colby Banbury

Zishen Wan
Zishen Wan

Srivatsan Krishnan
Srivatsan Krishnan

Divya Amirtharaj
Divya Amirtharaj

Abdulrahman Mahmoud
Abdulrahman Mahmoud

Aghyad Deeb
Aghyad Deeb

Haoran Qiu
Haoran Qiu

marin-llobet
marin-llobet

Aditi Raju
Aditi Raju

Jared Ni
Jared Ni

Michael Schnebly
Michael Schnebly

oishib
oishib

Emil Njor
Emil Njor

ELSuitorHarvard
ELSuitorHarvard

Henry Bae
Henry Bae

Yu-Shun Hsiao
Yu-Shun Hsiao

Jae-Won Chung
Jae-Won Chung

Mark Mazumder
Mark Mazumder

Pong Trairatvorakul
Pong Trairatvorakul

Shvetank Prakash
Shvetank Prakash

Andrew Bass
Andrew Bass

Marco Zennaro
Marco Zennaro

Emeka Ezike
Emeka Ezike

Eura Nofshin
Eura Nofshin

Jennifer Zhou
Jennifer Zhou

Arya Tschand
Arya Tschand

Fin Amin
Fin Amin

Bruno Scaglione
Bruno Scaglione

Gauri Jain
Gauri Jain

Allen-Kuang
Allen-Kuang

Fatima Shah
Fatima Shah

The Random DIY
The Random DIY

gnodipac886
gnodipac886

Sercan Aygün
Sercan Aygün

Alex Oesterling
Alex Oesterling

Abenezer
Abenezer

Emmanuel Rassou
Emmanuel Rassou

abigailswallow
abigailswallow

Yang Zhou
Yang Zhou

Bilge Acun
Bilge Acun

happyappledog
happyappledog

Jessica Quaye
Jessica Quaye

Jason Yik
Jason Yik

Shreya Johri
Shreya Johri

Sonia Murthy
Sonia Murthy

Costin-Andrei Oncescu
Costin-Andrei Oncescu

Baldassarre Cesarano
Baldassarre Cesarano

Annie Laurie Cook
Annie Laurie Cook

Vijay Edupuganti
Vijay Edupuganti

Jothi Ramaswamy
Jothi Ramaswamy

Batur Arslan
Batur Arslan

Curren Iyer
Curren Iyer

Fatima Shah
Fatima Shah

yanjingl
yanjingl

a-saraf
a-saraf

songhan
songhan

Zishen
Zishen

+ + + + + + + + \ No newline at end of file diff --git a/contents/core/ai_for_good/ai_for_good.qmd b/contents/core/ai_for_good/ai_for_good.qmd index 04db8594..d2228fdf 100644 --- a/contents/core/ai_for_good/ai_for_good.qmd +++ b/contents/core/ai_for_good/ai_for_good.qmd @@ -5,7 +5,7 @@ bibliography: ai_for_good.bib # AI for Good {#sec-ai_for_good} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-ai-for-good-resource), [Videos](#sec-ai-for-good-resource), [Exercises](#sec-ai-for-good-resource), [Labs](#sec-ai-for-good-resource) +Resources: [Slides](#sec-ai-for-good-resource), [Videos](#sec-ai-for-good-resource), [Exercises](#sec-ai-for-good-resource) ::: ![_DALL·E 3 Prompt: Illustration of planet Earth wrapped in shimmering neural networks, with diverse humans and AI robots working together on various projects like planting trees, cleaning the oceans, and developing sustainable energy solutions. The positive and hopeful atmosphere represents a united effort to create a better future._](images/png/cover_ai_good.png) @@ -30,7 +30,7 @@ By aligning AI progress with human values, goals, and ethics, the ultimate goal ::: -## Introduction +## Overview To give ourselves a framework around which to think about AI for social good, we will be following the UN Sustainable Development Goals (SDGs). The UN SDGs are a collection of 17 global goals, shown in @fig-sdg, adopted by the United Nations in 2015 as part of the 2030 Agenda for Sustainable Development. The SDGs address global challenges related to poverty, inequality, climate change, environmental degradation, prosperity, and peace and justice. @@ -285,15 +285,3 @@ These slides are a valuable tool for instructors to deliver lectures and for stu * @exr-hc ::: - -:::{.callout-warning collapse="false"} - -#### Labs - -In addition to exercises, we offer a series of hands-on labs allowing students to gain practical experience with embedded AI technologies. These labs provide step-by-step guidance, enabling students to develop their skills in a structured and supportive environment. We are excited to announce that new labs will be available soon, further enriching the learning experience. - -* _Coming soon._ -::: - - - diff --git a/contents/core/benchmarking/benchmarking.qmd b/contents/core/benchmarking/benchmarking.qmd index 9a889ef0..af5546e6 100644 --- a/contents/core/benchmarking/benchmarking.qmd +++ b/contents/core/benchmarking/benchmarking.qmd @@ -5,7 +5,7 @@ bibliography: benchmarking.bib # Benchmarking AI {#sec-benchmarking_ai} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-benchmarking-ai-resource), [Videos](#sec-benchmarking-ai-resource), [Exercises](#sec-benchmarking-ai-resource), [Labs](#sec-benchmarking-ai-resource) +Resources: [Slides](#sec-benchmarking-ai-resource), [Videos](#sec-benchmarking-ai-resource), [Exercises](#sec-benchmarking-ai-resource) ::: ![_DALL·E 3 Prompt: Photo of a podium set against a tech-themed backdrop. On each tier of the podium, there are AI chips with intricate designs. The top chip has a gold medal hanging from it, the second one has a silver medal, and the third has a bronze medal. Banners with 'AI Olympics' are displayed prominently in the background._](images/png/cover_ai_benchmarking.png) @@ -36,7 +36,7 @@ This chapter will provide an overview of popular ML benchmarks, best practices f ::: -## Introduction +## Overview Benchmarking provides the essential measurements needed to drive machine learning progress and truly understand system performance. As the physicist Lord Kelvin famously said, "To measure is to know." Benchmarks allow us to quantitatively know the capabilities of different models, software, and hardware. They allow ML developers to measure the inference time, memory usage, power consumption, and other metrics that characterize a system. Moreover, benchmarks create standardized processes for measurement, enabling fair comparisons across different solutions. @@ -827,13 +827,3 @@ To reinforce the concepts covered in this chapter, we have curated a set of exer * @exr-perf ::: - -:::{.callout-warning collapse="false"} - -#### Labs - -In addition to exercises, we offer a series of hands-on labs allowing students to gain practical experience with embedded AI technologies. These labs provide step-by-step guidance, enabling students to develop their skills in a structured and supportive environment. We are excited to announce that new labs will be available soon, further enriching the learning experience. - -* _Coming soon._ - -::: diff --git a/contents/core/conclusion/conclusion.qmd b/contents/core/conclusion/conclusion.qmd index d27e77a3..694741cb 100644 --- a/contents/core/conclusion/conclusion.qmd +++ b/contents/core/conclusion/conclusion.qmd @@ -6,7 +6,7 @@ bibliography: conclusion.bib ![_DALL·E 3 Prompt: An image depicting the last chapter of an ML systems book, open to a two-page spread. The pages summarize key concepts such as neural networks, model architectures, hardware acceleration, and MLOps. One page features a diagram of a neural network and different model architectures, while the other page shows illustrations of hardware components for acceleration and MLOps workflows. The background includes subtle elements like circuit patterns and data points to reinforce the technological theme. The colors are professional and clean, with an emphasis on clarity and understanding._](images/png/cover_conclusion.png) -## Introduction +## Overview This book examines the rapidly evolving field of ML systems (@sec-ml_systems). We focused on systems because while there are many resources on ML models and algorithms, more needs to be understood about how to build the systems that run them. diff --git a/contents/core/data_engineering/data_engineering.qmd b/contents/core/data_engineering/data_engineering.qmd index 4613a084..575efe0f 100644 --- a/contents/core/data_engineering/data_engineering.qmd +++ b/contents/core/data_engineering/data_engineering.qmd @@ -5,7 +5,7 @@ bibliography: data_engineering.bib # Data Engineering {#sec-data_engineering} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-data-engineering-resource), [Videos](#sec-data-engineering-resource), [Exercises](#sec-data-engineering-resource), [Labs](#sec-data-engineering-resource) +Resources: [Slides](#sec-data-engineering-resource), [Videos](#sec-data-engineering-resource), [Exercises](#sec-data-engineering-resource) ::: ![_DALL·E 3 Prompt: Create a rectangular illustration visualizing the concept of data engineering. Include elements such as raw data sources, data processing pipelines, storage systems, and refined datasets. Show how raw data is transformed through cleaning, processing, and storage to become valuable information that can be analyzed and used for decision-making._](images/png/cover_data_engineering.png) @@ -32,7 +32,7 @@ Data is the lifeblood of AI systems. Without good data, even the most advanced m ::: -## Introduction +## Overview Imagine a world where AI can diagnose diseases with unprecedented accuracy, but only if the data used to train it is unbiased and reliable. This is where data engineering comes in. While over 90% of the world's data has been created in the past two decades, this vast amount of information is only helpful for building effective AI models with proper processing and preparation. Data engineering bridges this gap by transforming raw data into a high-quality format that fuels AI innovation. In today's data-driven world, protecting user privacy is paramount. Whether mandated by law or driven by user concerns, anonymization techniques like differential privacy and aggregation are vital in mitigating privacy risks. However, careful implementation is crucial to ensure these methods don't compromise data utility. Dataset creators face complex privacy and representation challenges when building high-quality training data, especially for sensitive domains like healthcare. Legally, creators may need to remove direct identifiers like names and ages. Even without legal obligations, removing such information can help build user trust. However, excessive anonymization can compromise dataset utility. Techniques like differential privacy$^{1}$, aggregation, and reducing detail provide alternatives to balance privacy and utility but have downsides. Creators must strike a thoughtful balance based on the use case. @@ -553,15 +553,3 @@ To reinforce the concepts covered in this chapter, we have curated a set of exer * @exr-bl ::: - -:::{.callout-warning collapse="false"} - -#### Labs - -In addition to exercises, we offer a series of hands-on labs allowing students to gain practical experience with embedded AI technologies. These labs provide step-by-step guidance, enabling students to develop their skills in a structured and supportive environment. We are excited to announce that new labs will be available soon, further enriching the learning experience. - -* _Coming soon._ -::: - - - diff --git a/contents/core/dl_primer/dl_primer.qmd b/contents/core/dl_primer/dl_primer.qmd index 0d3b4dd7..95710bbf 100644 --- a/contents/core/dl_primer/dl_primer.qmd +++ b/contents/core/dl_primer/dl_primer.qmd @@ -5,7 +5,7 @@ bibliography: dl_primer.bib # DL Primer {#sec-dl_primer} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-deep-learning-primer-resource), [Videos](#sec-deep-learning-primer-resource), [Exercises](#sec-deep-learning-primer-resource), [Labs](#sec-deep-learning-primer-resource) +Resources: [Slides](#sec-deep-learning-primer-resource), [Videos](#sec-deep-learning-primer-resource), [Exercises](#sec-deep-learning-primer-resource) ::: ![_DALL·E 3 Prompt: Photo of a classic classroom with a large blackboard dominating one wall. Chalk drawings showcase a detailed deep neural network with several hidden layers, and each node and connection is precisely labeled with white chalk. The rustic wooden floor and brick walls provide a contrast to the modern concepts. Surrounding the room, posters mounted on frames emphasize deep learning themes: convolutional networks, transformers, neurons, activation functions, and more._](images/png/cover_dl_primer.png) @@ -28,7 +28,7 @@ The primer explores major deep learning architectures from a systems perspective ::: -## Introduction +## Overview ### Definition and Importance @@ -375,10 +375,3 @@ To reinforce the concepts covered in this chapter, we have curated a set of exer * @exr-cnn ::: - -:::{.callout-warning collapse="false"} - -#### Labs - -* _Coming soon._ -::: diff --git a/contents/core/efficient_ai/efficient_ai.qmd b/contents/core/efficient_ai/efficient_ai.qmd index 395188c7..bb79f36c 100644 --- a/contents/core/efficient_ai/efficient_ai.qmd +++ b/contents/core/efficient_ai/efficient_ai.qmd @@ -5,7 +5,7 @@ bibliography: efficient_ai.bib # Efficient AI {#sec-efficient_ai} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-efficient-ai-resource), [Videos](#sec-efficient-ai-resource), [Exercises](#sec-efficient-ai-resource), [Labs](#sec-efficient-ai-resource) +Resources: [Slides](#sec-efficient-ai-resource), [Videos](#sec-efficient-ai-resource), [Exercises](#sec-efficient-ai-resource) ::: ![_DALL·E 3 Prompt: A conceptual illustration depicting efficiency in artificial intelligence using a shipyard analogy. The scene shows a bustling shipyard where containers represent bits or bytes of data. These containers are being moved around efficiently by cranes and vehicles, symbolizing the streamlined and rapid information processing in AI systems. The shipyard is meticulously organized, illustrating the concept of optimal performance within the constraints of limited resources. In the background, ships are docked, representing different platforms and scenarios where AI is applied. The atmosphere should convey advanced technology with an underlying theme of sustainability and wide applicability._](images/png/cover_efficient_ai.png) @@ -36,7 +36,7 @@ Efficiency in artificial intelligence is not simply a luxury but a necessity. In The focus is on gaining a conceptual understanding of the motivations and significance of the various strategies for achieving efficient AI, both in terms of techniques and a holistic perspective. Subsequent chapters provide a more in-depth exploration of these multiple concepts. -## Introduction +## Overview Training models can consume significant energy, sometimes equivalent to the carbon footprint of sizable industrial processes. We will cover some of these sustainability details in the [AI Sustainability](../sustainable_ai/sustainable_ai.qmd) chapter. On the deployment side, if these models are not optimized for efficiency, they can quickly drain device batteries, demand excessive memory, or fall short of real-time processing needs. Through this chapter, we aim to elucidate the nuances of efficiency, setting the groundwork for a comprehensive exploration in the subsequent chapters. @@ -255,13 +255,3 @@ To reinforce the concepts covered in this chapter, we have curated a set of exer - _Coming soon._ ::: - -:::{.callout-warning collapse="false"} - -#### Labs - -In addition to exercises, we offer a series of hands-on labs allowing students to gain practical experience with embedded AI technologies. These labs provide step-by-step guidance, enabling students to develop their skills in a structured and supportive environment. We are excited to announce that new labs will be available soon, further enriching the learning experience. - -- _Coming soon._ -::: - diff --git a/contents/core/frameworks/frameworks.qmd b/contents/core/frameworks/frameworks.qmd index 908d1b3f..1288d463 100644 --- a/contents/core/frameworks/frameworks.qmd +++ b/contents/core/frameworks/frameworks.qmd @@ -5,7 +5,7 @@ bibliography: frameworks.bib # AI Frameworks {#sec-ai_frameworks} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-ai-frameworks-resource), [Videos](#sec-ai-frameworks-resource), [Exercises](#sec-ai-frameworks-resource), [Labs](#sec-ai-frameworks-resource) +Resources: [Slides](#sec-ai-frameworks-resource), [Videos](#sec-ai-frameworks-resource), [Exercises](#sec-ai-frameworks-resource) ::: ![_DALL·E 3 Prompt: Illustration in a rectangular format, designed for a professional textbook, where the content spans the entire width. The vibrant chart represents training and inference frameworks for ML. Icons for TensorFlow, Keras, PyTorch, ONNX, and TensorRT are spread out, filling the entire horizontal space, and aligned vertically. Each icon is accompanied by brief annotations detailing their features. The lively colors like blues, greens, and oranges highlight the icons and sections against a soft gradient background. The distinction between training and inference frameworks is accentuated through color-coded sections, with clean lines and modern typography maintaining clarity and focus._](images/png/cover_ml_frameworks.png) @@ -34,7 +34,7 @@ Furthermore, we investigate the specialization of frameworks tailored to specifi ::: -## Introduction +## Overview Machine learning frameworks provide the tools and infrastructure to efficiently build, train, and deploy machine learning models. In this chapter, we will explore the evolution and key capabilities of major frameworks like [TensorFlow (TF)](https://www.tensorflow.org/), [PyTorch](https://pytorch.org/), and specialized frameworks for embedded devices. We will dive into the components like computational graphs, optimization algorithms, hardware acceleration, and more that enable developers to construct performant models quickly. Understanding these frameworks is essential to leverage the power of deep learning across the spectrum from cloud to edge devices. @@ -909,15 +909,3 @@ To reinforce the concepts covered in this chapter, we have curated a set of exer * @exr-k ::: - -:::{.callout-warning collapse="false"} - -#### Labs - -In addition to exercises, we offer a series of hands-on labs allowing students to gain practical experience with embedded AI technologies. These labs provide step-by-step guidance, enabling students to develop their skills in a structured and supportive environment. We are excited to announce that new labs will be available soon, further enriching the learning experience. - -* _Coming soon._ -::: - - - diff --git a/contents/core/hw_acceleration/hw_acceleration.qmd b/contents/core/hw_acceleration/hw_acceleration.qmd index 057efb8a..f247bcae 100644 --- a/contents/core/hw_acceleration/hw_acceleration.qmd +++ b/contents/core/hw_acceleration/hw_acceleration.qmd @@ -5,7 +5,7 @@ bibliography: hw_acceleration.bib # AI Acceleration {#sec-ai_acceleration} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-ai-acceleration-resource), [Videos](#sec-ai-acceleration-resource), [Exercises](#sec-ai-acceleration-resource), [Labs](#sec-ai-acceleration-resource) +Resources: [Slides](#sec-ai-acceleration-resource), [Videos](#sec-ai-acceleration-resource), [Exercises](#sec-ai-acceleration-resource) ::: ![_DALL·E 3 Prompt: Create an intricate and colorful representation of a System on Chip (SoC) design in a rectangular format. Showcase a variety of specialized machine learning accelerators and chiplets, all integrated into the processor. Provide a detailed view inside the chip, highlighting the rapid movement of electrons. Each accelerator and chiplet should be designed to interact with neural network neurons, layers, and activations, emphasizing their processing speed. Depict the neural networks as a network of interconnected nodes, with vibrant data streams flowing between the accelerator pieces, showcasing the enhanced computation speed._](images/png/cover_ai_hardware.png) @@ -32,7 +32,7 @@ Deploying ML on edge devices presents challenges such as limited processing spee ::: -## Introduction +## Overview You've probably noticed the growing demand for embedding machine learning into everyday devices—like the smartphones in our pockets, smart home appliances, and even autonomous vehicles. Bringing ML capabilities into these real-world environments is exciting, but it comes with its own set of challenges. Unlike powerful data center servers, these edge devices have limited computing resources, making it tricky to run complex models effectively. @@ -1106,10 +1106,3 @@ Here is a curated list of resources to support students and instructors in their * @exr-tvm ::: - -:::{.callout-warning collapse="false"} - -#### Labs - -* _Coming soon._ -::: diff --git a/contents/core/introduction/image.png b/contents/core/introduction/image.png new file mode 100644 index 00000000..309a9612 Binary files /dev/null and b/contents/core/introduction/image.png differ diff --git a/contents/core/introduction/images/png/alexnet_arch.png b/contents/core/introduction/images/png/alexnet_arch.png new file mode 100644 index 00000000..0ac147ac Binary files /dev/null and b/contents/core/introduction/images/png/alexnet_arch.png differ diff --git a/contents/core/introduction/images/png/alphafold.gif b/contents/core/introduction/images/png/alphafold.gif new file mode 100644 index 00000000..a164287d Binary files /dev/null and b/contents/core/introduction/images/png/alphafold.gif differ diff --git a/contents/core/introduction/images/png/alphafold.png b/contents/core/introduction/images/png/alphafold.png new file mode 100644 index 00000000..bcb1788a Binary files /dev/null and b/contents/core/introduction/images/png/alphafold.png differ diff --git a/contents/core/introduction/images/png/book_pillars.png b/contents/core/introduction/images/png/book_pillars.png new file mode 100644 index 00000000..7949c0ca Binary files /dev/null and b/contents/core/introduction/images/png/book_pillars.png differ diff --git a/contents/core/introduction/images/png/farmbeats.png b/contents/core/introduction/images/png/farmbeats.png new file mode 100644 index 00000000..e22fc190 Binary files /dev/null and b/contents/core/introduction/images/png/farmbeats.png differ diff --git a/contents/core/introduction/images/png/hidden_debt.png b/contents/core/introduction/images/png/hidden_debt.png new file mode 100644 index 00000000..d171877b Binary files /dev/null and b/contents/core/introduction/images/png/hidden_debt.png differ diff --git a/contents/core/introduction/images/png/ml_lifecycle_overview.png b/contents/core/introduction/images/png/ml_lifecycle_overview.png new file mode 100644 index 00000000..53124511 Binary files /dev/null and b/contents/core/introduction/images/png/ml_lifecycle_overview.png differ diff --git a/contents/core/introduction/images/png/triangle.png b/contents/core/introduction/images/png/triangle.png new file mode 100644 index 00000000..3b2b9e56 Binary files /dev/null and b/contents/core/introduction/images/png/triangle.png differ diff --git a/contents/core/introduction/introduction.bib b/contents/core/introduction/introduction.bib index d4ae204a..7d6871c7 100644 --- a/contents/core/introduction/introduction.bib +++ b/contents/core/introduction/introduction.bib @@ -1,18 +1,33 @@ %comment{This file was created with betterbib v5.0.11.} - @article{weiser1991computer, - author = {Weiser, Mark}, - title = {The Computer for the 21st Century}, - journal = {Sci. Am.}, - volume = {265}, - number = {3}, - pages = {94--104}, - year = {1991}, - publisher = {Springer Science and Business Media LLC}, - doi = {10.1038/scientificamerican0991-94}, - source = {Crossref}, - url = {https://doi.org/10.1038/scientificamerican0991-94}, - issn = {0036-8733}, - month = sep, + number = {3}, + doi = {10.1038/scientificamerican0991-94}, + pages = {94--104}, + source = {Crossref}, + volume = {265}, + author = {Weiser, Mark}, + year = {1991}, + month = sep, + url = {https://doi.org/10.1038/scientificamerican0991-94}, + issn = {0036-8733}, + journal = {Scientific American}, + publisher = {Springer Science and Business Media LLC}, + title = {The Computer for the 21st Century}, } + +@article{krizhevsky2012imagenet, + number = {6}, + doi = {10.1145/3065386}, + pages = {84--90}, + source = {Crossref}, + volume = {60}, + author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E.}, + year = {2017}, + month = may, + url = {https://doi.org/10.1145/3065386}, + issn = {0001-0782,1557-7317}, + journal = {Communications of the ACM}, + publisher = {Association for Computing Machinery (ACM)}, + title = {ImageNet classification with deep convolutional neural networks}, +} \ No newline at end of file diff --git a/contents/core/introduction/introduction.qmd b/contents/core/introduction/introduction.qmd index 79e0e122..222b0b96 100644 --- a/contents/core/introduction/introduction.qmd +++ b/contents/core/introduction/introduction.qmd @@ -6,156 +6,427 @@ bibliography: introduction.bib ![_DALL·E 3 Prompt: A detailed, rectangular, flat 2D illustration depicting a roadmap of a book's chapters on machine learning systems, set on a crisp, clean white background. The image features a winding road traveling through various symbolic landmarks. Each landmark represents a chapter topic: Introduction, ML Systems, Deep Learning, AI Workflow, Data Engineering, AI Frameworks, AI Training, Efficient AI, Model Optimizations, AI Acceleration, Benchmarking AI, On-Device Learning, Embedded AIOps, Security & Privacy, Responsible AI, Sustainable AI, AI for Good, Robust AI, Generative AI. The style is clean, modern, and flat, suitable for a technical book, with each landmark clearly labeled with its chapter title._](images/png/cover_introduction.png) -## Overview +## Why Machine Learning Systems Matter -In the early 1990s, [Mark Weiser](https://en.wikipedia.org/wiki/Mark_Weiser), a pioneering computer scientist, introduced the world to a revolutionary concept that would forever change how we interact with technology. This vision was succinctly captured in his seminal paper, "The Computer for the 21st Century" (see @fig-ubiquitous). Weiser envisioned a future where computing would be seamlessly integrated into our environments, becoming an invisible, integral part of daily life. +AI is everywhere. Consider your morning routine: You wake up to an AI-powered smart alarm that learned your sleep patterns. Your phone suggests your route to work, having learned from traffic patterns. During your commute, your music app automatically creates a playlist it thinks you'll enjoy. At work, your email client filters spam and prioritizes important messages. Throughout the day, your smartwatch monitors your activity, suggesting when to move or exercise. In the evening, your streaming service recommends shows you might like, while your smart home devices adjust lighting and temperature based on your learned preferences. + +But these everyday conveniences are just the beginning. AI is transforming our world in extraordinary ways. Today, AI systems detect early-stage cancers with unprecedented accuracy, predict and track extreme weather events to save lives, and accelerate drug discovery by simulating millions of molecular interactions. Autonomous vehicles navigate complex city streets while processing real-time sensor data from dozens of sources. Language models engage in sophisticated conversations, translate between hundreds of languages, and help scientists analyze vast research databases. In scientific laboratories, AI systems are making breakthrough discoveries - from predicting protein structures that unlock new medical treatments to identifying promising materials for next-generation solar cells and batteries. Even in creative fields, AI collaborates with artists and musicians to explore new forms of expression, pushing the boundaries of human creativity. + +This isn't science fiction---it's the reality of how artificial intelligence, specifically machine learning systems, has become woven into the fabric of our daily lives. In the early 1990s, [Mark Weiser](https://en.wikipedia.org/wiki/Mark_Weiser), a pioneering computer scientist, introduced the world to a revolutionary concept that would forever change how we interact with technology. This vision was succinctly captured in his seminal paper, "The Computer for the 21st Century" (see @fig-ubiquitous). Weiser envisioned a future where computing would be seamlessly integrated into our environments, becoming an invisible, integral part of daily life. ![Ubiquitous computing as envisioned by Mark Weiser.](images/png/21st_computer.png){#fig-ubiquitous width=50%} -He termed this concept "ubiquitous computing," promising a world where technology would serve us without demanding our constant attention or interaction. Fast forward to today, and we find ourselves on the cusp of realizing Weiser's vision, thanks to the advent and proliferation of machine learning systems. +He termed this concept "ubiquitous computing," promising a world where technology would serve us without demanding our constant attention or interaction. Today, we find ourselves living in Weiser's envisioned future, largely enabled by machine learning systems. The true essence of his vision—creating an intelligent environment that can anticipate our needs and act on our behalf—has become reality through the development and deployment of ML systems that span entire ecosystems, from powerful cloud data centers to edge devices to the tiniest IoT sensors. + +Yet most of us rarely think about the complex systems that make this possible. Behind each of these seemingly simple interactions lies a sophisticated infrastructure of data, algorithms, and computing resources working together. Understanding how these systems work—their capabilities, limitations, and requirements—has become increasingly critical as they become more integrated into our world. + +To appreciate the magnitude of this transformation and the complexity of modern machine learning systems, we need to understand how we got here. The journey from early artificial intelligence to today's ubiquitous ML systems is a story of not just technological evolution, but of changing perspectives on what's possible and what's necessary to make AI practical and reliable. + +## The Evolution of AI + +The evolution of AI, depicted in the timeline shown in @fig-ai-timeline, highlights key milestones such as the development of the **perceptron**[^defn-perceptron] in 1957 by Frank Rosenblatt, a foundational element for modern neural networks. Imagine walking into a computer lab in 1965. You'd find room-sized mainframes running programs that could prove basic mathematical theorems or play simple games like tic-tac-toe. These early artificial intelligence systems, while groundbreaking for their time, were a far cry from today's machine learning systems that can detect cancer in medical images or understand human speech. The timeline shows the progression from early innovations like the ELIZA chatbot in 1966, to significant breakthroughs such as IBM's Deep Blue defeating chess champion Garry Kasparov in 1997. More recent advancements include the introduction of OpenAI's GPT-3 in 2020 and GPT-4 in 2023, demonstrating the dramatic evolution and increasing complexity of AI systems over the decades. + +[^defn-perceptron]: The first artificial neural network—a simple model that could learn to classify visual patterns, similar to a single neuron making a yes/no decision based on its inputs. + +![Milestones in AI from 1950 to 2020. Source: IEEE Spectrum](https://spectrum.ieee.org/media-library/a-chart-of-milestones-in-ai-from-1950-to-2020.png?id=27547255){#fig-ai-timeline} + +Let's explore how we got here. + +### Symbolic AI (1956-1974) + +The story of machine learning begins at the historic Dartmouth Conference in 1956, where pioneers like John McCarthy, Marvin Minsky, and Claude Shannon first coined the term "artificial intelligence." Their approach was based on a compelling idea: intelligence could be reduced to symbol manipulation. Consider Daniel Bobrow's STUDENT system from 1964, one of the first AI programs that could solve algebra word problems: + +::: {.callout-note} +### Example: STUDENT (1964) + +``` +Problem: "If the number of customers Tom gets is twice the +square of 20% of the number of advertisements he runs, and +the number of advertisements is 45, what is the number of +customers Tom gets?" + +STUDENT would: + +1. Parse the English text +2. Convert it to algebraic equations +3. Solve the equation: n = 2(0.2 × 45)² +4. Provide the answer: 162 customers +``` +::: + +Early AI like STUDENT suffered from a fundamental limitation: they could only handle inputs that exactly matched their pre-programmed patterns and rules. Imagine a language translator that only works when sentences follow perfect grammatical structure---even slight variations like changing word order, using synonyms, or natural speech patterns would cause the STUDENT to fail. This "brittleness" meant that while these solutions could appear intelligent when handling very specific cases they were designed for, they would break down completely when faced with even minor variations or real-world complexity. This limitation wasn't just a technical inconvenience—it revealed a deeper problem with rule-based approaches to AI: they couldn't genuinely understand or generalize from their programming, they could only match and manipulate patterns exactly as specified. + +### Expert Systems(1970s-1980s) + +By the mid-1970s, researchers realized that general AI was too ambitious. Instead, they focused on capturing human expert knowledge in specific domains. MYCIN, developed at Stanford, was one of the first large-scale expert systems designed to diagnose blood infections: + +::: {.callout-note} +### Example: MYCIN (1976) +``` +Rule Example from MYCIN: +IF + The infection is primary-bacteremia + The site of the culture is one of the sterile sites + The suspected portal of entry is the gastrointestinal tract +THEN + There is suggestive evidence (0.7) that infection is bacteroid +``` +::: + +While MYCIN represented a major advance in medical AI with its 600 expert rules for diagnosing blood infections, it revealed fundamental challenges that still plague ML today. Getting domain knowledge from human experts and converting it into precise rules proved incredibly time-consuming and difficult—doctors often couldn't explain exactly how they made decisions. MYCIN struggled with uncertain or incomplete information, unlike human doctors who could make educated guesses. Perhaps most importantly, maintaining and updating the rule base became exponentially more complex as MYCIN grew—adding new rules often conflicted with existing ones, and medical knowledge itself kept evolving. These same challenges of knowledge capture, uncertainty handling, and maintenance remain central concerns in modern machine learning, even though we now use different technical approaches to address them. + +### Statistical Learning: A Paradigm Shift (1990s) + +The 1990s marked a radical transformation in artificial intelligence as the field moved away from hand-coded rules toward statistical learning approaches. This wasn't a simple choice—it was driven by three converging factors that made statistical methods both possible and powerful. The digital revolution meant massive amounts of data were suddenly available to train the algorithms. **Moore's Law**[^defn-mooreslaw] delivered the computational power needed to process this data effectively. And researchers developed new algorithms like Support Vector Machines and improved neural networks that could actually learn patterns from this data rather than following pre-programmed rules. This combination fundamentally changed how we built AI: instead of trying to encode human knowledge directly, we could now let machines discover patterns automatically from examples, leading to more robust and adaptable AI. + +[^defn-mooreslaw]: The observation made by Intel co-founder Gordon Moore in 1965 that the number of transistors on a microchip doubles approximately every two years, while the cost halves. This exponential growth in computing power has been a key driver of advances in machine learning, though the pace has begun to slow in recent years. + +Consider how email spam filtering evolved: + +::: {.callout-note} +### Example: Early Spam Detection Systems + +``` +Rule-based (1980s): +IF contains("viagra") OR contains("winner") THEN spam + +Statistical (1990s): +P(spam|word) = (frequency in spam emails) / (total frequency) +Combined using Naive Bayes: +P(spam|email) ∝ P(spam) × ∏ P(word|spam) +``` +::: + +The move to statistical approaches fundamentally changed how we think about building AI by introducing three core concepts that remain important today. First, the quality and quantity of training data became as important as the algorithms themselves---AI could only learn patterns that were present in its training examples. Second, we needed rigorous ways to evaluate how well AI actually performed, leading to metrics that could measure success and compare different approaches. Third, we discovered an inherent tension between precision (being right when we make a prediction) and recall (catching all the cases we should find), forcing designers to make explicit trade-offs based on their application's needs. For example, a spam filter might tolerate some spam to avoid blocking important emails, while medical diagnosis might need to catch every potential case even if it means more false alarms. + +@tbl-ai-evolution-strengths encapsulates the evolutionary journey of AI approaches we have discussed so far, highlighting the key strengths and capabilities that emerged with each new paradigm. As we move from left to right across the table, we can observe several important trends. We will talk about shallow and deep learning next, but it is useful to understand the trade-offs between the approaches we have covered so far. + ++---------------------+--------------------------+--------------------------+--------------------------+-------------------------------+ +| Aspect | Symbolic AI | Expert Systems | Statistical Learning | Shallow / Deep Learning | ++:====================+:=========================+:=========================+:=========================+:==============================+ +| Key Strength | Logical reasoning | Domain expertise | Versatility | Pattern recognition | ++---------------------+--------------------------+--------------------------+--------------------------+-------------------------------+ +| Best Use Case | Well-defined, rule-based | Specific domain problems | Various structured data | Complex, unstructured data | +| | problems | | problems | problems | ++---------------------+--------------------------+--------------------------+--------------------------+-------------------------------+ +| Data Handling | Minimal data needed | Domain knowledge-based | Moderate data required | Large-scale data processing | ++---------------------+--------------------------+--------------------------+--------------------------+-------------------------------+ +| Adaptability | Fixed rules | Domain-specific | Adaptable to various | Highly adaptable to diverse | +| | | adaptability | domains | tasks | ++---------------------+--------------------------+--------------------------+--------------------------+-------------------------------+ +| Problem Complexity | Simple, logic-based | Complicated, domain- | Complex, structured | Highly complex, unstructured | +| | | specific | | | ++---------------------+--------------------------+--------------------------+--------------------------+-------------------------------+ + +: Evolution of AI - Key Positive Aspects {#tbl-ai-evolution-strengths .hover .striped} + +The table serves as a bridge between the early approaches we've discussed and the more recent developments in shallow and deep learning that we'll explore next. It sets the stage for understanding why certain approaches gained prominence in different eras and how each new paradigm built upon and addressed the limitations of its predecessors. Moreover, it illustrates how the strengths of earlier approaches continue to influence and enhance modern AI techniques, particularly in the era of foundation models. + +### Shallow Learning (2000s) + +The 2000s marked a fascinating period in machine learning history that we now call the "shallow learning" era. To understand why it's "shallow," imagine building a house: deep learning (which came later) is like having multiple construction crews working at different levels simultaneously, each crew learning from the work of crews below them. In contrast, shallow learning typically had just one or two levels of processing - like having just a foundation crew and a framing crew. + +During this time, several powerful algorithms dominated the machine learning landscape. Each brought unique strengths to different problems: Decision trees provided interpretable results by making choices much like a flowchart. K-nearest neighbors made predictions by finding similar examples in past data, like asking your most experienced neighbors for advice. Linear and logistic regression offered straightforward, interpretable models that worked well for many real-world problems. Support Vector Machines (SVMs) excelled at finding complex boundaries between categories using the "kernel trick" - imagine being able to untangle a bowl of spaghetti into straight lines by lifting it into a higher dimension. +These algorithms formed the foundation of practical machine learning because: +Consider a typical computer vision solution from 2005: + +::: {.callout-note} +### Example: Traditional Computer Vision Pipeline +``` +1. Manual Feature Extraction + - SIFT (Scale-Invariant Feature Transform) + - HOG (Histogram of Oriented Gradients) + - Gabor filters +2. Feature Selection/Engineering +3. "Shallow" Learning Model (e.g., SVM) +4. Post-processing +``` +::: + +What made this era distinct was its hybrid approach: human-engineered features combined with statistical learning. They had strong mathematical foundations (researchers could prove why they worked). They performed well even with limited data. They were computationally efficient. They produced reliable, reproducible results. + +Take the example of face detection, where the Viola-Jones algorithm (2001) achieved real-time performance using simple rectangular features and a cascade of classifiers. This algorithm powered digital camera face detection for nearly a decade. + +### Deep Learning (2012-Present) + +While Support Vector Machines excelled at finding complex boundaries between categories using mathematical transformations, deep learning took a radically different approach inspired by the human brain's architecture. Deep learning is built from layers of artificial neurons, where each layer learns to transform its input data into increasingly abstract representations. Imagine processing an image of a cat: the first layer might learn to detect simple edges and contrasts, the next layer combines these into basic shapes and textures, another layer might recognize whiskers and pointy ears, and the final layers assemble these features into the concept of "cat." Unlike shallow learning methods that required humans to carefully engineer features, deep learning networks can automatically discover useful features directly from raw data. This ability to learn hierarchical representations—from simple to complex, concrete to abstract—is what makes deep learning "deep," and it turned out to be a remarkably powerful approach for handling complex, real-world data like images, speech, and text. + +In 2012, a deep neural network called AlexNet, shown in @fig-alexnet, achieved a breakthrough in the ImageNet competition that would transform the field of machine learning. The challenge was formidable: correctly classify 1.2 million high-resolution images into 1,000 different categories. While previous approaches struggled with error rates above 25%, AlexNet achieved a 15.3% error rate, dramatically outperforming all existing methods. + +![Deep neural network architecture for Alexnet. Source: @krizhevsky2012imagenet](./images/png/alexnet_arch.png){#fig-alexnet} + +The success of AlexNet wasn't just a technical achievement---it was a watershed moment that demonstrated the practical viability of deep learning. It showed that with sufficient data, computational power, and architectural innovations, neural networks could outperform hand-engineered features and shallow learning methods that had dominated the field for decades. This single result triggered an explosion of research and applications in deep learning that continues to this day. + +From this foundation, deep learning entered an era of unprecedented scale. By the late 2010s, companies like Google, Facebook, and OpenAI were training neural networks thousands of times larger than **AlexNet**[^defn-alexnet]. These massive models, often called "foundation models," took deep learning to new heights. GPT-3, released in 2020, contained 175 billion **parameters**[^defn-parameters]---imagine a student that could read through all of Wikipedia multiple times and learn patterns from every article. These models showed remarkable abilities: writing human-like text, engaging in conversation, generating images from descriptions, and even writing computer code. The key insight was simple but powerful: as we made neural networks bigger and fed them more data, they became capable of solving increasingly complex tasks. However, this scale brought unprecedented systems challenges: how do you efficiently train models that require thousands of GPUs working in parallel? How do you store and serve models that are hundreds of gigabytes in size? How do you handle the massive datasets needed for training? + +[^defn-alexnet]: A breakthrough deep neural network from 2012 that won the [ImageNet competition](https://www.image-net.org/challenges/LSVRC/) by a large margin and helped spark the deep learning revolution. + +[^defn-parameters]: Similar to how the brain's neural connections grow stronger as you learn a new skill, having more parameters generally means that the model can learn more complex patterns. + +The deep learning revolution of 2012 didn't emerge from nowhere---it was built on neural network research dating back to the 1950s. The story begins with Frank Rosenblatt's Perceptron in 1957, which captured the imagination of researchers by showing how a simple artificial neuron could learn to classify patterns. While it could only handle linearly separable problems—a limitation dramatically highlighted by Minsky and Papert's 1969 book "Perceptrons"—it introduced the fundamental concept of trainable neural networks. The 1980s brought more important breakthroughs: Rumelhart, Hinton, and Williams introduced backpropagation in 1986, providing a systematic way to train multi-layer networks, while Yann LeCun demonstrated its practical application in recognizing handwritten digits using **convolutional neural networks (CNNs)**[^defn-cnn]. + +[^defn-cnn]: A type of neural network specially designed for processing images, inspired by how the human visual system works. The "convolutional" part refers to how it scans images in small chunks, similar to how our eyes focus on different parts of a scene. + +:::{#vid-tl .callout-important} + +# Convolutional Network Demo from 1989 + +{{< video https://www.youtube.com/watch?v=FwFduRA_L6Q&ab_channel=YannLeCun >}} + +::: + +Yet these networks largely languished through the 1990s and 2000s, not because the ideas were wrong, but because they were ahead of their time---the field lacked three important ingredients: sufficient data to train complex networks, enough computational power to process this data, and the technical innovations needed to train very deep networks effectively. + +The field had to wait for the convergence of big data, better computing hardware, and algorithmic breakthroughs before deep learning's potential could be unlocked. This long gestation period helps explain why the 2012 ImageNet moment was less a sudden revolution and more the culmination of decades of accumulated research finally finding its moment. As we'll explore in the following sections, this evolution has led to two significant developments in the field. First, it has given rise to define the field of machine learning systems engineering, a discipline that teaches how to bridge the gap between theoretical advancements and practical implementation. Second, it has necessitated a more comprehensive definition of machine learning systems, one that encompasses not just algorithms, but also data and computing infrastructure. Today's challenges of scale echo many of the same fundamental questions about computation, data, and learning methods that researchers have grappled with since the field's inception, but now within a more complex and interconnected framework. + +## The Rise of ML Systems Engineering + +The story we've traced--from the early days of the Perceptron through the deep learning revolution---has largely been one of algorithmic breakthroughs. Each era brought new mathematical insights and modeling approaches that pushed the boundaries of what AI could achieve. But something important changed over the past decade: the success of AI systems became increasingly dependent not just on algorithmic innovations, but on sophisticated engineering. + +This shift mirrors the evolution of computer science and engineering in the late 1960s and early 1970s. During that period, as computing systems grew more complex, a new discipline emerged: Computer Engineering. This field bridged the gap between Electrical Engineering's hardware expertise and Computer Science's focus on algorithms and software. Computer Engineering arose because the challenges of designing and building complex computing systems required an integrated approach that neither discipline could fully address on its own. + +Today, we're witnessing a similar transition in the field of AI. While Computer Science continues to push the boundaries of ML algorithms and Electrical Engineering advances specialized AI hardware, neither discipline fully addresses the engineering principles needed to deploy, optimize, and sustain ML systems at scale. This gap highlights the need for a new discipline: Machine Learning Systems Engineering. + +There is no explicit definition of what this field is as such today, but it can be broadly defined as such: + +:::{.callout-tip} +### Definition of Machine Learning Systems Engineering + +Machine Learning Systems Engineering (MLSysEng) is the discipline of designing, implementing, and operating artificially intelligent systems across computing scales—from resource-constrained embedded devices to warehouse-scale computers. This field integrates principles from engineering disciplines spanning hardware to software to create systems that are reliable, efficient, and optimized for their deployment context. It encompasses the complete lifecycle of AI applications: from requirements engineering and data collection through model development, system integration, deployment, monitoring, and maintenance. The field emphasizes engineering principles of systematic design, resource constraints, performance requirements, and operational reliability. +::: + +Let's consider space exploration. While astronauts venture into new frontiers and explore the vast unknowns of the universe, their discoveries are only possible because of the complex engineering systems supporting them---the rockets that lift them into space, the life support systems that keep them alive, and the communication networks that keep them connected to Earth. Similarly, while AI researchers push the boundaries of what's possible with learning algorithms, their breakthroughs only become practical reality through careful systems engineering. Modern AI systems need robust infrastructure to collect and manage data, powerful computing systems to train models, and reliable deployment platforms to serve millions of users. + +This emergence of machine learning systems engineering as a important discipline reflects a broader reality: turning AI algorithms into real-world systems requires bridging the gap between theoretical possibilities and practical implementation. It's not enough to have a brilliant algorithm if you can't efficiently collect and process the data it needs, distribute its computation across hundreds of machines, serve it reliably to millions of users, or monitor its performance in production. + +Understanding this interplay between algorithms and engineering has become fundamental for modern AI practitioners. While researchers continue to push the boundaries of what's algorithmically possible, engineers are tackling the complex challenge of making these algorithms work reliably and efficiently in the real world. This brings us to a fundamental question: what exactly is a machine learning system, and what makes it different from traditional software systems? + +## Definition of a ML System + +There's no universally accepted, clear-cut textbook definition of a machine learning system. This ambiguity stems from the fact that different practitioners, researchers, and industries often refer to machine learning systems in varying contexts and with different scopes. Some might focus solely on the algorithmic aspects, while others might include the entire pipeline from data collection to model deployment. This loose usage of the term reflects the rapidly evolving and multidisciplinary nature of the field. + +Given this diversity of perspectives, it is important to establish a clear and comprehensive definition that encompasses all these aspects. In this textbook, we take a holistic approach to machine learning systems, considering not just the algorithms but also the entire ecosystem in which they operate. Therefore, we define a machine learning system as follows: + +:::{.callout-tip} +### Definition of a Machine Learning System + +A machine learning system is an integrated computing system comprising three core components: (1) data that guides algorithmic behavior, (2) learning algorithms that extract patterns from this data, and (3) computing infrastructure that enables both the learning process (i.e., training) and the application of learned knowledge (i.e., inference/serving). Together, these components create a computing system capable of making predictions, generating content, or taking actions based on learned patterns. +::: + +The core of any machine learning system consists of three interrelated components, as illustrated in @fig-ai-triangle: Models/Algorithms, Data, and Computing Infrastructure. These components form a triangular dependency where each element fundamentally shapes the possibilities of the others. The model architecture dictates both the computational demands for training and inference, as well as the volume and structure of data required for effective learning. The data's scale and complexity influence what infrastructure is needed for storage and processing, while simultaneously determining which model architectures are feasible. The infrastructure capabilities establish practical limits on both model scale and data processing capacity, creating a framework within which the other components must operate. + +![Machine learning systems involve algorithms, data, and computation, all intertwined together.](images/png/triangle.png){#fig-ai-triangle} + +Each of these components serves a distinct but interconnected purpose: + +- **Algorithms:** Mathematical models and methods that learn patterns from data to make predictions or decisions + +- **Data:** Processes and infrastructure for collecting, storing, processing, managing, and serving data for both training and inference. + +- **Computing:** Hardware and software infrastructure that enables efficient training, serving, and operation of models at scale. + +The interdependency of these components means no single element can function in isolation. The most sophisticated algorithm cannot learn without data or computing resources to run on. The largest datasets are useless without algorithms to extract patterns or infrastructure to process them. And the most powerful computing infrastructure serves no purpose without algorithms to execute or data to process. + +To illustrate these relationships, we can draw an analogy to space exploration. Algorithm developers are like astronauts---exploring new frontiers and making discoveries. Data science teams function like mission control specialists—ensuring the constant flow of critical information and resources needed to keep the mission running. Computing infrastructure engineers are like rocket engineers—designing and building the systems that make the mission possible. Just as a space mission requires the seamless integration of astronauts, mission control, and rocket systems, a machine learning system demands the careful orchestration of algorithms, data, and computing infrastructure. + +## The ML Systems Lifecycle + +Traditional software systems follow a predictable lifecycle where developers write explicit instructions for computers to execute. These systems are built on decades of established software engineering practices. Version control systems maintain precise histories of code changes. Continuous integration and deployment pipelines automate testing and release processes. Static analysis tools measure code quality and identify potential issues. This infrastructure enables reliable development, testing, and deployment of software systems, following well-defined principles of software engineering. + +Machine learning systems represent a fundamental departure from this traditional paradigm. While traditional systems execute explicit programming logic, machine learning systems derive their behavior from patterns in data. This shift from code to data as the primary driver of system behavior introduces new complexities. + +As illustrated in @fig-ml_lifecycle_overview, the ML lifecycle consists of interconnected stages from data collection through model monitoring, with feedback loops for continuous improvement when performance degrades or models need enhancement. + +![The typical lifecycle of a machine learning system.](./images/png/ml_lifecycle_overview.png){#fig-ml_lifecycle_overview} + +Unlike source code, which changes only when developers modify it, data reflects the dynamic nature of the real world. Changes in data distributions can silently alter system behavior. Traditional software engineering tools, designed for deterministic code-based systems, prove insufficient for managing these data-dependent systems. For example, version control systems that excel at tracking discrete code changes struggle to manage large, evolving datasets. Testing frameworks designed for deterministic outputs must be adapted for probabilistic predictions. This data-dependent nature creates a more dynamic lifecycle, requiring continuous monitoring and adaptation to maintain system relevance as real-world data patterns evolve. + +Understanding the machine learning system lifecycle requires examining its distinct stages. Each stage presents unique requirements from both learning and infrastructure perspectives. This dual consideration---of learning needs and systems support---is wildly important for building effective machine learning systems. + +However, the various stages of the ML lifecycle in production are not isolated; they are, in fact, deeply interconnected. This interconnectedness can create either virtuous or vicious cycles. In a virtuous cycle, high-quality data enables effective learning, robust infrastructure supports efficient processing, and well-engineered systems facilitate the collection of even better data. However, in a vicious cycle, poor data quality undermines learning, inadequate infrastructure hampers processing, and system limitations prevent the improvement of data collection—each problem compounds the others. + +## The Spectrum of ML Systems + +The complexity of managing machine learning systems becomes even more apparent when we consider the broad spectrum across which ML is deployed today. ML systems exist at vastly different scales and in diverse environments, each presenting unique challenges and constraints. + +At one end of the spectrum, we have cloud-based ML systems running in massive data centers. These systems, like large language models or recommendation engines, process petabytes of data and serve millions of users simultaneously. They can leverage virtually unlimited computing resources but must manage enormous operational complexity and costs. + +At the other end, we find TinyML systems running on microcontrollers and embedded devices. These systems must perform ML tasks with severe constraints on memory, computing power, and energy consumption. Imagine a smart home device, such as Alexa or Google Assistant, that must recognize voice commands using less power than a LED bulb, or a sensor that must detect anomalies while running on a battery for months or even years. + +Between these extremes, we find a rich variety of ML systems adapted for different contexts. Edge ML systems bring computation closer to data sources, reducing latency and bandwidth requirements while managing local computing resources. Mobile ML systems must balance sophisticated capabilities with battery life and processor limitations on smartphones and tablets. Enterprise ML systems often operate within specific business constraints, focusing on particular tasks while integrating with existing infrastructure. Some organizations employ hybrid approaches, distributing ML capabilities across multiple tiers to balance various requirements. + +## ML System Implications on the ML Lifecycle + +The diversity of ML systems across the spectrum represents a complex interplay of requirements, constraints, and trade-offs. These decisions fundamentally impact every stage of the ML lifecycle we discussed earlier, from data collection to continuous operation. + +Performance requirements often drive initial architectural decisions. Latency-sensitive applications, like autonomous vehicles or real-time fraud detection, might require edge or embedded architectures despite their resource constraints. Conversely, applications requiring massive computational power for training, such as large language models, naturally gravitate toward centralized cloud architectures. However, raw performance is just one consideration in a complex decision space. + +Resource management varies dramatically across architectures. Cloud systems must optimize for cost efficiency at scale—balancing expensive GPU clusters, storage systems, and network bandwidth. Edge systems face fixed resource limits and must carefully manage local compute and storage. Mobile and embedded systems operate under the strictest constraints, where every byte of memory and milliwatt of power matters. These resource considerations directly influence both model design and system architecture. + +Operational complexity increases with system distribution. While centralized cloud architectures benefit from mature deployment tools and managed services, edge and hybrid systems must handle the complexity of distributed system management. This complexity manifests throughout the ML lifecycle—from data collection and version control to model deployment and monitoring. As we discussed in our examination of technical debt, this operational complexity can compound over time if not carefully managed. + +Data considerations often introduce competing pressures. Privacy requirements or data sovereignty regulations might push toward edge or embedded architectures, while the need for large-scale training data might favor cloud approaches. The velocity and volume of data also influence architectural choices—real-time sensor data might require edge processing to manage bandwidth, while batch analytics might be better suited to cloud processing. + +Evolution and maintenance requirements must be considered from the start. Cloud architectures offer flexibility for system evolution but can incur significant ongoing costs. Edge and embedded systems might be harder to update but could offer lower operational overhead. The continuous cycle of ML systems we discussed earlier becomes particularly challenging in distributed architectures, where updating models and maintaining system health requires careful orchestration across multiple tiers. + +These trade-offs are rarely simple binary choices. Modern ML systems often adopt hybrid approaches, carefully balancing these considerations based on specific use cases and constraints. The key is understanding how these decisions will impact the system throughout its lifecycle, from initial development through continuous operation and evolution. + +### Emerging Trends + +We are just at the beginning. As machine learning systems continue to evolve, several key trends are reshaping the landscape of ML system design and deployment. + +The rise of agentic systems marks a profound evolution in ML systems. Traditional ML systems were primarily reactive—they made predictions or classifications based on input data. In contrast, agentic systems can take actions, learn from their outcomes, and adapt their behavior accordingly. These systems, exemplified by autonomous agents that can plan, reason, and execute complex tasks, introduce new architectural challenges. They require sophisticated frameworks for decision-making, safety constraints, and real-time interaction with their environment. + +Architectural evolution is being driven by new hardware and deployment patterns. Specialized AI accelerators are emerging across the spectrum—from powerful data center chips to efficient edge processors to tiny neural processing units in mobile devices. This heterogeneous computing landscape is enabling new architectural possibilities, such as dynamic model distribution across tiers based on computing capabilities and current conditions. The traditional boundaries between cloud, edge, and embedded systems are becoming increasingly fluid. + +Resource efficiency is gaining prominence as the environmental and economic costs of large-scale ML become more apparent. This has sparked innovation in model compression, efficient training techniques, and energy-aware computing. Future systems will likely need to balance the drive for more powerful models against growing sustainability concerns. This emphasis on efficiency is particularly relevant given our earlier discussion of technical debt and operational costs. + +System intelligence is moving toward more autonomous operation. Future ML systems will likely incorporate more sophisticated self-monitoring, automated resource management, and adaptive deployment strategies. This evolution builds upon the continuous cycle we discussed earlier, but with increased automation in handling data distribution shifts, model updates, and system optimization. + +Integration challenges are becoming more complex as ML systems interact with broader technology ecosystems. The need to integrate with existing software systems, handle diverse data sources, and operate across organizational boundaries is driving new approaches to system design. This integration complexity adds new dimensions to the technical debt considerations we explored earlier. + +These trends suggest that future ML systems will need to be increasingly adaptable and efficient while managing growing complexity. Understanding these directions is important for building systems that can evolve with the field while avoiding the accumulation of technical debt we discussed earlier. + +## Real-world Applications and Impact + +The ability to build and operationalize ML systems across various scales and environments has led to transformative changes across numerous sectors. This section showcases a few examples where theoretical concepts and practical considerations we have discussed manifest in tangible, impactful applications and real-world impact. + +### Case Study: FarmBeats: Edge and Embedded ML for Agriculture + +FarmBeats, a project developed by Microsoft Research, shown in @fig-farmbeats-overview is a significant advancement in the application of machine learning to agriculture. This system aims to increase farm productivity and reduce costs by leveraging AI and IoT technologies. FarmBeats exemplifies how edge and embedded ML systems can be deployed in challenging, real-world environments to solve practical problems. By bringing ML capabilities directly to the farm, FarmBeats demonstrates the potential of distributed AI systems in transforming traditional industries. + +![Microsoft Farmbeats: AI, Edge & IoT for Agriculture.](./images/png/farmbeats.png){#fig-farmbeats-overview} + +**Data Aspects** + +The data ecosystem in FarmBeats is diverse and distributed. Sensors deployed across fields collect real-time data on soil moisture, temperature, and nutrient levels. Drones equipped with multispectral cameras capture high-resolution imagery of crops, providing insights into plant health and growth patterns. Weather stations contribute local climate data, while historical farming records offer context for long-term trends. The challenge lies not just in collecting this heterogeneous data, but in managing its flow from dispersed, often remote locations with limited connectivity. FarmBeats employs innovative data transmission techniques, such as using TV white spaces (unused broadcasting frequencies) to extend internet connectivity to far-flung sensors. This approach to data collection and transmission embodies the principles of edge computing we discussed earlier, where data processing begins at the source to reduce bandwidth requirements and enable real-time decision making. + +**Algorithm/Model Aspects** + +FarmBeats uses a variety of ML algorithms tailored to agricultural applications. For soil moisture prediction, it uses temporal neural networks that can capture the complex dynamics of water movement in soil. Computer vision algorithms process drone imagery to detect crop stress, pest infestations, and yield estimates. These models must be robust to noisy data and capable of operating with limited computational resources. Machine learning methods such as "transfer learning" allow models to learn on data-rich farms to be adapted for use in areas with limited historical data. The system also incorporates a mixture of methods that combine outputs from multiple algorithms to improve prediction accuracy and reliability. A key challenge FarmBeats addresses is model personalization---adapting general models to the specific conditions of individual farms, which may have unique soil compositions, microclimates, and farming practices. + +**Computing Infrastructure Aspects** + +FarmBeats exemplifies the edge computing paradigm we explored in our discussion of the ML system spectrum. At the lowest level, embedded ML models run directly on IoT devices and sensors, performing basic data filtering and anomaly detection. Edge devices, such as ruggedized field gateways, aggregate data from multiple sensors and run more complex models for local decision-making. These edge devices operate in challenging conditions, requiring robust hardware designs and efficient power management to function reliably in remote agricultural settings. The system employs a hierarchical architecture, with more computationally intensive tasks offloaded to on-premises servers or the cloud. This tiered approach allows FarmBeats to balance the need for real-time processing with the benefits of centralized data analysis and model training. The infrastructure also includes mechanisms for over-the-air model updates, ensuring that edge devices can receive improved models as more data becomes available and algorithms are refined. + +**Impact and Future Implications** + +FarmBeats shows how ML systems can be deployed in resource-constrained, real-world environments to drive significant improvements in traditional industries. By providing farmers with AI-driven insights, the system has shown potential to increase crop yields, reduce water usage, and optimize resource allocation. Looking forward, the FarmBeats approach could be extended to address global challenges in food security and sustainable agriculture. The success of this system also highlights the growing importance of edge and embedded ML in IoT applications, where bringing intelligence closer to the data source can lead to more responsive, efficient, and scalable solutions. As edge computing capabilities continue to advance, we can expect to see similar distributed ML architectures applied to other domains, from smart cities to environmental monitoring. + +### Case Study: AlphaFold: Large-Scale Scientific ML -In the vision of ubiquitous computing [@weiser1991computer], the integration of processors into everyday objects is just one aspect of a larger paradigm shift. The true essence of this vision lies in creating an intelligent environment that can anticipate our needs and act on our behalf, enhancing our experiences without requiring explicit commands. To achieve this level of pervasive intelligence, it is crucial to develop and deploy machine learning systems that span the entire ecosystem, from the cloud to the edge and even to the tiniest IoT devices. +AlphaFold, developed by DeepMind, is a landmark achievement in the application of machine learning to complex scientific problems. This AI system is designed to predict the three-dimensional structure of proteins, as shown in @fig-alphafold-overview, from their amino acid sequences, a challenge known as the "protein folding problem" that has puzzled scientists for decades. AlphaFold's success demonstrates how large-scale ML systems can accelerate scientific discovery and potentially revolutionize fields like structural biology and drug design. This case study exemplifies the use of advanced ML techniques and massive computational resources to tackle problems at the frontiers of science. -By distributing machine learning capabilities across the "computing continuum," from cloud to edge to embedded systems that surround us, we can harness the strengths of each layer while mitigating their limitations. The cloud, with its vast computational resources and storage capacity, is ideal for training complex models on large datasets and performing resource-intensive tasks. Edge devices, such as gateways and smartphones, can process data locally, enabling faster response times, improved privacy, and reduced bandwidth requirements. Finally, the tiniest IoT devices, equipped with machine learning capabilities, can make quick decisions based on sensor data, enabling highly responsive and efficient systems. +::: {.content-visible when-format="html"} +![Examples of protein targets within the free modeling category. Source: Google DeepMind](images/png/alphafold.gif){#fig-alphafold-overview} +::: -This distributed intelligence is particularly crucial for applications that require real-time processing, such as autonomous vehicles, industrial automation, and smart healthcare. By processing data at the most appropriate layer of the computing continuum, we can ensure that decisions are made quickly and accurately, without relying on constant communication with a central server. +::: {.content-visible when-format="pdf"} +![Examples of protein targets within the free modeling category. Source: Google DeepMind](images/png/alphafold.png){#fig-alphafold-overview} +::: -The migration of machine learning intelligence across the ecosystem also enables more personalized and context-aware experiences. By learning from user behavior and preferences at the edge, devices can adapt to individual needs without compromising privacy. This localized intelligence can then be aggregated and refined in the cloud, creating a feedback loop that continuously improves the overall system. +**Data Aspects** -However, deploying machine learning systems across the computing continuum presents several challenges. Ensuring the interoperability and seamless integration of these systems requires standardized protocols and interfaces. Security and privacy concerns must also be addressed, as the distribution of intelligence across multiple layers increases the attack surface and the potential for data breaches. +The data underpinning AlphaFold's success is vast and multifaceted. The primary dataset is the Protein Data Bank (PDB), which contains the experimentally determined structures of over 180,000 proteins. This is complemented by databases of protein sequences, which number in the hundreds of millions. AlphaFold also utilizes evolutionary data in the form of multiple sequence alignments (MSAs), which provide insights into the conservation patterns of amino acids across related proteins. The challenge lies not just in the volume of data, but in its quality and representation. Experimental protein structures can contain errors or be incomplete, requiring sophisticated data cleaning and validation processes. Moreover, the representation of protein structures and sequences in a form amenable to machine learning is a significant challenge in itself. AlphaFold's data pipeline involves complex preprocessing steps to convert raw sequence and structural data into meaningful features that capture the physical and chemical properties relevant to protein folding. -Furthermore, the varying computational capabilities and energy constraints of devices at different layers of the computing continuum necessitate the development of efficient and adaptable machine learning models. Techniques such as model compression, federated learning, and transfer learning can help address these challenges, enabling the deployment of intelligence across a wide range of devices. +**Algorithm/Model Aspects** -As we move towards the realization of Weiser's vision of ubiquitous computing, the development and deployment of machine learning systems across the entire ecosystem will be critical. By leveraging the strengths of each layer of the computing continuum, we can create an intelligent environment that seamlessly integrates with our daily lives, anticipating our needs and enhancing our experiences in ways that were once unimaginable. As we continue to push the boundaries of what's possible with distributed machine learning, we inch closer to a future where technology becomes an invisible but integral part of our world. +AlphaFold's algorithmic approach represents a tour de force in the application of deep learning to scientific problems. At its core, AlphaFold uses a novel neural network architecture that combines with techniques from computational biology. The model learns to predict inter-residue distances and torsion angles, which are then used to construct a full 3D protein structure. A key innovation is the use of "equivariant attention" layers that respect the symmetries inherent in protein structures. The learning process involves multiple stages, including initial "pretraining" on a large corpus of protein sequences, followed by fine-tuning on known structures. AlphaFold also incorporates domain knowledge in the form of physics-based constraints and scoring functions, creating a hybrid system that leverages both data-driven learning and scientific prior knowledge. The model's ability to generate accurate confidence estimates for its predictions is crucial, allowing researchers to assess the reliability of the predicted structures. -![Common applications of Machine Learning. Source: [EDUCBA](https://www.educba.com/applications-of-machine-learning/)](images/png/mlapplications.png){#fig-applications-of-ml} +**Computing Infrastructure Aspects** -This vision is already beginning to take shape, as illustrated by the common applications of AI surrounding us in our daily lives (see @fig-applications-of-ml). From healthcare and finance to transportation and entertainment, machine learning is transforming various sectors, making our interactions with technology more intuitive and personalized. +The computational demands of AlphaFold epitomize the challenges of large-scale scientific ML systems. Training the model requires massive parallel computing resources, leveraging clusters of GPUs or TPUs (Tensor Processing Units) in a distributed computing environment. DeepMind utilized Google's cloud infrastructure, with the final version of AlphaFold trained on 128 TPUv3 cores for several weeks. The inference process, while less computationally intensive than training, still requires significant resources, especially when predicting structures for large proteins or processing many proteins in parallel. To make AlphaFold more accessible to the scientific community, DeepMind has collaborated with the European Bioinformatics Institute to create a [public database](https://alphafold.ebi.ac.uk/) of predicted protein structures, which itself represents a substantial computing and data management challenge. This infrastructure allows researchers worldwide to access AlphaFold's predictions without needing to run the model themselves, demonstrating how centralized, high-performance computing resources can be leveraged to democratize access to advanced ML capabilities. -## What's Inside the Book +**Impact and Future Implications** -In this book, we will explore the technical foundations of ubiquitous machine learning systems, the challenges of building and deploying these systems across the computing continuum, and the vast array of applications they enable. A unique aspect of this book is its function as a conduit to seminal scholarly works and academic research papers, aimed at enriching the reader's understanding and encouraging deeper exploration of the subject. This approach seeks to bridge the gap between pedagogical materials and cutting-edge research trends, offering a comprehensive guide that is in step with the evolving field of applied machine learning. +AlphaFold's impact on structural biology has been profound, with the potential to accelerate research in areas ranging from fundamental biology to drug discovery. By providing accurate structural predictions for proteins that have resisted experimental methods, AlphaFold opens new avenues for understanding disease mechanisms and designing targeted therapies. The success of AlphaFold also serves as a powerful demonstration of how ML can be applied to other complex scientific problems, potentially leading to breakthroughs in fields like materials science or climate modeling. However, it also raises important questions about the role of AI in scientific discovery and the changing nature of scientific inquiry in the age of large-scale ML systems. As we look to the future, the AlphaFold approach suggests a new paradigm for scientific ML, where massive computational resources are combined with domain-specific knowledge to push the boundaries of human understanding. -To improve the learning experience, we have included a variety of supplementary materials. Throughout the book, you will find slides that summarize key concepts, videos that provide in-depth explanations and demonstrations, exercises that reinforce your understanding, and labs that offer hands-on experience with the tools and techniques discussed. These additional resources are designed to cater to different learning styles and help you gain a deeper, more practical understanding of the subject matter. +### Case Study: Autonomous Vehicles: Spanning the ML Spectrum -We begin with the fundamentals, introducing key concepts in systems and machine learning, and providing a deep learning primer. We then guide you through the AI workflow, from data engineering to selecting the right AI frameworks. This workflow closely follows the lifecycle of a typical machine learning project, as illustrated in @fig-ml-lifecycle. +Waymo, a subsidiary of Alphabet Inc., stands at the forefront of autonomous vehicle technology, representing one of the most ambitious applications of machine learning systems to date. Evolving from the Google Self-Driving Car Project initiated in 2009, Waymo's approach to autonomous driving exemplifies how ML systems can span the entire spectrum from embedded systems to cloud infrastructure. This case study demonstrates the practical implementation of complex ML systems in a safety-critical, real-world environment, integrating real-time decision-making with long-term learning and adaptation. -![Machine Learning project life cycle. Source:[Medium](https://ihsanulpro.medium.com/complete-machine-learning-project-flowchart-explained-0f55e52b9381)](images/png/mlprojectlifecycle.png){#fig-ml-lifecycle} +{{< video https://youtu.be/hA_-MkU0Nfw?si=6DIH7qwMbeMicnJ5 >}} -The training section covers efficient AI training techniques, model optimizations, and AI acceleration using specialized hardware. Deployment is addressed next, with chapters on benchmarking AI, distributed learning, and ML operations. Advanced topics like security, privacy, responsible AI, sustainable AI, robust AI, and generative AI are then explored in depth. The book concludes by highlighting the positive impact of AI and its potential for good. +**Data Aspects** -## How to Navigate This Book +The data ecosystem underpinning Waymo's technology is vast and dynamic. Each vehicle serves as a roving data center, its sensor suite—comprising LiDAR, radar, and high-resolution cameras—generating approximately one terabyte of data per hour of driving. This real-world data is complemented by an even more extensive simulated dataset, with Waymo's vehicles having traversed over 20 billion miles in simulation and more than 20 million miles on public roads. The challenge lies not just in the volume of data, but in its heterogeneity and the need for real-time processing. Waymo must handle both structured (e.g., GPS coordinates) and unstructured data (e.g., camera images) simultaneously. The data pipeline spans from edge processing on the vehicle itself to massive cloud-based storage and processing systems. Sophisticated data cleaning and validation processes are necessary, given the safety-critical nature of the application. Moreover, the representation of the vehicle's environment in a form amenable to machine learning presents significant challenges, requiring complex preprocessing to convert raw sensor data into meaningful features that capture the dynamics of traffic scenarios. -To get the most out of this book, we recommend a structured learning approach that leverages the various resources provided. Each chapter includes slides, videos, exercises, and labs to cater to different learning styles and reinforce your understanding. +**Algorithm/Model Aspects** -1. **Fundamentals (Chapters 1-3):** Start by building a strong foundation with the initial chapters, which provide an introduction to AI and cover core topics like AI systems and deep learning. +Waymo's ML stack represents a sophisticated ensemble of algorithms tailored to the multifaceted challenge of autonomous driving. The perception system employs deep learning techniques, including convolutional neural networks, to process visual data for object detection and tracking. Prediction models, needed for anticipating the behavior of other road users, leverage recurrent neural networks to understand temporal sequences. Waymo has developed custom ML models like VectorNet for predicting vehicle trajectories. The planning and decision-making systems may incorporate reinforcement learning or imitation learning techniques to navigate complex traffic scenarios. A key innovation in Waymo's approach is the integration of these diverse models into a coherent system capable of real-time operation. The ML models must also be interpretable to some degree, as understanding the reasoning behind a vehicle's decisions is vital for safety and regulatory compliance. Waymo's learning process involves continuous refinement based on real-world driving experiences and extensive simulation, creating a feedback loop that constantly improves the system's performance. -2. **Workflow (Chapters 4-6):** With that foundation, move on to the chapters focused on practical aspects of the AI model building process like workflows, data engineering, and frameworks. +**Computing Infrastructure Aspects** -3. **Training (Chapters 7-10):** These chapters offer insights into effectively training AI models, including techniques for efficiency, optimizations, and acceleration. +The computing infrastructure supporting Waymo's autonomous vehicles epitomizes the challenges of deploying ML systems across the full spectrum from edge to cloud. Each vehicle is equipped with a custom-designed compute platform capable of processing sensor data and making decisions in real-time, often leveraging specialized hardware like GPUs or custom AI accelerators. This edge computing is complemented by extensive use of cloud infrastructure, leveraging the power of Google's data centers for training models, running large-scale simulations, and performing fleet-wide learning. The connectivity between these tiers is critical, with vehicles requiring reliable, high-bandwidth communication for real-time updates and data uploading. Waymo's infrastructure must be designed for robustness and fault tolerance, ensuring safe operation even in the face of hardware failures or network disruptions. The scale of Waymo's operation presents significant challenges in data management, model deployment, and system monitoring across a geographically distributed fleet of vehicles. -4. **Deployment (Chapters 11-13):** Learn about deploying AI on devices and monitoring the operationalization through methods like benchmarking, on-device learning, and MLOps. +**Impact and Future Implications** -5. **Advanced Topics (Chapters 14-18):** Critically examine topics like security, privacy, ethics, sustainability, robustness, and generative AI. +Waymo's impact extends beyond technological advancement, potentially revolutionizing transportation, urban planning, and numerous aspects of daily life. The launch of Waymo One, a commercial ride-hailing service using autonomous vehicles in Phoenix, Arizona, represents a significant milestone in the practical deployment of AI systems in safety-critical applications. Waymo's progress has broader implications for the development of robust, real-world AI systems, driving innovations in sensor technology, edge computing, and AI safety that have applications far beyond the automotive industry. However, it also raises important questions about liability, ethics, and the interaction between AI systems and human society. As Waymo continues to expand its operations and explore applications in trucking and last-mile delivery, it serves as an important test bed for advanced ML systems, driving progress in areas such as continual learning, robust perception, and human-AI interaction. The Waymo case study underscores both the tremendous potential of ML systems to transform industries and the complex challenges involved in deploying AI in the real world. -6. **Social Impact (Chapter 19):** Explore the positive applications and potential of AI for societal good. +## Challenges and Considerations -7. **Conclusion (Chapter 20):** Reflect on the key takeaways and future directions in AI systems. +Building and deploying machine learning systems presents unique challenges that go beyond traditional software development. These challenges help explain why creating effective ML systems is about more than just choosing the right algorithm or collecting enough data. Let's explore the key areas where ML practitioners face significant hurdles. -While the book is designed for progressive learning, we encourage an interconnected learning approach that allows you to navigate chapters based on your interests and needs. Throughout the book, you'll find case studies and hands-on exercises that help you relate theory to real-world applications. We also recommend participating in forums and groups to engage in [discussions](https://github.com/harvard-edge/cs249r_book/discussions), debate concepts, and share insights with fellow learners. Regularly revisiting chapters can help reinforce your learning and offer new perspectives on the concepts covered. By adopting this structured yet flexible approach and actively engaging with the content and the community, you'll embark on a fulfilling and enriching learning experience that maximizes your understanding. +### Data Challenges -## Chapter-by-Chapter Insights +The foundation of any ML system is its data, and managing this data introduces several fundamental challenges. First, there's the basic question of data quality - real-world data is often messy and inconsistent. Imagine a healthcare application that needs to process patient records from different hospitals. Each hospital might record information differently, use different units of measurement, or have different standards for what data to collect. Some records might have missing information, while others might contain errors or inconsistencies that need to be cleaned up before the data can be useful. -Here's a closer look at what each chapter covers. We have structured the book into six main sections: Fundamentals, Workflow, Training, Deployment, Advanced Topics, and Impact. These sections closely reflect the major components of a typical machine learning pipeline, from understanding the basic concepts to deploying and maintaining AI systems in real-world applications. By organizing the content in this manner, we aim to provide a logical progression that mirrors the actual process of developing and implementing AI systems. +As ML systems grow, they often need to handle increasingly large amounts of data. A video streaming service like Netflix, for example, needs to process billions of viewer interactions to power its recommendation system. This scale introduces new challenges in how to store, process, and manage such large datasets efficiently. -### Fundamentals +Another critical challenge is how data changes over time. This phenomenon, known as "data drift," occurs when the patterns in new data begin to differ from the patterns the system originally learned from. For example, many predictive models struggled during the COVID-19 pandemic because consumer behavior changed so dramatically that historical patterns became less relevant. ML systems need ways to detect when this happens and adapt accordingly. -In the Fundamentals section, we lay the groundwork for understanding AI. This is far from being a thorough deep dive into the algorithms, but we aim to introduce key concepts, provide an overview of machine learning systems, and dive into the principles and algorithms of deep learning that power AI applications in their associated systems. This section equips you with the essential knowledge needed to grasp the subsequent chapters. +### Model Challenges -1. **[Introduction:](../introduction/introduction.qmd)** This chapter sets the stage, providing an overview of AI and laying the groundwork for the chapters that follow. -2. **[ML Systems:](../ml_systems/ml_systems.qmd)** We introduce the basics of machine learning systems, the platforms where AI algorithms are widely applied. -3. **[Deep Learning Primer:](../dl_primer/dl_primer.qmd)** This chapter offers a brief introduction to the algorithms and principles that underpin AI applications in ML systems. +Creating and maintaining the ML models themselves presents another set of challenges. Modern ML models, particularly in deep learning, can be extremely complex. Consider a language model like GPT-3, which has hundreds of billions of parameters (the individual settings the model learns during training). This complexity creates practical challenges: these models require enormous computing power to train and run, making it difficult to deploy them in situations with limited resources, like on mobile phones or IoT devices. -### Workflow +Training these models effectively is itself a significant challenge. Unlike traditional programming where we write explicit instructions, ML models learn from examples. This learning process involves many choices: How should we structure the model? How long should we train it? How can we tell if it's learning the right things? Making these decisions often requires both technical expertise and considerable trial and error. -The Workflow section guides you through the practical aspects of building AI models. We break down the AI workflow, discuss data engineering best practices, and review popular AI frameworks. By the end of this section, you'll have a clear understanding of the steps involved in developing proficient AI applications and the tools available to streamline the process. +A particularly important challenge is ensuring that models work well in real-world conditions. A model might perform excellently on its training data but fail when faced with slightly different situations in the real world. This gap between training performance and real-world performance is a central challenge in machine learning, especially for critical applications like autonomous vehicles or medical diagnosis systems. -4. **[AI Workflow:](../workflow/workflow.qmd)** This chapter breaks down the machine learning workflow, offering insights into the steps leading to proficient AI applications. -5. **[Data Engineering:](../data_engineering/data_engineering.qmd)** We focus on the importance of data in AI systems, discussing how to effectively manage and organize data. -6. **[AI Frameworks:](../frameworks/frameworks.qmd)** This chapter reviews different frameworks for developing machine learning models, guiding you in choosing the most suitable one for your projects. +### System Challenges -### Training +Getting ML systems to work reliably in the real world introduces its own set of challenges. Unlike traditional software that follows fixed rules, ML systems need to handle uncertainty and variability in their inputs and outputs. They also typically need both training systems (for learning from data) and serving systems (for making predictions), each with different requirements and constraints. -In the Training section, we explore techniques for training efficient and reliable AI models. We cover strategies for achieving efficiency, model optimizations, and the role of specialized hardware in AI acceleration. This section empowers you with the knowledge to develop high-performing models that can be seamlessly integrated into AI systems. +Consider a company building a speech recognition system. They need infrastructure to collect and store audio data, systems to train models on this data, and then separate systems to actually process users' speech in real-time. Each part of this pipeline needs to work reliably and efficiently, and all the parts need to work together seamlessly. -7. **[AI Training:](../training/training.qmd)** This chapter explores model training, exploring techniques for developing efficient and reliable models. -8. **[Efficient AI:](../efficient_ai/efficient_ai.qmd)** Here, we discuss strategies for achieving efficiency in AI applications, from computational resource optimization to performance enhancement. -9. **[Model Optimizations:](../optimizations/optimizations.qmd)** We explore various avenues for optimizing AI models for seamless integration into AI systems. -10. **[AI Acceleration:](../hw_acceleration/hw_acceleration.qmd)** We discuss the role of specialized hardware in enhancing the performance of AI systems. +These systems also need constant monitoring and updating. How do we know if the system is working correctly? How do we update models without interrupting service? How do we handle errors or unexpected inputs? These operational challenges become particularly complex when ML systems are serving millions of users. -### Deployment +### Ethical and Social Considerations -The Deployment section focuses on the challenges and solutions for deploying AI models. We discuss benchmarking methods to evaluate AI system performance, techniques for on-device learning to improve efficiency and privacy, and the processes involved in ML operations. This section equips you with the skills to effectively deploy and maintain AI functionalities in AI systems. +As ML systems become more prevalent in our daily lives, their broader impacts on society become increasingly important to consider. One major concern is fairness - ML systems can sometimes learn to make decisions that discriminate against certain groups of people. This often happens unintentionally, as the systems pick up biases present in their training data. For example, a job application screening system might inadvertently learn to favor certain demographics if those groups were historically more likely to be hired. -11. **[Benchmarking AI:](../benchmarking/benchmarking.qmd)** This chapter focuses on how to evaluate AI systems through systematic benchmarking methods. -12. **[On-Device Learning:](../ondevice_learning/ondevice_learning.qmd)** We explore techniques for localized learning, which enhances both efficiency and privacy. -13. **[ML Operations:](../ops/ops.qmd)** This chapter looks at the processes involved in the seamless integration, monitoring, and maintenance of AI functionalities. +Another important consideration is transparency. Many modern ML models, particularly deep learning models, work as "black boxes" - while they can make predictions, it's often difficult to understand how they arrived at their decisions. This becomes particularly problematic when ML systems are making important decisions about people's lives, such as in healthcare or financial services. -### Advanced Topics +Privacy is also a major concern. ML systems often need large amounts of data to work effectively, but this data might contain sensitive personal information. How do we balance the need for data with the need to protect individual privacy? How do we ensure that models don't inadvertently memorize and reveal private information? -In the Advanced Topics section, We will study the critical issues surrounding AI. We address privacy and security concerns, explore the ethical principles of responsible AI, discuss strategies for sustainable AI development, examine techniques for building robust AI models, and introduce the exciting field of generative AI. This section broadens your understanding of the complex landscape of AI and prepares you to navigate its challenges. +These challenges aren't merely technical problems to be solved, but ongoing considerations that shape how we approach ML system design and deployment. Throughout this book, we'll explore these challenges in detail and examine strategies for addressing them effectively. -14. **[Security & Privacy:](../privacy_security/privacy_security.qmd)** As AI becomes more ubiquitous, this chapter addresses the crucial aspects of privacy and security in AI systems. -15. **[Responsible AI:](../responsible_ai/responsible_ai.qmd)** We discuss the ethical principles guiding the responsible use of AI, focusing on fairness, accountability, and transparency. -16. **[Sustainable AI:](../sustainable_ai/sustainable_ai.qmd)** This chapter explores practices and strategies for sustainable AI, ensuring long-term viability and reduced environmental impact. -17. **[Robust AI:](../robust_ai/robust_ai.qmd)** We discuss techniques for developing reliable and robust AI models that can perform consistently across various conditions. -18. **[Generative AI:](../generative_ai/generative_ai.qmd)** This chapter explores the algorithms and techniques behind generative AI, opening avenues for innovation and creativity. +## Future Directions -### Social Impact +As we look to the future of machine learning systems, several exciting trends are shaping the field. These developments promise to both solve existing challenges and open new possibilities for what ML systems can achieve. -The Impact section highlights the transformative potential of AI in various domains. We showcase real-world applications of TinyML in healthcare, agriculture, conservation, and other areas where AI is making a positive difference. This section inspires you to leverage the power of AI for societal good and to contribute to the development of impactful solutions. +One of the most significant trends is the democratization of AI technology. Just as personal computers transformed computing from specialized mainframes to everyday tools, ML systems are becoming more accessible to developers and organizations of all sizes. Cloud providers now offer pre-trained models and automated ML platforms that reduce the expertise needed to deploy AI solutions. This democratization is enabling new applications across industries, from small businesses using AI for customer service to researchers applying ML to previously intractable problems. -19. **[AI for Good:](../ai_for_good/ai_for_good.qmd)** We highlight positive applications of TinyML in areas like healthcare, agriculture, and conservation. +As concerns about computational costs and environmental impact grow, there's an increasing focus on making ML systems more efficient. Researchers are developing new techniques for training models with less data and computing power. Innovation in specialized hardware, from improved GPUs to custom AI chips, is making ML systems faster and more energy-efficient. These advances could make sophisticated AI capabilities available on more devices, from smartphones to IoT sensors. -### Closing +Perhaps the most transformative trend is the development of more autonomous ML systems that can adapt and improve themselves. These systems are beginning to handle their own maintenance tasks - detecting when they need retraining, automatically finding and correcting errors, and optimizing their own performance. This automation could dramatically reduce the operational overhead of running ML systems while improving their reliability. -In the Closing section, we reflect on the key learnings from the book and look ahead to the future of AI. We synthesize the concepts covered, discuss emerging trends, and provide guidance on continuing your learning journey in this rapidly evolving field. This section leaves you with a comprehensive understanding of AI and the excitement to apply your knowledge in innovative ways. +While these trends are promising, it's important to recognize the field's limitations. Creating truly artificial general intelligence remains a distant goal. Current ML systems excel at specific tasks but lack the flexibility and understanding that humans take for granted. Challenges around bias, transparency, and privacy continue to require careful consideration. As ML systems become more prevalent, addressing these limitations while leveraging new capabilities will be crucial. -20. **[Conclusion:](../conclusion/conclusion.qmd)** The book concludes with a reflection on the key learnings and future directions in the field of AI. +## Learning Path and Book Structure -### Tailored Learning +This book is designed to guide you from understanding the fundamentals of ML systems to effectively designing and implementing them. To address the complexities and challenges of Machine Learning Systems engineering, we've organized the content around five fundamental pillars that encompass the lifecycle of ML systems. These pillars provide a framework for understanding, developing, and maintaining robust ML systems. -We understand that readers have diverse interests; some may wish to grasp the fundamentals, while others are eager to delve into advanced topics like hardware acceleration or AI ethics. To help you navigate the book more effectively, we've created a persona-based reading guide tailored to your specific interests and goals. This guide assists you in identifying the reader persona that best matches your interests. Each persona represents a distinct reader profile with specific objectives. By selecting the persona that resonates with you, you can focus on the chapters and sections most relevant to your needs. +![Overview of the five fundamental system pillars of Machine Learning Systems engineering.](images/png/book_pillars.png){#fig-pillars} -+------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ -| Persona | Description | Relevant Chapters or Sections | Focus | -+:=======================+:=========================================================================+:==============================================+:==========================================================================================================+ -| The TinyML Newbie | You are new to the field of TinyML and eager to learn the basics. | Chapters 1-3, 8, 9, 10, 12 | Understand the fundamentals, gain insights into efficient and optimized ML, | -| | | | and learn about on-device learning. | -+------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ -| The EdgeML Enthusiast | You have some TinyML knowledge and are interested in exploring | Chapters 1-3, 8, 9, 10, 12, 13 | Build a strong foundation, delve into the intricacies of efficient ML, | -| | the broader world of EdgeML. | | and explore the operational aspects of embedded systems. | -+------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ -| The Computer Visionary | You are fascinated by computer vision and its applications in TinyML | Chapters 1-3, 5, 8-10, 12, 13, 17, 20 | Start with the basics, explore data engineering, and study methods for optimizing ML | -| | and EdgeML. | | models. Learn about robustness and the future of ML systems. | -+------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ -| The Data Maestro | You are passionate about data and its crucial role in ML systems. | Chapters 1-5, 8-13 | Gain a comprehensive understanding of data's role in ML systems, explore the ML | -| | | | workflow, and dive into model optimization and deployment considerations. | -+------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ -| The Hardware Hero | You are excited about the hardware aspects of ML systems and how | Chapters 1-3, 6, 8-10, 12, 14, 17, 20 | Build a solid foundation in ML systems and frameworks, explore challenges of | -| | they impact model performance. | | optimizing models for efficiency, hardware-software co-design, and security aspects. | -+------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ -| The Sustainability | You are an advocate for sustainability and want to learn how to | Chapters 1-3, 8-10, 12, 15, 16, 20 | Begin with the fundamentals of ML systems and TinyML, explore model optimization | -| Champion | develop eco-friendly AI systems. | | techniques, and learn about responsible and sustainable AI practices. | -+------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ -| The AI Ethicist | You are concerned about the ethical implications of AI and want to | Chapters 1-3, 5, 7, 12, 14-16, 19, 20 | Gain insights into the ethical considerations surrounding AI, including fairness, | -| | ensure responsible development and deployment. | | privacy, sustainability, and responsible development practices. | -+------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ -| The Full-Stack ML | You are a seasoned ML expert and want to deepen your understanding | The entire book | Understand the end-to-end process of building and deploying ML systems, from data | -| Engineer | of the entire ML system stack. | | engineering and model optimization to hardware acceleration and ethical considerations. | -+------------------------+--------------------------------------------------------------------------+-----------------------------------------------+-----------------------------------------------------------------------------------------------------------+ +As illustrated in Figure @fig-pillars, the five pillars central to the framework are: +- **Data**: Emphasizing data engineering and foundational principles critical to how AI operates in relation to data. +- **Training**: Exploring the methodologies for AI training, focusing on efficiency, optimization, and acceleration techniques to enhance model performance. +- **Deployment**: Encompassing benchmarks, on-device learning strategies, and machine learning operations to ensure effective model application. +- **Operations**: Highlighting the maintenance challenges unique to machine learning systems, which require specialized approaches distinct from traditional engineering systems. +- **Ethics & Governance**: Addressing concerns such as security, privacy, responsible AI practices, and the broader societal implications of AI technologies. -## Join the Community +Each pillar represents a critical phase in the lifecycle of ML systems and is composed of foundational elements that build upon each other. This structure ensures a comprehensive understanding of MLSE, from basic principles to advanced applications and ethical considerations. -Learning in the fast-paced world of AI is a collaborative journey. We set out to nurture a vibrant community of learners, innovators, and contributors. As you explore the concepts and engage with the exercises, we encourage you to share your insights and experiences. Whether it's a novel approach, an interesting application, or a thought-provoking question, your contributions can enrich the learning ecosystem. Engage in discussions, offer and seek guidance, and collaborate on projects to foster a culture of mutual growth and learning. By sharing knowledge, you play an important role in fostering a globally connected, informed, and empowered community. +For more detailed information about the book's overview, contents, learning outcomes, target audience, prerequisites, and navigation guide, please refer to the [About the Book](../about/about.qmd) section. There, you'll also find valuable details about our learning community and how to maximize your experience with this resource. \ No newline at end of file diff --git a/contents/core/ml_systems/ml_systems.qmd b/contents/core/ml_systems/ml_systems.qmd index fade11b2..1aeb03c0 100644 --- a/contents/core/ml_systems/ml_systems.qmd +++ b/contents/core/ml_systems/ml_systems.qmd @@ -5,7 +5,7 @@ bibliography: ml_systems.bib # ML Systems {#sec-ml_systems} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-ml-systems-resource), [Videos](#sec-ml-systems-resource), [Exercises](#sec-ml-systems-resource), [Labs](#sec-ml-systems-resource) +Resources: [Slides](#sec-ml-systems-resource), [Videos](#sec-ml-systems-resource), [Exercises](#sec-ml-systems-resource) ::: ![_DALL·E 3 Prompt: Illustration in a rectangular format depicting the merger of embedded systems with Embedded AI. The left half of the image portrays traditional embedded systems, including microcontrollers and processors, detailed and precise. The right half showcases the world of artificial intelligence, with abstract representations of machine learning models, neurons, and data flow. The two halves are distinctly separated, emphasizing the individual significance of embedded tech and AI, but they come together in harmony at the center._](images/png/cover_ml_systems.png) @@ -30,7 +30,7 @@ As this chapter progresses, we will explore ML systems' complex and fascinating ::: -## Introduction +## Overview ML is rapidly evolving, with new paradigms reshaping how models are developed, trained, and deployed. The field is experiencing significant innovation driven by advancements in hardware, software, and algorithmic techniques. These developments are enabling machine learning to be applied in diverse settings, from large-scale cloud infrastructures to edge devices and even tiny, resource-constrained environments. @@ -406,12 +406,3 @@ To reinforce the concepts covered in this chapter, we have curated a set of exer * _Coming soon._ ::: - -:::{.callout-warning collapse="false"} - -#### Labs - -In addition to exercises, we offer a series of hands-on labs allowing students to gain practical experience with embedded AI technologies. These labs provide step-by-step guidance, enabling students to develop their skills in a structured and supportive environment. We are excited to announce that new labs will be available soon, further enriching the learning experience. - -* _Coming soon._ -::: diff --git a/contents/core/ondevice_learning/ondevice_learning.qmd b/contents/core/ondevice_learning/ondevice_learning.qmd index d7ff83ea..09a7d192 100644 --- a/contents/core/ondevice_learning/ondevice_learning.qmd +++ b/contents/core/ondevice_learning/ondevice_learning.qmd @@ -5,7 +5,7 @@ bibliography: ondevice_learning.bib # On-Device Learning {#sec-ondevice_learning} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-on-device-learning-resource), [Videos](#sec-on-device-learning-resource), [Exercises](#sec-on-device-learning-resource), [Labs](#sec-on-device-learning-resource) +Resources: [Slides](#sec-on-device-learning-resource), [Videos](#sec-on-device-learning-resource), [Exercises](#sec-on-device-learning-resource) ::: ![_DALL·E 3 Prompt: Drawing of a smartphone with its internal components exposed, revealing diverse miniature engineers of different genders and skin tones actively working on the ML model. The engineers, including men, women, and non-binary individuals, are tuning parameters, repairing connections, and enhancing the network on the fly. Data flows into the ML model, being processed in real-time, and generating output inferences._](images/png/cover_ondevice_learning.png) @@ -28,7 +28,7 @@ On-device Learning represents a significant innovation for embedded and edge IoT ::: -## Introduction +## Overview On-device learning refers to training ML models directly on the device where they are deployed, as opposed to traditional methods where models are trained on powerful servers and then deployed to devices. This method is particularly relevant to TinyML, where ML systems are integrated into tiny, resource-constrained devices. @@ -809,12 +809,3 @@ To reinforce the concepts covered in this chapter, we have curated a set of exer * @exr-fli ::: - -:::{.callout-warning collapse="false"} - -#### Labs - -In addition to exercises, we also offer a series of hands-on labs that allow students to gain practical experience with embedded AI technologies. These labs provide step-by-step guidance, enabling students to develop their skills in a structured and supportive environment. We are excited to announce that new labs will be available soon, further enriching the learning experience. - -* _Coming soon._ -::: diff --git a/contents/core/ops/ops.qmd b/contents/core/ops/ops.qmd index c4d43516..bdceaf3f 100644 --- a/contents/core/ops/ops.qmd +++ b/contents/core/ops/ops.qmd @@ -5,12 +5,12 @@ bibliography: ops.bib # ML Operations {#sec-mlops} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-embedded-aiops-resource), [Videos](#sec-embedded-aiops-resource), [Exercises](#sec-embedded-aiops-resource), [Labs](#sec-embedded-aiops-resource) +Resources: [Slides](#sec-embedded-aiops-resource), [Videos](#sec-embedded-aiops-resource), [Exercises](#sec-embedded-aiops-resource) ::: ![_DALL·E 3 Prompt: Create a detailed, wide rectangular illustration of an AI workflow. The image should showcase the process across six stages, with a flow from left to right: 1. Data collection, with diverse individuals of different genders and descents using a variety of devices like laptops, smartphones, and sensors to gather data. 2. Data processing, displaying a data center with active servers and databases with glowing lights. 3. Model training, represented by a computer screen with code, neural network diagrams, and progress indicators. 4. Model evaluation, featuring people examining data analytics on large monitors. 5. Deployment, where the AI is integrated into robotics, mobile apps, and industrial equipment. 6. Monitoring, showing professionals tracking AI performance metrics on dashboards to check for accuracy and concept drift over time. Each stage should be distinctly marked and the style should be clean, sleek, and modern with a dynamic and informative color scheme._](images/png/cover_ml_ops.png) -This chapter explores the practices and architectures needed to effectively develop, deploy, and manage ML models across their entire lifecycle. We examine the various phases of the ML process, including data collection, model training, evaluation, deployment, and monitoring. The importance of automation, collaboration, and continuous improvement is also something we discuss. We contrast different environments for ML model deployment, from cloud servers to embedded edge devices, and analyze their distinct constraints. We demonstrate how to tailor ML system design and operations through concrete examples for reliable and optimized model performance in any target environment. The goal is to provide readers with a comprehensive understanding of ML model management so they can successfully build and run ML applications that sustainably deliver value. +In this chapter, we will dive into the practices and frameworks needed to successfully develop, deploy, and manage machine learning models from start to finish. You will learn about each stage in the ML workflow, from data collection and model training to evaluation, deployment, and ongoing monitoring. We will discuss the role of automation, collaboration, and continuous improvement, highlighting why they are essential for keeping ML systems efficient and reliable. We will also explore different deployment environments, from powerful cloud servers to resource-limited edge devices, looking at the unique challenges each presents. Through concrete examples, you will see how to design and operate ML systems that deliver consistent, reliable performance, no matter where they are deployed. By the end of this chapter, you will have a solid grasp of ML model management and be ready to build and maintain ML applications that provide lasting value. ::: {.callout-tip} @@ -32,7 +32,7 @@ This chapter explores the practices and architectures needed to effectively deve ::: -## Introduction +## Overview Machine Learning Operations (MLOps) is a systematic approach that combines machine learning (ML), data science, and software engineering to automate the end-to-end ML lifecycle. This includes everything from data preparation and model training to deployment and maintenance. MLOps ensures that ML models are developed, deployed, and maintained efficiently and effectively. @@ -111,7 +111,7 @@ Learn more about ML Lifecycles through a case study featuring speech recognition ## Key Components of MLOps -In this chapter, we will provide an overview of the core components of MLOps, an emerging set of practices that enables robust delivery and lifecycle management of ML models in production. While some MLOps elements like automation and monitoring were covered in previous chapters, we will integrate them into a framework and expand on additional capabilities like governance. Additionally, we will describe and link to popular tools used within each component, such as [LabelStudio](https://labelstud.io/) for data labeling. By the end, we hope that you will understand the end-to-end MLOps methodology that takes models from ideation to sustainable value creation within organizations. +The core components of MLOps form a comprehensive framework that supports the end-to-end lifecycle of ML models in production, from initial development to deployment and ongoing management. In this section, we build on topics like automation and monitoring from previous chapters, integrating them into a broader framework while also introducing additional key practices like governance. Each component contributes to smoother, more streamlined ML operations, with popular tools helping teams tackle specific tasks within this ecosystem. Together, these elements make MLOps a robust approach to managing ML models and creating long-term value within organizations. @fig-ops-layers illustrates the comprehensive MLOps system stack. It shows the various layers involved in machine learning operations. At the top of the stack are ML Models/Applications, such as BERT, followed by ML Frameworks/Platforms like PyTorch. The core MLOps layer, labeled as Model Orchestration, encompasses several key components: Data Management, CI/CD, Model Training, Model Evaluation, Deployment, and Model Serving. Underpinning the MLOps layer is the Infrastructure layer, represented by technologies such as Kubernetes. This layer manages aspects such as Job Scheduling, Resource Management, Capacity Management, and Monitoring, among others. Holding it all together is the Hardware layer, which provides the necessary computational resources for ML operations. @@ -121,19 +121,15 @@ This layered approach in @fig-ops-layers demonstrates how MLOps integrates vario ### Data Management {#sec-ops-data-mgmt} -Robust data management and data engineering actively empower successful [MLOps](https://cloud.google.com/solutions/machine-learning/mlops-continuous-delivery-and-automation-pipelines-in-machine-learning) implementations. Teams properly ingest, store, and prepare raw data from sensors, databases, apps, and other systems for model training and deployment. +Data in its raw form, whether collected from sensors, databases, apps, or other systems, often requires significant preparation before it can be used for training or inference. Issues like inconsistent formats, missing values, and evolving labeling conventions can lead to inefficiencies and poor model performance if not systematically addressed. Robust data management practices ensure that data remains high quality, traceable, and readily accessible throughout the ML lifecycle, forming the foundation of scalable machine learning systems. -Teams actively track changes to datasets over time using version control with [Git](https://git-scm.com/) and tools like [GitHub](https://github.com/) or [GitLab](https://about.gitlab.com/). Data scientists collaborate on curating datasets by merging changes from multiple contributors. Teams can review or roll back each iteration of a dataset if needed. +One key aspect of data management is version control. Tools like [Git](https://git-scm.com/), [GitHub](https://github.com/), and [GitLab](https://about.gitlab.com/) enable teams to track changes to datasets, collaborate on curation, and revert to earlier versions when necessary. Alongside versioning, annotating and labeling datasets is crucial for supervised learning tasks. Software like [LabelStudio](https://labelstud.io/) helps distributed teams tag data consistently across large-scale datasets while maintaining access to earlier versions as labeling conventions evolve. These practices not only enhance collaboration but also ensure that models are trained on reliable, well-organized data. -Teams meticulously label and annotate data using labeling software like [LabelStudio](https://labelstud.io/), which enables distributed teams to work on tagging datasets together. As the target variables and labeling conventions evolve, teams maintain accessibility to earlier versions. +Once prepared, datasets are typically stored on scalable cloud storage solutions like [Amazon S3](https://aws.amazon.com/s3/) or [Google Cloud Storage](https://cloud.google.com/storage). These services provide versioning, resilience, and granular access controls, safeguarding sensitive data while maintaining flexibility for analysis and modeling. To streamline the transition from raw data to analysis-ready formats, teams build automated pipelines using tools such as [Prefect](https://www.prefect.io/), [Apache Airflow](https://airflow.apache.org/), and [dbt](https://www.getdbt.com/). These pipelines automate tasks like data extraction, cleaning, deduplication, and transformation, reducing manual overhead and improving efficiency. -Teams store the raw dataset and all derived assets on cloud storage services like [Amazon S3](https://aws.amazon.com/s3/) or [Google Cloud Storage](https://cloud.google.com/storage). These services provide scalable, resilient storage with versioning capabilities. Teams can set granular access permissions. +For example, a data pipeline might ingest information from [PostgreSQL](https://www.postgresql.org/) databases, REST APIs, and CSV files stored in S3, applying transformations to produce clean, aggregated datasets. The output can be stored in feature stores like [Tecton](https://www.tecton.ai/) or [Feast](https://feast.dev/), which provide low-latency access for both training and predictions. In an industrial predictive maintenance scenario, sensor data could be processed alongside maintenance records, resulting in enriched datasets stored in Feast for models to access the latest information seamlessly. -Robust data pipelines created by teams automate raw data extraction, joining, cleansing, and transformation into analysis-ready datasets. [Prefect](https://www.prefect.io/), [Apache Airflow](https://airflow.apache.org/), and [dbt](https://www.getdbt.com/) are workflow orchestrators that allow engineers to develop flexible, reusable data processing pipelines. - -For instance, a pipeline may ingest data from [PostgreSQL](https://www.postgresql.org/) databases, REST APIs, and CSVs stored on S3. It can filter, deduplicate, and aggregate the data, handle errors, and save the output to S3. The pipeline can also push the transformed data into a feature store like [Tecton](https://www.tecton.ai/) or [Feast](https://feast.dev/) for low-latency access. - -In an industrial predictive maintenance use case, sensor data is ingested from devices into S3. A perfect pipeline processes the sensor data, joining it with maintenance records. The enriched dataset is stored in Feast so models can easily retrieve the latest data for training and predictions. +By integrating version control, annotation tools, storage solutions, and automated pipelines, data management becomes a critical enabler for effective [MLOps](https://cloud.google.com/solutions/machine-learning/mlops-continuous-delivery-and-automation-pipelines-in-machine-learning). These practices ensure that data is not only clean and accessible but also consistently aligned with evolving project needs, allowing machine learning systems to deliver reliable and scalable performance in production environments. @vid-datapipe below is a short overview of data pipelines. @@ -161,55 +157,43 @@ CI/CD pipelines empower teams to iterate and deliver ML models rapidly by connec ### Model Training -In the model training phase, data scientists actively experiment with different ML architectures and algorithms to create optimized models that extract insights and patterns from data. MLOps introduces best practices and automation to make this iterative process more efficient and reproducible. - -Modern ML frameworks like [TensorFlow](https://www.tensorflow.org/), [PyTorch](https://pytorch.org/) and [Keras](https://keras.io/) provide pre-built components that simplify designing neural networks and other model architectures. Data scientists leverage built-in modules for layers, activations, losses, etc., and high-level APIs like Keras to focus more on model architecture. - -MLOps enables teams to package model training code into reusable, tracked scripts and notebooks. As models are developed, capabilities like [hyperparameter tuning](https://cloud.google.com/ai-platform/training/docs/hyperparameter-tuning-overview), [neural architecture search](https://arxiv.org/abs/1808.05377) and [automatic feature selection](https://scikit-learn.org/stable/modules/feature_selection.html) rapidly iterate to find the best-performing configurations. +Model training is a critical phase where data scientists experiment with various ML architectures and algorithms to optimize models that extract insights from data. MLOps introduces best practices and automation to make this iterative process more efficient and reproducible. Modern ML frameworks like [TensorFlow](https://www.tensorflow.org/), [PyTorch](https://pytorch.org/), and [Keras](https://keras.io/) provide pre-built components that simplify designing neural networks and other model architectures. These tools allow data scientists to focus on creating high-performing models using built-in modules for layers, activations, and loss functions. -Teams use Git to version control training code and host it in repositories like GitHub to track changes over time. This allows seamless collaboration between data scientists. +To make the training process efficient and reproducible, MLOps introduces best practices such as version-controlling training code using Git and hosting it in repositories like GitHub. Reproducible environments, often managed through interactive tools like [Jupyter](https://jupyter.org/) notebooks, allow teams to bundle data ingestion, preprocessing, model development, and evaluation in a single document. These notebooks are not only version-controlled but can also be integrated into automated pipelines for continuous retraining. -Notebooks like [Jupyter](https://jupyter.org/) create an excellent interactive model development environment. The notebooks contain data ingestion, preprocessing, model declaration, training loop, evaluation, and export code in one reproducible document. +Automation plays a significant role in standardizing training workflows. Capabilities such as [hyperparameter tuning](https://cloud.google.com/ai-platform/training/docs/hyperparameter-tuning-overview), [neural architecture search](https://arxiv.org/abs/1808.05377), and [automatic feature selection](https://scikit-learn.org/stable/modules/feature_selection.html) are commonly integrated into MLOps pipelines to iterate rapidly and find optimal configurations. CI/CD pipelines orchestrate training workflows by automating tasks like data preprocessing, model training, evaluation, and registration. For example, a Jenkins pipeline can trigger a Python script to retrain a TensorFlow model, validate its performance against pre-defined metrics, and deploy it if thresholds are met. -Finally, teams orchestrate model training as part of a CI/CD pipeline for automation. For instance, a Jenkins pipeline can trigger a Python script to load new training data, retrain a TensorFlow classifier, evaluate model metrics, and automatically register the model if performance thresholds are met. +Cloud-managed training services have revolutionized the accessibility of high-performance hardware for training models. These services provide on-demand access to GPU-accelerated infrastructure, making advanced training feasible even for small teams. Depending on the provider, developers may manage the training workflow themselves or rely on fully managed options like [Vertex AI Fine Tuning](https://cloud.google.com/vertex-ai/docs/generative-ai/models/tune-models), which can automatically finetune a base model using a labeled dataset. However, it is important to note that GPU hardware demand often exceeds supply, and availability may vary based on region or contractual agreements, posing potential bottlenecks for teams relying on cloud services. An example workflow has a data scientist using a PyTorch notebook to develop a CNN model for image classification. The [fastai](https://www.fast.ai/) library provides high-level APIs to simplify training CNNs on image datasets. The notebook trains the model on sample data, evaluates accuracy metrics, and tunes hyperparameters like learning rate and layers to optimize performance. This reproducible notebook is version-controlled and integrated into a retraining pipeline. -Automating and standardizing model training empowers teams to accelerate experimentation and achieve the rigor needed to produce ML systems. +By automating and standardizing model training, leveraging managed cloud services, and integrating modern frameworks, teams can accelerate experimentation and build robust, production-ready ML models. ### Model Evaluation -Before deploying models, teams perform rigorous evaluation and testing to validate meeting performance benchmarks and readiness for release. MLOps introduces best practices around model validation, auditing, and [canary testing](https://martinfowler.com/bliki/CanaryRelease.html). +Before deploying models, teams perform rigorous evaluation and testing to validate meeting performance benchmarks and readiness for release. MLOps provides best practices for model validation, auditing, and controlled testing methods to minimize risks during deployment. -Teams typically evaluate models against holdout [test datasets](https://en.wikipedia.org/wiki/Training,_validation,_and_test_sets) that are not used during training. The test data originates from the same distribution as production data. Teams calculate metrics like [accuracy](https://en.wikipedia.org/wiki/Accuracy_and_precision), [AUC](https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve), [precision](https://en.wikipedia.org/wiki/Precision_and_recall), [recall](https://en.wikipedia.org/wiki/Precision_and_recall), and [F1 score](https://en.wikipedia.org/wiki/F1_score). +The evaluation process begins with testing models against holdout [test datasets](https://en.wikipedia.org/wiki/Training,_validation,_and_test_sets) that are independent of the training data but originate from the same distribution as production data. Key metrics such as [accuracy](https://en.wikipedia.org/wiki/Accuracy_and_precision), [AUC](https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve), [precision](https://en.wikipedia.org/wiki/Precision_and_recall), [recall](https://en.wikipedia.org/wiki/Precision_and_recall), and [F1 score](https://en.wikipedia.org/wiki/F1_score) are calculated to quantify model performance. Tracking these metrics over time helps teams identify trends and potential degradation in model behavior, particularly when evaluation data comes from live production streams. This is vital for detecting [data drift](https://www.ibm.com/cloud/learn/data-drift), where changes in input data distributions can erode model accuracy. -Teams also track the same metrics over time against test data samples. If evaluation data comes from live production streams, this catches [data drifts](https://www.ibm.com/cloud/learn/data-drift) that degrade model performance over time. +To validate real-world performance, [canary testing](https://martinfowler.com/bliki/CanaryRelease.html) deploys the model to a small subset of users. This gradual rollout allows teams to monitor metrics in a live environment and catch potential issues before full-scale deployment. By incrementally increasing traffic to the new model, teams can confidently evaluate its impact on end-user experience. For instance, a retailer might test a personalized recommendation model by comparing its accuracy and diversity metrics against historical data. During the testing phase, the team tracks live performance metrics and identifies a slight accuracy decline over two weeks. To ensure stability, the model is initially deployed to 5% of web traffic, monitored for potential issues, and only rolled out widely after proving robust in production. -Human oversight for model release remains important. Data scientists review performance across key segments and slices. Error analysis helps identify model weaknesses to guide enhancement. Teams apply [fairness](https://developers.google.com/machine-learning/fairness-overview) and [bias detection](https://developers.google.com/machine-learning/fairness-overview) techniques. +ML models deployed to the cloud benefit from constant internet connectivity and the ability to log every request and response. This makes it feasible to replay or generate synthetic requests for comparing different models and versions. Some providers offer tools that automate parts of the evaluation process, such as tracking hyperparameter experiments or comparing model runs. For instance, platforms like [Weights and Biases](https://wandb.ai/) streamline this process by automating experiment tracking and generating artifacts from training runs. -Canary testing releases a model to a small subset of users to evaluate real-world performance before wide deployment. Teams incrementally route traffic to the canary release while monitoring for issues. - -For example, a retailer evaluates a personalized product recommendation model against historical test data, reviewing accuracy and diversity metrics. Teams also calculate metrics on live customer data over time, detecting decreased accuracy over the last 2 weeks. Before full rollout, the new model is released to 5% of web traffic to ensure no degradation. - -Automating evaluation and canary releases reduces deployment risks. However, human review still needs to be more critical to assess less quantifiable dynamics of model behavior. Rigorous pre-deployment validation provides confidence in putting models into production. +Automating evaluation and testing processes, combined with careful canary testing, reduces deployment risks. While automated evaluation processes catch many issues, human oversight remains essential for reviewing performance across specific data segments and identifying subtle weaknesses. This combination of rigorous pre-deployment validation and real-world testing provides teams with confidence when putting models into production. ### Model Deployment Teams need to properly package, test, and track ML models to reliably deploy them to production. MLOps introduces frameworks and procedures for actively versioning, deploying, monitoring, and updating models in sustainable ways. -Teams containerize models using [Docker](https://www.docker.com/), which bundles code, libraries, and dependencies into a standardized unit. Containers enable smooth portability across environments. - -Frameworks like [TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving) and [BentoML](https://bentoml.org/) help serve predictions from deployed models via performance-optimized APIs. These frameworks handle versioning, scaling, and monitoring. - -Teams first deploy updated models to staging or QA environments for testing before full production rollout. Shadow or canary deployments route a sample of traffic to test model variants. Teams incrementally increase access to new models. +One common approach to deployment involves containerizing models using tools like [Docker](https://www.docker.com/), which package code, libraries, and dependencies into standardized units. Containers ensure smooth portability across environments, making deployment consistent and predictable. Frameworks like [TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving) and [BentoML](https://bentoml.org/) help serve predictions from deployed models via performance-optimized APIs. These frameworks handle versioning, scaling, and monitoring. -Teams build robust rollback procedures in case issues emerge. Rollbacks revert to the last known good model version. Integration with CI/CD pipelines simplifies redeployment if needed. +Before full-scale rollout, teams deploy updated models to staging or QA environments to rigorously test performance. Techniques such as shadow or canary deployments are used to validate new models incrementally. For instance, canary deployments route a small percentage of traffic to the new model while closely monitoring performance. If no issues arise, traffic to the new model gradually increases. Robust rollback procedures are essential to handle unexpected issues, reverting systems to the previous stable model version to ensure minimal disruption. Integration with CI/CD pipelines further automates the deployment and rollback process, enabling efficient iteration cycles. -Teams carefully track model artifacts, such as scripts, weights, logs, and metrics, for each version with ML metadata tools like [MLflow](https://mlflow.org/). This maintains lineage and auditability. +To maintain lineage and auditability, teams track model artifacts, including scripts, weights, logs, and metrics, using tools like [MLflow](https://mlflow.org/). Model registries, such as [Vertex AI's model registry](https://cloud.google.com/vertex-ai/docs/model-registry/introduction), act as centralized repositories for storing and managing trained models. These registries not only facilitate version comparisons but also often include access to base models, which may be open source, proprietary, or a hybrid (e.g., [LLAMA](https://ai.meta.com/llama/)). Deploying a model from the registry to an inference endpoint is streamlined, handling resource provisioning, model weight downloads, and hosting. -For example, a retailer containerizes a product recommendation model in [TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving) and deploys it to a [Kubernetes](https://kubernetes.io/) staging cluster. After monitoring and approving performance on sample traffic, Kubernetes shifts 10% of production traffic to the new model. If no issues are detected after a few days, the new model takes over 100% of traffic. However, teams should keep the previous version accessible for rollback if needed. +Inference endpoints typically expose the deployed model via REST APIs for real-time predictions. Depending on performance requirements, teams can configure resources, such as GPU accelerators, to meet latency and throughput targets. Some providers also offer flexible options like serverless or batch inference, eliminating the need for persistent endpoints and enabling cost-efficient, scalable deployments. For example, [AWS SageMaker Inference](https://docs.aws.amazon.com/sagemaker/latest/dg/deploy-model.html) supports such configurations. -Model deployment processes enable teams to make ML systems resilient in production by accounting for all transition states. +By leveraging these tools and practices, teams can deploy ML models resiliently, ensuring smooth transitions between versions, maintaining production stability, and optimizing performance across diverse use cases. ### Model Serving @@ -248,7 +232,7 @@ Containers and orchestrators like Docker and Kubernetes allow teams to package m By leveraging cloud elasticity, teams scale resources up and down to meet spikes in workloads like hyperparameter tuning jobs or spikes in prediction requests. [Auto-scaling](https://aws.amazon.com/autoscaling/) enables optimized cost efficiency. -Infrastructure spans on-prem, cloud, and edge devices. A robust technology stack provides flexibility and resilience. Monitoring tools allow teams to observe resource utilization. +Infrastructure spans on-premises (on-prem), cloud, and edge devices. A robust technology stack provides flexibility and resilience. Monitoring tools allow teams to observe resource utilization. For example, a Terraform config may deploy a GCP Kubernetes cluster to host trained TensorFlow models exposed as prediction microservices. The cluster scales up pods to handle increased traffic. CI/CD integration seamlessly rolls out new model containers. @@ -410,7 +394,7 @@ Although financial debt is a good metaphor for understanding tradeoffs, it diffe The [Hidden Technical Debt of Machine Learning Systems](https://papers.nips.cc/paper_files/paper/2015/file/86df7dcfd896fcaf2674f757a2463eba-Paper.pdf) paper spreads awareness of the nuances of ML system-specific tech debt. It encourages additional development in the broad area of maintainable ML. -## Roles and Responsibilities +## Roles and Responsibilities {#sec-roles-and_resp-ops} Given the vastness of MLOps, successfully implementing ML systems requires diverse skills and close collaboration between people with different areas of expertise. While data scientists build the core ML models, it takes cross-functional teamwork to successfully deploy these models into production environments and enable them to deliver sustainable business value. @@ -464,7 +448,7 @@ The ML engineering team enables data science models to progress smoothly into su ### DevOps Engineers -DevOps engineers enable MLOps by building and managing the underlying infrastructure for developing, deploying, and monitoring ML models. They provide the cloud architecture and automation pipelines. Their main responsibilities include: +DevOps engineers enable MLOps by building and managing the underlying infrastructure for developing, deploying, and monitoring ML models. As a specialized branch of software engineering, DevOps focuses on creating automation pipelines, cloud architecture, and operational frameworks. Their main responsibilities include: * Provisioning and managing cloud infrastructure for ML workflows using IaC tools like Terraform, Docker, and Kubernetes. * Developing CI/CD pipelines for model retraining, validation, and deployment. Integrating ML tools into the pipeline, such as MLflow and Kubeflow. @@ -492,56 +476,10 @@ For example, a project manager would create a project plan for developing and en Skilled project managers enable MLOps teams to work synergistically to rapidly deliver maximum business value from ML investments. Their leadership and organization align with diverse teams. -## Embedded System Challenges - -Building on our discussion of [On-device Learning](../ondevice_learning/ondevice_learning.qmd) in the previous chapter, we now turn our attention to the broader context of embedded systems in MLOps. The unique constraints and requirements of embedded environments significantly impact the implementation of machine learning models and operations. To set the stage for the specific challenges that emerge with embedded MLOps, it is important to first review the general challenges associated with embedded systems. This overview will provide a foundation for understanding how these constraints intersect with and shape the practices of MLOps in resource-limited, edge computing scenarios. - -### Limited Compute Resources - -Embedded devices like microcontrollers and mobile phones have much more constrained computing power than data center machines or GPUs. A typical microcontroller may have only KB of RAM, MHz CPU speed, and no GPU. For example, a microcontroller in a smartwatch may only have a 32-bit processor running at 120MHz with 320KB of RAM [@stm2021l4]. This allows simple ML models like small linear regressions or random forests, but more complex deep neural networks would be infeasible. Strategies to mitigate this include quantization, pruning, efficient model architectures, and offloading certain computations to the cloud when connectivity allows. - -### Constrained Memory - -Storing large ML models and datasets directly on embedded devices is often infeasible with limited memory. For example, a deep neural network model can easily take hundreds of MB, which exceeds the storage capacity of many embedded systems. Consider this example. A wildlife camera that captures images to detect animals may have only a 2GB memory card. More is needed to store a deep learning model for image classification that is often hundreds of MB in size. Consequently, this requires optimization of memory usage through weights compression, lower-precision numerics, and streaming inference pipelines. - -### Intermittent Connectivity - -Many embedded devices operate in remote environments without reliable internet connectivity. We must rely on something other than constant cloud access for convenient retraining, monitoring, and deployment. Instead, we need smart scheduling and caching strategies to optimize for intermittent connections. For example, a model predicting crop yield on a remote farm may need to make predictions daily but only have connectivity to the cloud once a week when the farmer drives into town. The model needs to operate independently in between connections. - -### Power Limitations - -Embedded devices like phones, wearables, and remote sensors are battery-powered. Continual inference and communication can quickly drain those batteries, limiting functionality. For example, a smart collar tagging endangered animals runs on a small battery. Continuously running a GPS tracking model would drain the battery within days. The collar has to schedule when to activate the model carefully. Thus, embedded ML has to manage tasks carefully to conserve power. Techniques include optimized hardware accelerators, prediction caching, and adaptive model execution. - -### Fleet Management - -For mass-produced embedded devices, millions of units can be deployed in the field to orchestrate updates. Hypothetically, updating a fraud detection model on 100 million (future smart) credit cards requires securely pushing updates to each distributed device rather than a centralized data center. Such a distributed scale makes fleet-wide management much harder than a centralized server cluster. It requires intelligent protocols for over-the-air updates, handling connectivity issues, and monitoring resource constraints across devices. - -### On-Device Data Collection - -Collecting useful training data requires engineering both the sensors on the device and the software pipelines. This is unlike servers, where we can pull data from external sources. Challenges include handling sensor noise. Sensors on an industrial machine detect vibrations and temperature to predict maintenance needs. This requires tuning the sensors and sampling rates to capture useful data. - -### Device-Specific Personalization - -A smart speaker learns an individual user's voice patterns and speech cadence to improve recognition accuracy while protecting privacy. Adapting ML models to specific devices and users is important, but this poses privacy challenges. On-device learning allows personalization without transmitting as much private data. However, balancing model improvement, privacy preservation, and constraints requires novel techniques. - -### Safety Considerations - -If extremely large embedded ML in systems like self-driving vehicles is not engineered carefully, there are serious safety risks. To ensure safe operation before deployment, self-driving cars must undergo extensive track testing in simulated rain, snow, and obstacle scenarios. This requires extensive validation, fail-safes, simulators, and standards compliance before deployment. - -### Diverse Hardware Targets - -There is a diverse range of embedded processors, including ARM, x86, specialized AI accelerators, FPGAs, etc. Supporting this heterogeneity makes deployment challenging. We need strategies like standardized frameworks, extensive testing, and model tuning for each platform. For example, an object detection model needs efficient implementations across embedded devices like a Raspberry Pi, Nvidia Jetson, and Google Edge TPU. - -### Testing Coverage - -Rigorously testing edge cases is difficult with constrained embedded simulation resources, but exhaustive testing is critical in systems like self-driving cars. Exhaustively testing an autopilot model requires millions of simulated kilometers, exposing it to rare events like sensor failures. Therefore, strategies like synthetic data generation, distributed simulation, and chaos engineering help improve coverage. - -### Concept Drift Detection - -With limited monitoring data from each remote device, detecting changes in the input data over time is much harder. Drift can lead to degraded model performance. Lightweight methods are needed to identify when retraining is necessary. A model predicting power grid loads shows declining performance as usage patterns change over time. With only local device data, this trend is difficult to spot. - ## Traditional MLOps vs. Embedded MLOps +Building on our discussion of [On-device Learning](../ondevice_learning/ondevice_learning.qmd) in the previous chapter, we now turn our attention to the broader context of embedded systems in MLOps. The unique constraints and requirements of embedded environments significantly impact the implementation of machine learning models and operations. As we have discussed in previous chapters, embedded systems introduce unique challenges to MLOps due to their constrained resources, intermittent connectivity, and the need for efficient, power-aware computation. Unlike cloud environments with abundant compute and storage, embedded devices often operate with limited memory, power, and processing capabilities, requiring careful optimization of workflows. These limitations influence all aspects of MLOps, from deployment and data collection to monitoring and updates. + In traditional MLOps, ML models are typically deployed in cloud-based or server environments, with abundant resources like computing power and memory. These environments facilitate the smooth operation of complex models that require significant computational resources. For instance, a cloud-based image recognition model might be used by a social media platform to tag photos with relevant labels automatically. In this case, the model can leverage the extensive resources available in the cloud to efficiently process vast amounts of data. On the other hand, embedded MLOps involves deploying ML models on embedded systems, specialized computing systems designed to perform specific functions within larger systems. Embedded systems are typically characterized by their limited computational resources and power. For example, an ML model might be embedded in a smart thermostat to optimize heating and cooling based on the user's preferences and habits. The model must be optimized to run efficiently on the thermostat's limited hardware without compromising its performance or accuracy. @@ -589,7 +527,7 @@ The volume of aggregated data is much lower, often requiring techniques like fed Furthermore, the models must use simplified architectures optimized for low-power edge hardware. Given the computing limitations, high-end GPUs are inaccessible for intensive deep learning. Training leverages lower-powered edge servers and clusters with distributed approaches to spread load. -Transfer learning emerges as a crucial strategy to address data scarcity and irregularity in machine learning, particularly in edge computing scenarios. As illustrated in @fig-transfer-learning-mlops, this approach involves pre-training models on large public datasets and then fine-tuning them on limited domain-specific edge data. The figure depicts a neural network where initial layers (W_{A1} to W_{A4}), responsible for general feature extraction, are frozen (indicated by a green dashed line). These layers retain knowledge from previous tasks, accelerating learning and reducing resource requirements. The latter layers (W_{A5} to W_{A7}), beyond the blue dashed line, are fine-tuned for the specific task, focusing on task-specific feature learning. +Transfer learning emerges as a crucial strategy to address data scarcity and irregularity in machine learning, particularly in edge computing scenarios. As illustrated in @fig-transfer-learning-mlops, this approach involves pre-training models on large public datasets and then fine-tuning them on limited domain-specific edge data. The figure depicts a neural network where initial layers ($W_{A1}$ to $W_{A4}$), responsible for general feature extraction, are frozen (indicated by a green dashed line). These layers retain knowledge from previous tasks, accelerating learning and reducing resource requirements. The latter layers ($W_{A5}$ to $W_{A7}$), beyond the blue dashed line, are fine-tuned for the specific task, focusing on task-specific feature learning. ![Transfer learning in MLOps. Source: HarvardX.](images/png/transfer_learning.png){#fig-transfer-learning-mlops} @@ -721,35 +659,7 @@ Embedded MLOps governance must encompass privacy, security, safety, transparency So, while Embedded MLOps shares foundational MLOps principles, it faces unique constraints in tailoring workflows and infrastructure specifically for resource-constrained edge devices. -### Traditional MLOps - -Google, Microsoft, and Amazon offer their version of managed ML services. These include services that manage model training and experimentation, model hosting and scaling, and monitoring. These offerings are available via an API and client SDKs, as well as through web UIs. While it is possible to build your own end-to-end MLOps solutions using pieces from each, the greatest ease of use benefits come by staying within a single provider ecosystem to take advantage of interservice integrations. - -The following sections present a quick overview of the services that fit into each part of the MLOps life cycle described above, providing examples of offerings from different providers. It's important to note that the MLOps space is evolving rapidly; new companies and products are entering the scene at a swift pace. The examples mentioned are not meant to serve as endorsements of particular companies' offerings but rather to illustrate the types of solutions available in the market. - -#### Data Management - -Data storage and versioning are table stakes for any commercial offering, and most take advantage of existing general-purpose storage solutions such as S3. Others use more specialized options such as git-based storage (Example: [Hugging Face's Dataset Hub](https://huggingface.co/datasets)). This is an area where providers make it easy to support their competitors' data storage options, as they don't want this to be a barrier for adoptions of the rest of their MLOps services. For example, Vertex AI's training pipeline seamlessly supports datasets stored in S3, Google Cloud Buckets, or Hugging Face's Dataset Hub. - -#### Model Training - -Managed training services are where cloud providers shine, as they provide on-demand access to hardware that is out of reach for most smaller companies. They bill only for hardware during training time, putting GPU-accelerated training within reach of even the smallest developer teams. The control developers have over their training workflow can vary widely depending on their needs. Some providers have services that provide little more than access to the resources and rely on the developer to manage the training loop, logging, and model storage themselves. Other services are as simple as pointing to a base model and a labeled data set to kick off a fully managed finetuning job (example: [Vertex AI Fine Tuning](https://cloud.google.com/vertex-ai/docs/generative-ai/models/tune-models)). - -A word of warning: As of 2023, GPU hardware demand well exceeds supply, and as a result, cloud providers are rationing access to their GPUs. In some data center regions, GPUs may be unavailable or require long-term contracts. - -#### Model Evaluation - -Model evaluation tasks typically involve monitoring models' accuracy, latency, and resource usage in both the testing and production phases. Unlike embedded systems, ML models deployed to the cloud benefit from constant internet connectivity and unlimited logging capacities. As a result, it is often feasible to capture and log every request and response. This makes replaying or generating synthetic requests to compare different models and versions tractable. - -Some providers also offer services that automate the experiment tracking of modifying model hyperparameters. They track the runs and performance and generate artifacts from these model training runs. Example: [WeightsAndBiases](https://wandb.ai/) - -#### Model Deployment - -Each provider typically has a service referred to as a "model registry," where training models are stored and accessed. Often, these registries may also provide access to base models that are either open source or provided by larger technology companies (or, in some cases, like [LLAMA](https://ai.meta.com/llama/), both!). These model registries are a common place to compare all the models and their versions to allow easy decision-making on which to pick for a given use case. Example: [Vertex AI's model registry](https://cloud.google.com/vertex-ai/docs/model-registry/introduction) - -From the model registry, deploying a model to an inference endpoint is quick and simple, and it handles the resource provisioning, model weight downloading, and hosting of a given model. These services typically give access to the model via a REST API where inference requests can be sent. Depending on the model type, specific resources can be configured, such as which type of GPU accelerator may be needed to hit the desired performance. Some providers may also offer serverless inference or batch inference options that do not need a persistent endpoint to access the model. Example: [AWS SageMaker Inference](https://docs.aws.amazon.com/sagemaker/latest/dg/deploy-model.html) - -### Embedded MLOps +### Embedded MLOps Services Despite the proliferation of new MLOps tools in response to the increase in demand, the challenges described earlier have constrained the availability of such tools in embedded systems environments. More recently, new tools such as Edge Impulse [@janapa2023edge] have made the development process somewhat easier, as described below. @@ -1023,13 +933,3 @@ To reinforce the concepts covered in this chapter, we have curated a set of exer * @exr-ei ::: - -:::{.callout-warning collapse="false"} - -#### Labs - -In addition to exercises, we also offer a series of hands-on labs that allow students to gain practical experience with embedded AI technologies. These labs provide step-by-step guidance, enabling students to develop their skills in a structured and supportive environment. We are excited to announce that new labs will be available soon, further enriching the learning experience. - -* _Coming soon._ -::: - diff --git a/contents/core/optimizations/optimizations.qmd b/contents/core/optimizations/optimizations.qmd index 85c292b8..e9b721eb 100644 --- a/contents/core/optimizations/optimizations.qmd +++ b/contents/core/optimizations/optimizations.qmd @@ -5,7 +5,7 @@ bibliography: optimizations.bib # Model Optimizations {#sec-model_optimizations} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-model-optimizations-resource), [Videos](#sec-model-optimizations-resource), [Exercises](#sec-model-optimizations-resource), [Labs](#sec-model-optimizations-resource) +Resources: [Slides](#sec-model-optimizations-resource), [Videos](#sec-model-optimizations-resource), [Exercises](#sec-model-optimizations-resource) ::: ![_DALL·E 3 Prompt: Illustration of a neural network model represented as a busy construction site, with a diverse group of construction workers, both male and female, of various ethnicities, labeled as 'pruning', 'quantization', and 'sparsity'. They are working together to make the neural network more efficient and smaller, while maintaining high accuracy. The 'pruning' worker, a Hispanic female, is cutting unnecessary connections from the middle of the network. The 'quantization' worker, a Caucasian male, is adjusting or tweaking the weights all over the place. The 'sparsity' worker, an African female, is removing unnecessary nodes to shrink the model. Construction trucks and cranes are in the background, assisting the workers in their tasks. The neural network is visually transforming from a complex and large structure to a more streamlined and smaller one._](images/png/cover_model_optimizations.png) @@ -31,7 +31,7 @@ When machine learning models are deployed on systems, especially on resource-con ::: -## Introduction +## Overview The optimization of machine learning models for practical deployment is a critical aspect of AI systems. This chapter focuses on exploring model optimization techniques as they relate to the development of ML systems, ranging from high-level model architecture considerations to low-level hardware adaptations. @fig-3-sections Illustrates the three layers of the optimization stack we cover. @@ -973,12 +973,3 @@ To reinforce the concepts covered in this chapter, we have curated a set of exer * @exr-md ::: - -:::{.callout-warning collapse="false"} -#### Labs - -In addition to exercises, we also offer a series of hands-on labs that allow students to gain practical experience with embedded AI technologies. These labs provide step-by-step guidance, enabling students to develop their skills in a structured and supportive environment. We are excited to announce that new labs will be available soon, further enriching the learning experience. - -* *Coming soon.* -::: - diff --git a/contents/core/privacy_security/privacy_security.qmd b/contents/core/privacy_security/privacy_security.qmd index 42a9dd82..9797eaeb 100644 --- a/contents/core/privacy_security/privacy_security.qmd +++ b/contents/core/privacy_security/privacy_security.qmd @@ -5,7 +5,7 @@ bibliography: privacy_security.bib # Security & Privacy {#sec-security_privacy} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-security-and-privacy-resource), [Videos](#sec-security-and-privacy-resource), [Exercises](#sec-security-and-privacy-resource), [Labs](#sec-security-and-privacy-resource) +Resources: [Slides](#sec-security-and-privacy-resource), [Videos](#sec-security-and-privacy-resource), [Exercises](#sec-security-and-privacy-resource) ::: ![_DALL·E 3 Prompt: An illustration on privacy and security in machine learning systems. The image shows a digital landscape with a network of interconnected nodes and data streams, symbolizing machine learning algorithms. In the foreground, there's a large lock superimposed over the network, representing privacy and security. The lock is semi-transparent, allowing the underlying network to be partially visible. The background features binary code and digital encryption symbols, emphasizing the theme of cybersecurity. The color scheme is a mix of blues, greens, and grays, suggesting a high-tech, digital environment._](images/png/cover_security_privacy.png) @@ -36,7 +36,7 @@ Security and privacy are critical when developing real-world machine learning sy ::: -## Introduction +## Overview Machine learning has evolved substantially from its academic origins, where privacy was not a primary concern. As ML migrated into commercial and consumer applications, the data became more sensitive - encompassing personal information like communications, purchases, and health data. This explosion of data availability fueled rapid advancements in ML capabilities. However, it also exposed new privacy risks, as demonstrated by incidents like the [AOL data leak in 2006](https://en.wikipedia.org/wiki/AOL_search_log_release) and the [Cambridge Analytica](https://www.nytimes.com/2018/04/04/us/politics/cambridge-analytica-scandal-fallout.html) scandal. @@ -1175,13 +1175,3 @@ To reinforce the concepts covered in this chapter, we have curated a set of exer * @exr-he ::: - -:::{.callout-warning collapse="false"} - -#### Labs - -In addition to exercises, we offer a series of hands-on labs allowing students to gain practical experience with embedded AI technologies. These labs provide step-by-step guidance, enabling students to develop their skills in a structured and supportive environment. We are excited to announce that new labs will be available soon, further enriching the learning experience. - -* _Coming soon._ -::: - diff --git a/contents/core/responsible_ai/responsible_ai.qmd b/contents/core/responsible_ai/responsible_ai.qmd index e6ebf4cd..dc6643dd 100644 --- a/contents/core/responsible_ai/responsible_ai.qmd +++ b/contents/core/responsible_ai/responsible_ai.qmd @@ -5,7 +5,7 @@ bibliography: responsible_ai.bib # Responsible AI {#sec-responsible_ai} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-responsible-ai-resource), [Videos](#sec-responsible-ai-resource), [Exercises](#sec-responsible-ai-resource), [Labs](#sec-responsible-ai-resource) +Resources: [Slides](#sec-responsible-ai-resource), [Videos](#sec-responsible-ai-resource), [Exercises](#sec-responsible-ai-resource) ::: ![_DALL·E 3 Prompt: Illustration of responsible AI in a futuristic setting with the universe in the backdrop: A human hand or hands nurturing a seedling that grows into an AI tree, symbolizing a neural network. The tree has digital branches and leaves, resembling a neural network, to represent the interconnected nature of AI. The background depicts a future universe where humans and animals with general intelligence collaborate harmoniously. The scene captures the initial nurturing of the AI as a seedling, emphasizing the ethical development of AI technology in harmony with humanity and the universe._](images/png/cover_responsible_ai.png) @@ -28,7 +28,7 @@ As machine learning models grow across various domains, these algorithms have th ::: -## Introduction +## Overview Machine learning models are increasingly used to automate decisions in high-stakes social domains like healthcare, criminal justice, and employment. However, without deliberate care, these algorithms can perpetuate biases, breach privacy, or cause other harm. For instance, a loan approval model solely trained on data from high-income neighborhoods could disadvantage applicants from lower-income areas. This motivates the need for responsible machine learning - creating fair, accountable, transparent, and ethical models. @@ -500,14 +500,3 @@ To reinforce the concepts covered in this chapter, we have curated a set of exer * _Coming soon._ ::: - -:::{.callout-warning collapse="false"} - -#### Labs - -In addition to exercises, we offer a series of hands-on labs allowing students to gain practical experience with embedded AI technologies. These labs provide step-by-step guidance, enabling students to develop their skills in a structured and supportive environment. We are excited to announce that new labs will be available soon, further enriching the learning experience. - -* _Coming soon._ -::: - - diff --git a/contents/core/robust_ai/robust_ai.qmd b/contents/core/robust_ai/robust_ai.qmd index 29f8b34a..91b24604 100644 --- a/contents/core/robust_ai/robust_ai.qmd +++ b/contents/core/robust_ai/robust_ai.qmd @@ -5,7 +5,7 @@ bibliography: robust_ai.bib # Robust AI {#sec-robust_ai} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-robust-ai-resource), [Videos](#sec-robust-ai-resource), [Exercises](#sec-robust-ai-resource), [Labs](#sec-robust-ai-resource) +Resources: [Slides](#sec-robust-ai-resource), [Videos](#sec-robust-ai-resource), [Exercises](#sec-robust-ai-resource) ::: ![_DALL·E 3 Prompt: Create an image featuring an advanced AI system symbolized by an intricate, glowing neural network, deeply nested within a series of progressively larger and more fortified shields. Each shield layer represents a layer of defense, showcasing the system's robustness against external threats and internal errors. The neural network, at the heart of this fortress of shields, radiates with connections that signify the AI's capacity for learning and adaptation. This visual metaphor emphasizes not only the technological sophistication of the AI but also its resilience and security, set against the backdrop of a state-of-the-art, secure server room filled with the latest in technological advancements. The image aims to convey the concept of ultimate protection and resilience in the field of artificial intelligence._](./images/png/cover_robust_ai.png) @@ -32,7 +32,7 @@ This chapter explores the fundamental concepts, techniques, and tools for buildi ::: -## Introduction +## Overview Robust AI refers to a system's ability to maintain its performance and reliability in the presence of errors. A robust machine learning system is designed to be fault-tolerant and error-resilient, capable of operating effectively even under adverse conditions. @@ -1075,13 +1075,3 @@ To reinforce the concepts covered in this chapter, we have curated a set of exer * @exr-ft ::: - -:::{.callout-warning collapse="false"} - -#### Labs - -In addition to exercises, we offer a series of hands-on labs allowing students to gain practical experience with embedded AI technologies. These labs provide step-by-step guidance, enabling students to develop their skills in a structured and supportive environment. We are excited to announce that new labs will be available soon, further enriching the learning experience. - -* _Coming soon._ -::: - diff --git a/contents/core/sustainable_ai/sustainable_ai.qmd b/contents/core/sustainable_ai/sustainable_ai.qmd index cfca4fb5..21c766e0 100644 --- a/contents/core/sustainable_ai/sustainable_ai.qmd +++ b/contents/core/sustainable_ai/sustainable_ai.qmd @@ -5,7 +5,7 @@ bibliography: sustainable_ai.bib # Sustainable AI {#sec-sustainable_ai} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-sustainable-ai-resource), [Videos](#sec-sustainable-ai-resource), [Exercises](#sec-sustainable-ai-resource), [Labs](#sec-sustainable-ai-resource) +Resources: [Slides](#sec-sustainable-ai-resource), [Videos](#sec-sustainable-ai-resource), [Exercises](#sec-sustainable-ai-resource) ::: ![_DALL·E 3 Prompt: 3D illustration on a light background of a sustainable AI network interconnected with a myriad of eco-friendly energy sources. The AI actively manages and optimizes its energy from sources like solar arrays, wind turbines, and hydro dams, emphasizing power efficiency and performance. Deep neural networks spread throughout, receiving energy from these sustainable resources._](images/png/cover_sustainable_ai.png) @@ -23,7 +23,7 @@ Resources: [Slides](#sec-sustainable-ai-resource), [Videos](#sec-sustainable-ai- ::: -## Introduction +## Overview The rapid advancements in artificial intelligence (AI) and machine learning (ML) have led to many beneficial applications and optimizations for performance efficiency. However, the remarkable growth of AI comes with a significant yet often overlooked cost: its environmental impact. The most recent report released by the IPCC, the international body leading scientific assessments of climate change and its impacts, emphasized the pressing importance of tackling climate change. Without immediate efforts to decrease global $\textrm{CO}_2$ emissions by at least 43 percent before 2030, we exceed global warming of 1.5 degrees Celsius [@lecocq2022mitigation]. This could initiate positive feedback loops, pushing temperatures even higher. Next to environmental issues, the United Nations recognized [17 Sustainable Development Goals (SDGs)](https://sdgs.un.org/goals), in which AI can play an important role, and vice versa, play an important role in the development of AI systems. As the field continues expanding, considering sustainability is crucial. @@ -705,12 +705,3 @@ To reinforce the concepts covered in this chapter, we have curated a set of exer * @exr-mle ::: - -:::{.callout-warning collapse="false"} - -#### Labs - -In addition to exercises, we offer hands-on labs that allow students to gain practical experience with embedded AI technologies. These labs provide step-by-step guidance, enabling students to develop their skills in a structured and supportive environment. We are excited to announce that new labs will be available soon, further enriching the learning experience. - -* _Coming soon._ -::: diff --git a/contents/core/training/training.qmd b/contents/core/training/training.qmd index fb47192f..e8272200 100644 --- a/contents/core/training/training.qmd +++ b/contents/core/training/training.qmd @@ -5,7 +5,7 @@ bibliography: training.bib # AI Training {#sec-ai_training} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-ai-training-resource), [Videos](#sec-ai-frameworks-resource), [Exercises](#sec-ai-training-resource), [Labs](#sec-ai-training-resource) +Resources: [Slides](#sec-ai-training-resource), [Videos](#sec-ai-frameworks-resource), [Exercises](#sec-ai-training-resource) ::: ![_DALL·E 3 Prompt: An illustration for AI training, depicting a neural network with neurons that are being repaired and firing. The scene includes a vast network of neurons, each glowing and firing to represent activity and learning. Among these neurons, small figures resembling engineers and scientists are actively working, repairing and tweaking the neurons. These miniature workers symbolize the process of training the network, adjusting weights and biases to achieve convergence. The entire scene is a visual metaphor for the intricate and collaborative effort involved in AI training, with the workers representing the continuous optimization and learning within a neural network. The background is a complex array of interconnected neurons, creating a sense of depth and complexity._](images/png/ai_training.png) @@ -34,7 +34,7 @@ Training is central to developing accurate and useful AI systems using machine l ::: -## Introduction +## Overview Training is critical for developing accurate and useful AI systems using machine learning. The training creates a machine learning model that can generalize to new, unseen data rather than memorizing the training examples. This is done by feeding training data into algorithms that learn patterns from these examples by adjusting internal parameters. diff --git a/contents/core/workflow/workflow.qmd b/contents/core/workflow/workflow.qmd index 639f4095..41bf1b73 100644 --- a/contents/core/workflow/workflow.qmd +++ b/contents/core/workflow/workflow.qmd @@ -5,7 +5,7 @@ bibliography: workflow.bib # AI Workflow {#sec-ai_workflow} ::: {.content-visible when-format="html"} -Resources: [Slides](#sec-ai-workflow-resource), [Videos](#sec-ai-workflow-resource), [Exercises](#sec-ai-workflow-resource), [Labs](#sec-ai-workflow-resource) +Resources: [Slides](#sec-ai-workflow-resource), [Videos](#sec-ai-workflow-resource), [Exercises](#sec-ai-workflow-resource) ::: ![_DALL·E 3 Prompt: Create a rectangular illustration of a stylized flowchart representing the AI workflow/pipeline. From left to right, depict the stages as follows: 'Data Collection' with a database icon, 'Data Preprocessing' with a filter icon, 'Model Design' with a brain icon, 'Training' with a weight icon, 'Evaluation' with a checkmark, and 'Deployment' with a rocket. Connect each stage with arrows to guide the viewer horizontally through the AI processes, emphasizing these steps' sequential and interconnected nature._](images/png/cover_ai_workflow.png) @@ -128,7 +128,7 @@ Understanding the various roles involved in an ML project is crucial for its suc : Roles and responsibilities of people involved in MLOps. {#tbl-mlops_roles .striped .hover} -As we proceed through the upcoming chapters, we will explore each role's essence and expertise and foster a deeper understanding of the complexities involved in AI projects. This holistic view facilitates seamless collaboration and nurtures an environment ripe for innovation and breakthroughs. +This holistic view facilitates seamless collaboration and nurtures an environment ripe for innovation and breakthroughs. As we proceed through the upcoming chapters, we will explore each role's essence and expertise and foster a deeper understanding of the complexities involved in AI projects. For a more detailed discussion of the specific tools and techniques these roles use, as well as an in-depth exploration of their responsibilities, refer to @sec-roles-and_resp-ops. ## Conclusion @@ -169,12 +169,3 @@ To reinforce the concepts covered in this chapter, we have curated a set of exer * _Coming soon._ ::: - -:::{.callout-warning collapse="false"} - -#### Labs - -In addition to exercises, we offer a series of hands-on labs allowing students to gain practical experience with embedded AI technologies. These labs provide step-by-step guidance, enabling students to develop their skills in a structured and supportive environment. We are excited to announce that new labs will be available soon, further enriching the learning experience. - -* _Coming soon._ -::: diff --git a/contents/dedication.qmd b/contents/dedication.qmd deleted file mode 100644 index 9e171332..00000000 --- a/contents/dedication.qmd +++ /dev/null @@ -1,7 +0,0 @@ ---- -comments: false ---- - -# Dedication {.unnumbered} - -_This book is a testament to the idea that, in the vast expanse of technology and innovation, it's not always the largest systems, but the smallest ones, that can change the world._ diff --git a/contents/labs/arduino/nicla_vision/image_classification/image_classification.qmd b/contents/labs/arduino/nicla_vision/image_classification/image_classification.qmd index 763eccc3..d1f5759e 100644 --- a/contents/labs/arduino/nicla_vision/image_classification/image_classification.qmd +++ b/contents/labs/arduino/nicla_vision/image_classification/image_classification.qmd @@ -6,7 +6,7 @@ bibliography: image_classification.bib ![*DALL·E 3 Prompt: Cartoon in a 1950s style featuring a compact electronic device with a camera module placed on a wooden table. The screen displays blue robots on one side and green periquitos on the other. LED lights on the device indicate classifications, while characters in retro clothing observe with interest.*](images/jpg/img_class_ini.jpg) -## Introduction +## Overview As we initiate our studies into embedded machine learning or TinyML, it's impossible to overlook the transformative impact of Computer Vision (CV) and Artificial Intelligence (AI) in our lives. These two intertwined disciplines redefine what machines can perceive and accomplish, from autonomous vehicles and robotics to healthcare and surveillance. diff --git a/contents/labs/arduino/nicla_vision/kws/kws.qmd b/contents/labs/arduino/nicla_vision/kws/kws.qmd index f6a03dd9..1734fd5e 100644 --- a/contents/labs/arduino/nicla_vision/kws/kws.qmd +++ b/contents/labs/arduino/nicla_vision/kws/kws.qmd @@ -6,7 +6,7 @@ bibliography: kws.bib ![*DALL·E 3 Prompt: 1950s style cartoon scene set in a vintage audio research room. Two Afro-American female scientists are at the center. One holds a magnifying glass, closely examining ancient circuitry, while the other takes notes. On their wooden table, there are multiple boards with sensors, notably featuring a microphone. Behind these boards, a computer with a large, rounded back displays the Arduino IDE. The IDE showcases code for LED pin assignments and machine learning inference for voice command detection. A distinct window in the IDE, the Serial Monitor, reveals outputs indicating the spoken commands 'yes' and 'no'. The room ambiance is nostalgic with vintage lamps, classic audio analysis tools, and charts depicting FFT graphs and time-domain curves.*](images/jpg/nicla-kws.jpg) -## Introduction +## Overview Having already explored the Nicla Vision board in the *Image Classification* and *Object Detection* applications, we are now shifting our focus to voice-activated applications with a project on Keyword Spotting (KWS). diff --git a/contents/labs/arduino/nicla_vision/motion_classification/motion_classification.qmd b/contents/labs/arduino/nicla_vision/motion_classification/motion_classification.qmd index 2b149011..5c5037d2 100644 --- a/contents/labs/arduino/nicla_vision/motion_classification/motion_classification.qmd +++ b/contents/labs/arduino/nicla_vision/motion_classification/motion_classification.qmd @@ -7,7 +7,7 @@ bibliography: motion_classification.bib ![*DALL·E 3 Prompt: 1950s style cartoon illustration depicting a movement research room. In the center of the room, there's a simulated container used for transporting goods on trucks, boats, and forklifts. The container is detailed with rivets and markings typical of industrial cargo boxes. Around the container, the room is filled with vintage equipment, including an oscilloscope, various sensor arrays, and large paper rolls of recorded data. The walls are adorned with educational posters about transportation safety and logistics. The overall ambiance of the room is nostalgic and scientific, with a hint of industrial flair.*](images/jpg/movement_anomaly_ini.jpg) -## Introduction +## Overview Transportation is the backbone of global commerce. Millions of containers are transported daily via various means, such as ships, trucks, and trains, to destinations worldwide. Ensuring these containers' safe and efficient transit is a monumental task that requires leveraging modern technology, and TinyML is undoubtedly one of them. diff --git a/contents/labs/arduino/nicla_vision/object_detection/object_detection.qmd b/contents/labs/arduino/nicla_vision/object_detection/object_detection.qmd index 87d81b0e..2971cd66 100644 --- a/contents/labs/arduino/nicla_vision/object_detection/object_detection.qmd +++ b/contents/labs/arduino/nicla_vision/object_detection/object_detection.qmd @@ -6,7 +6,7 @@ bibliography: object_detection.bib ![*DALL·E 3 Prompt: Cartoon in the style of the 1940s or 1950s showcasing a spacious industrial warehouse interior. A conveyor belt is prominently featured, carrying a mixture of toy wheels and boxes. The wheels are distinguishable with their bright yellow centers and black tires. The boxes are white cubes painted with alternating black and white patterns. At the end of the moving conveyor stands a retro-styled robot, equipped with tools and sensors, diligently classifying and counting the arriving wheels and boxes. The overall aesthetic is reminiscent of mid-century animation with bold lines and a classic color palette.*](images/jpg/obj_det_ini.jpg) -## Introduction +## Overview This is a continuation of **CV on Nicla Vision**, now exploring **Object Detection** on microcontrollers. diff --git a/contents/labs/arduino/nicla_vision/setup/setup.qmd b/contents/labs/arduino/nicla_vision/setup/setup.qmd index 096daa9c..0d70558d 100644 --- a/contents/labs/arduino/nicla_vision/setup/setup.qmd +++ b/contents/labs/arduino/nicla_vision/setup/setup.qmd @@ -6,7 +6,7 @@ bibliography: setup.bib ![*DALL·E 3 Prompt: Illustration reminiscent of a 1950s cartoon where the Arduino NICLA VISION board, equipped with a variety of sensors including a camera, is the focal point on an old-fashioned desk. In the background, a computer screen with rounded edges displays the Arduino IDE. The code seen is related to LED configurations and machine learning voice command detection. Outputs on the Serial Monitor explicitly display the words 'yes' and 'no'.*](images/jpg/nicla_sys_ini.jpg) -## Introduction +## Overview The [Arduino Nicla Vision](https://docs.arduino.cc/hardware/nicla-vision) (sometimes called *NiclaV*) is a development board that includes two processors that can run tasks in parallel. It is part of a family of development boards with the same form factor but designed for specific tasks, such as the [Nicla Sense ME](https://www.bosch-sensortec.com/software-tools/tools/arduino-nicla-sense-me/) and the [Nicla Voice](https://store-usa.arduino.cc/products/nicla-voice?_gl=1*l3abc6*_ga*MTQ3NzE4Mjk4Mi4xNjQwMDIwOTk5*_ga_NEXN8H46L5*MTY5NjM0Mzk1My4xMDIuMS4xNjk2MzQ0MjQ1LjAuMC4w). The *Niclas* can efficiently run processes created with TensorFlow Lite. For example, one of the cores of the NiclaV runs a computer vision algorithm on the fly (inference), while the other executes low-level operations like controlling a motor and communicating or acting as a user interface. The onboard wireless module allows the management of WiFi and Bluetooth Low Energy (BLE) connectivity simultaneously. diff --git a/contents/labs/raspi/image_classification/image_classification.qmd b/contents/labs/raspi/image_classification/image_classification.qmd index 472934db..5e2effa4 100644 --- a/contents/labs/raspi/image_classification/image_classification.qmd +++ b/contents/labs/raspi/image_classification/image_classification.qmd @@ -2,7 +2,7 @@ ![*DALL·E prompt - A cover image for an 'Image Classification' chapter in a Raspberry Pi tutorial, designed in the same vintage 1950s electronics lab style as previous covers. The scene should feature a Raspberry Pi connected to a camera module, with the camera capturing a photo of the small blue robot provided by the user. The robot should be placed on a workbench, surrounded by classic lab tools like soldering irons, resistors, and wires. The lab background should include vintage equipment like oscilloscopes and tube radios, maintaining the detailed and nostalgic feel of the era. No text or logos should be included.*](images/jpeg/img_class_cover.jpg) -## Introduction +## Overview Image classification is a fundamental task in computer vision that involves categorizing an image into one of several predefined classes. It's a cornerstone of artificial intelligence, enabling machines to interpret and understand visual information in a way that mimics human perception. diff --git a/contents/labs/raspi/llm/llm.qmd b/contents/labs/raspi/llm/llm.qmd index 90098990..75bbc261 100644 --- a/contents/labs/raspi/llm/llm.qmd +++ b/contents/labs/raspi/llm/llm.qmd @@ -2,7 +2,7 @@ ![*DALL·E prompt - A 1950s-style cartoon illustration showing a Raspberry Pi running a small language model at the edge. The Raspberry Pi is stylized in a retro-futuristic way with rounded edges and chrome accents, connected to playful cartoonish sensors and devices. Speech bubbles are floating around, representing language processing, and the background has a whimsical landscape of interconnected devices with wires and small gadgets, all drawn in a vintage cartoon style. The color palette uses soft pastel colors and bold outlines typical of 1950s cartoons, giving a fun and nostalgic vibe to the scene.*](images/jpeg/cover.jpg) -## Introduction +## Overview In the fast-growing area of artificial intelligence, edge computing presents an opportunity to decentralize capabilities traditionally reserved for powerful, centralized servers. This lab explores the practical integration of small versions of traditional large language models (LLMs) into a Raspberry Pi 5, transforming this edge device into an AI hub capable of real-time, on-site data processing. diff --git a/contents/labs/raspi/object_detection/object_detection.qmd b/contents/labs/raspi/object_detection/object_detection.qmd index 22b81904..0b4bfa8f 100644 --- a/contents/labs/raspi/object_detection/object_detection.qmd +++ b/contents/labs/raspi/object_detection/object_detection.qmd @@ -4,7 +4,7 @@ ![*DALL·E prompt - A cover image for an 'Object Detection' chapter in a Raspberry Pi tutorial, designed in the same vintage 1950s electronics lab style as previous covers. The scene should prominently feature wheels and cubes, similar to those provided by the user, placed on a workbench in the foreground. A Raspberry Pi with a connected camera module should be capturing an image of these objects. Surround the scene with classic lab tools like soldering irons, resistors, and wires. The lab background should include vintage equipment like oscilloscopes and tube radios, maintaining the detailed and nostalgic feel of the era. No text or logos should be included.*](images/jpeg/cover.jpg) -## Introduction +## Overview Building upon our exploration of image classification, we now turn our attention to a more advanced computer vision task: object detection. While image classification assigns a single label to an entire image, object detection goes further by identifying and locating multiple objects within a single image. This capability opens up many new applications and challenges, particularly in edge computing and IoT devices like the Raspberry Pi. diff --git a/contents/labs/raspi/setup/setup.qmd b/contents/labs/raspi/setup/setup.qmd index 4cf0a292..5415ff90 100644 --- a/contents/labs/raspi/setup/setup.qmd +++ b/contents/labs/raspi/setup/setup.qmd @@ -6,7 +6,7 @@ This chapter will guide you through setting up Raspberry Pi Zero 2 W (*Raspi-Zer > The general instructions for the *Raspi-5* also apply to the older Raspberry Pi versions, such as the Raspi-3 and Raspi-4. -## Introduction +## Overview The Raspberry Pi is a powerful and versatile single-board computer that has become an essential tool for engineers across various disciplines. Developed by the [Raspberry Pi Foundation](https://www.raspberrypi.org/), these compact devices offer a unique combination of affordability, computational power, and extensive GPIO (General Purpose Input/Output) capabilities, making them ideal for prototyping, embedded systems development, and advanced engineering projects. diff --git a/contents/labs/seeed/xiao_esp32s3/image_classification/image_classification.qmd b/contents/labs/seeed/xiao_esp32s3/image_classification/image_classification.qmd index e0090616..dc63fa69 100644 --- a/contents/labs/seeed/xiao_esp32s3/image_classification/image_classification.qmd +++ b/contents/labs/seeed/xiao_esp32s3/image_classification/image_classification.qmd @@ -2,7 +2,7 @@ ![*Image by Marcelo Rovai*](./images/png/ini.png) -## Introduction +## Overview More and more, we are facing an artificial intelligence (AI) revolution where, as stated by Gartner, **Edge AI** has a very high impact potential, and **it is for now**! diff --git a/contents/labs/seeed/xiao_esp32s3/kws/kws.qmd b/contents/labs/seeed/xiao_esp32s3/kws/kws.qmd index e1e6f81f..d7ca82c6 100644 --- a/contents/labs/seeed/xiao_esp32s3/kws/kws.qmd +++ b/contents/labs/seeed/xiao_esp32s3/kws/kws.qmd @@ -3,7 +3,7 @@ ![*Image by Marcelo Rovai*](images/jpeg/kws_ini.jpg) -## Introduction +## Overview Keyword Spotting (KWS) is integral to many voice recognition systems, enabling devices to respond to specific words or phrases. While this technology underpins popular devices like Google Assistant or Amazon Alexa, it's equally applicable and achievable on smaller, low-power devices. This lab will guide you through implementing a KWS system using TinyML on the XIAO ESP32S3 microcontroller board. diff --git a/contents/labs/seeed/xiao_esp32s3/motion_classification/motion_classification.qmd b/contents/labs/seeed/xiao_esp32s3/motion_classification/motion_classification.qmd index d6177524..5f5e5ef6 100644 --- a/contents/labs/seeed/xiao_esp32s3/motion_classification/motion_classification.qmd +++ b/contents/labs/seeed/xiao_esp32s3/motion_classification/motion_classification.qmd @@ -2,7 +2,7 @@ ![*DALL·E prompt - 1950s style cartoon illustration set in a vintage audio lab. Scientists, dressed in classic attire with white lab coats, are intently analyzing audio data on large chalkboards. The boards display intricate FFT (Fast Fourier Transform) graphs and time-domain curves. Antique audio equipment is scattered around, but the data representations are clear and detailed, indicating their focus on audio analysis.*](./images/jpeg/ini.jpg) -## Introduction +## Overview The XIAO ESP32S3 Sense, with its built-in camera and mic, is a versatile device. But what if you need to add another type of sensor, such as an IMU? No problem! One of the standout features of the XIAO ESP32S3 is its multiple pins that can be used as an I2C bus (SDA/SCL pins), making it a suitable platform for sensor integration. diff --git a/contents/labs/seeed/xiao_esp32s3/object_detection/object_detection.qmd b/contents/labs/seeed/xiao_esp32s3/object_detection/object_detection.qmd index d302e3a8..afec4dc5 100644 --- a/contents/labs/seeed/xiao_esp32s3/object_detection/object_detection.qmd +++ b/contents/labs/seeed/xiao_esp32s3/object_detection/object_detection.qmd @@ -2,7 +2,7 @@ ![*DALL·E prompt - Cartoon styled after 1950s animations, showing a detailed board with sensors, particularly a camera, on a table with patterned cloth. Behind the board, a computer with a large back showcases the Arduino IDE. The IDE's content hints at LED pin assignments and machine learning inference for detecting spoken commands. The Serial Monitor, in a distinct window, reveals outputs for the commands 'yes' and 'no'.*](./images/png/obj_detec_ini.png) -## Introduction +## Overview In the last section regarding Computer Vision (CV) and the XIAO ESP32S3, *Image Classification,* we learned how to set up and classify images with this remarkable development board. Continuing our CV journey, we will explore **Object Detection** on microcontrollers. diff --git a/contents/labs/seeed/xiao_esp32s3/setup/setup.qmd b/contents/labs/seeed/xiao_esp32s3/setup/setup.qmd index 8caa62e0..681fb454 100644 --- a/contents/labs/seeed/xiao_esp32s3/setup/setup.qmd +++ b/contents/labs/seeed/xiao_esp32s3/setup/setup.qmd @@ -2,7 +2,7 @@ ![*DALL·E prompt - 1950s cartoon-style drawing of a XIAO ESP32S3 board with a distinctive camera module, as shown in the image provided. The board is placed on a classic lab table with various sensors, including a microphone. Behind the board, a vintage computer screen displays the Arduino IDE in muted colors, with code focusing on LED pin setups and machine learning inference for voice commands. The Serial Monitor on the IDE showcases outputs detecting voice commands like 'yes' and 'no'. The scene merges the retro charm of mid-century labs with modern electronics.*](./images/jpeg/xiao_setup.jpg) -## Introduction +## Overview The [XIAO ESP32S3 Sense](https://www.seeedstudio.com/XIAO-ESP32S3-Sense-p-5639.html) is Seeed Studio's affordable development board, which integrates a camera sensor, digital microphone, and SD card support. Combining embedded ML computing power and photography capability, this development board is a great tool to start with TinyML (intelligent voice and vision AI). diff --git a/contents/labs/shared/dsp_spectral_features_block/dsp_spectral_features_block.qmd b/contents/labs/shared/dsp_spectral_features_block/dsp_spectral_features_block.qmd index d9784107..c908eead 100644 --- a/contents/labs/shared/dsp_spectral_features_block/dsp_spectral_features_block.qmd +++ b/contents/labs/shared/dsp_spectral_features_block/dsp_spectral_features_block.qmd @@ -6,7 +6,7 @@ bibliography: dsp_spectral_features_block.bib ![*DALL·E 3 Prompt: 1950s style cartoon illustration of a Latin male and female scientist in a vibration research room. The man is using a calculus ruler to examine ancient circuitry. The woman is at a computer with complex vibration graphs. The wooden table has boards with sensors, prominently an accelerometer. A classic, rounded-back computer shows the Arduino IDE with code for LED pin assignments and machine learning algorithms for movement detection. The Serial Monitor displays FFT, classification, wavelets, and DSPs. Vintage lamps, tools, and charts with FFT and Wavelets graphs complete the scene.*](images/jpg/dsp_ini.jpg) -## Introduction +## Overview TinyML projects related to motion (or vibration) involve data from IMUs (usually **accelerometers** and **Gyroscopes**). These time-series type datasets should be preprocessed before inputting them into a Machine Learning model training, which is a challenging area for embedded machine learning. Still, Edge Impulse helps overcome this complexity with its digital signal processing (DSP) preprocessing step and, more specifically, the [Spectral Features Block](https://docs.edgeimpulse.com/docs/edge-impulse-studio/processing-blocks/spectral-features) for Inertial sensors. diff --git a/contents/labs/shared/kws_feature_eng/kws_feature_eng.qmd b/contents/labs/shared/kws_feature_eng/kws_feature_eng.qmd index df4771b0..807b91f4 100644 --- a/contents/labs/shared/kws_feature_eng/kws_feature_eng.qmd +++ b/contents/labs/shared/kws_feature_eng/kws_feature_eng.qmd @@ -6,7 +6,7 @@ bibliography: kws_feature_eng.bib ![*DALL·E 3 Prompt: 1950s style cartoon scene set in an audio research room. Two scientists, one holding a magnifying glass and the other taking notes, examine large charts pinned to the wall. These charts depict FFT graphs and time curves related to audio data analysis. The room has a retro ambiance, with wooden tables, vintage lamps, and classic audio analysis tools.*](images/jpg/kws_under_the_hood_ini.jpg) -## Introduction +## Overview In this hands-on tutorial, the emphasis is on the critical role that feature engineering plays in optimizing the performance of machine learning models applied to audio classification tasks, such as speech recognition. It is essential to be aware that the performance of any machine learning model relies heavily on the quality of features used, and we will deal with "under-the-hood" mechanics of feature extraction, mainly focusing on Mel-frequency Cepstral Coefficients (MFCCs), a cornerstone in the field of audio signal processing. @@ -36,7 +36,7 @@ Here a typical KWS Process using MFCC Feature Converter: - **Resource Constraints:** KWS models are often designed to be lightweight, so they can run on devices with limited computational resources, like microcontrollers or mobile phones. - **Focused Task:** While general speech recognition models are trained to handle a broad range of vocabulary and accents, KWS models are fine-tuned to recognize specific keywords, often in noisy environments accurately. -## Introduction to Audio Signals +## Overview to Audio Signals Understanding the basic properties of audio signals is crucial for effective feature extraction and, ultimately, for successfully applying machine learning algorithms in audio classification tasks. Audio signals are complex waveforms that capture fluctuations in air pressure over time. These signals can be characterized by several fundamental attributes: sampling rate, frequency, and amplitude. @@ -72,7 +72,7 @@ Here are some additional details of the critical issues associated with using ra For these reasons, feature extraction techniques such as Mel-frequency Cepstral Coefficients (MFCCs), Mel-Frequency Energies (MFEs), and simple Spectograms are commonly used to transform raw audio data into a more manageable and informative format. These features capture the essential characteristics of the audio signal while reducing dimensionality and noise, facilitating more effective machine learning. -## Introduction to MFCCs +## Overview to MFCCs ### What are MFCCs? diff --git a/github-button.html b/github-button.html index bbd89d79..2eda354e 100644 --- a/github-button.html +++ b/github-button.html @@ -9,50 +9,56 @@ -
-

🌟 Help Us Reach 1,000 Stars! 🌟

-

For every 25 stars, Arduino and SEEED will each donate a NiclaVision or XIAO ESP32S3 kit for AI education in the developing world.                

+

🎓 The EDGE AI Foundation is matching scholarship funds for every GitHub ⭐
(up to 10,000 stars).

+

Click here to support! 🙏

+ href="https://github.com/harvard-edge/cs249r_book" + data-icon="octicon-star" + data-show-count="true" + data-size="large" + aria-label="Star harvard-edge/cs249r_book on GitHub"> Star
- diff --git a/index.qmd b/index.qmd index 58e3a51c..69a41db5 100644 --- a/index.qmd +++ b/index.qmd @@ -14,28 +14,40 @@ We have created this open-source book as a collaborative effort to bring togethe This isn't a static textbook; it's a living, breathing document. We're making it open-source and continuously updated to meet the ever-changing needs of this dynamic field. Expect a rich blend of expert knowledge that guides you through the complex interplay between cutting-edge algorithms and the foundational principles that make them work. We're setting the stage for the next big leap in AI innovation. -# Why We Wrote This Book +## Why We Wrote This Book We're in an age where technology is always evolving. Open collaboration and sharing knowledge are the building blocks of true innovation. That's the spirit behind this effort. We go beyond the traditional textbook model to create a living knowledge hub, so that we can all share and learn from one another. The book focuses on AI systems' principles and case studies, aiming to give you a deep understanding that will help you navigate the ever-changing landscape of AI systems. By keeping it open, we're not just making learning accessible but inviting new ideas and ongoing improvements. In short, we're building a community where knowledge is free to grow and light the way forward in global AI technology. -# What You'll Need to Know +## What You'll Need to Know To dive into this book, you don't need to be an AI expert. All you need is a basic understanding of computer science concepts and a curiosity to explore how AI systems work. This is where innovation happens, and a basic grasp of programming and data structures will be your compass. -# Content Transparency Statement +## Content Transparency Statement This book is a community-driven project, with content generated collaboratively by numerous contributors over time. The content creation process may have involved various editing tools, including generative AI technology. As the main author, editor, and curator, Prof. Vijay Janapa Reddi maintains human oversight and editorial oversight to make sure the content is accurate and relevant. However, no one is perfect, so inaccuracies may still exist. We highly value your feedback and encourage you to provide corrections or suggestions. This collaborative approach is crucial for enhancing and maintaining the quality of the content contained within and making high-quality information globally accessible. -# Want to Help Out? +## Want to Help Out? If you're interested in contributing, you can find the guidelines [here](https://github.com/harvard-edge/cs249r_book/blob/dev/contribute.md). -# Get in Touch +## Get in Touch Do you have questions or feedback? Feel free to [e-mail Prof. Vijay Janapa Reddi]({{< var email.info >}}) directly, or you are welcome to [start a discussion thread](https://github.com/harvard-edge/cs249r_book/discussions) on GitHub. -# Contributors +## Contributors -A big thanks to everyone who's helped make this book what it is! You can see the full list of individual contributors [here](./contents/contributors.qmd) and additional GitHub style details [here](https://github.com/harvard-edge/cs249r_book/graphs/contributors). Join us as a contributor! +A big thanks to everyone who's helped make this book what it is! You can see the full list of individual contributors [here](contents/core/acknowledgements/acknowledgements.qmd) and additional GitHub style details [here](https://github.com/harvard-edge/cs249r_book/graphs/contributors). Join us as a contributor! + +## Copyright + +This book is open-source and developed collaboratively through GitHub. Unless otherwise stated, this work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0 CC BY-SA 4.0). You can find the full text of the license [here](https://creativecommons.org/licenses/by-nc-sa/4.0). + +Contributors to this project have dedicated their contributions to the public domain or under the same open license as the original project. While the contributions are collaborative, each contributor retains copyright in their respective contributions. + +For details on authorship, contributions, and how to contribute, please see the project repository on [GitHub](https://github.com/harvard-edge/cs249r_book). + +All trademarks and registered trademarks mentioned in this book are the property of their respective owners. + +The information provided in this book is believed to be accurate and reliable. However, the authors, editors, and publishers cannot be held liable for any damages caused or alleged to be caused either directly or indirectly by the information contained in this book. diff --git a/tex/header-includes.tex b/tex/header-includes.tex index 3ef98694..1afc701c 100644 --- a/tex/header-includes.tex +++ b/tex/header-includes.tex @@ -22,6 +22,7 @@ \usepackage[outercaption, ragged]{sidecap} \usepackage{etoolbox} % For redefining footnotes \usepackage{sidenotes} +\usepackage{ifthen} % Define the Crimson color \definecolor{crimson}{HTML}{A51C30} @@ -29,16 +30,24 @@ % Redefine \sidenote to include a custom minimalist styled box with a vertical bar \renewcommand{\thefootnote}{\textcolor{crimson}{\arabic{footnote}}} -\let\oldsidenote\sidenote +% Save the old \sidenote command (only if it exists) +\makeatletter +\@ifundefined{oldsidenote}{ + \let\oldsidenote\sidenote +}{} +\makeatother + +% Redefine \sidenote \renewcommand{\sidenote}[1]{% \oldsidenote{% - \vspace{-1em} % Adjust position to align better with main text \noindent - \begin{minipage}{0.9\marginparwidth} - \color{crimson!100} % Light blue color for the vertical line - \rule{0.5pt}{1.5em} \hspace{0.3em} % Thin vertical line with spacing - {\footnotesize #1} % Light background for sidenote text - \end{minipage}% + \color{crimson!100} % Set the color for the vertical line + \raisebox{0em}{% Raise the vertical line to align with the number + \rule{0.5pt}{1.5em} % Thin vertical line with fixed height + } + \hspace{0.3em} % Spacing between the line and the sidenote text + \color{black} % Reset color for sidenote text + {\footnotesize #1} % Sidenote text in smaller font size } } @@ -166,3 +175,56 @@ \setlength{\cftsecnumwidth}{2.75em} % Adjust width for section numbers \setlength{\cftsubsecnumwidth}{3.25em} % Adjust width for subsection numbers \setlength{\cftsubsubsecnumwidth}{4em} % Adjust width for subsubsection numbers + + + + + +% Page numbering setup +\makeatletter +% Store whether we've seen the first of each type +\newif\if@firstnumbered +\@firstnumberedtrue +\newif\if@firstunnumbered +\@firstunnumberedtrue + +% Store the page numbers +\newcounter{lastRomanPage} +\setcounter{lastRomanPage}{1} + +% Initial setup for front matter +\AtBeginDocument{ + \pagenumbering{roman} + \renewcommand{\thepage}{\roman{page}} +} + +% Modify chapter to handle page numbering +\let\old@chapter\chapter +\renewcommand{\chapter}{% + \@ifstar{\unnumbered@chapter}{\numbered@chapter}% +} + +% Handle numbered chapters +\newcommand{\numbered@chapter}[1]{% + \if@firstnumbered + \cleardoublepage + \setcounter{lastRomanPage}{\value{page}}% + \pagenumbering{arabic}% + \@firstnumberedfalse + \else + \setcounter{page}{\value{page}}% + \fi + \old@chapter{#1}% +} + +% Handle unnumbered chapters - only switch on actual chapter changes +\newcommand{\unnumbered@chapter}[1]{% + \if@firstunnumbered + \clearpage + \setcounter{lastRomanPage}{\value{page}}% + \pagenumbering{roman}% + \@firstunnumberedfalse + \fi + \old@chapter*{#1}% +} +\makeatother \ No newline at end of file