diff --git a/images/ai_training.png b/images/ai_training.png new file mode 100644 index 00000000..5924365f Binary files /dev/null and b/images/ai_training.png differ diff --git a/images/aitrainingfit.png b/images/aitrainingfit.png new file mode 100644 index 00000000..bf8889fd Binary files /dev/null and b/images/aitrainingfit.png differ diff --git a/images/aitrainingnn.png b/images/aitrainingnn.png new file mode 100644 index 00000000..641346bf Binary files /dev/null and b/images/aitrainingnn.png differ diff --git a/images/aitrainingpara.png b/images/aitrainingpara.png new file mode 100644 index 00000000..81d18e9e Binary files /dev/null and b/images/aitrainingpara.png differ diff --git a/images/aitrainingroof.png b/images/aitrainingroof.png new file mode 100644 index 00000000..0ec96d45 Binary files /dev/null and b/images/aitrainingroof.png differ diff --git a/images/aitrainingsgd.png b/images/aitrainingsgd.png new file mode 100644 index 00000000..8db1772b Binary files /dev/null and b/images/aitrainingsgd.png differ diff --git a/references.bib b/references.bib index 1184f00f..45902772 100644 --- a/references.bib +++ b/references.bib @@ -1,3496 +1,3000 @@ -@article{Ratner_Hancock_Dunnmon_Goldman_Ré_2018, - title = {Snorkel metal: Weak supervision for multi-task learning.}, - author = {Ratner, Alex and Hancock, Braden and Dunnmon, Jared and Goldman, Roger and R\'{e}, Christopher}, - year = 2018, - journal = {Proceedings of the Second Workshop on Data Management for End-To-End Machine Learning}, +@article{10242251, + author = {Eshraghian, Jason K. and Ward, Max and Neftci, Emre O. and Wang, Xinxin and Lenz, Gregor and Dwivedi, Girish and Bennamoun, Mohammed and Jeong, Doo Seok and Lu, Wei D.}, + year = 2023, + journal = {Proceedings of the IEEE}, + volume = 111, + number = 9, + pages = {1016--1054}, + doi = {10.1109/JPROC.2023.3308088}, + bdsk-url-1 = {https://doi.org/10.1109/JPROC.2023.3308088} } - -@article{oecd22, - author = "OECD", - title = "Measuring the environmental impacts of artificial intelligence compute and applications", - year = "2022", - number = "341", - url = "https://www.oecd-ilibrary.org/content/paper/7babf571-en", - doi = "https://doi.org/https://doi.org/10.1787/7babf571-en" +@inproceedings{abadi2016deep, + author = {Abadi, Martin and Chu, Andy and Goodfellow, Ian and McMahan, H. Brendan and Mironov, Ilya and Talwar, Kunal and Zhang, Li}, + year = 2016, + booktitle = {Proceedings of the 2016 ACM SIGSAC Conference on Computer and Communications Security}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + series = {CCS '16}, + pages = {308--318}, + date-added = {2023-11-22 18:06:03 -0500}, + date-modified = {2023-11-22 18:08:42 -0500} } - - - -@inproceedings{sculley2015hidden, - title = {"Everyone wants to do the model work, not the data work": Data Cascades in High-Stakes AI}, - author = {Nithya Sambasivan and Shivani Kapania and Hannah Highfill and Diana Akrong and Praveen Kumar Paritosh and Lora Mois Aroyo}, - year = 2021, +@inproceedings{abadi2016tensorflow, + author = {Abadi, Mart{\'\i}n and Barham, Paul and Chen, Jianmin and Chen, Zhifeng and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Irving, Geoffrey and Isard, Michael and others}, + year = 2016, + booktitle = {12th USENIX symposium on operating systems design and implementation (OSDI 16)}, + pages = {265--283} } - -@inproceedings{kocher1996timing, - title={Timing attacks on implementations of Diffie-Hellman, RSA, DSS, and other systems}, - author={Kocher, Paul C}, - booktitle={Advances in Cryptology—CRYPTO’96: 16th Annual International Cryptology Conference Santa Barbara, California, USA August 18--22, 1996 Proceedings 16}, - pages={104--113}, - year={1996}, - organization={Springer} +@inproceedings{Abdelkader_2020, + author = {Abdelkader, Ahmed and Curry, Michael J. and Fowl, Liam and Goldstein, Tom and Schwarzschild, Avi and Shu, Manli and Studer, Christoph and Zhu, Chen}, + year = 2020, + booktitle = {ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, + date-added = {2023-11-22 16:28:31 -0500}, + date-modified = {2023-11-22 16:29:33 -0500} } - -@inproceedings{agrawal2003side, - title={The EM side—channel (s)}, - author={Agrawal, Dakshi and Archambeault, Bruce and Rao, Josyula R and Rohatgi, Pankaj}, - booktitle={Cryptographic Hardware and Embedded Systems-CHES 2002: 4th International Workshop Redwood Shores, CA, USA, August 13--15, 2002 Revised Papers 4}, - pages={29--45}, - year={2003}, - organization={Springer} +@article{adagrad, + author = {John Duchi and Elad Hazan and Yoram Singer}, + year = 2011, + journal = {Journal of Machine Learning Research}, + url = {http://jmlr.org/papers/v12/duchi11a.html} } - -@article{breier2018deeplaser, - title={Deeplaser: Practical fault attack on deep neural networks}, - author={Breier, Jakub and Hou, Xiaolu and Jap, Dirmanto and Ma, Lei and Bhasin, Shivam and Liu, Yang}, - journal={arXiv preprint arXiv:1806.05859}, - year={2018} +@misc{adam, + author = {Diederik P. Kingma and Jimmy Ba}, + year = 2017, + eprint = {1412.6980}, + archiveprefix = {arXiv}, + primaryclass = {cs.LG} } - - -@inproceedings{skorobogatov2003optical, - title={Optical fault induction attacks}, - author={Skorobogatov, Sergei P and Anderson, Ross J}, - booktitle={Cryptographic Hardware and Embedded Systems-CHES 2002: 4th International Workshop Redwood Shores, CA, USA, August 13--15, 2002 Revised Papers 4}, - pages={2--12}, - year={2003}, - organization={Springer} +@misc{adelta, + author = {Matthew D. Zeiler}, + year = 2012, + eprint = {1212.5701}, + archiveprefix = {arXiv}, + primaryclass = {cs.LG} } - -@inproceedings{skorobogatov2009local, - title={Local heating attacks on flash memory devices}, - author={Skorobogatov, Sergei}, - booktitle={2009 IEEE International Workshop on Hardware-Oriented Security and Trust}, - pages={1--6}, - year={2009}, - organization={IEEE} +@inproceedings{adolf2016fathom, + author = {Adolf, Robert and Rama, Saketh and Reagen, Brandon and Wei, Gu-Yeon and Brooks, David}, + year = 2016, + booktitle = {2016 IEEE International Symposium on Workload Characterization (IISWC)}, + pages = {1--10}, + organization = {IEEE} } - - -@article{oprea2022poisoning, - title={Poisoning Attacks Against Machine Learning: Can Machine Learning Be Trustworthy?}, - author={Oprea, Alina and Singhal, Anoop and Vassilev, Apostol}, - journal={Computer}, - volume={55}, - number={11}, - pages={94--99}, - year={2022}, - publisher={IEEE} +@article{afib, + author = {Yutao Guo and Hao Wang and Hui Zhang and Tong Liu and Zhaoguang Liang and Yunlong Xia and Li Yan and Yunli Xing and Haili Shi and Shuyan Li and Yanxia Liu and Fan Liu and Mei Feng and Yundai Chen and Gregory Y.H. Lip and null null}, + year = 2019, + journal = {Journal of the American College of Cardiology}, + volume = 74, + number = 19, + pages = {2365--2375}, + doi = {10.1016/j.jacc.2019.08.019}, + bdsk-url-1 = {https://doi.org/10.1016/j.jacc.2019.08.019} } - -@inproceedings{antonakakis2017understanding, - title={Understanding the mirai botnet}, - author={Antonakakis, Manos and April, Tim and Bailey, Michael and Bernhard, Matt and Bursztein, Elie and Cochran, Jaime and Durumeric, Zakir and Halderman, J Alex and Invernizzi, Luca and Kallitsis, Michalis and others}, - booktitle={26th USENIX security symposium (USENIX Security 17)}, - pages={1093--1110}, - year={2017} +@inproceedings{agrawal2003side, + author = {Agrawal, Dakshi and Archambeault, Bruce and Rao, Josyula R and Rohatgi, Pankaj}, + year = 2003, + booktitle = {Cryptographic Hardware and Embedded Systems-CHES 2002: 4th International Workshop Redwood Shores, CA, USA, August 13--15, 2002 Revised Papers 4}, + pages = {29--45}, + organization = {Springer} } - -@article{goodfellow2020generative, - title={Generative adversarial networks}, - author={Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua}, - journal={Communications of the ACM}, - volume={63}, - number={11}, - pages={139--144}, - year={2020}, - publisher={ACM New York, NY, USA} +@misc{al2016theano, + author = {The Theano Development Team and Rami Al-Rfou and Guillaume Alain and Amjad Almahairi and Christof Angermueller and Dzmitry Bahdanau and Nicolas Ballas and Fr{\'e}d{\'e}ric Bastien and Justin Bayer and Anatoly Belikov and Alexander Belopolsky and Yoshua Bengio and Arnaud Bergeron and James Bergstra and Valentin Bisson and Josh Bleecher Snyder and Nicolas Bouchard and Nicolas Boulanger-Lewandowski and Xavier Bouthillier and Alexandre de Br{\'e}bisson and Olivier Breuleux and Pierre-Luc Carrier and Kyunghyun Cho and Jan Chorowski and Paul Christiano and Tim Cooijmans and Marc-Alexandre C{\^o}t{\'e} and Myriam C{\^o}t{\'e} and Aaron Courville and Yann N. Dauphin and Olivier Delalleau and Julien Demouth and Guillaume Desjardins and Sander Dieleman and Laurent Dinh and M{\'e}lanie Ducoffe and Vincent Dumoulin and Samira Ebrahimi Kahou and Dumitru Erhan and Ziye Fan and Orhan Firat and Mathieu Germain and Xavier Glorot and Ian Goodfellow and Matt Graham and Caglar Gulcehre and Philippe Hamel and Iban Harlouchet and Jean-Philippe Heng and Bal{\'a}zs Hidasi and Sina Honari and Arjun Jain and S{\'e}bastien Jean and Kai Jia and Mikhail Korobov and Vivek Kulkarni and Alex Lamb and Pascal Lamblin and Eric Larsen and C{\'e}sar Laurent and Sean Lee and Simon Lefrancois and Simon Lemieux and Nicholas L{\'e}onard and Zhouhan Lin and Jesse A. Livezey and Cory Lorenz and Jeremiah Lowin and Qianli Ma and Pierre-Antoine Manzagol and Olivier Mastropietro and Robert T. McGibbon and Roland Memisevic and Bart van Merri{\"e}nboer and Vincent Michalski and Mehdi Mirza and Alberto Orlandi and Christopher Pal and Razvan Pascanu and Mohammad Pezeshki and Colin Raffel and Daniel Renshaw and Matthew Rocklin and Adriana Romero and Markus Roth and Peter Sadowski and John Salvatier and Fran{\c c}ois Savard and Jan Schl{\"u}ter and John Schulman and Gabriel Schwartz and Iulian Vlad Serban and Dmitriy Serdyuk and Samira Shabanian and {\'E}tienne Simon and Sigurd Spieckermann and S. Ramana Subramanyam and Jakub Sygnowski and J{\'e}r{\'e}mie Tanguay and Gijs van Tulder and Joseph Turian and Sebastian Urban and Pascal Vincent and Francesco Visin and Harm de Vries and David Warde-Farley and Dustin J. Webb and Matthew Willson and Kelvin Xu and Lijun Xue and Li Yao and Saizheng Zhang and Ying Zhang}, + year = 2016, + archiveprefix = {arXiv}, + eprint = {1605.02688}, + primaryclass = {cs.SC} } - - -@conference{Rombach22cvpr, -title = {High-Resolution Image Synthesis with Latent Diffusion Models}, -author = {Robin Rombach and Andreas Blattmann and Dominik Lorenz and Patrick Esser and Björn Ommer}, -url = {https://github.com/CompVis/latent-diffusionhttps://arxiv.org/abs/2112.10752}, -year = {2022}, -booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, +@article{Aledhari_Razzak_Parizi_Saeed_2020, + author = {Aledhari, Mohammed and Razzak, Rehma and Parizi, Reza M. and Saeed, Fahad}, + year = 2020, + journal = {IEEE Access}, + volume = 8, + pages = {140699--140725}, + doi = {10.1109/access.2020.3013541}, + bdsk-url-1 = {https://doi.org/10.1109/access.2020.3013541} } - - -@inproceedings{ramesh2021zero, - title={Zero-shot text-to-image generation}, - author={Ramesh, Aditya and Pavlov, Mikhail and Goh, Gabriel and Gray, Scott and Voss, Chelsea and Radford, Alec and Chen, Mark and Sutskever, Ilya}, - booktitle={International Conference on Machine Learning}, - pages={8821--8831}, - year={2021}, - organization={PMLR} +@article{aljundi_gradient_nodate, + author = {Aljundi, Rahaf and Lin, Min and Goujaud, Baptiste and Bengio, Yoshua}, + language = {en} } - -@article{shan2023prompt, - title={Prompt-Specific Poisoning Attacks on Text-to-Image Generative Models}, - author={Shan, Shawn and Ding, Wenxin and Passananti, Josephine and Zheng, Haitao and Zhao, Ben Y}, - journal={arXiv preprint arXiv:2310.13828}, - year={2023} +@inproceedings{altayeb2022classifying, + author = {Altayeb, Moez and Zennaro, Marco and Rovai, Marcelo}, + year = 2022, + booktitle = {Proceedings of the 2022 ACM Conference on Information Technology for Social Good}, + pages = {132--137} } - -@article{soufleri2023synthetic, - author = {Efstathia Soufleri and Gobinda Saha and Kaushik Roy}, - date-added = {2023-11-22 19:26:18 -0500}, - date-modified = {2023-11-22 19:26:57 -0500}, - journal = {arXiv preprint arXiv:2210.03205}, - title = {Synthetic Dataset Generation for Privacy-Preserving Machine Learning}, - year = {2023}} - -@article{eldan2023whos, - author = {Ronen Eldan and Mark Russinovich}, - date-added = {2023-11-22 19:24:35 -0500}, - date-modified = {2023-11-22 19:25:20 -0500}, - journal = {arXiv preprint arXiv:2310.02238}, - title = {Who's Harry Potter? Approximate Unlearning in LLMs}, - year = {2023}} - -@article{khan2021knowledgeadaptation, - author = {Mohammad Emtiyaz Khan and Siddharth Swaroop}, - date-added = {2023-11-22 19:22:50 -0500}, - date-modified = {2023-11-22 19:23:40 -0500}, - journal = {arXiv preprint arXiv:2106.08769}, - title = {Knowledge-Adaptation Priors}, - year = {2021}} - -@article{tarun2023deep, - author = {Ayush K Tarun and Vikram S Chundawat and Murari Mandal and Mohan Kankanhalli}, - date-added = {2023-11-22 19:20:59 -0500}, - date-modified = {2023-11-22 19:21:59 -0500}, - journal = {arXiv preprint arXiv:2210.08196}, - title = {Deep Regression Unlearning}, - year = {2023}} - -@article{Li2020Federated, - author = {Li, Tian and Sahu, Anit Kumar and Talwalkar, Ameet and Smith, Virginia}, - date-added = {2023-11-22 19:15:13 -0500}, - date-modified = {2023-11-22 19:17:19 -0500}, - journal = {IEEE Signal Processing Magazine}, - number = {3}, - pages = {50-60}, - title = {Federated Learning: Challenges, Methods, and Future Directions}, - volume = {37}, - year = {2020}} - -@article{MAL-083, - author = {Peter Kairouz and H. Brendan McMahan and Brendan Avent and Aur{\'e}lien Bellet and Mehdi Bennis and Arjun Nitin Bhagoji and Kallista Bonawitz and Zachary Charles and Graham Cormode and Rachel Cummings and Rafael G. L. D'Oliveira and Hubert Eichner and Salim El Rouayheb and David Evans and Josh Gardner and Zachary Garrett and Adri{\`a} Gasc{\'o}n and Badih Ghazi and Phillip B. Gibbons and Marco Gruteser and Zaid Harchaoui and Chaoyang He and Lie He and Zhouyuan Huo and Ben Hutchinson and Justin Hsu and Martin Jaggi and Tara Javidi and Gauri Joshi and Mikhail Khodak and Jakub Konecn{\'y} and Aleksandra Korolova and Farinaz Koushanfar and Sanmi Koyejo and Tancr{\`e}de Lepoint and Yang Liu and Prateek Mittal and Mehryar Mohri and Richard Nock and Ayfer {\"O}zg{\"u}r and Rasmus Pagh and Hang Qi and Daniel Ramage and Ramesh Raskar and Mariana Raykova and Dawn Song and Weikang Song and Sebastian U. Stich and Ziteng Sun and Ananda Theertha Suresh and Florian Tram{\`e}r and Praneeth Vepakomma and Jianyu Wang and Li Xiong and Zheng Xu and Qiang Yang and Felix X. Yu and Han Yu and Sen Zhao}, - date-added = {2023-11-22 19:14:08 -0500}, - date-modified = {2023-11-22 19:14:08 -0500}, - doi = {10.1561/2200000083}, - issn = {1935-8237}, - journal = {Foundations and Trends{\textregistered} in Machine Learning}, - number = {1--2}, - pages = {1-210}, - title = {Advances and Open Problems in Federated Learning}, - url = {http://dx.doi.org/10.1561/2200000083}, - volume = {14}, - year = {2021}, - Bdsk-Url-1 = {http://dx.doi.org/10.1561/2200000083}} - -@inproceedings{abadi2016deep, - address = {New York, NY, USA}, - author = {Abadi, Martin and Chu, Andy and Goodfellow, Ian and McMahan, H. Brendan and Mironov, Ilya and Talwar, Kunal and Zhang, Li}, - booktitle = {Proceedings of the 2016 ACM SIGSAC Conference on Computer and Communications Security}, - date-added = {2023-11-22 18:06:03 -0500}, - date-modified = {2023-11-22 18:08:42 -0500}, - keywords = {deep learning, differential privacy}, - pages = {308--318}, - publisher = {Association for Computing Machinery}, - series = {CCS '16}, - title = {Deep Learning with Differential Privacy}, - year = {2016}} - -@inproceedings{Dwork2006Theory, - address = {Berlin, Heidelberg}, - author = {Dwork, Cynthia and McSherry, Frank and Nissim, Kobbi and Smith, Adam}, - booktitle = {Theory of Cryptography}, - date-added = {2023-11-22 18:04:12 -0500}, - date-modified = {2023-11-22 18:05:20 -0500}, - editor = {Halevi, Shai and Rabin, Tal}, - pages = {265-284}, - publisher = {Springer Berlin Heidelberg}, - title = {Calibrating Noise to Sensitivity in Private Data Analysis}, - year = {2006}} - -@article{Gupta2023ChatGPT, - author = {Gupta, Maanak and Akiri, Charankumar and Aryal, Kshitiz and Parker, Eli and Praharaj, Lopamudra}, - date-added = {2023-11-22 18:01:41 -0500}, - date-modified = {2023-11-22 18:02:55 -0500}, - journal = {IEEE Access}, - pages = {80218-80245}, - title = {From ChatGPT to ThreatGPT: Impact of Generative AI in Cybersecurity and Privacy}, - volume = {11}, - year = {2023}} - -@inproceedings{Biega2020Oper, - address = {New York, NY, USA}, - author = {Biega, Asia J. and Potash, Peter and Daum\'{e}, Hal and Diaz, Fernando and Finck, Mich\`{e}le}, - booktitle = {Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval}, - date-added = {2023-11-22 17:57:23 -0500}, - date-modified = {2023-11-22 17:59:54 -0500}, - keywords = {data minimization, privacy, gdpr, recommender systems, purpose limitation, personalization}, - pages = {399--408}, - publisher = {Association for Computing Machinery}, - series = {SIGIR '20}, - title = {Operationalizing the Legal Principle of Data Minimization for Personalization}, - year = {2020}} - -@article{cavoukian2009privacy, - author = {Cavoukian, Ann}, - date-added = {2023-11-22 17:55:45 -0500}, - date-modified = {2023-11-22 17:56:58 -0500}, - journal = {Office of the Information and Privacy Commissioner}, - title = {Privacy by design}, - year = {2009}} - -@article{Gao2020Physical, - author = {Gao, Yansong and Al-Sarawi, Said F. and Abbott, Derek}, - date-added = {2023-11-22 17:52:20 -0500}, - date-modified = {2023-11-22 17:54:56 -0500}, - journal = {Nature Electronics}, - month = {February}, - number = {2}, - pages = {81-91}, - title = {Physical unclonable functions}, - volume = {3}, - year = {2020}} - -@inproceedings{Rashmi2018Secure, - author = {R.V. Rashmi and A. Karthikeyan}, - booktitle = {2018 Second International Conference on Electronics, Communication and Aerospace Technology (ICECA)}, - date-added = {2023-11-22 17:50:16 -0500}, - date-modified = {2023-11-22 17:51:39 -0500}, - pages = {291-298}, - title = {Secure boot of Embedded Applications - A Review}, - year = {2018}} - -@article{miller2015remote, - author = {Miller, Charlie and Valasek, Chris}, - date-added = {2023-11-22 17:11:27 -0500}, - date-modified = {2023-11-22 17:12:18 -0500}, - journal = {Black Hat USA}, - number = {S 91}, - pages = {1-91}, - title = {Remote exploitation of an unaltered passenger vehicle}, - volume = {2015}, - year = {2015}} - -@book{dhanjani2015abusing, - author = {Dhanjani, Nitesh}, - date-added = {2023-11-22 17:09:41 -0500}, - date-modified = {2023-11-22 17:10:22 -0500}, - publisher = {O'Reilly Media, Inc.}, - title = {Abusing the internet of things: blackouts, freakouts, and stakeouts}, - year = {2015}} - -@inproceedings{zhao2018fpga, - author = {Zhao, Mark and Suh, G Edward}, - booktitle = {2018 IEEE Symposium on Security and Privacy (SP)}, - date-added = {2023-11-22 17:08:21 -0500}, - date-modified = {2023-11-22 17:09:07 -0500}, - organization = {IEEE}, - pages = {229-244}, - title = {FPGA-based remote power side-channel attacks}, - year = {2018}} - -@inproceedings{gnad2017voltage, - author = {Gnad, Dennis RE and Oboril, Fabian and Tahoori, Mehdi B}, - booktitle = {2017 27th International Conference on Field Programmable Logic and Applications (FPL)}, - date-added = {2023-11-22 17:07:13 -0500}, - date-modified = {2023-11-22 17:07:59 -0500}, - organization = {IEEE}, - pages = {1-7}, - title = {Voltage drop-based fault attacks on FPGAs using valid bitstreams}, - year = {2017}} - -@inproceedings{Asonov2004Keyboard, - author = {Asonov, D. and Agrawal, R.}, - booktitle = {IEEE Symposium on Security and Privacy, 2004. Proceedings. 2004}, - date-added = {2023-11-22 17:05:39 -0500}, - date-modified = {2023-11-22 17:06:45 -0500}, - organization = {IEEE}, - pages = {3-11}, - title = {Keyboard acoustic emanations}, - year = {2004}} - -@article{Burnet1989Spycatcher, - author = {David Burnet and Richard Thomas}, - date-added = {2023-11-22 17:03:00 -0500}, - date-modified = {2023-11-22 17:04:44 -0500}, - journal = {Journal of Law and Society}, - number = {2}, - pages = {210-224}, - title = {Spycatcher: The Commodification of Truth}, - volume = {16}, - year = {1989}} - -@article{Kocher2011Intro, - author = {Kocher, Paul and Jaffe, Joshua and Jun, Benjamin and Rohatgi, Pankaj}, - date-added = {2023-11-22 16:58:42 -0500}, - date-modified = {2023-11-22 17:00:36 -0500}, - journal = {Journal of Cryptographic Engineering}, - month = {April}, - number = {1}, - pages = {5-27}, - title = {Introduction to differential power analysis}, - volume = {1}, - year = {2011}} - -@inproceedings{gandolfi2001electromagnetic, - author = {Gandolfi, Karine and Mourtel, Christophe and Olivier, Francis}, - booktitle = {Cryptographic Hardware and Embedded Systems---CHES 2001: Third International Workshop Paris, France, May 14--16, 2001 Proceedings 3}, - date-added = {2023-11-22 16:56:42 -0500}, - date-modified = {2023-11-22 16:57:40 -0500}, - organization = {Springer}, - pages = {251-261}, - title = {Electromagnetic analysis: Concrete results}, - year = {2001}} - -@inproceedings{kocher1999differential, - author = {Kocher, Paul and Jaffe, Joshua and Jun, Benjamin}, - booktitle = {Advances in Cryptology---CRYPTO'99: 19th Annual International Cryptology Conference Santa Barbara, California, USA, August 15--19, 1999 Proceedings 19}, - date-added = {2023-11-22 16:55:28 -0500}, - date-modified = {2023-11-22 16:56:18 -0500}, - organization = {Springer}, - pages = {388-397}, - title = {Differential power analysis}, - year = {1999}} - -@inproceedings{hsiao2023mavfi, - author = {Hsiao, Yu-Shun and Wan, Zishen and Jia, Tianyu and Ghosal, Radhika and Mahmoud, Abdulrahman and Raychowdhury, Arijit and Brooks, David and Wei, Gu-Yeon and Reddi, Vijay Janapa}, - booktitle = {2023 Design, Automation \& Test in Europe Conference \& Exhibition (DATE)}, - date-added = {2023-11-22 16:54:11 -0500}, - date-modified = {2023-11-22 16:55:12 -0500}, - organization = {IEEE}, - pages = {1-6}, - title = {Mavfi: An end-to-end fault analysis framework with anomaly detection and recovery for micro aerial vehicles}, - year = {2023}} - -@inproceedings{Breier2018Practical, - address = {New York, NY, USA}, - author = {Breier, Jakub and Hou, Xiaolu and Jap, Dirmanto and Ma, Lei and Bhasin, Shivam and Liu, Yang}, - booktitle = {Proceedings of the 2018 ACM SIGSAC Conference on Computer and Communications Security}, - date-added = {2023-11-22 16:51:23 -0500}, - date-modified = {2023-11-22 16:53:46 -0500}, - keywords = {fault attacks, deep learning security, adversarial attacks}, - pages = {2204--2206}, - publisher = {Association for Computing Machinery}, - series = {CCS '18}, - title = {Practical Fault Attack on Deep Neural Networks}} - -@inproceedings{govindavajhala2003using, - author = {Govindavajhala, Sudhakar and Appel, Andrew W}, - booktitle = {2003 Symposium on Security and Privacy, 2003.}, - date-added = {2023-11-22 16:46:13 -0500}, - date-modified = {2023-11-22 16:47:03 -0500}, - organization = {IEEE}, - pages = {154-156}, - title = {Using memory errors to attack a virtual machine}, - year = {2003}} - @inproceedings{amiel2006fault, - author = {Amiel, Frederic and Clavier, Christophe and Tunstall, Michael}, - booktitle = {International Workshop on Fault Diagnosis and Tolerance in Cryptography}, - date-added = {2023-11-22 16:45:05 -0500}, + author = {Amiel, Frederic and Clavier, Christophe and Tunstall, Michael}, + year = 2006, + booktitle = {International Workshop on Fault Diagnosis and Tolerance in Cryptography}, + pages = {223--236}, + date-added = {2023-11-22 16:45:05 -0500}, date-modified = {2023-11-22 16:45:55 -0500}, - organization = {Springer}, - pages = {223-236}, - title = {Fault analysis of DPA-resistant algorithms}, - year = {2006}} - -@inproceedings{hutter2009contact, - author = {Hutter, Michael and Schmidt, Jorn-Marc and Plos, Thomas}, - booktitle = {2009 European Conference on Circuit Theory and Design}, - date-added = {2023-11-22 16:43:29 -0500}, - date-modified = {2023-11-22 16:44:30 -0500}, - organization = {IEEE}, - pages = {409-412}, - title = {Contact-based fault injections and power analysis on RFID tags}, - year = {2009}} - -@inproceedings{barenghi2010low, - author = {Barenghi, Alessandro and Bertoni, Guido M and Breveglieri, Luca and Pellicioli, Mauro and Pelosi, Gerardo}, - booktitle = {2010 IEEE International Symposium on Hardware-Oriented Security and Trust (HOST)}, - date-added = {2023-11-22 16:42:05 -0500}, - date-modified = {2023-11-22 16:43:09 -0500}, - organization = {IEEE}, - pages = {7-12}, - title = {Low voltage fault attacks to AES}, - year = {2010}} - -@book{joye2012fault, - author = {Joye, Marc and Tunstall, Michael}, - date-added = {2023-11-22 16:35:24 -0500}, - date-modified = {2023-11-22 16:36:20 -0500}, - publisher = {Springer Publishing Company, Incorporated}, - title = {Fault Analysis in Cryptography}, - year = {2012}} - -@inproceedings{Kocher2018spectre, - author = {Paul Kocher and Jann Horn and Anders Fogh and and Daniel Genkin and Daniel Gruss and Werner Haas and Mike Hamburg and Moritz Lipp and Stefan Mangard and Thomas Prescher and Michael Schwarz and Yuval Yarom}, - booktitle = {40th IEEE Symposium on Security and Privacy (S\&P'19)}, - date-added = {2023-11-22 16:33:35 -0500}, - date-modified = {2023-11-22 16:34:01 -0500}, - title = {Spectre Attacks: Exploiting Speculative Execution}, - year = {2019}} - -@inproceedings{Lipp2018meltdown, - author = {Moritz Lipp and Michael Schwarz and Daniel Gruss and Thomas Prescher and Werner Haas and Anders Fogh and Jann Horn and Stefan Mangard and Paul Kocher and Daniel Genkin and Yuval Yarom and Mike Hamburg}, - booktitle = {27th {USENIX} Security Symposium ({USENIX} Security 18)}, - date-added = {2023-11-22 16:32:26 -0500}, - date-modified = {2023-11-22 16:33:08 -0500}, - title = {Meltdown: Reading Kernel Memory from User Space}, - year = {2018}} - -@article{eykholt2018robust, - author = {Kevin Eykholt and Ivan Evtimov and Earlence Fernandes and Bo Li and Amir Rahmati and Chaowei Xiao and Atul Prakash and Tadayoshi Kohno and Dawn Song}, - date-added = {2023-11-22 16:30:51 -0500}, - date-modified = {2023-11-22 16:31:55 -0500}, - journal = {arXiv preprint arXiv:1707.08945}, - title = {Robust Physical-World Attacks on Deep Learning Models}, - year = {2018}} - -@inproceedings{Abdelkader_2020, - author = {Abdelkader, Ahmed and Curry, Michael J. and Fowl, Liam and Goldstein, Tom and Schwarzschild, Avi and Shu, Manli and Studer, Christoph and Zhu, Chen}, - booktitle = {ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, - date-added = {2023-11-22 16:28:31 -0500}, - date-modified = {2023-11-22 16:29:33 -0500}, - title = {Headless Horseman: Adversarial Attacks on Transfer Learning Models}, - year = {2020}} - -@article{parrish2023adversarial, - author = {Alicia Parrish and Hannah Rose Kirk and Jessica Quaye and Charvi Rastogi and Max Bartolo and Oana Inel and Juan Ciro and Rafael Mosquera and Addison Howard and Will Cukierski and D. Sculley and Vijay Janapa Reddi and Lora Aroyo}, - date-added = {2023-11-22 16:24:50 -0500}, - date-modified = {2023-11-22 16:26:30 -0500}, - journal = {arXiv preprint arXiv:2305.14384}, - title = {Adversarial Nibbler: A Data-Centric Challenge for Improving the Safety of Text-to-Image Models}, - year = {2023}} - -@article{hosseini2017deceiving, - author = {Hosseini, Hossein and Kannan, Sreeram and Zhang, Baosen and Poovendran, Radha}, - date-added = {2023-11-22 16:22:18 -0500}, - date-modified = {2023-11-22 16:23:43 -0500}, - journal = {arXiv preprint arXiv:1702.08138}, - title = {Deceiving google's perspective api built for detecting toxic comments}, - year = {2017}} - -@article{biggio2012poisoning, - author = {Biggio, Battista and Nelson, Blaine and Laskov, Pavel}, - date-added = {2023-11-22 16:21:35 -0500}, - date-modified = {2023-11-22 16:22:06 -0500}, - journal = {arXiv preprint arXiv:1206.6389}, - title = {Poisoning attacks against support vector machines}, - year = {2012}} - -@article{oliynyk2023know, - author = {Oliynyk, Daryna and Mayer, Rudolf and Rauber, Andreas}, - date-added = {2023-11-22 16:18:21 -0500}, - date-modified = {2023-11-22 16:20:44 -0500}, - journal = {ACM Comput. Surv.}, - keywords = {model stealing, Machine learning, model extraction}, - month = {July}, - number = {14s}, - title = {I Know What You Trained Last Summer: A Survey on Stealing Machine Learning Models and Defences}, - volume = {55}, - year = {2023}} - -@article{narayanan2006break, - author = {Narayanan, Arvind and Shmatikov, Vitaly}, - date-added = {2023-11-22 16:16:19 -0500}, - date-modified = {2023-11-22 16:16:59 -0500}, - journal = {arXiv preprint cs/0610105}, - title = {How to break anonymity of the netflix prize dataset}, - year = {2006}} - -@article{ateniese2015hacking, - author = {Ateniese, Giuseppe and Mancini, Luigi V and Spognardi, Angelo and Villani, Antonio and Vitali, Domenico and Felici, Giovanni}, - date-added = {2023-11-22 16:14:42 -0500}, - date-modified = {2023-11-22 16:15:42 -0500}, - journal = {International Journal of Security and Networks}, - number = {3}, - pages = {137-150}, - title = {Hacking smart machines with smarter ones: How to extract meaningful data from machine learning classifiers}, - volume = {10}, - year = {2015}} - -@article{miller2019lessons, - author = {Miller, Charlie}, - date-added = {2023-11-22 16:12:04 -0500}, - date-modified = {2023-11-22 16:13:31 -0500}, - journal = {IEEE Design & Test}, - number = {6}, - pages = {7-9}, - title = {Lessons learned from hacking a car}, - volume = {36}, - year = {2019}} - -@article{farwell2011stuxnet, - author = {Farwell, James P and Rohozinski, Rafal}, - date-added = {2023-11-22 14:03:31 -0500}, - date-modified = {2023-11-22 14:05:19 -0500}, - journal = {Survival}, - number = {1}, - pages = {23-40}, - title = {Stuxnet and the future of cyber war}, - volume = {53}, - year = {2011}} - -@inproceedings{krishnan2023archgym, - author = {Krishnan, Srivatsan and Yazdanbakhsh, Amir and Prakash, Shvetank and Jabbour, Jason and Uchendu, Ikechukwu and Ghosh, Susobhan and Boroujerdian, Behzad and Richins, Daniel and Tripathy, Devashree and Faust, Aleksandra and Janapa Reddi, Vijay}, - booktitle = {Proceedings of the 50th Annual International Symposium on Computer Architecture}, - pages = {1--16}, - title = {ArchGym: An Open-Source Gymnasium for Machine Learning Assisted Architecture Design}, - year = {2023}} - -@misc{kuzmin2022fp8, - archiveprefix = {arXiv}, - author = {Andrey Kuzmin and Mart Van Baalen and Yuwei Ren and Markus Nagel and Jorn Peters and Tijmen Blankevoort}, - eprint = {2208.09225}, - primaryclass = {cs.LG}, - title = {FP8 Quantization: The Power of the Exponent}, - year = {2022}} - -@inproceedings{abadi2016tensorflow, - author = {Abadi, Mart{\'\i}n and Barham, Paul and Chen, Jianmin and Chen, Zhifeng and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Irving, Geoffrey and Isard, Michael and others}, - booktitle = {12th USENIX symposium on operating systems design and implementation (OSDI 16)}, - pages = {265--283}, - title = {$\{$TensorFlow$\}$: a system for $\{$Large-Scale$\}$ machine learning}, - year = 2016} - -@article{shastri2021photonics, - author = {Shastri, Bhavin J and Tait, Alexander N and Ferreira de Lima, Thomas and Pernice, Wolfram HP and Bhaskaran, Harish and Wright, C David and Prucnal, Paul R}, - journal = {Nature Photonics}, - number = {2}, - pages = {102--114}, - publisher = {Nature Publishing Group UK London}, - title = {Photonics for artificial intelligence and neuromorphic computing}, - volume = {15}, - year = {2021}} - -@inproceedings{jouppi2017datacenter, - author = {Jouppi, Norman P and Young, Cliff and Patil, Nishant and Patterson, David and Agrawal, Gaurav and Bajwa, Raminder and Bates, Sarah and Bhatia, Suresh and Boden, Nan and Borchers, Al and others}, - booktitle = {Proceedings of the 44th annual international symposium on computer architecture}, - pages = {1--12}, - title = {In-datacenter performance analysis of a tensor processing unit}, - year = {2017}} - -@inproceedings{ignatov2018ai, - author = {Ignatov, Andrey and Timofte, Radu and Chou, William and Wang, Ke and Wu, Max and Hartley, Tim and Van Gool, Luc}, - booktitle = {Proceedings of the European Conference on Computer Vision (ECCV) Workshops}, - pages = {0--0}, - title = {Ai benchmark: Running deep neural networks on android smartphones}, - year = {2018}} - -@inproceedings{adolf2016fathom, - author = {Adolf, Robert and Rama, Saketh and Reagen, Brandon and Wei, Gu-Yeon and Brooks, David}, - booktitle = {2016 IEEE International Symposium on Workload Characterization (IISWC)}, - organization = {IEEE}, - pages = {1--10}, - title = {Fathom: Reference workloads for modern deep learning methods}, - year = 2016} - -@misc{al2016theano, - archiveprefix = {arXiv}, - author = {The Theano Development Team and Rami Al-Rfou and Guillaume Alain and Amjad Almahairi and Christof Angermueller and Dzmitry Bahdanau and Nicolas Ballas and Fr{\'e}d{\'e}ric Bastien and Justin Bayer and Anatoly Belikov and Alexander Belopolsky and Yoshua Bengio and Arnaud Bergeron and James Bergstra and Valentin Bisson and Josh Bleecher Snyder and Nicolas Bouchard and Nicolas Boulanger-Lewandowski and Xavier Bouthillier and Alexandre de Br{\'e}bisson and Olivier Breuleux and Pierre-Luc Carrier and Kyunghyun Cho and Jan Chorowski and Paul Christiano and Tim Cooijmans and Marc-Alexandre C{\^o}t{\'e} and Myriam C{\^o}t{\'e} and Aaron Courville and Yann N. Dauphin and Olivier Delalleau and Julien Demouth and Guillaume Desjardins and Sander Dieleman and Laurent Dinh and M{\'e}lanie Ducoffe and Vincent Dumoulin and Samira Ebrahimi Kahou and Dumitru Erhan and Ziye Fan and Orhan Firat and Mathieu Germain and Xavier Glorot and Ian Goodfellow and Matt Graham and Caglar Gulcehre and Philippe Hamel and Iban Harlouchet and Jean-Philippe Heng and Bal{\'a}zs Hidasi and Sina Honari and Arjun Jain and S{\'e}bastien Jean and Kai Jia and Mikhail Korobov and Vivek Kulkarni and Alex Lamb and Pascal Lamblin and Eric Larsen and C{\'e}sar Laurent and Sean Lee and Simon Lefrancois and Simon Lemieux and Nicholas L{\'e}onard and Zhouhan Lin and Jesse A. Livezey and Cory Lorenz and Jeremiah Lowin and Qianli Ma and Pierre-Antoine Manzagol and Olivier Mastropietro and Robert T. McGibbon and Roland Memisevic and Bart van Merri{\"e}nboer and Vincent Michalski and Mehdi Mirza and Alberto Orlandi and Christopher Pal and Razvan Pascanu and Mohammad Pezeshki and Colin Raffel and Daniel Renshaw and Matthew Rocklin and Adriana Romero and Markus Roth and Peter Sadowski and John Salvatier and Fran{\c c}ois Savard and Jan Schl{\"u}ter and John Schulman and Gabriel Schwartz and Iulian Vlad Serban and Dmitriy Serdyuk and Samira Shabanian and {\'E}tienne Simon and Sigurd Spieckermann and S. Ramana Subramanyam and Jakub Sygnowski and J{\'e}r{\'e}mie Tanguay and Gijs van Tulder and Joseph Turian and Sebastian Urban and Pascal Vincent and Francesco Visin and Harm de Vries and David Warde-Farley and Dustin J. Webb and Matthew Willson and Kelvin Xu and Lijun Xue and Li Yao and Saizheng Zhang and Ying Zhang}, - eprint = {1605.02688}, - primaryclass = {cs.SC}, - title = {Theano: A Python framework for fast computation of mathematical expressions}, - year = 2016} - -@article{Aledhari_Razzak_Parizi_Saeed_2020, - author = {Aledhari, Mohammed and Razzak, Rehma and Parizi, Reza M. and Saeed, Fahad}, - doi = {10.1109/access.2020.3013541}, - journal = {IEEE Access}, - pages = {140699--140725}, - title = {Federated learning: A survey on enabling technologies, Protocols, and applications}, - volume = 8, - year = 2020, - Bdsk-Url-1 = {https://doi.org/10.1109/access.2020.3013541}} - -@article{aljundi_gradient_nodate, - author = {Aljundi, Rahaf and Lin, Min and Goujaud, Baptiste and Bengio, Yoshua}, - file = {Aljundi et al. - Gradient based sample selection for online continu.pdf:/Users/alex/Zotero/storage/GPHM4KY7/Aljundi et al. - Gradient based sample selection for online continu.pdf:application/pdf}, - language = {en}, - title = {Gradient based sample selection for online continual learning}} - -@inproceedings{altayeb2022classifying, - author = {Altayeb, Moez and Zennaro, Marco and Rovai, Marcelo}, - booktitle = {Proceedings of the 2022 ACM Conference on Information Technology for Social Good}, - pages = {132--137}, - title = {Classifying mosquito wingbeat sound using TinyML}, - year = 2022} - + organization = {Springer} +} @misc{amodei_ai_2018, - author = {Amodei, Dario and Hernandez, Danny}, - journal = {OpenAI Blog}, - month = may, - title = {{AI} and {Compute}}, - url = {https://openai.com/research/ai-and-compute}, - year = 2018, - Bdsk-Url-1 = {https://openai.com/research/ai-and-compute}} - + author = {Amodei, Dario and Hernandez, Danny}, + year = 2018, + month = may, + journal = {OpenAI Blog}, + url = {https://openai.com/research/ai-and-compute}, + bdsk-url-1 = {https://openai.com/research/ai-and-compute} +} @inproceedings{antol2015vqa, - author = {Antol, Stanislaw and Agrawal, Aishwarya and Lu, Jiasen and Mitchell, Margaret and Batra, Dhruv and Zitnick, C Lawrence and Parikh, Devi}, - booktitle = {Proceedings of the IEEE international conference on computer vision}, - pages = {2425--2433}, - title = {Vqa: Visual question answering}, - year = 2015} - + author = {Antol, Stanislaw and Agrawal, Aishwarya and Lu, Jiasen and Mitchell, Margaret and Batra, Dhruv and Zitnick, C Lawrence and Parikh, Devi}, + year = 2015, + booktitle = {Proceedings of the IEEE international conference on computer vision}, + pages = {2425--2433} +} +@inproceedings{antonakakis2017understanding, + author = {Antonakakis, Manos and April, Tim and Bailey, Michael and Bernhard, Matt and Bursztein, Elie and Cochran, Jaime and Durumeric, Zakir and Halderman, J Alex and Invernizzi, Luca and Kallitsis, Michalis and others}, + year = 2017, + booktitle = {26th USENIX security symposium (USENIX Security 17)}, + pages = {1093--1110} +} @article{app112211073, + author = {Kwon, Jisu and Park, Daejin}, + year = 2021, + journal = {Applied Sciences}, + volume = 11, + number = 22, + doi = {10.3390/app112211073}, + issn = {2076-3417}, + url = {https://www.mdpi.com/2076-3417/11/22/11073}, article-number = 11073, - author = {Kwon, Jisu and Park, Daejin}, - doi = {10.3390/app112211073}, - issn = {2076-3417}, - journal = {Applied Sciences}, - number = 22, - title = {Hardware/Software Co-Design for TinyML Voice-Recognition Application on Resource Frugal Edge Devices}, - url = {https://www.mdpi.com/2076-3417/11/22/11073}, - volume = 11, - year = 2021, - Bdsk-Url-1 = {https://www.mdpi.com/2076-3417/11/22/11073}, - Bdsk-Url-2 = {https://doi.org/10.3390/app112211073}} - + bdsk-url-1 = {https://www.mdpi.com/2076-3417/11/22/11073}, + bdsk-url-2 = {https://doi.org/10.3390/app112211073} +} @article{Ardila_Branson_Davis_Henretty_Kohler_Meyer_Morais_Saunders_Tyers_Weber_2020, - author = {Ardila, Rosana and Branson, Megan and Davis, Kelly and Henretty, Michael and Kohler, Michael and Meyer, Josh and Morais, Reuben and Saunders, Lindsay and Tyers, Francis M. and Weber, Gregor}, - journal = {Proceedings of the 12th Conference on Language Resources and Evaluation}, - month = {May}, - pages = {4218-4222}, - title = {Common Voice: A Massively-Multilingual Speech Corpus}, - year = 2020} - + author = {Ardila, Rosana and Branson, Megan and Davis, Kelly and Henretty, Michael and Kohler, Michael and Meyer, Josh and Morais, Reuben and Saunders, Lindsay and Tyers, Francis M. and Weber, Gregor}, + year = 2020, + month = {May}, + journal = {Proceedings of the 12th Conference on Language Resources and Evaluation}, + pages = {4218--4222} +} +@inproceedings{Asonov2004Keyboard, + author = {Asonov, D. and Agrawal, R.}, + year = 2004, + booktitle = {IEEE Symposium on Security and Privacy, 2004. Proceedings. 2004}, + pages = {3--11}, + date-added = {2023-11-22 17:05:39 -0500}, + date-modified = {2023-11-22 17:06:45 -0500}, + organization = {IEEE} +} +@article{ateniese2015hacking, + author = {Ateniese, Giuseppe and Mancini, Luigi V and Spognardi, Angelo and Villani, Antonio and Vitali, Domenico and Felici, Giovanni}, + year = 2015, + journal = {International Journal of Security and Networks}, + volume = 10, + number = 3, + pages = {137--150}, + date-added = {2023-11-22 16:14:42 -0500}, + date-modified = {2023-11-22 16:15:42 -0500} +} @misc{awq, - author = {Lin and Tang, Tang and Yang, Dang and Gan, Han}, - doi = {10.48550/arXiv.2306.00978}, - title = {AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration}, - url = {https://arxiv.org/abs/2306.00978}, - urldate = {2023-10-03}, - year = 2023, - Bdsk-Url-1 = {https://arxiv.org/abs/2306.00978}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.2306.00978}} - + author = {Lin and Tang, Tang and Yang, Dang and Gan, Han}, + year = 2023, + doi = {10.48550/arXiv.2306.00978}, + url = {https://arxiv.org/abs/2306.00978}, + urldate = {2023-10-03}, + bdsk-url-1 = {https://arxiv.org/abs/2306.00978}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.2306.00978} +} +@misc{bailey_enabling_2018, + author = {Bailey, Brian}, + year = 2018, + month = sep, + journal = {Semiconductor Engineering}, + url = {https://semiengineering.com/enabling-cheaper-design/}, + urldate = {2023-11-07}, + language = {en-US}, + bdsk-url-1 = {https://semiengineering.com/enabling-cheaper-design/} +} +@article{bains2020business, + author = {Bains, Sunny}, + year = 2020, + journal = {Nat. Electron}, + volume = 3, + number = 7, + pages = {348--351} +} @inproceedings{bamoumen2022tinyml, - author = {Bamoumen, Hatim and Temouden, Anas and Benamar, Nabil and Chtouki, Yousra}, - booktitle = {2022 International Conference on Innovation and Intelligence for Informatics, Computing, and Technologies (3ICT)}, - organization = {IEEE}, - pages = {338--343}, - title = {How TinyML Can be Leveraged to Solve Environmental Problems: A Survey}, - year = 2022} - + author = {Bamoumen, Hatim and Temouden, Anas and Benamar, Nabil and Chtouki, Yousra}, + year = 2022, + booktitle = {2022 International Conference on Innovation and Intelligence for Informatics, Computing, and Technologies (3ICT)}, + pages = {338--343}, + organization = {IEEE} +} @article{banbury2020benchmarking, - author = {Banbury, Colby R and Reddi, Vijay Janapa and Lam, Max and Fu, William and Fazel, Amin and Holleman, Jeremy and Huang, Xinyuan and Hurtado, Robert and Kanter, David and Lokhmotov, Anton and others}, - journal = {arXiv preprint arXiv:2003.04821}, - title = {Benchmarking tinyml systems: Challenges and direction}, - year = 2020} - + author = {Banbury, Colby R and Reddi, Vijay Janapa and Lam, Max and Fu, William and Fazel, Amin and Holleman, Jeremy and Huang, Xinyuan and Hurtado, Robert and Kanter, David and Lokhmotov, Anton and others}, + year = 2020, + journal = {arXiv preprint arXiv:2003.04821} +} @article{bank2023autoencoders, - author = {Bank, Dor and Koenigstein, Noam and Giryes, Raja}, - journal = {Machine Learning for Data Science Handbook: Data Mining and Knowledge Discovery Handbook}, - pages = {353--374}, - publisher = {Springer}, - title = {Autoencoders}, - year = 2023} - + author = {Bank, Dor and Koenigstein, Noam and Giryes, Raja}, + year = 2023, + journal = {Machine Learning for Data Science Handbook: Data Mining and Knowledge Discovery Handbook}, + publisher = {Springer}, + pages = {353--374} +} +@inproceedings{barenghi2010low, + author = {Barenghi, Alessandro and Bertoni, Guido M and Breveglieri, Luca and Pellicioli, Mauro and Pelosi, Gerardo}, + year = 2010, + booktitle = {2010 IEEE International Symposium on Hardware-Oriented Security and Trust (HOST)}, + pages = {7--12}, + date-added = {2023-11-22 16:42:05 -0500}, + date-modified = {2023-11-22 16:43:09 -0500}, + organization = {IEEE} +} @book{barroso2019datacenter, - author = {Barroso, Luiz Andr{\'e} and H{\"o}lzle, Urs and Ranganathan, Parthasarathy}, - publisher = {Springer Nature}, - title = {The datacenter as a computer: Designing warehouse-scale machines}, - year = 2019} - + author = {Barroso, Luiz Andr{\'e} and H{\"o}lzle, Urs and Ranganathan, Parthasarathy}, + year = 2019, + publisher = {Springer Nature} +} +@misc{bayes_hyperparam, + author = {Jasper Snoek and Hugo Larochelle and Ryan P. Adams}, + year = 2012, + eprint = {1206.2944}, + archiveprefix = {arXiv}, + primaryclass = {stat.ML} +} @article{Bender_Friedman_2018, - author = {Bender, Emily M. and Friedman, Batya}, - doi = {10.1162/tacl_a_00041}, - journal = {Transactions of the Association for Computational Linguistics}, - pages = {587-604}, - title = {Data statements for natural language processing: Toward mitigating system bias and enabling better science}, - volume = 6, - year = 2018, - Bdsk-Url-1 = {https://doi.org/10.1162/tacl_a_00041}} - + author = {Bender, Emily M. and Friedman, Batya}, + year = 2018, + journal = {Transactions of the Association for Computational Linguistics}, + volume = 6, + pages = {587--604}, + doi = {10.1162/tacl_a_00041}, + bdsk-url-1 = {https://doi.org/10.1162/tacl_a_00041} +} @article{beyer2020we, - author = {Beyer, Lucas and H{\'e}naff, Olivier J and Kolesnikov, Alexander and Zhai, Xiaohua and Oord, A{\"a}ron van den}, - journal = {arXiv preprint arXiv:2006.07159}, - title = {Are we done with imagenet?}, - year = 2020} - + author = {Beyer, Lucas and H{\'e}naff, Olivier J and Kolesnikov, Alexander and Zhai, Xiaohua and Oord, A{\"a}ron van den}, + year = 2020, + journal = {arXiv preprint arXiv:2006.07159} +} +@inproceedings{Biega2020Oper, + author = {Biega, Asia J. and Potash, Peter and Daum\'{e}, Hal and Diaz, Fernando and Finck, Mich\`{e}le}, + year = 2020, + booktitle = {Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + series = {SIGIR '20}, + pages = {399--408}, + date-added = {2023-11-22 17:57:23 -0500}, + date-modified = {2023-11-22 17:59:54 -0500} +} +@article{biggio2012poisoning, + author = {Biggio, Battista and Nelson, Blaine and Laskov, Pavel}, + year = 2012, + journal = {arXiv preprint arXiv:1206.6389}, + date-added = {2023-11-22 16:21:35 -0500}, + date-modified = {2023-11-22 16:22:06 -0500} +} @article{biggio2014pattern, - author = {Biggio, Battista and Fumera, Giorgio and Roli, Fabio}, - journal = {International Journal of Pattern Recognition and Artificial Intelligence}, - number = {07}, - pages = 1460002, - publisher = {World Scientific}, - title = {Pattern recognition systems under attack: Design issues and research challenges}, - volume = 28, - year = 2014} - + author = {Biggio, Battista and Fumera, Giorgio and Roli, Fabio}, + year = 2014, + journal = {International Journal of Pattern Recognition and Artificial Intelligence}, + publisher = {World Scientific}, + volume = 28, + number = {07}, + pages = 1460002 +} +@article{biggs2021natively, + author = {Biggs, John and Myers, James and Kufel, Jedrzej and Ozer, Emre and Craske, Simon and Sou, Antony and Ramsdale, Catherine and Williamson, Ken and Price, Richard and White, Scott}, + year = 2021, + journal = {Nature}, + publisher = {Nature Publishing Group UK London}, + volume = 595, + number = 7868, + pages = {532--536} +} +@article{binkert2011gem5, + author = {Binkert, Nathan and Beckmann, Bradford and Black, Gabriel and Reinhardt, Steven K and Saidi, Ali and Basu, Arkaprava and Hestness, Joel and Hower, Derek R and Krishna, Tushar and Sardashti, Somayeh and others}, + year = 2011, + journal = {ACM SIGARCH computer architecture news}, + publisher = {ACM New York, NY, USA}, + volume = 39, + number = 2, + pages = {1--7} +} @misc{blalock_what_2020, - abstract = {Neural network pruning---the task of reducing the size of a network by removing parameters---has been the subject of a great deal of work in recent years. We provide a meta-analysis of the literature, including an overview of approaches to pruning and consistent findings in the literature. After aggregating results across 81 papers and pruning hundreds of models in controlled conditions, our clearest finding is that the community suffers from a lack of standardized benchmarks and metrics. This deficiency is substantial enough that it is hard to compare pruning techniques to one another or determine how much progress the field has made over the past three decades. To address this situation, we identify issues with current practices, suggest concrete remedies, and introduce ShrinkBench, an open-source framework to facilitate standardized evaluations of pruning methods. We use ShrinkBench to compare various pruning techniques and show that its comprehensive evaluation can prevent common pitfalls when comparing pruning methods.}, - author = {Blalock, Davis and Ortiz, Jose Javier Gonzalez and Frankle, Jonathan and Guttag, John}, - doi = {10.48550/arXiv.2003.03033}, - file = {arXiv Fulltext PDF:/Users/jeffreyma/Zotero/storage/MA4QGZ6E/Blalock et al. - 2020 - What is the State of Neural Network Pruning.pdf:application/pdf;arXiv.org Snapshot:/Users/jeffreyma/Zotero/storage/8DFKG4GL/2003.html:text/html}, - keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, - month = mar, - note = {arXiv:2003.03033 [cs, stat]}, - publisher = {arXiv}, - title = {What is the {State} of {Neural} {Network} {Pruning}?}, - url = {http://arxiv.org/abs/2003.03033}, - urldate = {2023-10-20}, - year = 2020, - Bdsk-Url-1 = {http://arxiv.org/abs/2003.03033}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.2003.03033}} - + author = {Blalock, Davis and Ortiz, Jose Javier Gonzalez and Frankle, Jonathan and Guttag, John}, + year = 2020, + month = mar, + publisher = {arXiv}, + doi = {10.48550/arXiv.2003.03033}, + url = {http://arxiv.org/abs/2003.03033}, + urldate = {2023-10-20}, + note = {arXiv:2003.03033 [cs, stat]}, + bdsk-url-1 = {http://arxiv.org/abs/2003.03033}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.2003.03033} +} +@article{breier2018deeplaser, + author = {Breier, Jakub and Hou, Xiaolu and Jap, Dirmanto and Ma, Lei and Bhasin, Shivam and Liu, Yang}, + year = 2018, + journal = {arXiv preprint arXiv:1806.05859} +} +@inproceedings{Breier2018Practical, + author = {Breier, Jakub and Hou, Xiaolu and Jap, Dirmanto and Ma, Lei and Bhasin, Shivam and Liu, Yang}, + booktitle = {Proceedings of the 2018 ACM SIGSAC Conference on Computer and Communications Security}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + series = {CCS '18}, + pages = {2204--2206}, + date-added = {2023-11-22 16:51:23 -0500}, + date-modified = {2023-11-22 16:53:46 -0500} +} +@inproceedings{brown_language_2020, + author = {Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and Agarwal, Sandhini and Herbert-Voss, Ariel and Krueger, Gretchen and Henighan, Tom and Child, Rewon and Ramesh, Aditya and Ziegler, Daniel and Wu, Jeffrey and Winter, Clemens and Hesse, Chris and Chen, Mark and Sigler, Eric and Litwin, Mateusz and Gray, Scott and Chess, Benjamin and Clark, Jack and Berner, Christopher and McCandlish, Sam and Radford, Alec and Sutskever, Ilya and Amodei, Dario}, + year = 2020, + booktitle = {Advances in {Neural} {Information} {Processing} {Systems}}, + publisher = {Curran Associates, Inc.}, + volume = 33, + pages = {1877--1901}, + url = {https://proceedings.neurips.cc/paper_files/paper/2020/hash/1457c0d6bfcb4967418bfb8ac142f64a-Abstract.html}, + urldate = {2023-11-07}, + bdsk-url-1 = {https://proceedings.neurips.cc/paper_files/paper/2020/hash/1457c0d6bfcb4967418bfb8ac142f64a-Abstract.html} +} @article{brown2020language, - author = {Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and others}, - journal = {Advances in neural information processing systems}, - pages = {1877--1901}, - title = {Language models are few-shot learners}, - volume = 33, - year = 2020} - + author = {Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and others}, + year = 2020, + journal = {Advances in neural information processing systems}, + volume = 33, + pages = {1877--1901} +} +@article{Burnet1989Spycatcher, + author = {David Burnet and Richard Thomas}, + year = 1989, + journal = {Journal of Law and Society}, + volume = 16, + number = 2, + pages = {210--224}, + date-added = {2023-11-22 17:03:00 -0500}, + date-modified = {2023-11-22 17:04:44 -0500} +} +@article{burr2016recent, + author = {Burr, Geoffrey W and Brightsky, Matthew J and Sebastian, Abu and Cheng, Huai-Yu and Wu, Jau-Yi and Kim, Sangbum and Sosa, Norma E and Papandreou, Nikolaos and Lung, Hsiang-Lan and Pozidis, Haralampos and others}, + year = 2016, + journal = {IEEE Journal on Emerging and Selected Topics in Circuits and Systems}, + publisher = {IEEE}, + volume = 6, + number = 2, + pages = {146--162} +} @inproceedings{cai_online_2021, - address = {Montreal, QC, Canada}, - author = {Cai, Zhipeng and Sener, Ozan and Koltun, Vladlen}, - booktitle = {2021 {IEEE}/{CVF} {International} {Conference} on {Computer} {Vision} ({ICCV})}, - doi = {10.1109/ICCV48922.2021.00817}, - file = {Cai et al. - 2021 - Online Continual Learning with Natural Distributio.pdf:/Users/alex/Zotero/storage/R7ZMIM4K/Cai et al. - 2021 - Online Continual Learning with Natural Distributio.pdf:application/pdf}, - isbn = {978-1-66542-812-5}, - language = {en}, - month = oct, - pages = {8261--8270}, - publisher = {IEEE}, - shorttitle = {Online {Continual} {Learning} with {Natural} {Distribution} {Shifts}}, - title = {Online {Continual} {Learning} with {Natural} {Distribution} {Shifts}: {An} {Empirical} {Study} with {Visual} {Data}}, - url = {https://ieeexplore.ieee.org/document/9710740/}, - urldate = {2023-10-26}, - year = 2021, - Bdsk-Url-1 = {https://ieeexplore.ieee.org/document/9710740/}, - Bdsk-Url-2 = {https://doi.org/10.1109/ICCV48922.2021.00817}} - + shorttitle = {Online {Continual} {Learning} with {Natural} {Distribution} {Shifts}}, + author = {Cai, Zhipeng and Sener, Ozan and Koltun, Vladlen}, + year = 2021, + month = oct, + booktitle = {2021 {IEEE}/{CVF} {International} {Conference} on {Computer} {Vision} ({ICCV})}, + publisher = {IEEE}, + address = {Montreal, QC, Canada}, + pages = {8261--8270}, + doi = {10.1109/ICCV48922.2021.00817}, + isbn = {978-1-66542-812-5}, + url = {https://ieeexplore.ieee.org/document/9710740/}, + urldate = {2023-10-26}, + language = {en}, + bdsk-url-1 = {https://ieeexplore.ieee.org/document/9710740/}, + bdsk-url-2 = {https://doi.org/10.1109/ICCV48922.2021.00817} +} @article{cai_tinytl_nodate, - author = {Cai, Han and Gan, Chuang and Zhu, Ligeng and Han, Song}, - file = {Cai et al. - TinyTL Reduce Memory, Not Parameters for Efficient.pdf:/Users/alex/Zotero/storage/J9C8PTCX/Cai et al. - TinyTL Reduce Memory, Not Parameters for Efficient.pdf:application/pdf}, - language = {en}, - title = {{TinyTL}: {Reduce} {Memory}, {Not} {Parameters} for {Efficient} {On}-{Device} {Learning}}} - + author = {Cai, Han and Gan, Chuang and Zhu, Ligeng and Han, Song}, + language = {en} +} @article{cai2018proxylessnas, - author = {Cai, Han and Zhu, Ligeng and Han, Song}, - journal = {arXiv preprint arXiv:1812.00332}, - title = {Proxylessnas: Direct neural architecture search on target task and hardware}, - year = 2018} - + author = {Cai, Han and Zhu, Ligeng and Han, Song}, + year = 2018, + journal = {arXiv preprint arXiv:1812.00332} +} @article{cai2020tinytl, - author = {Cai, Han and Gan, Chuang and Zhu, Ligeng and Han, Song}, - journal = {Advances in Neural Information Processing Systems}, - pages = {11285--11297}, - title = {Tinytl: Reduce memory, not parameters for efficient on-device learning}, - volume = 33, - year = 2020} - + author = {Cai, Han and Gan, Chuang and Zhu, Ligeng and Han, Song}, + year = 2020, + journal = {Advances in Neural Information Processing Systems}, + volume = 33, + pages = {11285--11297} +} +@article{cavoukian2009privacy, + author = {Cavoukian, Ann}, + year = 2009, + journal = {Office of the Information and Privacy Commissioner}, + date-added = {2023-11-22 17:55:45 -0500}, + date-modified = {2023-11-22 17:56:58 -0500} +} @article{Chapelle_Scholkopf_Zien, - author = {Chapelle, O. and Scholkopf, B. and Zien, Eds., A.}, - doi = {10.1109/tnn.2009.2015974}, - journal = {IEEE Transactions on Neural Networks}, - number = 3, - pages = {542--542}, - title = {Semi-supervised learning (Chapelle, O. et al., eds.; 2006) [book reviews]}, - volume = 20, - year = 2009, - Bdsk-Url-1 = {https://doi.org/10.1109/tnn.2009.2015974}} - + author = {Chapelle, O. and Scholkopf, B. and Zien, Eds., A.}, + year = 2009, + journal = {IEEE Transactions on Neural Networks}, + volume = 20, + number = 3, + pages = {542--542}, + doi = {10.1109/tnn.2009.2015974}, + bdsk-url-1 = {https://doi.org/10.1109/tnn.2009.2015974} +} @misc{chen__inpainting_2022, - abstract = {Some simple examples for showing how to use tensor decomposition to reconstruct fluid dynamics}, - author = {Chen (陈新宇), Xinyu}, - journal = {Medium}, - language = {en}, - month = mar, - title = {Inpainting {Fluid} {Dynamics} with {Tensor} {Decomposition} ({NumPy})}, - url = {https://medium.com/@xinyu.chen/inpainting-fluid-dynamics-with-tensor-decomposition-numpy-d84065fead4d}, - urldate = {2023-10-20}, - year = 2022, - Bdsk-Url-1 = {https://medium.com/@xinyu.chen/inpainting-fluid-dynamics-with-tensor-decomposition-numpy-d84065fead4d}} - + author = {Chen (陈新宇), Xinyu}, + year = 2022, + month = mar, + journal = {Medium}, + url = {https://medium.com/@xinyu.chen/inpainting-fluid-dynamics-with-tensor-decomposition-numpy-d84065fead4d}, + urldate = {2023-10-20}, + language = {en}, + bdsk-url-1 = {https://medium.com/@xinyu.chen/inpainting-fluid-dynamics-with-tensor-decomposition-numpy-d84065fead4d} +} @misc{chen_tvm_2018, - annote = {Comment: Significantly improved version, add automated optimization}, - author = {Chen, Tianqi and Moreau, Thierry and Jiang, Ziheng and Zheng, Lianmin and Yan, Eddie and Cowan, Meghan and Shen, Haichen and Wang, Leyuan and Hu, Yuwei and Ceze, Luis and Guestrin, Carlos and Krishnamurthy, Arvind}, - file = {Chen et al. - 2018 - TVM An Automated End-to-End Optimizing Compiler f.pdf:/Users/alex/Zotero/storage/QR8MHJ38/Chen et al. - 2018 - TVM An Automated End-to-End Optimizing Compiler f.pdf:application/pdf}, - keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Computer Science - Programming Languages}, - language = {en}, - month = oct, - note = {arXiv:1802.04799 [cs]}, - publisher = {arXiv}, - shorttitle = {{TVM}}, - title = {{TVM}: {An} {Automated} {End}-to-{End} {Optimizing} {Compiler} for {Deep} {Learning}}, - url = {http://arxiv.org/abs/1802.04799}, - urldate = {2023-10-26}, - year = 2018, - Bdsk-Url-1 = {http://arxiv.org/abs/1802.04799}} - + shorttitle = {{TVM}}, + author = {Chen, Tianqi and Moreau, Thierry and Jiang, Ziheng and Zheng, Lianmin and Yan, Eddie and Cowan, Meghan and Shen, Haichen and Wang, Leyuan and Hu, Yuwei and Ceze, Luis and Guestrin, Carlos and Krishnamurthy, Arvind}, + year = 2018, + month = oct, + publisher = {arXiv}, + url = {http://arxiv.org/abs/1802.04799}, + urldate = {2023-10-26}, + note = {arXiv:1802.04799 [cs]}, + annote = {Comment: Significantly improved version, add automated optimization}, + language = {en}, + bdsk-url-1 = {http://arxiv.org/abs/1802.04799} +} @article{chen2016training, - author = {Chen, Tianqi and Xu, Bing and Zhang, Chiyuan and Guestrin, Carlos}, - journal = {arXiv preprint arXiv:1604.06174}, - title = {Training deep nets with sublinear memory cost}, - year = 2016} - + author = {Chen, Tianqi and Xu, Bing and Zhang, Chiyuan and Guestrin, Carlos}, + year = 2016, + journal = {arXiv preprint arXiv:1604.06174} +} @inproceedings{chen2018tvm, - author = {Chen, Tianqi and Moreau, Thierry and Jiang, Ziheng and Zheng, Lianmin and Yan, Eddie and Shen, Haichen and Cowan, Meghan and Wang, Leyuan and Hu, Yuwei and Ceze, Luis and others}, - booktitle = {13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)}, - pages = {578--594}, - title = {$\{$TVM$\}$: An automated $\{$End-to-End$\}$ optimizing compiler for deep learning}, - year = 2018} - + author = {Chen, Tianqi and Moreau, Thierry and Jiang, Ziheng and Zheng, Lianmin and Yan, Eddie and Shen, Haichen and Cowan, Meghan and Wang, Leyuan and Hu, Yuwei and Ceze, Luis and others}, + year = 2018, + booktitle = {13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)}, + pages = {578--594} +} +@article{Chen2023, + author = {Chen, Emma and Prakash, Shvetank and Janapa Reddi, Vijay and Kim, David and Rajpurkar, Pranav}, + year = 2023, + month = {Nov}, + day = {06}, + journal = {Nature Biomedical Engineering}, + doi = {10.1038/s41551-023-01115-0}, + issn = {2157-846X}, + url = {https://doi.org/10.1038/s41551-023-01115-0}, + bdsk-url-1 = {https://doi.org/10.1038/s41551-023-01115-0} +} @article{chen2023learning, - author = {Chen, Zhiyong and Xu, Shugong}, - journal = {EURASIP Journal on Audio, Speech, and Music Processing}, - number = 1, - pages = 33, - publisher = {Springer}, - title = {Learning domain-heterogeneous speaker recognition systems with personalized continual federated learning}, - volume = 2023, - year = 2023} - + author = {Chen, Zhiyong and Xu, Shugong}, + year = 2023, + journal = {EURASIP Journal on Audio, Speech, and Music Processing}, + publisher = {Springer}, + volume = 2023, + number = 1, + pages = 33 +} +@article{cheng2017survey, + author = {Cheng, Yu and Wang, Duo and Zhou, Pan and Zhang, Tao}, + year = 2017, + journal = {arXiv preprint arXiv:1710.09282} +} +@article{chi2016prime, + author = {Chi, Ping and Li, Shuangchen and Xu, Cong and Zhang, Tao and Zhao, Jishen and Liu, Yongpan and Wang, Yu and Xie, Yuan}, + year = 2016, + journal = {ACM SIGARCH Computer Architecture News}, + publisher = {ACM New York, NY, USA}, + volume = 44, + number = 3, + pages = {27--39} +} @misc{chollet2015, - author = {Fran{\c c}ois Chollet}, - commit = {5bcac37}, - howpublished = {\url{https://github.com/fchollet/keras}}, - journal = {GitHub repository}, - publisher = {GitHub}, - title = {keras}, - year = 2015} - + author = {Fran{\c c}ois Chollet}, + year = 2015, + journal = {GitHub repository}, + publisher = {GitHub}, + commit = {5bcac37}, + howpublished = {\url{https://github.com/fchollet/keras}} +} @article{chollet2018keras, - author = {Chollet, Fran{\c{c}}ois}, - journal = {March 9th}, - title = {Introduction to keras}, - year = 2018} - - + author = {Chollet, Fran{\c{c}}ois}, + year = 2018, + journal = {March 9th} +} @inproceedings{chu2021discovering, + author = {Chu, Grace and Arikan, Okan and Bender, Gabriel and Wang, Weijun and Brighton, Achille and Kindermans, Pieter-Jan and Liu, Hanxiao and Akin, Berkin and Gupta, Suyog and Howard, Andrew}, + year = 2021, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages = {3022--3031}, archiveprefix = {arXiv}, - author = {Chu, Grace and Arikan, Okan and Bender, Gabriel and Wang, Weijun and Brighton, Achille and Kindermans, Pieter-Jan and Liu, Hanxiao and Akin, Berkin and Gupta, Suyog and Howard, Andrew}, - booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, - eprint = {2008.08178}, - pages = {3022--3031}, - primaryclass = {cs.CV}, - title = {Discovering multi-hardware mobile models via architecture search}, - year = 2021} - + eprint = {2008.08178}, + primaryclass = {cs.CV} +} +@article{chua1971memristor, + author = {Chua, Leon}, + year = 1971, + journal = {IEEE Transactions on circuit theory}, + publisher = {IEEE}, + volume = 18, + number = 5, + pages = {507--519} +} @article{coleman2017dawnbench, - author = {Coleman, Cody and Narayanan, Deepak and Kang, Daniel and Zhao, Tian and Zhang, Jian and Nardi, Luigi and Bailis, Peter and Olukotun, Kunle and R{\'e}, Chris and Zaharia, Matei}, - journal = {Training}, - number = 101, - pages = 102, - title = {Dawnbench: An end-to-end deep learning benchmark and competition}, - volume = 100, - year = 2017} - + author = {Coleman, Cody and Narayanan, Deepak and Kang, Daniel and Zhao, Tian and Zhang, Jian and Nardi, Luigi and Bailis, Peter and Olukotun, Kunle and R{\'e}, Chris and Zaharia, Matei}, + year = 2017, + journal = {Training}, + volume = 100, + number = 101, + pages = 102 +} @inproceedings{coleman2022similarity, - author = {Coleman, Cody and Chou, Edward and Katz-Samuels, Julian and Culatana, Sean and Bailis, Peter and Berg, Alexander C and Nowak, Robert and Sumbaly, Roshan and Zaharia, Matei and Yalniz, I Zeki}, - booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence}, - number = 6, - pages = {6402--6410}, - title = {Similarity search for efficient active learning and search of rare concepts}, - volume = 36, - year = 2022} - + author = {Coleman, Cody and Chou, Edward and Katz-Samuels, Julian and Culatana, Sean and Bailis, Peter and Berg, Alexander C and Nowak, Robert and Sumbaly, Roshan and Zaharia, Matei and Yalniz, I Zeki}, + year = 2022, + booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence}, + volume = 36, + number = 6, + pages = {6402--6410} +} @misc{cottier_trends_2023, - author = {Cottier, Ben}, - journal = {Epoch AI Report}, - month = jan, - title = {Trends in the {Dollar} {Training} {Cost} of {Machine} {Learning} {Systems}}, - url = {https://epochai.org/blog/trends-in-the-dollar-training-cost-of-machine-learning-systems}, - year = 2023, - Bdsk-Url-1 = {https://epochai.org/blog/trends-in-the-dollar-training-cost-of-machine-learning-systems}} - + author = {Cottier, Ben}, + year = 2023, + month = jan, + journal = {Epoch AI Report}, + url = {https://epochai.org/blog/trends-in-the-dollar-training-cost-of-machine-learning-systems}, + bdsk-url-1 = {https://epochai.org/blog/trends-in-the-dollar-training-cost-of-machine-learning-systems} +} +@article{dahl2023benchmarking, + author = {Dahl, George E and Schneider, Frank and Nado, Zachary and Agarwal, Naman and Sastry, Chandramouli Shama and Hennig, Philipp and Medapati, Sourabh and Eschenhagen, Runa and Kasimbeg, Priya and Suo, Daniel and others}, + year = 2023, + journal = {arXiv preprint arXiv:2306.07179} +} +@article{dally_evolution_2021, + author = {Dally, William J. and Keckler, Stephen W. and Kirk, David B.}, + year = 2021, + month = nov, + journal = {IEEE Micro}, + volume = 41, + number = 6, + pages = {42--51}, + doi = {10.1109/MM.2021.3113475}, + issn = {1937-4143}, + url = {https://ieeexplore.ieee.org/document/9623445}, + urldate = {2023-11-07}, + note = {Conference Name: IEEE Micro}, + bdsk-url-1 = {https://ieeexplore.ieee.org/document/9623445}, + bdsk-url-2 = {https://doi.org/10.1109/MM.2021.3113475} +} @misc{david_tensorflow_2021, - author = {David, Robert and Duke, Jared and Jain, Advait and Reddi, Vijay Janapa and Jeffries, Nat and Li, Jian and Kreeger, Nick and Nappier, Ian and Natraj, Meghna and Regev, Shlomi and Rhodes, Rocky and Wang, Tiezhen and Warden, Pete}, - file = {David et al. - 2021 - TensorFlow Lite Micro Embedded Machine Learning o.pdf:/Users/alex/Zotero/storage/YCFVNEVH/David et al. - 2021 - TensorFlow Lite Micro Embedded Machine Learning o.pdf:application/pdf}, - keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning}, - language = {en}, - month = mar, - note = {arXiv:2010.08678 [cs]}, - publisher = {arXiv}, - shorttitle = {{TensorFlow} {Lite} {Micro}}, - title = {{TensorFlow} {Lite} {Micro}: {Embedded} {Machine} {Learning} on {TinyML} {Systems}}, - url = {http://arxiv.org/abs/2010.08678}, - urldate = {2023-10-26}, - year = 2021, - Bdsk-Url-1 = {http://arxiv.org/abs/2010.08678}} - + shorttitle = {{TensorFlow} {Lite} {Micro}}, + author = {David, Robert and Duke, Jared and Jain, Advait and Reddi, Vijay Janapa and Jeffries, Nat and Li, Jian and Kreeger, Nick and Nappier, Ian and Natraj, Meghna and Regev, Shlomi and Rhodes, Rocky and Wang, Tiezhen and Warden, Pete}, + year = 2021, + month = mar, + publisher = {arXiv}, + url = {http://arxiv.org/abs/2010.08678}, + urldate = {2023-10-26}, + note = {arXiv:2010.08678 [cs]}, + language = {en}, + bdsk-url-1 = {http://arxiv.org/abs/2010.08678} +} @article{david2021tensorflow, - author = {David, Robert and Duke, Jared and Jain, Advait and Janapa Reddi, Vijay and Jeffries, Nat and Li, Jian and Kreeger, Nick and Nappier, Ian and Natraj, Meghna and Wang, Tiezhen and others}, - journal = {Proceedings of Machine Learning and Systems}, - pages = {800--811}, - title = {Tensorflow lite micro: Embedded machine learning for tinyml systems}, - volume = 3, - year = 2021} - + author = {David, Robert and Duke, Jared and Jain, Advait and Janapa Reddi, Vijay and Jeffries, Nat and Li, Jian and Kreeger, Nick and Nappier, Ian and Natraj, Meghna and Wang, Tiezhen and others}, + year = 2021, + journal = {Proceedings of Machine Learning and Systems}, + volume = 3, + pages = {800--811} +} +@article{davies2018loihi, + author = {Davies, Mike and Srinivasa, Narayan and Lin, Tsung-Han and Chinya, Gautham and Cao, Yongqiang and Choday, Sri Harsha and Dimou, Georgios and Joshi, Prasad and Imam, Nabil and Jain, Shweta and others}, + year = 2018, + journal = {Ieee Micro}, + publisher = {IEEE}, + volume = 38, + number = 1, + pages = {82--99} +} +@article{davies2021advancing, + author = {Davies, Mike and Wild, Andreas and Orchard, Garrick and Sandamirskaya, Yulia and Guerra, Gabriel A Fonseca and Joshi, Prasad and Plank, Philipp and Risbud, Sumedh R}, + year = 2021, + journal = {Proceedings of the IEEE}, + publisher = {IEEE}, + volume = 109, + number = 5, + pages = {911--934} +} +@inproceedings{DBLP:journals/corr/KingmaB14, + author = {Diederik P. Kingma and Jimmy Ba}, + year = 2015, + booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015, San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings}, + url = {http://arxiv.org/abs/1412.6980}, + editor = {Yoshua Bengio and Yann LeCun}, + timestamp = {Thu, 25 Jul 2019 14:25:37 +0200}, + biburl = {https://dblp.org/rec/journals/corr/KingmaB14.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} +@misc{dean_jeff_numbers_nodate, + author = {Dean. Jeff}, + url = {https://brenocon.com/dean_perf.html}, + urldate = {2023-11-07}, + bdsk-url-1 = {https://brenocon.com/dean_perf.html} +} @article{dean2012large, - author = {Dean, Jeffrey and Corrado, Greg and Monga, Rajat and Chen, Kai and Devin, Matthieu and Mao, Mark and Ranzato, Marc'aurelio and Senior, Andrew and Tucker, Paul and Yang, Ke and others}, - journal = {Advances in neural information processing systems}, - title = {Large scale distributed deep networks}, - volume = 25, - year = 2012} - + author = {Dean, Jeffrey and Corrado, Greg and Monga, Rajat and Chen, Kai and Devin, Matthieu and Mao, Mark and Ranzato, Marc'aurelio and Senior, Andrew and Tucker, Paul and Yang, Ke and others}, + year = 2012, + journal = {Advances in neural information processing systems}, + volume = 25 +} @misc{deci, - title = {The Ultimate Guide to Deep Learning Model Quantization and Quantization-Aware Training}, - url = {https://deci.ai/quantization-and-quantization-aware-training/}, - Bdsk-Url-1 = {https://deci.ai/quantization-and-quantization-aware-training/}} - + url = {https://deci.ai/quantization-and-quantization-aware-training/}, + bdsk-url-1 = {https://deci.ai/quantization-and-quantization-aware-training/} +} @misc{deepcompress, - abstract = {Neural networks are both computationally intensive and memory intensive, making them difficult to deploy on embedded systems with limited hardware resources. To address this limitation, we introduce "deep compression", a three stage pipeline: pruning, trained quantization and Huffman coding, that work together to reduce the storage requirement of neural networks by 35x to 49x without affecting their accuracy. Our method first prunes the network by learning only the important connections. Next, we quantize the weights to enforce weight sharing, finally, we apply Huffman coding. After the first two steps we retrain the network to fine tune the remaining connections and the quantized centroids. Pruning, reduces the number of connections by 9x to 13x; Quantization then reduces the number of bits that represent each connection from 32 to 5. On the ImageNet dataset, our method reduced the storage required by AlexNet by 35x, from 240MB to 6.9MB, without loss of accuracy. Our method reduced the size of VGG-16 by 49x from 552MB to 11.3MB, again with no loss of accuracy. This allows fitting the model into on-chip SRAM cache rather than off-chip DRAM memory. Our compression method also facilitates the use of complex neural networks in mobile applications where application size and download bandwidth are constrained. Benchmarked on CPU, GPU and mobile GPU, compressed network has 3x to 4x layerwise speedup and 3x to 7x better energy efficiency.}, - author = {Han and Mao and Dally}, - doi = {10.48550/arXiv.1510.00149}, - title = {Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Coding}, - url = {https://arxiv.org/abs/1510.00149}, - urldate = {2016-02-15}, - year = 2016, - Bdsk-Url-1 = {https://arxiv.org/abs/1510.00149}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.1510.00149}} - + author = {Han and Mao and Dally}, + year = 2016, + doi = {10.48550/arXiv.1510.00149}, + url = {https://arxiv.org/abs/1510.00149}, + urldate = {2016-02-15}, + bdsk-url-1 = {https://arxiv.org/abs/1510.00149}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.1510.00149} +} +@article{demler_ceva_2020, + author = {Demler, Mike}, + year = 2020, + language = {en} +} @inproceedings{deng2009imagenet, - added-at = {2018-09-20T15:22:39.000+0200}, - author = {Deng, Jia and Socher, R. and Fei-Fei, Li and Dong, Wei and Li, Kai and Li, Li-Jia}, - biburl = {https://www.bibsonomy.org/bibtex/252793859f5bcbbd3f7f9e5d083160acf/analyst}, - booktitle = {2009 IEEE Conference on Computer Vision and Pattern Recognition(CVPR)}, - description = {ImageNet: A large-scale hierarchical image database}, - doi = {10.1109/CVPR.2009.5206848}, - interhash = {fbfae3e4fe1a81c477ba00efd0d4d977}, - intrahash = {52793859f5bcbbd3f7f9e5d083160acf}, - keywords = {2009 computer-vision cvpr dataset ieee paper}, - month = {06}, - pages = {248--255}, - timestamp = {2018-09-20T15:22:39.000+0200}, - title = {ImageNet: A large-scale hierarchical image database}, - url = {https://ieeexplore.ieee.org/abstract/document/5206848/}, - volume = 00, - year = 2009, - Bdsk-Url-1 = {https://ieeexplore.ieee.org/abstract/document/5206848/}, - Bdsk-Url-2 = {https://doi.org/10.1109/CVPR.2009.5206848}} - + author = {Deng, Jia and Socher, R. and Fei-Fei, Li and Dong, Wei and Li, Kai and Li, Li-Jia}, + year = 2009, + month = {06}, + booktitle = {2009 IEEE Conference on Computer Vision and Pattern Recognition(CVPR)}, + volume = 00, + pages = {248--255}, + doi = {10.1109/CVPR.2009.5206848}, + url = {https://ieeexplore.ieee.org/abstract/document/5206848/}, + added-at = {2018-09-20T15:22:39.000+0200}, + biburl = {https://www.bibsonomy.org/bibtex/252793859f5bcbbd3f7f9e5d083160acf/analyst}, + description = {ImageNet: A large-scale hierarchical image database}, + interhash = {fbfae3e4fe1a81c477ba00efd0d4d977}, + intrahash = {52793859f5bcbbd3f7f9e5d083160acf}, + timestamp = {2018-09-20T15:22:39.000+0200}, + bdsk-url-1 = {https://ieeexplore.ieee.org/abstract/document/5206848/}, + bdsk-url-2 = {https://doi.org/10.1109/CVPR.2009.5206848} +} @article{desai2016five, - author = {Desai, Tanvi and Ritchie, Felix and Welpton, Richard and others}, - journal = {Economics Working Paper Series}, - pages = 28, - title = {Five Safes: designing data access for research}, - volume = 1601, - year = 2016} - + author = {Desai, Tanvi and Ritchie, Felix and Welpton, Richard and others}, + year = 2016, + journal = {Economics Working Paper Series}, + volume = 1601, + pages = 28 +} @article{desai2020five, - author = {Desai, Tanvi and Ritchie, Felix and Welpton, Richard}, - journal = {URL https://www2. uwe. ac. uk/faculties/bbs/Documents/1601. pdf}, - title = {Five Safes: designing data access for research; 2016}, - year = 2020} - + author = {Desai, Tanvi and Ritchie, Felix and Welpton, Richard}, + year = 2020, + journal = {URL https://www2. uwe. ac. uk/faculties/bbs/Documents/1601. pdf} +} @article{devlin2018bert, - author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, - journal = {arXiv preprint arXiv:1810.04805}, - title = {Bert: Pre-training of deep bidirectional transformers for language understanding}, - year = 2018} - + author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, + year = 2018, + journal = {arXiv preprint arXiv:1810.04805} +} +@book{dhanjani2015abusing, + author = {Dhanjani, Nitesh}, + year = 2015, + publisher = {O'Reilly Media, Inc.}, + date-added = {2023-11-22 17:09:41 -0500}, + date-modified = {2023-11-22 17:10:22 -0500} +} @article{dhar2021survey, - author = {Dhar, Sauptik and Guo, Junyao and Liu, Jiayi and Tripathi, Samarth and Kurup, Unmesh and Shah, Mohak}, - journal = {ACM Transactions on Internet of Things}, - number = 3, - pages = {1--49}, - publisher = {ACM New York, NY, USA}, - title = {A survey of on-device machine learning: An algorithms and learning theory perspective}, - volume = 2, - year = 2021} - + author = {Dhar, Sauptik and Guo, Junyao and Liu, Jiayi and Tripathi, Samarth and Kurup, Unmesh and Shah, Mohak}, + year = 2021, + journal = {ACM Transactions on Internet of Things}, + publisher = {ACM New York, NY, USA}, + volume = 2, + number = 3, + pages = {1--49} +} @misc{dong2022splitnets, + author = {Xin Dong and Barbara De Salvo and Meng Li and Chiao Liu and Zhongnan Qu and H. T. Kung and Ziyun Li}, + year = 2022, archiveprefix = {arXiv}, - author = {Xin Dong and Barbara De Salvo and Meng Li and Chiao Liu and Zhongnan Qu and H. T. Kung and Ziyun Li}, - eprint = {2204.04705}, - primaryclass = {cs.LG}, - title = {SplitNets: Designing Neural Architectures for Efficient Distributed Computing on Head-Mounted Systems}, - year = 2022} - + eprint = {2204.04705}, + primaryclass = {cs.LG} +} +@article{Dongarra2009-na, + author = {Dongarra, Jack J}, + year = 2009, + journal = {IBM Journal of Research and Development}, + volume = 53, + pages = {3--4} +} +@article{dropout, + author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov}, + year = 2014, + journal = {Journal of Machine Learning Research}, + url = {http://jmlr.org/papers/v15/srivastava14a.html} +} +@article{duarte2022fastml, + author = {Duarte, Javier and Tran, Nhan and Hawks, Ben and Herwig, Christian and Muhizi, Jules and Prakash, Shvetank and Reddi, Vijay Janapa}, + year = 2022, + journal = {arXiv preprint arXiv:2207.07958} +} @article{duisterhof2019learning, - author = {Duisterhof, Bardienus P and Krishnan, Srivatsan and Cruz, Jonathan J and Banbury, Colby R and Fu, William and Faust, Aleksandra and de Croon, Guido CHE and Reddi, Vijay Janapa}, - journal = {arXiv preprint arXiv:1909.11236}, - title = {Learning to seek: Autonomous source seeking with deep reinforcement learning onboard a nano drone microcontroller}, - year = 2019} - + author = {Duisterhof, Bardienus P and Krishnan, Srivatsan and Cruz, Jonathan J and Banbury, Colby R and Fu, William and Faust, Aleksandra and de Croon, Guido CHE and Reddi, Vijay Janapa}, + year = 2019, + journal = {arXiv preprint arXiv:1909.11236} +} @inproceedings{duisterhof2021sniffy, - author = {Duisterhof, Bardienus P and Li, Shushuai and Burgu{\'e}s, Javier and Reddi, Vijay Janapa and de Croon, Guido CHE}, - booktitle = {2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, - organization = {IEEE}, - pages = {9099--9106}, - title = {Sniffy bug: A fully autonomous swarm of gas-seeking nano quadcopters in cluttered environments}, - year = 2021} - + author = {Duisterhof, Bardienus P and Li, Shushuai and Burgu{\'e}s, Javier and Reddi, Vijay Janapa and de Croon, Guido CHE}, + year = 2021, + booktitle = {2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, + pages = {9099--9106}, + organization = {IEEE} +} +@inproceedings{Dwork2006Theory, + author = {Dwork, Cynthia and McSherry, Frank and Nissim, Kobbi and Smith, Adam}, + year = 2006, + booktitle = {Theory of Cryptography}, + publisher = {Springer Berlin Heidelberg}, + address = {Berlin, Heidelberg}, + pages = {265--284}, + date-added = {2023-11-22 18:04:12 -0500}, + date-modified = {2023-11-22 18:05:20 -0500}, + editor = {Halevi, Shai and Rabin, Tal} +} @article{dwork2014algorithmic, - author = {Dwork, Cynthia and Roth, Aaron and others}, - journal = {Foundations and Trends{\textregistered} in Theoretical Computer Science}, - number = {3--4}, - pages = {211--407}, - publisher = {Now Publishers, Inc.}, - title = {The algorithmic foundations of differential privacy}, - volume = 9, - year = 2014} - + author = {Dwork, Cynthia and Roth, Aaron and others}, + year = 2014, + journal = {Foundations and Trends{\textregistered} in Theoretical Computer Science}, + publisher = {Now Publishers, Inc.}, + volume = 9, + number = {3--4}, + pages = {211--407} +} +@article{el-rayis_reconfigurable_nodate, + author = {El-Rayis, Ahmed Osman}, + language = {en} +} +@article{eldan2023whos, + author = {Ronen Eldan and Mark Russinovich}, + year = 2023, + journal = {arXiv preprint arXiv:2310.02238}, + date-added = {2023-11-22 19:24:35 -0500}, + date-modified = {2023-11-22 19:25:20 -0500} +} @article{electronics12102287, + author = {Moshawrab, Mohammad and Adda, Mehdi and Bouzouane, Abdenour and Ibrahim, Hussein and Raad, Ali}, + year = 2023, + journal = {Electronics}, + volume = 12, + number = 10, + doi = {10.3390/electronics12102287}, + issn = {2079-9292}, + url = {https://www.mdpi.com/2079-9292/12/10/2287}, article-number = 2287, - author = {Moshawrab, Mohammad and Adda, Mehdi and Bouzouane, Abdenour and Ibrahim, Hussein and Raad, Ali}, - doi = {10.3390/electronics12102287}, - issn = {2079-9292}, - journal = {Electronics}, - number = 10, - title = {Reviewing Federated Learning Aggregation Algorithms; Strategies, Contributions, Limitations and Future Perspectives}, - url = {https://www.mdpi.com/2079-9292/12/10/2287}, - volume = 12, - year = 2023, - Bdsk-Url-1 = {https://www.mdpi.com/2079-9292/12/10/2287}, - Bdsk-Url-2 = {https://doi.org/10.3390/electronics12102287}} - + bdsk-url-1 = {https://www.mdpi.com/2079-9292/12/10/2287}, + bdsk-url-2 = {https://doi.org/10.3390/electronics12102287} +} @misc{energyproblem, - author = {ISSCC}, - title = {Computing's energy problem (and what we can do about it)}, - url = {https://ieeexplore.ieee.org/document/6757323}, - urldate = {2014-03-06}, - year = 2014, - Bdsk-Url-1 = {https://ieeexplore.ieee.org/document/6757323}} - + author = {ISSCC}, + year = 2014, + url = {https://ieeexplore.ieee.org/document/6757323}, + urldate = {2014-03-06}, + bdsk-url-1 = {https://ieeexplore.ieee.org/document/6757323} +} @article{esteva2017dermatologist, - author = {Esteva, Andre and Kuprel, Brett and Novoa, Roberto A and Ko, Justin and Swetter, Susan M and Blau, Helen M and Thrun, Sebastian}, - journal = {nature}, - number = 7639, - pages = {115--118}, - publisher = {Nature Publishing Group}, - title = {Dermatologist-level classification of skin cancer with deep neural networks}, - volume = 542, - year = 2017} - + author = {Esteva, Andre and Kuprel, Brett and Novoa, Roberto A and Ko, Justin and Swetter, Susan M and Blau, Helen M and Thrun, Sebastian}, + year = 2017, + journal = {nature}, + publisher = {Nature Publishing Group}, + volume = 542, + number = 7639, + pages = {115--118} +} +@article{eykholt2018robust, + author = {Kevin Eykholt and Ivan Evtimov and Earlence Fernandes and Bo Li and Amir Rahmati and Chaowei Xiao and Atul Prakash and Tadayoshi Kohno and Dawn Song}, + year = 2018, + journal = {arXiv preprint arXiv:1707.08945}, + date-added = {2023-11-22 16:30:51 -0500}, + date-modified = {2023-11-22 16:31:55 -0500} +} @misc{fahim2021hls4ml, + author = {Farah Fahim and Benjamin Hawks and Christian Herwig and James Hirschauer and Sergo Jindariani and Nhan Tran and Luca P. Carloni and Giuseppe Di Guglielmo and Philip Harris and Jeffrey Krupa and Dylan Rankin and Manuel Blanco Valentin and Josiah Hester and Yingyi Luo and John Mamish and Seda Orgrenci-Memik and Thea Aarrestad and Hamza Javed and Vladimir Loncar and Maurizio Pierini and Adrian Alan Pol and Sioni Summers and Javier Duarte and Scott Hauck and Shih-Chieh Hsu and Jennifer Ngadiuba and Mia Liu and Duc Hoang and Edward Kreinar and Zhenbin Wu}, + year = 2021, archiveprefix = {arXiv}, - author = {Farah Fahim and Benjamin Hawks and Christian Herwig and James Hirschauer and Sergo Jindariani and Nhan Tran and Luca P. Carloni and Giuseppe Di Guglielmo and Philip Harris and Jeffrey Krupa and Dylan Rankin and Manuel Blanco Valentin and Josiah Hester and Yingyi Luo and John Mamish and Seda Orgrenci-Memik and Thea Aarrestad and Hamza Javed and Vladimir Loncar and Maurizio Pierini and Adrian Alan Pol and Sioni Summers and Javier Duarte and Scott Hauck and Shih-Chieh Hsu and Jennifer Ngadiuba and Mia Liu and Duc Hoang and Edward Kreinar and Zhenbin Wu}, - eprint = {2103.05579}, - primaryclass = {cs.LG}, - title = {hls4ml: An Open-Source Codesign Workflow to Empower Scientific Low-Power Machine Learning Devices}, - year = 2021} - + eprint = {2103.05579}, + primaryclass = {cs.LG} +} +@article{farah2005neuroethics, + author = {Farah, Martha J}, + year = 2005, + journal = {Trends in cognitive sciences}, + publisher = {Elsevier}, + volume = 9, + number = 1, + pages = {34--40} +} +@article{farwell2011stuxnet, + author = {Farwell, James P and Rohozinski, Rafal}, + year = 2011, + journal = {Survival}, + volume = 53, + number = 1, + pages = {23--40}, + date-added = {2023-11-22 14:03:31 -0500}, + date-modified = {2023-11-22 14:05:19 -0500} +} +@inproceedings{fowers2018configurable, + author = {Fowers, Jeremy and Ovtcharov, Kalin and Papamichael, Michael and Massengill, Todd and Liu, Ming and Lo, Daniel and Alkalay, Shlomi and Haselman, Michael and Adams, Logan and Ghandi, Mahdi and others}, + year = 2018, + booktitle = {2018 ACM/IEEE 45th Annual International Symposium on Computer Architecture (ISCA)}, + pages = {1--14}, + organization = {IEEE} +} @misc{frankle_lottery_2019, - abstract = {Neural network pruning techniques can reduce the parameter counts of trained networks by over 90\%, decreasing storage requirements and improving computational performance of inference without compromising accuracy. However, contemporary experience is that the sparse architectures produced by pruning are difficult to train from the start, which would similarly improve training performance. We find that a standard pruning technique naturally uncovers subnetworks whose initializations made them capable of training effectively. Based on these results, we articulate the "lottery ticket hypothesis:" dense, randomly-initialized, feed-forward networks contain subnetworks ("winning tickets") that - when trained in isolation - reach test accuracy comparable to the original network in a similar number of iterations. The winning tickets we find have won the initialization lottery: their connections have initial weights that make training particularly effective. We present an algorithm to identify winning tickets and a series of experiments that support the lottery ticket hypothesis and the importance of these fortuitous initializations. We consistently find winning tickets that are less than 10-20\% of the size of several fully-connected and convolutional feed-forward architectures for MNIST and CIFAR10. Above this size, the winning tickets that we find learn faster than the original network and reach higher test accuracy.}, - author = {Frankle, Jonathan and Carbin, Michael}, - doi = {10.48550/arXiv.1803.03635}, - file = {arXiv Fulltext PDF:/Users/jeffreyma/Zotero/storage/6STHYGW5/Frankle and Carbin - 2019 - The Lottery Ticket Hypothesis Finding Sparse, Tra.pdf:application/pdf;arXiv.org Snapshot:/Users/jeffreyma/Zotero/storage/QGNSCTQB/1803.html:text/html}, - keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing}, - month = mar, - note = {arXiv:1803.03635 [cs]}, - publisher = {arXiv}, - shorttitle = {The {Lottery} {Ticket} {Hypothesis}}, - title = {The {Lottery} {Ticket} {Hypothesis}: {Finding} {Sparse}, {Trainable} {Neural} {Networks}}, - url = {http://arxiv.org/abs/1803.03635}, - urldate = {2023-10-20}, - year = 2019, - Bdsk-Url-1 = {http://arxiv.org/abs/1803.03635}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.1803.03635}} - + shorttitle = {The {Lottery} {Ticket} {Hypothesis}}, + author = {Frankle, Jonathan and Carbin, Michael}, + year = 2019, + month = mar, + publisher = {arXiv}, + doi = {10.48550/arXiv.1803.03635}, + url = {http://arxiv.org/abs/1803.03635}, + urldate = {2023-10-20}, + note = {arXiv:1803.03635 [cs]}, + bdsk-url-1 = {http://arxiv.org/abs/1803.03635}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.1803.03635} +} +@article{furber2016large, + author = {Furber, Steve}, + year = 2016, + journal = {Journal of neural engineering}, + publisher = {IOP Publishing}, + volume = 13, + number = 5, + pages = {051001} +} +@article{gaitathome, + author = {Yingcheng Liu and Guo Zhang and Christopher G. Tarolli and Rumen Hristov and Stella Jensen-Roberts and Emma M. Waddell and Taylor L. Myers and Meghan E. Pawlik and Julia M. Soto and Renee M. Wilson and Yuzhe Yang and Timothy Nordahl and Karlo J. Lizarraga and Jamie L. Adams and Ruth B. Schneider and Karl Kieburtz and Terry Ellis and E. Ray Dorsey and Dina Katabi}, + year = 2022, + journal = {Science Translational Medicine}, + volume = 14, + number = 663, + pages = {eadc9669}, + doi = {10.1126/scitranslmed.adc9669}, + url = {https://www.science.org/doi/abs/10.1126/scitranslmed.adc9669}, + eprint = {https://www.science.org/doi/pdf/10.1126/scitranslmed.adc9669}, + bdsk-url-1 = {https://www.science.org/doi/abs/10.1126/scitranslmed.adc9669}, + bdsk-url-2 = {https://doi.org/10.1126/scitranslmed.adc9669} +} +@article{gale2019state, + author = {Gale, Trevor and Elsen, Erich and Hooker, Sara}, + year = 2019, + journal = {arXiv preprint arXiv:1902.09574} +} +@inproceedings{gandolfi2001electromagnetic, + author = {Gandolfi, Karine and Mourtel, Christophe and Olivier, Francis}, + year = 2001, + booktitle = {Cryptographic Hardware and Embedded Systems---CHES 2001: Third International Workshop Paris, France, May 14--16, 2001 Proceedings 3}, + pages = {251--261}, + date-added = {2023-11-22 16:56:42 -0500}, + date-modified = {2023-11-22 16:57:40 -0500}, + organization = {Springer} +} +@inproceedings{gannot1994verilog, + author = {Gannot, G. and Ligthart, M.}, + year = 1994, + booktitle = {International Verilog HDL Conference}, + pages = {86--92}, + doi = {10.1109/IVC.1994.323743}, + bdsk-url-1 = {https://doi.org/10.1109/IVC.1994.323743} +} +@article{Gao2020Physical, + author = {Gao, Yansong and Al-Sarawi, Said F. and Abbott, Derek}, + year = 2020, + month = {February}, + journal = {Nature Electronics}, + volume = 3, + number = 2, + pages = {81--91}, + date-added = {2023-11-22 17:52:20 -0500}, + date-modified = {2023-11-22 17:54:56 -0500} +} +@article{gates2009flexible, + author = {Gates, Byron D}, + year = 2009, + journal = {Science}, + publisher = {American Association for the Advancement of Science}, + volume = 323, + number = 5921, + pages = {1566--1567} +} @article{gaviria2022dollar, - author = {Gaviria Rojas, William and Diamos, Sudnya and Kini, Keertan and Kanter, David and Janapa Reddi, Vijay and Coleman, Cody}, - journal = {Advances in Neural Information Processing Systems}, - pages = {12979--12990}, - title = {The Dollar Street Dataset: Images Representing the Geographic and Socioeconomic Diversity of the World}, - volume = 35, - year = 2022} - + author = {Gaviria Rojas, William and Diamos, Sudnya and Kini, Keertan and Kanter, David and Janapa Reddi, Vijay and Coleman, Cody}, + year = 2022, + journal = {Advances in Neural Information Processing Systems}, + volume = 35, + pages = {12979--12990} +} @article{Gebru_Morgenstern_Vecchione_Vaughan_Wallach_III_Crawford_2021, - author = {Gebru, Timnit and Morgenstern, Jamie and Vecchione, Briana and Vaughan, Jennifer Wortman and Wallach, Hanna and III, Hal Daum{\'e} and Crawford, Kate}, - doi = {10.1145/3458723}, - journal = {Communications of the ACM}, - number = 12, - pages = {86--92}, - title = {Datasheets for datasets}, - volume = 64, - year = 2021, - Bdsk-Url-1 = {https://doi.org/10.1145/3458723}} - + author = {Gebru, Timnit and Morgenstern, Jamie and Vecchione, Briana and Vaughan, Jennifer Wortman and Wallach, Hanna and III, Hal Daum{\'e} and Crawford, Kate}, + year = 2021, + journal = {Communications of the ACM}, + volume = 64, + number = 12, + pages = {86--92}, + doi = {10.1145/3458723}, + bdsk-url-1 = {https://doi.org/10.1145/3458723} +} +@article{glucosemonitor, + author = {Li, Jingzhen and Tobore, Igbe and Liu, Yuhang and Kandwal, Abhishek and Wang, Lei and Nie, Zedong}, + year = 2021, + journal = {IEEE Journal of Biomedical and Health Informatics}, + volume = 25, + number = 9, + pages = {3340--3350}, + doi = {10.1109/JBHI.2021.3072628}, + bdsk-url-1 = {https://doi.org/10.1109/JBHI.2021.3072628} +} +@inproceedings{gnad2017voltage, + author = {Gnad, Dennis RE and Oboril, Fabian and Tahoori, Mehdi B}, + year = 2017, + booktitle = {2017 27th International Conference on Field Programmable Logic and Applications (FPL)}, + pages = {1--7}, + date-added = {2023-11-22 17:07:13 -0500}, + date-modified = {2023-11-22 17:07:59 -0500}, + organization = {IEEE} +} @article{goodfellow2020generative, - author = {Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua}, - journal = {Communications of the ACM}, - number = 11, - pages = {139--144}, - publisher = {ACM New York, NY, USA}, - title = {Generative adversarial networks}, - volume = 63, - year = 2020} - + author = {Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua}, + year = 2020, + journal = {Communications of the ACM}, + publisher = {ACM New York, NY, USA}, + volume = 63, + number = 11, + pages = {139--144} +} +@article{goodyear2017social, + author = {Goodyear, Victoria A}, + year = 2017, + journal = {Qualitative research in sport, exercise and health}, + publisher = {Taylor \& Francis}, + volume = 9, + number = 3, + pages = {285--302} +} @misc{Google, - author = {Google}, - title = {Information quality & content moderation}, - url = {https://blog.google/documents/83/}, - Bdsk-Url-1 = {https://blog.google/documents/83/}} - + author = {Google}, + url = {https://blog.google/documents/83/}, + bdsk-url-1 = {https://blog.google/documents/83/} +} @misc{gordon_morphnet_2018, - abstract = {We present MorphNet, an approach to automate the design of neural network structures. MorphNet iteratively shrinks and expands a network, shrinking via a resource-weighted sparsifying regularizer on activations and expanding via a uniform multiplicative factor on all layers. In contrast to previous approaches, our method is scalable to large networks, adaptable to specific resource constraints (e.g. the number of floating-point operations per inference), and capable of increasing the network's performance. When applied to standard network architectures on a wide variety of datasets, our approach discovers novel structures in each domain, obtaining higher performance while respecting the resource constraint.}, - author = {Gordon, Ariel and Eban, Elad and Nachum, Ofir and Chen, Bo and Wu, Hao and Yang, Tien-Ju and Choi, Edward}, - doi = {10.48550/arXiv.1711.06798}, - file = {arXiv Fulltext PDF:/Users/jeffreyma/Zotero/storage/GV7N4CZC/Gordon et al. - 2018 - MorphNet Fast & Simple Resource-Constrained Struc.pdf:application/pdf;arXiv.org Snapshot:/Users/jeffreyma/Zotero/storage/K6FUV82F/1711.html:text/html}, - keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, - month = apr, - note = {arXiv:1711.06798 [cs, stat]}, - publisher = {arXiv}, - shorttitle = {{MorphNet}}, - title = {{MorphNet}: {Fast} \& {Simple} {Resource}-{Constrained} {Structure} {Learning} of {Deep} {Networks}}, - url = {http://arxiv.org/abs/1711.06798}, - urldate = {2023-10-20}, - year = 2018, - Bdsk-Url-1 = {http://arxiv.org/abs/1711.06798}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.1711.06798}} - + shorttitle = {{MorphNet}}, + author = {Gordon, Ariel and Eban, Elad and Nachum, Ofir and Chen, Bo and Wu, Hao and Yang, Tien-Ju and Choi, Edward}, + year = 2018, + month = apr, + publisher = {arXiv}, + doi = {10.48550/arXiv.1711.06798}, + url = {http://arxiv.org/abs/1711.06798}, + urldate = {2023-10-20}, + note = {arXiv:1711.06798 [cs, stat]}, + bdsk-url-1 = {http://arxiv.org/abs/1711.06798}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.1711.06798} +} @inproceedings{gordon2018morphnet, - author = {Gordon, Ariel and Eban, Elad and Nachum, Ofir and Chen, Bo and Wu, Hao and Yang, Tien-Ju and Choi, Edward}, - booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, - pages = {1586--1595}, - title = {Morphnet: Fast \& simple resource-constrained structure learning of deep networks}, - year = 2018} - + author = {Gordon, Ariel and Eban, Elad and Nachum, Ofir and Chen, Bo and Wu, Hao and Yang, Tien-Ju and Choi, Edward}, + year = 2018, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {1586--1595} +} +@inproceedings{govindavajhala2003using, + author = {Govindavajhala, Sudhakar and Appel, Andrew W}, + year = 2003, + booktitle = {2003 Symposium on Security and Privacy, 2003.}, + pages = {154--156}, + date-added = {2023-11-22 16:46:13 -0500}, + date-modified = {2023-11-22 16:47:03 -0500}, + organization = {IEEE} +} @article{gruslys2016memory, - author = {Gruslys, Audrunas and Munos, R{\'e}mi and Danihelka, Ivo and Lanctot, Marc and Graves, Alex}, - journal = {Advances in neural information processing systems}, - title = {Memory-efficient backpropagation through time}, - volume = 29, - year = 2016} - + author = {Gruslys, Audrunas and Munos, R{\'e}mi and Danihelka, Ivo and Lanctot, Marc and Graves, Alex}, + year = 2016, + journal = {Advances in neural information processing systems}, + volume = 29 +} +@article{Gupta2023ChatGPT, + author = {Gupta, Maanak and Akiri, Charankumar and Aryal, Kshitiz and Parker, Eli and Praharaj, Lopamudra}, + year = 2023, + journal = {IEEE Access}, + volume = 11, + pages = {80218--80245}, + date-added = {2023-11-22 18:01:41 -0500}, + date-modified = {2023-11-22 18:02:55 -0500} +} +@article{gwennap_certus-nx_nodate, + author = {Gwennap, Linley}, + language = {en} +} +@article{haensch2018next, + author = {Haensch, Wilfried and Gokmen, Tayfun and Puri, Ruchir}, + year = 2018, + journal = {Proceedings of the IEEE}, + publisher = {IEEE}, + volume = 107, + number = 1, + pages = {108--122} +} @article{han2015deep, - author = {Han, Song and Mao, Huizi and Dally, William J}, - journal = {arXiv preprint arXiv:1510.00149}, - title = {Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding}, - year = 2015} - + author = {Han, Song and Mao, Huizi and Dally, William J}, + year = 2015, + journal = {arXiv preprint arXiv:1510.00149} +} @misc{han2016deep, + author = {Song Han and Huizi Mao and William J. Dally}, + year = 2016, archiveprefix = {arXiv}, - author = {Song Han and Huizi Mao and William J. Dally}, - eprint = {1510.00149}, - primaryclass = {cs.CV}, - title = {Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Coding}, - year = 2016} - + eprint = {1510.00149}, + primaryclass = {cs.CV} +} +@article{hazan2021neuromorphic, + author = {Hazan, Avi and Ezra Tsur, Elishai}, + year = 2021, + journal = {Frontiers in Neuroscience}, + publisher = {Frontiers Media SA}, + volume = 15, + pages = 627221 +} @misc{he_structured_2023, - abstract = {The remarkable performance of deep Convolutional neural networks (CNNs) is generally attributed to their deeper and wider architectures, which can come with significant computational costs. Pruning neural networks has thus gained interest since it effectively lowers storage and computational costs. In contrast to weight pruning, which results in unstructured models, structured pruning provides the benefit of realistic acceleration by producing models that are friendly to hardware implementation. The special requirements of structured pruning have led to the discovery of numerous new challenges and the development of innovative solutions. This article surveys the recent progress towards structured pruning of deep CNNs. We summarize and compare the state-of-the-art structured pruning techniques with respect to filter ranking methods, regularization methods, dynamic execution, neural architecture search, the lottery ticket hypothesis, and the applications of pruning. While discussing structured pruning algorithms, we briefly introduce the unstructured pruning counterpart to emphasize their differences. Furthermore, we provide insights into potential research opportunities in the field of structured pruning. A curated list of neural network pruning papers can be found at https://github.com/he-y/Awesome-Pruning}, - author = {He, Yang and Xiao, Lingao}, - doi = {10.48550/arXiv.2303.00566}, - file = {arXiv Fulltext PDF:/Users/jeffreyma/Zotero/storage/K5RGQQA9/He and Xiao - 2023 - Structured Pruning for Deep Convolutional Neural N.pdf:application/pdf;arXiv.org Snapshot:/Users/jeffreyma/Zotero/storage/U7PVPU4C/2303.html:text/html}, - keywords = {Computer Science - Computer Vision and Pattern Recognition}, - month = mar, - note = {arXiv:2303.00566 [cs]}, - publisher = {arXiv}, - shorttitle = {Structured {Pruning} for {Deep} {Convolutional} {Neural} {Networks}}, - title = {Structured {Pruning} for {Deep} {Convolutional} {Neural} {Networks}: {A} survey}, - url = {http://arxiv.org/abs/2303.00566}, - urldate = {2023-10-20}, - year = 2023, - Bdsk-Url-1 = {http://arxiv.org/abs/2303.00566}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.2303.00566}} - + shorttitle = {Structured {Pruning} for {Deep} {Convolutional} {Neural} {Networks}}, + author = {He, Yang and Xiao, Lingao}, + year = 2023, + month = mar, + publisher = {arXiv}, + doi = {10.48550/arXiv.2303.00566}, + url = {http://arxiv.org/abs/2303.00566}, + urldate = {2023-10-20}, + note = {arXiv:2303.00566 [cs]}, + bdsk-url-1 = {http://arxiv.org/abs/2303.00566}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.2303.00566} +} @inproceedings{he2016deep, - author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, - booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, - pages = {770--778}, - title = {Deep residual learning for image recognition}, - year = 2016} - + author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + year = 2016, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {770--778} +} @inproceedings{hendrycks2021natural, - author = {Hendrycks, Dan and Zhao, Kevin and Basart, Steven and Steinhardt, Jacob and Song, Dawn}, - booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, - pages = {15262--15271}, - title = {Natural adversarial examples}, - year = 2021} - + author = {Hendrycks, Dan and Zhao, Kevin and Basart, Steven and Steinhardt, Jacob and Song, Dawn}, + year = 2021, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages = {15262--15271} +} +@article{Hennessy2019-je, + author = {Hennessy, John L and Patterson, David A}, + year = 2019, + month = jan, + journal = {Commun. ACM}, + publisher = {Association for Computing Machinery (ACM)}, + volume = 62, + number = 2, + pages = {48--60}, + copyright = {http://www.acm.org/publications/policies/copyright\_policy\#Background}, + language = {en} +} @misc{hinton_distilling_2015, - abstract = {A very simple way to improve the performance of almost any machine learning algorithm is to train many different models on the same data and then to average their predictions. Unfortunately, making predictions using a whole ensemble of models is cumbersome and may be too computationally expensive to allow deployment to a large number of users, especially if the individual models are large neural nets. Caruana and his collaborators have shown that it is possible to compress the knowledge in an ensemble into a single model which is much easier to deploy and we develop this approach further using a different compression technique. We achieve some surprising results on MNIST and we show that we can significantly improve the acoustic model of a heavily used commercial system by distilling the knowledge in an ensemble of models into a single model. We also introduce a new type of ensemble composed of one or more full models and many specialist models which learn to distinguish fine-grained classes that the full models confuse. Unlike a mixture of experts, these specialist models can be trained rapidly and in parallel.}, - author = {Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff}, - doi = {10.48550/arXiv.1503.02531}, - file = {arXiv Fulltext PDF:/Users/jeffreyma/Zotero/storage/VREDW45A/Hinton et al. - 2015 - Distilling the Knowledge in a Neural Network.pdf:application/pdf;arXiv.org Snapshot:/Users/jeffreyma/Zotero/storage/8MNJG4RP/1503.html:text/html}, - keywords = {Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing, Statistics - Machine Learning}, - month = mar, - note = {arXiv:1503.02531 [cs, stat]}, - publisher = {arXiv}, - title = {Distilling the {Knowledge} in a {Neural} {Network}}, - url = {http://arxiv.org/abs/1503.02531}, - urldate = {2023-10-20}, - year = 2015, - Bdsk-Url-1 = {http://arxiv.org/abs/1503.02531}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.1503.02531}} - + author = {Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff}, + year = 2015, + month = mar, + publisher = {arXiv}, + doi = {10.48550/arXiv.1503.02531}, + url = {http://arxiv.org/abs/1503.02531}, + urldate = {2023-10-20}, + note = {arXiv:1503.02531 [cs, stat]}, + bdsk-url-1 = {http://arxiv.org/abs/1503.02531}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.1503.02531} +} @misc{hinton2015distilling, + author = {Geoffrey Hinton and Oriol Vinyals and Jeff Dean}, + year = 2015, archiveprefix = {arXiv}, - author = {Geoffrey Hinton and Oriol Vinyals and Jeff Dean}, - eprint = {1503.02531}, - primaryclass = {stat.ML}, - title = {Distilling the Knowledge in a Neural Network}, - year = 2015} - + eprint = {1503.02531}, + primaryclass = {stat.ML} +} @article{Holland_Hosny_Newman_Joseph_Chmielinski_2020, - author = {Holland, Sarah and Hosny, Ahmed and Newman, Sarah and Joseph, Joshua and Chmielinski, Kasia}, - doi = {10.5040/9781509932771.ch-001}, - journal = {Data Protection and Privacy}, - title = {The Dataset Nutrition label}, - year = 2020, - Bdsk-Url-1 = {https://doi.org/10.5040/9781509932771.ch-001}} - + author = {Holland, Sarah and Hosny, Ahmed and Newman, Sarah and Joseph, Joshua and Chmielinski, Kasia}, + year = 2020, + journal = {Data Protection and Privacy}, + doi = {10.5040/9781509932771.ch-001}, + bdsk-url-1 = {https://doi.org/10.5040/9781509932771.ch-001} +} @inproceedings{hong2023publishing, - author = {Hong, Sanghyun and Carlini, Nicholas and Kurakin, Alexey}, - booktitle = {2023 IEEE Conference on Secure and Trustworthy Machine Learning (SaTML)}, - organization = {IEEE}, - pages = {271--290}, - title = {Publishing Efficient On-device Models Increases Adversarial Vulnerability}, - year = 2023} - + author = {Hong, Sanghyun and Carlini, Nicholas and Kurakin, Alexey}, + year = 2023, + booktitle = {2023 IEEE Conference on Secure and Trustworthy Machine Learning (SaTML)}, + pages = {271--290}, + organization = {IEEE} +} +@article{hosseini2017deceiving, + author = {Hosseini, Hossein and Kannan, Sreeram and Zhang, Baosen and Poovendran, Radha}, + year = 2017, + journal = {arXiv preprint arXiv:1702.08138}, + date-added = {2023-11-22 16:22:18 -0500}, + date-modified = {2023-11-22 16:23:43 -0500} +} @misc{howard_mobilenets_2017, - abstract = {We present a class of efficient models called MobileNets for mobile and embedded vision applications. MobileNets are based on a streamlined architecture that uses depth-wise separable convolutions to build light weight deep neural networks. We introduce two simple global hyper-parameters that efficiently trade off between latency and accuracy. These hyper-parameters allow the model builder to choose the right sized model for their application based on the constraints of the problem. We present extensive experiments on resource and accuracy tradeoffs and show strong performance compared to other popular models on ImageNet classification. We then demonstrate the effectiveness of MobileNets across a wide range of applications and use cases including object detection, finegrain classification, face attributes and large scale geo-localization.}, - author = {Howard, Andrew G. and Zhu, Menglong and Chen, Bo and Kalenichenko, Dmitry and Wang, Weijun and Weyand, Tobias and Andreetto, Marco and Adam, Hartwig}, - doi = {10.48550/arXiv.1704.04861}, - file = {arXiv Fulltext PDF:/Users/jeffreyma/Zotero/storage/IJ9P9ID9/Howard et al. - 2017 - MobileNets Efficient Convolutional Neural Network.pdf:application/pdf;arXiv.org Snapshot:/Users/jeffreyma/Zotero/storage/D9TS95GJ/1704.html:text/html}, - keywords = {Computer Science - Computer Vision and Pattern Recognition}, - month = apr, - note = {arXiv:1704.04861 [cs]}, - publisher = {arXiv}, - shorttitle = {{MobileNets}}, - title = {{MobileNets}: {Efficient} {Convolutional} {Neural} {Networks} for {Mobile} {Vision} {Applications}}, - url = {http://arxiv.org/abs/1704.04861}, - urldate = {2023-10-20}, - year = 2017, - Bdsk-Url-1 = {http://arxiv.org/abs/1704.04861}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.1704.04861}} - + shorttitle = {{MobileNets}}, + author = {Howard, Andrew G. and Zhu, Menglong and Chen, Bo and Kalenichenko, Dmitry and Wang, Weijun and Weyand, Tobias and Andreetto, Marco and Adam, Hartwig}, + year = 2017, + month = apr, + publisher = {arXiv}, + doi = {10.48550/arXiv.1704.04861}, + url = {http://arxiv.org/abs/1704.04861}, + urldate = {2023-10-20}, + note = {arXiv:1704.04861 [cs]}, + bdsk-url-1 = {http://arxiv.org/abs/1704.04861}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.1704.04861} +} @misc{howard2017mobilenets, + author = {Andrew G. Howard and Menglong Zhu and Bo Chen and Dmitry Kalenichenko and Weijun Wang and Tobias Weyand and Marco Andreetto and Hartwig Adam}, + year = 2017, + journal = {arXiv preprint arXiv:1704.04861}, archiveprefix = {arXiv}, - author = {Andrew G. Howard and Menglong Zhu and Bo Chen and Dmitry Kalenichenko and Weijun Wang and Tobias Weyand and Marco Andreetto and Hartwig Adam}, - eprint = {1704.04861}, - journal = {arXiv preprint arXiv:1704.04861}, - primaryclass = {cs.CV}, - title = {MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications}, - year = 2017} - + eprint = {1704.04861}, + primaryclass = {cs.CV} +} +@inproceedings{hsiao2023mavfi, + author = {Hsiao, Yu-Shun and Wan, Zishen and Jia, Tianyu and Ghosal, Radhika and Mahmoud, Abdulrahman and Raychowdhury, Arijit and Brooks, David and Wei, Gu-Yeon and Reddi, Vijay Janapa}, + year = 2023, + booktitle = {2023 Design, Automation \& Test in Europe Conference \& Exhibition (DATE)}, + pages = {1--6}, + date-added = {2023-11-22 16:54:11 -0500}, + date-modified = {2023-11-22 16:55:12 -0500}, + organization = {IEEE} +} +@article{huang2010pseudo, + author = {Huang, Tsung-Ching and Fukuda, Kenjiro and Lo, Chun-Ming and Yeh, Yung-Hui and Sekitani, Tsuyoshi and Someya, Takao and Cheng, Kwang-Ting}, + year = 2010, + journal = {IEEE Transactions on Electron Devices}, + publisher = {IEEE}, + volume = 58, + number = 1, + pages = {141--150} +} +@inproceedings{hutter2009contact, + author = {Hutter, Michael and Schmidt, Jorn-Marc and Plos, Thomas}, + year = 2009, + booktitle = {2009 European Conference on Circuit Theory and Design}, + pages = {409--412}, + date-added = {2023-11-22 16:43:29 -0500}, + date-modified = {2023-11-22 16:44:30 -0500}, + organization = {IEEE} +} @misc{iandola_squeezenet_2016, - abstract = {Recent research on deep neural networks has focused primarily on improving accuracy. For a given accuracy level, it is typically possible to identify multiple DNN architectures that achieve that accuracy level. With equivalent accuracy, smaller DNN architectures offer at least three advantages: (1) Smaller DNNs require less communication across servers during distributed training. (2) Smaller DNNs require less bandwidth to export a new model from the cloud to an autonomous car. (3) Smaller DNNs are more feasible to deploy on FPGAs and other hardware with limited memory. To provide all of these advantages, we propose a small DNN architecture called SqueezeNet. SqueezeNet achieves AlexNet-level accuracy on ImageNet with 50x fewer parameters. Additionally, with model compression techniques we are able to compress SqueezeNet to less than 0.5MB (510x smaller than AlexNet). The SqueezeNet architecture is available for download here: https://github.com/DeepScale/SqueezeNet}, - author = {Iandola, Forrest N. and Han, Song and Moskewicz, Matthew W. and Ashraf, Khalid and Dally, William J. and Keutzer, Kurt}, - doi = {10.48550/arXiv.1602.07360}, - file = {arXiv Fulltext PDF:/Users/jeffreyma/Zotero/storage/X3ZX9UTZ/Iandola et al. - 2016 - SqueezeNet AlexNet-level accuracy with 50x fewer .pdf:application/pdf;arXiv.org Snapshot:/Users/jeffreyma/Zotero/storage/DHI96QVT/1602.html:text/html}, - keywords = {Computer Science - Artificial Intelligence, Computer Science - Computer Vision and Pattern Recognition}, - month = nov, - note = {arXiv:1602.07360 [cs]}, - publisher = {arXiv}, - shorttitle = {{SqueezeNet}}, - title = {{SqueezeNet}: {AlexNet}-level accuracy with 50x fewer parameters and {\textless}0.{5MB} model size}, - url = {http://arxiv.org/abs/1602.07360}, - urldate = {2023-10-20}, - year = 2016, - Bdsk-Url-1 = {http://arxiv.org/abs/1602.07360}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.1602.07360}} - + shorttitle = {{SqueezeNet}}, + author = {Iandola, Forrest N. and Han, Song and Moskewicz, Matthew W. and Ashraf, Khalid and Dally, William J. and Keutzer, Kurt}, + year = 2016, + month = nov, + publisher = {arXiv}, + doi = {10.48550/arXiv.1602.07360}, + url = {http://arxiv.org/abs/1602.07360}, + urldate = {2023-10-20}, + note = {arXiv:1602.07360 [cs]}, + bdsk-url-1 = {http://arxiv.org/abs/1602.07360}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.1602.07360} +} @article{iandola2016squeezenet, - author = {Iandola, Forrest N and Han, Song and Moskewicz, Matthew W and Ashraf, Khalid and Dally, William J and Keutzer, Kurt}, - journal = {arXiv preprint arXiv:1602.07360}, - title = {SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and< 0.5 MB model size}, - year = 2016} - + author = {Iandola, Forrest N and Han, Song and Moskewicz, Matthew W and Ashraf, Khalid and Dally, William J and Keutzer, Kurt}, + year = 2016, + journal = {arXiv preprint arXiv:1602.07360} +} +@article{Ignatov2018-kh, + author = {Ignatov, Andrey and Timofte, Radu and Chou, William and Wang, Ke and Wu, Max and Hartley, Tim and Van Gool, Luc}, + year = 2018, + publisher = {arXiv} +} @inproceedings{ignatov2018ai, - author = {Ignatov, Andrey and Timofte, Radu and Chou, William and Wang, Ke and Wu, Max and Hartley, Tim and Van Gool, Luc}, - booktitle = {Proceedings of the European Conference on Computer Vision (ECCV) Workshops}, - pages = {0--0}, - title = {Ai benchmark: Running deep neural networks on android smartphones}, - year = 2018} - + author = {Ignatov, Andrey and Timofte, Radu and Chou, William and Wang, Ke and Wu, Max and Hartley, Tim and Van Gool, Luc}, + year = 2018, + booktitle = {Proceedings of the European Conference on Computer Vision (ECCV) Workshops}, + pages = {0--0} +} @inproceedings{ijcai2021p592, - author = {Benmeziane, Hadjer and El Maghraoui, Kaoutar and Ouarnoughi, Hamza and Niar, Smail and Wistuba, Martin and Wang, Naigang}, - booktitle = {Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence, {IJCAI-21}}, - doi = {10.24963/ijcai.2021/592}, - editor = {Zhi-Hua Zhou}, - month = 8, - note = {Survey Track}, - pages = {4322--4329}, - publisher = {International Joint Conferences on Artificial Intelligence Organization}, - title = {Hardware-Aware Neural Architecture Search: Survey and Taxonomy}, - url = {https://doi.org/10.24963/ijcai.2021/592}, - year = 2021, - Bdsk-Url-1 = {https://doi.org/10.24963/ijcai.2021/592}} - + author = {Benmeziane, Hadjer and El Maghraoui, Kaoutar and Ouarnoughi, Hamza and Niar, Smail and Wistuba, Martin and Wang, Naigang}, + year = 2021, + month = 8, + booktitle = {Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence, {IJCAI-21}}, + publisher = {International Joint Conferences on Artificial Intelligence Organization}, + pages = {4322--4329}, + doi = {10.24963/ijcai.2021/592}, + url = {https://doi.org/10.24963/ijcai.2021/592}, + note = {Survey Track}, + editor = {Zhi-Hua Zhou}, + bdsk-url-1 = {https://doi.org/10.24963/ijcai.2021/592} +} +@inproceedings{imani2016resistive, + author = {Imani, Mohsen and Rahimi, Abbas and Rosing, Tajana S}, + year = 2016, + booktitle = {2016 Design, Automation \& Test in Europe Conference \& Exhibition (DATE)}, + pages = {1327--1332}, + organization = {IEEE} +} @misc{intquantfordeepinf, - author = {Wu and Judd, Zhang and Isaev, Micikevicius}, - doi = {10.48550/arXiv.2004.09602}, - title = {Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation)}, - url = {https://arxiv.org/abs/2004.09602}, - urldate = {2020-04-20}, - year = 2020, - Bdsk-Url-1 = {https://arxiv.org/abs/2004.09602}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.2004.09602}} - + author = {Wu and Judd, Zhang and Isaev, Micikevicius}, + year = 2020, + doi = {10.48550/arXiv.2004.09602}, + url = {https://arxiv.org/abs/2004.09602}, + urldate = {2020-04-20}, + bdsk-url-1 = {https://arxiv.org/abs/2004.09602}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.2004.09602} +} +@inproceedings{jacob2018quantization, + author = {Jacob, Benoit and Kligys, Skirmantas and Chen, Bo and Zhu, Menglong and Tang, Matthew and Howard, Andrew and Adam, Hartwig and Kalenichenko, Dmitry}, + year = 2018, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {2704--2713} +} +@article{janapa2023edge, + author = {Janapa Reddi, Vijay and Elium, Alexander and Hymel, Shawn and Tischler, David and Situnayake, Daniel and Ward, Carl and Moreau, Louis and Plunkett, Jenny and Kelcey, Matthew and Baaijens, Mathijs and others}, + year = 2023, + journal = {Proceedings of Machine Learning and Systems}, + volume = 5 +} +@misc{jia_dissecting_2018, + author = {Jia, Zhe and Maggioni, Marco and Staiger, Benjamin and Scarpazza, Daniele P.}, + year = 2018, + month = apr, + publisher = {arXiv}, + url = {http://arxiv.org/abs/1804.06826}, + urldate = {2023-11-07}, + note = {arXiv:1804.06826 [cs]}, + bdsk-url-1 = {http://arxiv.org/abs/1804.06826} +} @inproceedings{jia2014caffe, - author = {Jia, Yangqing and Shelhamer, Evan and Donahue, Jeff and Karayev, Sergey and Long, Jonathan and Girshick, Ross and Guadarrama, Sergio and Darrell, Trevor}, - booktitle = {Proceedings of the 22nd ACM international conference on Multimedia}, - pages = {675--678}, - title = {Caffe: Convolutional architecture for fast feature embedding}, - year = 2014} - + author = {Jia, Yangqing and Shelhamer, Evan and Donahue, Jeff and Karayev, Sergey and Long, Jonathan and Girshick, Ross and Guadarrama, Sergio and Darrell, Trevor}, + year = 2014, + booktitle = {Proceedings of the 22nd ACM international conference on Multimedia}, + pages = {675--678} +} +@article{jia2019beyond, + author = {Jia, Zhihao and Zaharia, Matei and Aiken, Alex}, + year = 2019, + journal = {Proceedings of Machine Learning and Systems}, + volume = 1, + pages = {1--13} +} @article{jia2023life, - author = {Jia, Zhenge and Li, Dawei and Xu, Xiaowei and Li, Na and Hong, Feng and Ping, Lichuan and Shi, Yiyu}, - journal = {Nature Machine Intelligence}, - number = 5, - pages = {554--555}, - publisher = {Nature Publishing Group UK London}, - title = {Life-threatening ventricular arrhythmia detection challenge in implantable cardioverter--defibrillators}, - volume = 5, - year = 2023} - + author = {Jia, Zhenge and Li, Dawei and Xu, Xiaowei and Li, Na and Hong, Feng and Ping, Lichuan and Shi, Yiyu}, + year = 2023, + journal = {Nature Machine Intelligence}, + publisher = {Nature Publishing Group UK London}, + volume = 5, + number = 5, + pages = {554--555} +} @misc{jiang2019accuracy, + author = {Weiwen Jiang and Xinyi Zhang and Edwin H. -M. Sha and Lei Yang and Qingfeng Zhuge and Yiyu Shi and Jingtong Hu}, + year = 2019, archiveprefix = {arXiv}, - author = {Weiwen Jiang and Xinyi Zhang and Edwin H. -M. Sha and Lei Yang and Qingfeng Zhuge and Yiyu Shi and Jingtong Hu}, - eprint = {1901.11211}, - primaryclass = {cs.DC}, - title = {Accuracy vs. Efficiency: Achieving Both through FPGA-Implementation Aware Neural Architecture Search}, - year = 2019} - + eprint = {1901.11211}, + primaryclass = {cs.DC} +} @article{Johnson-Roberson_Barto_Mehta_Sridhar_Rosaen_Vasudevan_2017, - author = {Johnson-Roberson, Matthew and Barto, Charles and Mehta, Rounak and Sridhar, Sharath Nittur and Rosaen, Karl and Vasudevan, Ram}, - doi = {10.1109/icra.2017.7989092}, - journal = {2017 IEEE International Conference on Robotics and Automation (ICRA)}, - title = {Driving in the matrix: Can virtual worlds replace human-generated annotations for real world tasks?}, - year = 2017, - Bdsk-Url-1 = {https://doi.org/10.1109/icra.2017.7989092}} - + author = {Johnson-Roberson, Matthew and Barto, Charles and Mehta, Rounak and Sridhar, Sharath Nittur and Rosaen, Karl and Vasudevan, Ram}, + year = 2017, + journal = {2017 IEEE International Conference on Robotics and Automation (ICRA)}, + doi = {10.1109/icra.2017.7989092}, + bdsk-url-1 = {https://doi.org/10.1109/icra.2017.7989092} +} @article{jordan_machine_2015, - author = {Jordan, M. I. and Mitchell, T. M.}, - doi = {10.1126/science.aaa8415}, - file = {Jordan and Mitchell - 2015 - Machine learning Trends, perspectives, and prospe.pdf:/Users/alex/Zotero/storage/RGU3CQ4Q/Jordan and Mitchell - 2015 - Machine learning Trends, perspectives, and prospe.pdf:application/pdf}, - issn = {0036-8075, 1095-9203}, - journal = {Science}, - language = {en}, - month = jul, - number = 6245, - pages = {255--260}, - shorttitle = {Machine learning}, - title = {Machine learning: {Trends}, perspectives, and prospects}, - url = {https://www.science.org/doi/10.1126/science.aaa8415}, - urldate = {2023-10-25}, - volume = 349, - year = 2015, - Bdsk-Url-1 = {https://www.science.org/doi/10.1126/science.aaa8415}, - Bdsk-Url-2 = {https://doi.org/10.1126/science.aaa8415}} - + shorttitle = {Machine learning}, + author = {Jordan, M. I. and Mitchell, T. M.}, + year = 2015, + month = jul, + journal = {Science}, + volume = 349, + number = 6245, + pages = {255--260}, + doi = {10.1126/science.aaa8415}, + issn = {0036-8075, 1095-9203}, + url = {https://www.science.org/doi/10.1126/science.aaa8415}, + urldate = {2023-10-25}, + language = {en}, + bdsk-url-1 = {https://www.science.org/doi/10.1126/science.aaa8415}, + bdsk-url-2 = {https://doi.org/10.1126/science.aaa8415} +} @inproceedings{jouppi2017datacenter, - author = {Jouppi, Norman P and Young, Cliff and Patil, Nishant and Patterson, David and Agrawal, Gaurav and Bajwa, Raminder and Bates, Sarah and Bhatia, Suresh and Boden, Nan and Borchers, Al and others}, - booktitle = {Proceedings of the 44th annual international symposium on computer architecture}, - pages = {1--12}, - title = {In-datacenter performance analysis of a tensor processing unit}, - year = 2017} - + author = {Jouppi, Norman P and Young, Cliff and Patil, Nishant and Patterson, David and Agrawal, Gaurav and Bajwa, Raminder and Bates, Sarah and Bhatia, Suresh and Boden, Nan and Borchers, Al and others}, + year = 2017, + booktitle = {Proceedings of the 44th annual international symposium on computer architecture}, + pages = {1--12} +} +@inproceedings{Jouppi2023TPUv4, + author = {Jouppi, Norm and Kurian, George and Li, Sheng and Ma, Peter and Nagarajan, Rahul and Nai, Lifeng and Patil, Nishant and Subramanian, Suvinay and Swing, Andy and Towles, Brian and Young, Clifford and Zhou, Xiang and Zhou, Zongwei and Patterson, David A}, + year = 2023, + booktitle = {Proceedings of the 50th Annual International Symposium on Computer Architecture}, + location = {Orlando, FL, USA}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + series = {ISCA '23}, + doi = {10.1145/3579371.3589350}, + isbn = 9798400700958, + url = {https://doi.org/10.1145/3579371.3589350}, + articleno = 82, + numpages = 14, + bdsk-url-1 = {https://doi.org/10.1145/3579371.3589350} +} +@book{joye2012fault, + author = {Joye, Marc and Tunstall, Michael}, + year = 2012, + publisher = {Springer Publishing Company, Incorporated}, + date-added = {2023-11-22 16:35:24 -0500}, + date-modified = {2023-11-22 16:36:20 -0500} +} +@misc{kaiming, + author = {Kaiming He and Xiangyu Zhang and Shaoqing Ren and Jian Sun}, + year = 2015, + booktitle = {Proceedings of the IEEE international conference on computer vision}, + eprint = {1502.01852}, + archiveprefix = {arXiv} +} @article{kairouz2015secure, - author = {Kairouz, Peter and Oh, Sewoong and Viswanath, Pramod}, - journal = {Advances in neural information processing systems}, - title = {Secure multi-party differential privacy}, - volume = 28, - year = 2015} - + author = {Kairouz, Peter and Oh, Sewoong and Viswanath, Pramod}, + year = 2015, + journal = {Advances in neural information processing systems}, + volume = 28 +} @article{karargyris2023federated, - author = {Karargyris, Alexandros and Umeton, Renato and Sheller, Micah J and Aristizabal, Alejandro and George, Johnu and Wuest, Anna and Pati, Sarthak and Kassem, Hasan and Zenk, Maximilian and Baid, Ujjwal and others}, - journal = {Nature Machine Intelligence}, - number = 7, - pages = {799--810}, - publisher = {Nature Publishing Group UK London}, - title = {Federated benchmarking of medical artificial intelligence with MedPerf}, - volume = 5, - year = 2023} - + author = {Karargyris, Alexandros and Umeton, Renato and Sheller, Micah J and Aristizabal, Alejandro and George, Johnu and Wuest, Anna and Pati, Sarthak and Kassem, Hasan and Zenk, Maximilian and Baid, Ujjwal and others}, + year = 2023, + journal = {Nature Machine Intelligence}, + publisher = {Nature Publishing Group UK London}, + volume = 5, + number = 7, + pages = {799--810} +} +@article{khan2021knowledgeadaptation, + author = {Mohammad Emtiyaz Khan and Siddharth Swaroop}, + year = 2021, + journal = {arXiv preprint arXiv:2106.08769}, + date-added = {2023-11-22 19:22:50 -0500}, + date-modified = {2023-11-22 19:23:40 -0500} +} @article{kiela2021dynabench, - author = {Kiela, Douwe and Bartolo, Max and Nie, Yixin and Kaushik, Divyansh and Geiger, Atticus and Wu, Zhengxuan and Vidgen, Bertie and Prasad, Grusha and Singh, Amanpreet and Ringshia, Pratik and others}, - journal = {arXiv preprint arXiv:2104.14337}, - title = {Dynabench: Rethinking benchmarking in NLP}, - year = 2021} - -@inproceedings{koh2021wilds, - author = {Koh, Pang Wei and Sagawa, Shiori and Marklund, Henrik and Xie, Sang Michael and Zhang, Marvin and Balsubramani, Akshay and Hu, Weihua and Yasunaga, Michihiro and Phillips, Richard Lanas and Gao, Irena and others}, - booktitle = {International Conference on Machine Learning}, - organization = {PMLR}, - pages = {5637--5664}, - title = {Wilds: A benchmark of in-the-wild distribution shifts}, - year = 2021} - -@article{kolda_tensor_2009, - abstract = {This survey provides an overview of higher-order tensor decompositions, their applications, and available software. A tensor is a multidimensional or N -way array. Decompositions of higher-order tensors (i.e., N -way arrays with N ≥ 3) have applications in psychometrics, chemometrics, signal processing, numerical linear algebra, computer vision, numerical analysis, data mining, neuroscience, graph analysis, and elsewhere. Two particular tensor decompositions can be considered to be higher-order extensions of the matrix singular value decomposition: CANDECOMP/PARAFAC (CP) decomposes a tensor as a sum of rank-one tensors, and the Tucker decomposition is a higher-order form of principal component analysis. There are many other tensor decompositions, including INDSCAL, PARAFAC2, CANDELINC, DEDICOM, and PARATUCK2 as well as nonnegative variants of all of the above. The N-way Toolbox, Tensor Toolbox, and Multilinear Engine are examples of software packages for working with tensors.}, - author = {Kolda, Tamara G. and Bader, Brett W.}, - doi = {10.1137/07070111X}, - file = {Kolda and Bader - 2009 - Tensor Decompositions and Applications.pdf:/Users/jeffreyma/Zotero/storage/Q7ZG2267/Kolda and Bader - 2009 - Tensor Decompositions and Applications.pdf:application/pdf}, - issn = {0036-1445, 1095-7200}, - journal = {SIAM Review}, - language = {en}, - month = aug, - number = 3, - pages = {455--500}, - title = {Tensor {Decompositions} and {Applications}}, - url = {http://epubs.siam.org/doi/10.1137/07070111X}, - urldate = {2023-10-20}, - volume = 51, - year = 2009, - Bdsk-Url-1 = {http://epubs.siam.org/doi/10.1137/07070111X}, - Bdsk-Url-2 = {https://doi.org/10.1137/07070111X}} - + author = {Kiela, Douwe and Bartolo, Max and Nie, Yixin and Kaushik, Divyansh and Geiger, Atticus and Wu, Zhengxuan and Vidgen, Bertie and Prasad, Grusha and Singh, Amanpreet and Ringshia, Pratik and others}, + year = 2021, + journal = {arXiv preprint arXiv:2104.14337} +} +@inproceedings{kocher1996timing, + author = {Kocher, Paul C}, + year = 1996, + booktitle = {Advances in Cryptology—CRYPTO’96: 16th Annual International Cryptology Conference Santa Barbara, California, USA August 18--22, 1996 Proceedings 16}, + pages = {104--113}, + organization = {Springer} +} +@inproceedings{kocher1999differential, + author = {Kocher, Paul and Jaffe, Joshua and Jun, Benjamin}, + year = 1999, + booktitle = {Advances in Cryptology---CRYPTO'99: 19th Annual International Cryptology Conference Santa Barbara, California, USA, August 15--19, 1999 Proceedings 19}, + pages = {388--397}, + date-added = {2023-11-22 16:55:28 -0500}, + date-modified = {2023-11-22 16:56:18 -0500}, + organization = {Springer} +} +@article{Kocher2011Intro, + author = {Kocher, Paul and Jaffe, Joshua and Jun, Benjamin and Rohatgi, Pankaj}, + year = 2011, + month = {April}, + journal = {Journal of Cryptographic Engineering}, + volume = 1, + number = 1, + pages = {5--27}, + date-added = {2023-11-22 16:58:42 -0500}, + date-modified = {2023-11-22 17:00:36 -0500} +} +@inproceedings{Kocher2018spectre, + author = {Paul Kocher and Jann Horn and Anders Fogh and and Daniel Genkin and Daniel Gruss and Werner Haas and Mike Hamburg and Moritz Lipp and Stefan Mangard and Thomas Prescher and Michael Schwarz and Yuval Yarom}, + year = 2019, + booktitle = {40th IEEE Symposium on Security and Privacy (S\&P'19)}, + date-added = {2023-11-22 16:33:35 -0500}, + date-modified = {2023-11-22 16:34:01 -0500} +} +@inproceedings{koh2021wilds, + author = {Koh, Pang Wei and Sagawa, Shiori and Marklund, Henrik and Xie, Sang Michael and Zhang, Marvin and Balsubramani, Akshay and Hu, Weihua and Yasunaga, Michihiro and Phillips, Richard Lanas and Gao, Irena and others}, + year = 2021, + booktitle = {International Conference on Machine Learning}, + pages = {5637--5664}, + organization = {PMLR} +} +@article{kolda_tensor_2009, + author = {Kolda, Tamara G. and Bader, Brett W.}, + year = 2009, + month = aug, + journal = {SIAM Review}, + volume = 51, + number = 3, + pages = {455--500}, + doi = {10.1137/07070111X}, + issn = {0036-1445, 1095-7200}, + url = {http://epubs.siam.org/doi/10.1137/07070111X}, + urldate = {2023-10-20}, + language = {en}, + bdsk-url-1 = {http://epubs.siam.org/doi/10.1137/07070111X}, + bdsk-url-2 = {https://doi.org/10.1137/07070111X} +} @article{koshti2011cumulative, - author = {Koshti, VV}, - journal = {International journal of physics and mathematical sciences}, - number = 1, - pages = {28--32}, - title = {Cumulative sum control chart}, - volume = 1, - year = 2011} - + author = {Koshti, VV}, + year = 2011, + journal = {International journal of physics and mathematical sciences}, + volume = 1, + number = 1, + pages = {28--32} +} @misc{krishna2023raman, + author = {Adithya Krishna and Srikanth Rohit Nudurupati and Chandana D G and Pritesh Dwivedi and Andr{\'e} van Schaik and Mahesh Mehendale and Chetan Singh Thakur}, + year = 2023, archiveprefix = {arXiv}, - author = {Adithya Krishna and Srikanth Rohit Nudurupati and Chandana D G and Pritesh Dwivedi and Andr{\'e} van Schaik and Mahesh Mehendale and Chetan Singh Thakur}, - eprint = {2306.06493}, - primaryclass = {cs.NE}, - title = {RAMAN: A Re-configurable and Sparse tinyML Accelerator for Inference on Edge}, - year = 2023} - + eprint = {2306.06493}, + primaryclass = {cs.NE} +} @article{krishnamoorthi2018quantizing, - author = {Krishnamoorthi, Raghuraman}, - journal = {arXiv preprint arXiv:1806.08342}, - title = {Quantizing deep convolutional networks for efficient inference: A whitepaper}, - year = 2018} - + author = {Krishnamoorthi, Raghuraman}, + year = 2018, + journal = {arXiv preprint arXiv:1806.08342} +} @article{Krishnan_Rajpurkar_Topol_2022, - author = {Krishnan, Rayan and Rajpurkar, Pranav and Topol, Eric J.}, - doi = {10.1038/s41551-022-00914-1}, - journal = {Nature Biomedical Engineering}, - number = 12, - pages = {1346--1352}, - title = {Self-supervised learning in medicine and Healthcare}, - volume = 6, - year = 2022, - Bdsk-Url-1 = {https://doi.org/10.1038/s41551-022-00914-1}} - + author = {Krishnan, Rayan and Rajpurkar, Pranav and Topol, Eric J.}, + year = 2022, + journal = {Nature Biomedical Engineering}, + volume = 6, + number = 12, + pages = {1346--1352}, + doi = {10.1038/s41551-022-00914-1}, + bdsk-url-1 = {https://doi.org/10.1038/s41551-022-00914-1} +} +@inproceedings{krishnan2023archgym, + author = {Krishnan, Srivatsan and Yazdanbakhsh, Amir and Prakash, Shvetank and Jabbour, Jason and Uchendu, Ikechukwu and Ghosh, Susobhan and Boroujerdian, Behzad and Richins, Daniel and Tripathy, Devashree and Faust, Aleksandra and Janapa Reddi, Vijay}, + year = 2023, + booktitle = {Proceedings of the 50th Annual International Symposium on Computer Architecture}, + pages = {1--16} +} @article{krizhevsky2012imagenet, - author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E}, - journal = {Advances in neural information processing systems}, - title = {Imagenet classification with deep convolutional neural networks}, - volume = 25, - year = 2012} - + author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E}, + year = 2012, + journal = {Advances in neural information processing systems}, + volume = 25 +} @inproceedings{kung1979systolic, - author = {Kung, Hsiang Tsung and Leiserson, Charles E}, - booktitle = {Sparse Matrix Proceedings 1978}, - organization = {Society for industrial and applied mathematics Philadelphia, PA, USA}, - pages = {256--282}, - title = {Systolic arrays (for VLSI)}, - volume = 1, - year = 1979} - + author = {Kung, Hsiang Tsung and Leiserson, Charles E}, + year = 1979, + booktitle = {Sparse Matrix Proceedings 1978}, + volume = 1, + pages = {256--282}, + organization = {Society for industrial and applied mathematics Philadelphia, PA, USA} +} @misc{kung2018packing, + author = {H. T. Kung and Bradley McDanel and Sai Qian Zhang}, + year = 2018, archiveprefix = {arXiv}, - author = {H. T. Kung and Bradley McDanel and Sai Qian Zhang}, - eprint = {1811.04770}, - primaryclass = {cs.LG}, - title = {Packing Sparse Convolutional Neural Networks for Efficient Systolic Array Implementations: Column Combining Under Joint Optimization}, - year = 2018} - + eprint = {1811.04770}, + primaryclass = {cs.LG} +} @incollection{kurkova_survey_2018, - address = {Cham}, - author = {Tan, Chuanqi and Sun, Fuchun and Kong, Tao and Zhang, Wenchang and Yang, Chao and Liu, Chunfang}, - booktitle = {Artificial {Neural} {Networks} and {Machine} {Learning} -- {ICANN} 2018}, - doi = {10.1007/978-3-030-01424-7_27}, - editor = {K{\r u}rkov{\'a}, V{\v e}ra and Manolopoulos, Yannis and Hammer, Barbara and Iliadis, Lazaros and Maglogiannis, Ilias}, - file = {Tan et al. - 2018 - A Survey on Deep Transfer Learning.pdf:/Users/alex/Zotero/storage/5NZ36SGB/Tan et al. - 2018 - A Survey on Deep Transfer Learning.pdf:application/pdf}, - isbn = {978-3-030-01423-0 978-3-030-01424-7}, - language = {en}, - note = {Series Title: Lecture Notes in Computer Science}, - pages = {270--279}, - publisher = {Springer International Publishing}, - title = {A {Survey} on {Deep} {Transfer} {Learning}}, - url = {http://link.springer.com/10.1007/978-3-030-01424-7_27}, - urldate = {2023-10-26}, - volume = 11141, - year = 2018, - Bdsk-Url-1 = {http://link.springer.com/10.1007/978-3-030-01424-7_27}, - Bdsk-Url-2 = {https://doi.org/10.1007/978-3-030-01424-7_27}} - + author = {Tan, Chuanqi and Sun, Fuchun and Kong, Tao and Zhang, Wenchang and Yang, Chao and Liu, Chunfang}, + year = 2018, + booktitle = {Artificial {Neural} {Networks} and {Machine} {Learning} -- {ICANN} 2018}, + publisher = {Springer International Publishing}, + address = {Cham}, + volume = 11141, + pages = {270--279}, + doi = {10.1007/978-3-030-01424-7_27}, + isbn = {978-3-030-01423-0 978-3-030-01424-7}, + url = {http://link.springer.com/10.1007/978-3-030-01424-7_27}, + urldate = {2023-10-26}, + note = {Series Title: Lecture Notes in Computer Science}, + editor = {K{\r u}rkov{\'a}, V{\v e}ra and Manolopoulos, Yannis and Hammer, Barbara and Iliadis, Lazaros and Maglogiannis, Ilias}, + language = {en}, + bdsk-url-1 = {http://link.springer.com/10.1007/978-3-030-01424-7_27}, + bdsk-url-2 = {https://doi.org/10.1007/978-3-030-01424-7_27} +} @misc{kuzmin2022fp8, + author = {Andrey Kuzmin and Mart Van Baalen and Yuwei Ren and Markus Nagel and Jorn Peters and Tijmen Blankevoort}, + year = 2022, archiveprefix = {arXiv}, - author = {Andrey Kuzmin and Mart Van Baalen and Yuwei Ren and Markus Nagel and Jorn Peters and Tijmen Blankevoort}, - eprint = {2208.09225}, - primaryclass = {cs.LG}, - title = {FP8 Quantization: The Power of the Exponent}, - year = 2022} - + eprint = {2208.09225}, + primaryclass = {cs.LG} +} @misc{kwon_tinytrain_2023, - author = {Kwon, Young D. and Li, Rui and Venieris, Stylianos I. and Chauhan, Jagmohan and Lane, Nicholas D. and Mascolo, Cecilia}, - file = {Kwon et al. - 2023 - TinyTrain Deep Neural Network Training at the Ext.pdf:/Users/alex/Zotero/storage/L2ST472U/Kwon et al. - 2023 - TinyTrain Deep Neural Network Training at the Ext.pdf:application/pdf}, - keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning}, - language = {en}, - month = jul, - note = {arXiv:2307.09988 [cs]}, - publisher = {arXiv}, - shorttitle = {{TinyTrain}}, - title = {{TinyTrain}: {Deep} {Neural} {Network} {Training} at the {Extreme} {Edge}}, - url = {http://arxiv.org/abs/2307.09988}, - urldate = {2023-10-26}, - year = 2023, - Bdsk-Url-1 = {http://arxiv.org/abs/2307.09988}} - + shorttitle = {{TinyTrain}}, + author = {Kwon, Young D. and Li, Rui and Venieris, Stylianos I. and Chauhan, Jagmohan and Lane, Nicholas D. and Mascolo, Cecilia}, + year = 2023, + month = jul, + publisher = {arXiv}, + url = {http://arxiv.org/abs/2307.09988}, + urldate = {2023-10-26}, + note = {arXiv:2307.09988 [cs]}, + language = {en}, + bdsk-url-1 = {http://arxiv.org/abs/2307.09988} +} +@article{kwon2022flexible, + author = {Kwon, Sun Hwa and Dong, Lin}, + year = 2022, + journal = {Nano Energy}, + publisher = {Elsevier}, + pages = 107632 +} @article{kwon2023tinytrain, - author = {Kwon, Young D and Li, Rui and Venieris, Stylianos I and Chauhan, Jagmohan and Lane, Nicholas D and Mascolo, Cecilia}, - journal = {arXiv preprint arXiv:2307.09988}, - title = {TinyTrain: Deep Neural Network Training at the Extreme Edge}, - year = 2023} - + author = {Kwon, Young D and Li, Rui and Venieris, Stylianos I and Chauhan, Jagmohan and Lane, Nicholas D and Mascolo, Cecilia}, + year = 2023, + journal = {arXiv preprint arXiv:2307.09988} +} @misc{Labelbox, - journal = {Labelbox}, - url = {https://labelbox.com/}, - Bdsk-Url-1 = {https://labelbox.com/}} - + journal = {Labelbox}, + url = {https://labelbox.com/}, + bdsk-url-1 = {https://labelbox.com/} +} @article{lai2018cmsis, - author = {Lai, Liangzhen and Suda, Naveen and Chandra, Vikas}, - journal = {arXiv preprint arXiv:1801.06601}, - title = {Cmsis-nn: Efficient neural network kernels for arm cortex-m cpus}, - year = 2018} - + author = {Lai, Liangzhen and Suda, Naveen and Chandra, Vikas}, + year = 2018, + journal = {arXiv preprint arXiv:1801.06601} +} @misc{lai2018cmsisnn, + author = {Liangzhen Lai and Naveen Suda and Vikas Chandra}, + year = 2018, archiveprefix = {arXiv}, - author = {Liangzhen Lai and Naveen Suda and Vikas Chandra}, - eprint = {1801.06601}, - primaryclass = {cs.NE}, - title = {CMSIS-NN: Efficient Neural Network Kernels for Arm Cortex-M CPUs}, - year = 2018} - + eprint = {1801.06601}, + primaryclass = {cs.NE} +} @inproceedings{lecun_optimal_1989, - abstract = {We have used information-theoretic ideas to derive a class of prac(cid:173) tical and nearly optimal schemes for adapting the size of a neural network. By removing unimportant weights from a network, sev(cid:173) eral improvements can be expected: better generalization, fewer training examples required, and improved speed of learning and/or classification. The basic idea is to use second-derivative informa(cid:173) tion to make a tradeoff between network complexity and training set error. Experiments confirm the usefulness of the methods on a real-world application.}, - author = {LeCun, Yann and Denker, John and Solla, Sara}, - booktitle = {Advances in {Neural} {Information} {Processing} {Systems}}, - file = {Full Text PDF:/Users/jeffreyma/Zotero/storage/BYHQQSST/LeCun et al. - 1989 - Optimal Brain Damage.pdf:application/pdf}, - publisher = {Morgan-Kaufmann}, - title = {Optimal {Brain} {Damage}}, - url = {https://proceedings.neurips.cc/paper/1989/hash/6c9882bbac1c7093bd25041881277658-Abstract.html}, - urldate = {2023-10-20}, - volume = 2, - year = 1989, - Bdsk-Url-1 = {https://proceedings.neurips.cc/paper/1989/hash/6c9882bbac1c7093bd25041881277658-Abstract.html}} - + author = {LeCun, Yann and Denker, John and Solla, Sara}, + year = 1989, + booktitle = {Advances in {Neural} {Information} {Processing} {Systems}}, + publisher = {Morgan-Kaufmann}, + volume = 2, + url = {https://proceedings.neurips.cc/paper/1989/hash/6c9882bbac1c7093bd25041881277658-Abstract.html}, + urldate = {2023-10-20}, + bdsk-url-1 = {https://proceedings.neurips.cc/paper/1989/hash/6c9882bbac1c7093bd25041881277658-Abstract.html} +} @article{lecun1989optimal, - author = {LeCun, Yann and Denker, John and Solla, Sara}, - journal = {Advances in neural information processing systems}, - title = {Optimal brain damage}, - volume = 2, - year = 1989} - + author = {LeCun, Yann and Denker, John and Solla, Sara}, + year = 1989, + journal = {Advances in neural information processing systems}, + volume = 2 +} @article{li2014communication, - author = {Li, Mu and Andersen, David G and Smola, Alexander J and Yu, Kai}, - journal = {Advances in Neural Information Processing Systems}, - title = {Communication efficient distributed machine learning with the parameter server}, - volume = 27, - year = 2014} - + author = {Li, Mu and Andersen, David G and Smola, Alexander J and Yu, Kai}, + year = 2014, + journal = {Advances in Neural Information Processing Systems}, + volume = 27 +} @article{li2016lightrnn, - author = {Li, Xiang and Qin, Tao and Yang, Jian and Liu, Tie-Yan}, - journal = {Advances in Neural Information Processing Systems}, - title = {LightRNN: Memory and computation-efficient recurrent neural networks}, - volume = 29, - year = 2016} - + author = {Li, Xiang and Qin, Tao and Yang, Jian and Liu, Tie-Yan}, + year = 2016, + journal = {Advances in Neural Information Processing Systems}, + volume = 29 +} @article{li2017deep, - author = {Li, Yuxi}, - journal = {arXiv preprint arXiv:1701.07274}, - title = {Deep reinforcement learning: An overview}, - year = 2017} - + author = {Li, Yuxi}, + year = 2017, + journal = {arXiv preprint arXiv:1701.07274} +} @article{li2017learning, - author = {Li, Zhizhong and Hoiem, Derek}, - journal = {IEEE transactions on pattern analysis and machine intelligence}, - number = 12, - pages = {2935--2947}, - publisher = {IEEE}, - title = {Learning without forgetting}, - volume = 40, - year = 2017} - + author = {Li, Zhizhong and Hoiem, Derek}, + year = 2017, + journal = {IEEE transactions on pattern analysis and machine intelligence}, + publisher = {IEEE}, + volume = 40, + number = 12, + pages = {2935--2947} +} @article{li2019edge, - author = {Li, En and Zeng, Liekang and Zhou, Zhi and Chen, Xu}, - journal = {IEEE Transactions on Wireless Communications}, - number = 1, - pages = {447--457}, - publisher = {IEEE}, - title = {Edge AI: On-demand accelerating deep neural network inference via edge computing}, - volume = 19, - year = 2019} - + author = {Li, En and Zeng, Liekang and Zhou, Zhi and Chen, Xu}, + year = 2019, + journal = {IEEE Transactions on Wireless Communications}, + publisher = {IEEE}, + volume = 19, + number = 1, + pages = {447--457} +} +@inproceedings{Li2020Additive, + author = {Yuhang Li and Xin Dong and Wei Wang}, + year = 2020, + booktitle = {International Conference on Learning Representations}, + url = {https://openreview.net/forum?id=BkgXT24tDS}, + bdsk-url-1 = {https://openreview.net/forum?id=BkgXT24tDS} +} +@article{Li2020Federated, + author = {Li, Tian and Sahu, Anit Kumar and Talwalkar, Ameet and Smith, Virginia}, + year = 2020, + journal = {IEEE Signal Processing Magazine}, + volume = 37, + number = 3, + pages = {50--60}, + date-added = {2023-11-22 19:15:13 -0500}, + date-modified = {2023-11-22 19:17:19 -0500} +} @misc{liao_can_2023, - abstract = {Pruning is a widely used technique for reducing the size of deep neural networks while maintaining their performance. However, such a technique, despite being able to massively compress deep models, is hardly able to remove entire layers from a model (even when structured): is this an addressable task? In this study, we introduce EGP, an innovative Entropy Guided Pruning algorithm aimed at reducing the size of deep neural networks while preserving their performance. The key focus of EGP is to prioritize pruning connections in layers with low entropy, ultimately leading to their complete removal. Through extensive experiments conducted on popular models like ResNet-18 and Swin-T, our findings demonstrate that EGP effectively compresses deep neural networks while maintaining competitive performance levels. Our results not only shed light on the underlying mechanism behind the advantages of unstructured pruning, but also pave the way for further investigations into the intricate relationship between entropy, pruning techniques, and deep learning performance. The EGP algorithm and its insights hold great promise for advancing the field of network compression and optimization. The source code for EGP is released open-source.}, - author = {Liao, Zhu and Qu{\'e}tu, Victor and Nguyen, Van-Tam and Tartaglione, Enzo}, - doi = {10.48550/arXiv.2308.06619}, - file = {arXiv Fulltext PDF:/Users/jeffreyma/Zotero/storage/V6P3XB5H/Liao et al. - 2023 - Can Unstructured Pruning Reduce the Depth in Deep .pdf:application/pdf;arXiv.org Snapshot:/Users/jeffreyma/Zotero/storage/WSQ4ZUH4/2308.html:text/html}, - keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning}, - month = aug, - note = {arXiv:2308.06619 [cs]}, - publisher = {arXiv}, - title = {Can {Unstructured} {Pruning} {Reduce} the {Depth} in {Deep} {Neural} {Networks}?}, - url = {http://arxiv.org/abs/2308.06619}, - urldate = {2023-10-20}, - year = 2023, - Bdsk-Url-1 = {http://arxiv.org/abs/2308.06619}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.2308.06619}} - + author = {Liao, Zhu and Qu{\'e}tu, Victor and Nguyen, Van-Tam and Tartaglione, Enzo}, + year = 2023, + month = aug, + publisher = {arXiv}, + doi = {10.48550/arXiv.2308.06619}, + url = {http://arxiv.org/abs/2308.06619}, + urldate = {2023-10-20}, + note = {arXiv:2308.06619 [cs]}, + bdsk-url-1 = {http://arxiv.org/abs/2308.06619}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.2308.06619} +} @misc{lin_-device_2022, - annote = {Comment: NeurIPS 2022}, - author = {Lin, Ji and Zhu, Ligeng and Chen, Wei-Ming and Wang, Wei-Chen and Gan, Chuang and Han, Song}, - file = {Lin et al. - 2022 - On-Device Training Under 256KB Memory.pdf:/Users/alex/Zotero/storage/GMF6SWGT/Lin et al. - 2022 - On-Device Training Under 256KB Memory.pdf:application/pdf}, - keywords = {Computer Science - Computer Vision and Pattern Recognition}, - language = {en}, - month = nov, - note = {arXiv:2206.15472 [cs]}, - publisher = {arXiv}, - title = {On-{Device} {Training} {Under} {256KB} {Memory}}, - url = {http://arxiv.org/abs/2206.15472}, - urldate = {2023-10-26}, - year = 2022, - Bdsk-Url-1 = {http://arxiv.org/abs/2206.15472}} - + author = {Lin, Ji and Zhu, Ligeng and Chen, Wei-Ming and Wang, Wei-Chen and Gan, Chuang and Han, Song}, + year = 2022, + month = nov, + publisher = {arXiv}, + url = {http://arxiv.org/abs/2206.15472}, + urldate = {2023-10-26}, + note = {arXiv:2206.15472 [cs]}, + annote = {Comment: NeurIPS 2022}, + language = {en}, + bdsk-url-1 = {http://arxiv.org/abs/2206.15472} +} @misc{lin_-device_2022-1, - annote = {Comment: NeurIPS 2022}, - author = {Lin, Ji and Zhu, Ligeng and Chen, Wei-Ming and Wang, Wei-Chen and Gan, Chuang and Han, Song}, - file = {Lin et al. - 2022 - On-Device Training Under 256KB Memory.pdf:/Users/alex/Zotero/storage/DNIY32R2/Lin et al. - 2022 - On-Device Training Under 256KB Memory.pdf:application/pdf}, - keywords = {Computer Science - Computer Vision and Pattern Recognition}, - language = {en}, - month = nov, - note = {arXiv:2206.15472 [cs]}, - publisher = {arXiv}, - title = {On-{Device} {Training} {Under} {256KB} {Memory}}, - url = {http://arxiv.org/abs/2206.15472}, - urldate = {2023-10-25}, - year = 2022, - Bdsk-Url-1 = {http://arxiv.org/abs/2206.15472}} - + author = {Lin, Ji and Zhu, Ligeng and Chen, Wei-Ming and Wang, Wei-Chen and Gan, Chuang and Han, Song}, + year = 2022, + month = nov, + publisher = {arXiv}, + url = {http://arxiv.org/abs/2206.15472}, + urldate = {2023-10-25}, + note = {arXiv:2206.15472 [cs]}, + annote = {Comment: NeurIPS 2022}, + language = {en}, + bdsk-url-1 = {http://arxiv.org/abs/2206.15472} +} @misc{lin_mcunet_2020, - abstract = {Machine learning on tiny IoT devices based on microcontroller units (MCU) is appealing but challenging: the memory of microcontrollers is 2-3 orders of magnitude smaller even than mobile phones. We propose MCUNet, a framework that jointly designs the efficient neural architecture (TinyNAS) and the lightweight inference engine (TinyEngine), enabling ImageNet-scale inference on microcontrollers. TinyNAS adopts a two-stage neural architecture search approach that first optimizes the search space to fit the resource constraints, then specializes the network architecture in the optimized search space. TinyNAS can automatically handle diverse constraints (i.e.device, latency, energy, memory) under low search costs.TinyNAS is co-designed with TinyEngine, a memory-efficient inference library to expand the search space and fit a larger model. TinyEngine adapts the memory scheduling according to the overall network topology rather than layer-wise optimization, reducing the memory usage by 4.8x, and accelerating the inference by 1.7-3.3x compared to TF-Lite Micro and CMSIS-NN. MCUNet is the first to achieves {\textgreater}70\% ImageNet top1 accuracy on an off-the-shelf commercial microcontroller, using 3.5x less SRAM and 5.7x less Flash compared to quantized MobileNetV2 and ResNet-18. On visual\&audio wake words tasks, MCUNet achieves state-of-the-art accuracy and runs 2.4-3.4x faster than MobileNetV2 and ProxylessNAS-based solutions with 3.7-4.1x smaller peak SRAM. Our study suggests that the era of always-on tiny machine learning on IoT devices has arrived. Code and models can be found here: https://tinyml.mit.edu.}, - annote = {Comment: NeurIPS 2020 (spotlight)}, - author = {Lin, Ji and Chen, Wei-Ming and Lin, Yujun and Cohn, John and Gan, Chuang and Han, Song}, - doi = {10.48550/arXiv.2007.10319}, - file = {arXiv Fulltext PDF:/Users/jeffreyma/Zotero/storage/IX2JN4P9/Lin et al. - 2020 - MCUNet Tiny Deep Learning on IoT Devices.pdf:application/pdf;arXiv.org Snapshot:/Users/jeffreyma/Zotero/storage/BAKHZ46Y/2007.html:text/html}, - keywords = {Computer Science - Computer Vision and Pattern Recognition}, - language = {en}, - month = nov, - note = {arXiv:2007.10319 [cs]}, - publisher = {arXiv}, - shorttitle = {{MCUNet}}, - title = {{MCUNet}: {Tiny} {Deep} {Learning} on {IoT} {Devices}}, - url = {http://arxiv.org/abs/2007.10319}, - urldate = {2023-10-20}, - year = 2020, - Bdsk-Url-1 = {http://arxiv.org/abs/2007.10319}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.2007.10319}} - + shorttitle = {{MCUNet}}, + author = {Lin, Ji and Chen, Wei-Ming and Lin, Yujun and Cohn, John and Gan, Chuang and Han, Song}, + year = 2020, + month = nov, + publisher = {arXiv}, + doi = {10.48550/arXiv.2007.10319}, + url = {http://arxiv.org/abs/2007.10319}, + urldate = {2023-10-20}, + note = {arXiv:2007.10319 [cs]}, + annote = {Comment: NeurIPS 2020 (spotlight)}, + language = {en}, + bdsk-url-1 = {http://arxiv.org/abs/2007.10319}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.2007.10319} +} @inproceedings{lin2014microsoft, - author = {Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, - booktitle = {Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13}, - organization = {Springer}, - pages = {740--755}, - title = {Microsoft coco: Common objects in context}, - year = 2014} - + author = {Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + year = 2014, + booktitle = {Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13}, + pages = {740--755}, + organization = {Springer} +} @article{lin2020mcunet, + author = {Lin, Ji and Chen, Wei-Ming and Lin, Yujun and Gan, Chuang and Han, Song and others}, + year = 2020, + journal = {Advances in Neural Information Processing Systems}, + volume = 33, + pages = {11711--11722}, archiveprefix = {arXiv}, - author = {Lin, Ji and Chen, Wei-Ming and Lin, Yujun and Gan, Chuang and Han, Song and others}, - eprint = {2007.10319}, - journal = {Advances in Neural Information Processing Systems}, - pages = {11711--11722}, - primaryclass = {cs.CV}, - title = {Mcunet: Tiny deep learning on iot devices}, - volume = 33, - year = 2020} - + eprint = {2007.10319}, + primaryclass = {cs.CV} +} @article{lin2022device, - author = {Lin, Ji and Zhu, Ligeng and Chen, Wei-Ming and Wang, Wei-Chen and Gan, Chuang and Han, Song}, - journal = {Advances in Neural Information Processing Systems}, - pages = {22941--22954}, - title = {On-device training under 256kb memory}, - volume = 35, - year = 2022} - + author = {Lin, Ji and Zhu, Ligeng and Chen, Wei-Ming and Wang, Wei-Chen and Gan, Chuang and Han, Song}, + year = 2022, + journal = {Advances in Neural Information Processing Systems}, + volume = 35, + pages = {22941--22954} +} +@inproceedings{lin2022ondevice, + author = {Lin, Ji and Zhu, Ligeng and Chen, Wei-Ming and Wang, Wei-Chen and Gan, Chuang and Han, Song}, + year = 2022, + booktitle = {ArXiv} +} +@article{lin2023awq, + author = {Lin, Ji and Tang, Jiaming and Tang, Haotian and Yang, Shang and Dang, Xingyu and Han, Song}, + year = 2023, + journal = {arXiv} +} +@article{lindholm_nvidia_2008, + shorttitle = {{NVIDIA} {Tesla}}, + author = {Lindholm, Erik and Nickolls, John and Oberman, Stuart and Montrym, John}, + year = 2008, + month = mar, + journal = {IEEE Micro}, + volume = 28, + number = 2, + pages = {39--55}, + doi = {10.1109/MM.2008.31}, + issn = {1937-4143}, + url = {https://ieeexplore.ieee.org/document/4523358}, + urldate = {2023-11-07}, + note = {Conference Name: IEEE Micro}, + bdsk-url-1 = {https://ieeexplore.ieee.org/document/4523358}, + bdsk-url-2 = {https://doi.org/10.1109/MM.2008.31} +} +@inproceedings{Lipp2018meltdown, + author = {Moritz Lipp and Michael Schwarz and Daniel Gruss and Thomas Prescher and Werner Haas and Anders Fogh and Jann Horn and Stefan Mangard and Paul Kocher and Daniel Genkin and Yuval Yarom and Mike Hamburg}, + year = 2018, + booktitle = {27th {USENIX} Security Symposium ({USENIX} Security 18)}, + date-added = {2023-11-22 16:32:26 -0500}, + date-modified = {2023-11-22 16:33:08 -0500} +} +@article{loh20083d, + author = {Loh, Gabriel H}, + year = 2008, + journal = {ACM SIGARCH computer architecture news}, + publisher = {ACM New York, NY, USA}, + volume = 36, + number = 3, + pages = {453--464} +} @misc{lu_notes_2016, - abstract = {Low-rank matrix factorization (MF) is an important technique in data science. The key idea of MF is that there exists latent structures in the data, by uncovering which we could obtain a compressed representation of the data. By factorizing an original matrix to low-rank matrices, MF provides a unified method for dimension reduction, clustering, and matrix completion. In this article we review several important variants of MF, including: Basic MF, Non-negative MF, Orthogonal non-negative MF. As can be told from their names, non-negative MF and orthogonal non-negative MF are variants of basic MF with non-negativity and/or orthogonality constraints. Such constraints are useful in specific senarios. In the first part of this article, we introduce, for each of these models, the application scenarios, the distinctive properties, and the optimizing method. By properly adapting MF, we can go beyond the problem of clustering and matrix completion. In the second part of this article, we will extend MF to sparse matrix compeletion, enhance matrix compeletion using various regularization methods, and make use of MF for (semi-)supervised learning by introducing latent space reinforcement and transformation. We will see that MF is not only a useful model but also as a flexible framework that is applicable for various prediction problems.}, - author = {Lu, Yuan and Yang, Jie}, - doi = {10.48550/arXiv.1507.00333}, - file = {arXiv Fulltext PDF:/Users/jeffreyma/Zotero/storage/4QED5ZU9/Lu and Yang - 2016 - Notes on Low-rank Matrix Factorization.pdf:application/pdf;arXiv.org Snapshot:/Users/jeffreyma/Zotero/storage/XIBZBDJQ/1507.html:text/html}, - keywords = {Computer Science - Information Retrieval, Computer Science - Machine Learning, Mathematics - Numerical Analysis}, - month = may, - note = {arXiv:1507.00333 [cs]}, - publisher = {arXiv}, - title = {Notes on {Low}-rank {Matrix} {Factorization}}, - url = {http://arxiv.org/abs/1507.00333}, - urldate = {2023-10-20}, - year = 2016, - Bdsk-Url-1 = {http://arxiv.org/abs/1507.00333}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.1507.00333}} - + author = {Lu, Yuan and Yang, Jie}, + year = 2016, + month = may, + publisher = {arXiv}, + doi = {10.48550/arXiv.1507.00333}, + url = {http://arxiv.org/abs/1507.00333}, + urldate = {2023-10-20}, + note = {arXiv:1507.00333 [cs]}, + bdsk-url-1 = {http://arxiv.org/abs/1507.00333}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.1507.00333} +} +@inproceedings{luebke2008cuda, + author = {Luebke, David}, + year = 2008, + booktitle = {2008 5th IEEE International Symposium on Biomedical Imaging: From Nano to Macro}, + pages = {836--838}, + doi = {10.1109/ISBI.2008.4541126}, + bdsk-url-1 = {https://doi.org/10.1109/ISBI.2008.4541126} +} @article{lundberg2017unified, - author = {Lundberg, Scott M and Lee, Su-In}, - journal = {Advances in neural information processing systems}, - title = {A unified approach to interpreting model predictions}, - volume = 30, - year = 2017} - + author = {Lundberg, Scott M and Lee, Su-In}, + year = 2017, + journal = {Advances in neural information processing systems}, + volume = 30 +} +@article{maass1997networks, + author = {Maass, Wolfgang}, + year = 1997, + journal = {Neural networks}, + publisher = {Elsevier}, + volume = 10, + number = 9, + pages = {1659--1671} +} +@article{MAL-083, + author = {Peter Kairouz and H. Brendan McMahan and Brendan Avent and Aur{\'e}lien Bellet and Mehdi Bennis and Arjun Nitin Bhagoji and Kallista Bonawitz and Zachary Charles and Graham Cormode and Rachel Cummings and Rafael G. L. D'Oliveira and Hubert Eichner and Salim El Rouayheb and David Evans and Josh Gardner and Zachary Garrett and Adri{\`a} Gasc{\'o}n and Badih Ghazi and Phillip B. Gibbons and Marco Gruteser and Zaid Harchaoui and Chaoyang He and Lie He and Zhouyuan Huo and Ben Hutchinson and Justin Hsu and Martin Jaggi and Tara Javidi and Gauri Joshi and Mikhail Khodak and Jakub Konecn{\'y} and Aleksandra Korolova and Farinaz Koushanfar and Sanmi Koyejo and Tancr{\`e}de Lepoint and Yang Liu and Prateek Mittal and Mehryar Mohri and Richard Nock and Ayfer {\"O}zg{\"u}r and Rasmus Pagh and Hang Qi and Daniel Ramage and Ramesh Raskar and Mariana Raykova and Dawn Song and Weikang Song and Sebastian U. Stich and Ziteng Sun and Ananda Theertha Suresh and Florian Tram{\`e}r and Praneeth Vepakomma and Jianyu Wang and Li Xiong and Zheng Xu and Qiang Yang and Felix X. Yu and Han Yu and Sen Zhao}, + year = 2021, + journal = {Foundations and Trends{\textregistered} in Machine Learning}, + volume = 14, + number = {1--2}, + pages = {1--210}, + doi = {10.1561/2200000083}, + issn = {1935-8237}, + url = {http://dx.doi.org/10.1561/2200000083}, + date-added = {2023-11-22 19:14:08 -0500}, + date-modified = {2023-11-22 19:14:08 -0500}, + bdsk-url-1 = {http://dx.doi.org/10.1561/2200000083} +} +@article{markovic2020, + author = {Markovi{\'c}, Danijela and Mizrahi, Alice and Querlioz, Damien and Grollier, Julie}, + year = 2020, + journal = {Nature Reviews Physics}, + publisher = {Nature Publishing Group UK London}, + volume = 2, + number = 9, + pages = {499--510} +} @article{mattson2020mlperf, - author = {Mattson, Peter and Cheng, Christine and Diamos, Gregory and Coleman, Cody and Micikevicius, Paulius and Patterson, David and Tang, Hanlin and Wei, Gu-Yeon and Bailis, Peter and Bittorf, Victor and others}, - journal = {Proceedings of Machine Learning and Systems}, - pages = {336--349}, - title = {Mlperf training benchmark}, - volume = 2, - year = 2020} - + author = {Mattson, Peter and Cheng, Christine and Diamos, Gregory and Coleman, Cody and Micikevicius, Paulius and Patterson, David and Tang, Hanlin and Wei, Gu-Yeon and Bailis, Peter and Bittorf, Victor and others}, + year = 2020, + journal = {Proceedings of Machine Learning and Systems}, + volume = 2, + pages = {336--349} +} @inproceedings{mcmahan2017communication, - author = {McMahan, Brendan and Moore, Eider and Ramage, Daniel and Hampson, Seth and y Arcas, Blaise Aguera}, - booktitle = {Artificial intelligence and statistics}, - organization = {PMLR}, - pages = {1273--1282}, - title = {Communication-efficient learning of deep networks from decentralized data}, - year = 2017} - + author = {McMahan, Brendan and Moore, Eider and Ramage, Daniel and Hampson, Seth and y Arcas, Blaise Aguera}, + year = 2017, + booktitle = {Artificial intelligence and statistics}, + pages = {1273--1282}, + organization = {PMLR} +} @inproceedings{mcmahan2023communicationefficient, - author = {McMahan, Brendan and Moore, Eider and Ramage, Daniel and Hampson, Seth and y Arcas, Blaise Aguera}, - booktitle = {Artificial intelligence and statistics}, - organization = {PMLR}, - pages = {1273--1282}, - title = {Communication-efficient learning of deep networks from decentralized data}, - year = 2017} - + author = {McMahan, Brendan and Moore, Eider and Ramage, Daniel and Hampson, Seth and y Arcas, Blaise Aguera}, + year = 2017, + booktitle = {Artificial intelligence and statistics}, + pages = {1273--1282}, + organization = {PMLR} +} +@article{miller2000optical, + author = {Miller, David AB}, + year = 2000, + journal = {IEEE Journal of Selected Topics in Quantum Electronics}, + publisher = {IEEE}, + volume = 6, + number = 6, + pages = {1312--1317} +} +@article{miller2015remote, + author = {Miller, Charlie and Valasek, Chris}, + year = 2015, + journal = {Black Hat USA}, + volume = 2015, + number = {S 91}, + pages = {1--91}, + date-added = {2023-11-22 17:11:27 -0500}, + date-modified = {2023-11-22 17:12:18 -0500} +} +@article{miller2019lessons, + author = {Miller, Charlie}, + year = 2019, + journal = {IEEE Design & Test}, + volume = 36, + number = 6, + pages = {7--9}, + date-added = {2023-11-22 16:12:04 -0500}, + date-modified = {2023-11-22 16:13:31 -0500} +} +@article{mittal2021survey, + author = {Mittal, Sparsh and Verma, Gaurav and Kaushik, Brajesh and Khanday, Farooq A}, + year = 2021, + journal = {Journal of Systems Architecture}, + publisher = {Elsevier}, + volume = 119, + pages = 102276 +} +@article{modha2023neural, + author = {Modha, Dharmendra S and Akopyan, Filipp and Andreopoulos, Alexander and Appuswamy, Rathinakumar and Arthur, John V and Cassidy, Andrew S and Datta, Pallab and DeBole, Michael V and Esser, Steven K and Otero, Carlos Ortega and others}, + year = 2023, + journal = {Science}, + publisher = {American Association for the Advancement of Science}, + volume = 382, + number = 6668, + pages = {329--335} +} @article{moshawrab2023reviewing, - author = {Moshawrab, Mohammad and Adda, Mehdi and Bouzouane, Abdenour and Ibrahim, Hussein and Raad, Ali}, - journal = {Electronics}, - number = 10, - pages = 2287, - publisher = {MDPI}, - title = {Reviewing Federated Learning Aggregation Algorithms; Strategies, Contributions, Limitations and Future Perspectives}, - volume = 12, - year = 2023} - + author = {Moshawrab, Mohammad and Adda, Mehdi and Bouzouane, Abdenour and Ibrahim, Hussein and Raad, Ali}, + year = 2023, + journal = {Electronics}, + publisher = {MDPI}, + volume = 12, + number = 10, + pages = 2287 +} +@inproceedings{munshi2009opencl, + author = {Munshi, Aaftab}, + year = 2009, + booktitle = {2009 IEEE Hot Chips 21 Symposium (HCS)}, + pages = {1--314}, + doi = {10.1109/HOTCHIPS.2009.7478342}, + bdsk-url-1 = {https://doi.org/10.1109/HOTCHIPS.2009.7478342} +} +@article{musk2019integrated, + author = {Musk, Elon and others}, + year = 2019, + journal = {Journal of medical Internet research}, + publisher = {JMIR Publications Inc., Toronto, Canada}, + volume = 21, + number = 10, + pages = {e16194} +} +@article{narayanan2006break, + author = {Narayanan, Arvind and Shmatikov, Vitaly}, + year = 2006, + journal = {arXiv preprint cs/0610105}, + date-added = {2023-11-22 16:16:19 -0500}, + date-modified = {2023-11-22 16:16:59 -0500} +} +@misc{nas, + author = {Barret Zoph and Quoc V. Le}, + year = 2017, + eprint = {1611.01578}, + archiveprefix = {arXiv}, + primaryclass = {cs.LG} +} @inproceedings{nguyen2023re, - author = {Nguyen, Ngoc-Bao and Chandrasegaran, Keshigeyan and Abdollahzadeh, Milad and Cheung, Ngai-Man}, - booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, - pages = {16384--16393}, - title = {Re-thinking Model Inversion Attacks Against Deep Neural Networks}, - year = 2023} - + author = {Nguyen, Ngoc-Bao and Chandrasegaran, Keshigeyan and Abdollahzadeh, Milad and Cheung, Ngai-Man}, + year = 2023, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages = {16384--16393} +} +@misc{noauthor_amd_nodate, + url = {https://www.amd.com/en/graphics/radeon-rx-graphics}, + urldate = {2023-11-07}, + bdsk-url-1 = {https://www.amd.com/en/graphics/radeon-rx-graphics} +} @misc{noauthor_deep_nodate, - author = {Ivy Gu}, - title = {Deep {Learning} {Model} {Compression} (ii) {\textbar} by {Ivy} {Gu} {\textbar} {Medium}}, - url = {https://ivygdy.medium.com/deep-learning-model-compression-ii-546352ea9453}, - urldate = {2023-10-20}, - year = {2023}, - Bdsk-Url-1 = {https://ivygdy.medium.com/deep-learning-model-compression-ii-546352ea9453}} - + author = {Ivy Gu}, + year = 2023, + url = {https://ivygdy.medium.com/deep-learning-model-compression-ii-546352ea9453}, + urldate = {2023-10-20}, + bdsk-url-1 = {https://ivygdy.medium.com/deep-learning-model-compression-ii-546352ea9453} +} +@misc{noauthor_evolution_2023, + year = 2023, + month = oct, + journal = {audioXpress}, + url = {https://audioxpress.com/article/the-evolution-of-audio-dsps}, + urldate = {2023-11-07}, + language = {en}, + bdsk-url-1 = {https://audioxpress.com/article/the-evolution-of-audio-dsps} +} +@misc{noauthor_fpga_nodate, + url = {https://www.intel.com/content/www/us/en/docs/oneapi-fpga-add-on/optimization-guide/2023-1/fpga-architecture-overview.html}, + urldate = {2023-11-07}, + bdsk-url-1 = {https://www.intel.com/content/www/us/en/docs/oneapi-fpga-add-on/optimization-guide/2023-1/fpga-architecture-overview.html} +} +@misc{noauthor_google_2023, + shorttitle = {Google {Tensor} {G3}}, + year = 2023, + month = oct, + journal = {Google}, + url = {https://blog.google/products/pixel/google-tensor-g3-pixel-8/}, + urldate = {2023-11-07}, + language = {en-us}, + bdsk-url-1 = {https://blog.google/products/pixel/google-tensor-g3-pixel-8/} +} +@misc{noauthor_hexagon_nodate, + journal = {Qualcomm Developer Network}, + url = {https://developer.qualcomm.com/software/hexagon-dsp-sdk/dsp-processor}, + urldate = {2023-11-07}, + language = {en}, + bdsk-url-1 = {https://developer.qualcomm.com/software/hexagon-dsp-sdk/dsp-processor} +} +@misc{noauthor_integrated_2023, + year = 2023, + month = nov, + journal = {Wikipedia}, + url = {https://en.wikipedia.org/w/index.php?title=Integrated_circuit&oldid=1183537457}, + urldate = {2023-11-07}, + copyright = {Creative Commons Attribution-ShareAlike License}, + note = {Page Version ID: 1183537457}, + language = {en}, + bdsk-url-1 = {https://en.wikipedia.org/w/index.php?title=Integrated_circuit&oldid=1183537457} +} +@misc{noauthor_intel_nodate, + journal = {Intel}, + url = {https://www.intel.com/content/www/us/en/products/details/discrete-gpus/arc.html}, + urldate = {2023-11-07}, + language = {en}, + bdsk-url-1 = {https://www.intel.com/content/www/us/en/products/details/discrete-gpus/arc.html} +} @misc{noauthor_introduction_nodate, - author = {Hegde, Sumant}, - title = {An {Introduction} to {Separable} {Convolutions} - {Analytics} {Vidhya}}, - url = {https://www.analyticsvidhya.com/blog/2021/11/an-introduction-to-separable-convolutions/}, - urldate = {2023-10-20}, - year = {2023}, - Bdsk-Url-1 = {https://www.analyticsvidhya.com/blog/2021/11/an-introduction-to-separable-convolutions/}} - + author = {Hegde, Sumant}, + year = 2023, + url = {https://www.analyticsvidhya.com/blog/2021/11/an-introduction-to-separable-convolutions/}, + urldate = {2023-10-20}, + bdsk-url-1 = {https://www.analyticsvidhya.com/blog/2021/11/an-introduction-to-separable-convolutions/} +} @misc{noauthor_knowledge_nodate, - author = {IntelLabs}, - title = {Knowledge {Distillation} - {Neural} {Network} {Distiller}}, - url = {https://intellabs.github.io/distiller/knowledge_distillation.html}, - urldate = {2023-10-20}, - year = {2023}, - Bdsk-Url-1 = {https://intellabs.github.io/distiller/knowledge_distillation.html}} - + author = {IntelLabs}, + year = 2023, + url = {https://intellabs.github.io/distiller/knowledge_distillation.html}, + urldate = {2023-10-20}, + bdsk-url-1 = {https://intellabs.github.io/distiller/knowledge_distillation.html} +} +@misc{noauthor_project_nodate, + url = {https://www.microsoft.com/en-us/research/project/project-catapult/}, + urldate = {2023-11-07}, + bdsk-url-1 = {https://www.microsoft.com/en-us/research/project/project-catapult/} +} +@misc{noauthor_what_nodate, + shorttitle = {What is an {FPGA}?}, + journal = {AMD}, + url = {https://www.xilinx.com/products/silicon-devices/fpga/what-is-an-fpga.html}, + urldate = {2023-11-07}, + language = {en}, + bdsk-url-1 = {https://www.xilinx.com/products/silicon-devices/fpga/what-is-an-fpga.html} +} +@misc{noauthor_who_nodate, + url = {https://computerhistory.org/blog/who-invented-the-microprocessor/}, + urldate = {2023-11-07}, + bdsk-url-1 = {https://computerhistory.org/blog/who-invented-the-microprocessor/} +} +@inproceedings{Norman2017TPUv1, + author = {Jouppi, Norman P. and Young, Cliff and Patil, Nishant and Patterson, David and Agrawal, Gaurav and Bajwa, Raminder and Bates, Sarah and Bhatia, Suresh and Boden, Nan and Borchers, Al and Boyle, Rick and Cantin, Pierre-luc and Chao, Clifford and Clark, Chris and Coriell, Jeremy and Daley, Mike and Dau, Matt and Dean, Jeffrey and Gelb, Ben and Ghaemmaghami, Tara Vazir and Gottipati, Rajendra and Gulland, William and Hagmann, Robert and Ho, C. Richard and Hogberg, Doug and Hu, John and Hundt, Robert and Hurt, Dan and Ibarz, Julian and Jaffey, Aaron and Jaworski, Alek and Kaplan, Alexander and Khaitan, Harshit and Killebrew, Daniel and Koch, Andy and Kumar, Naveen and Lacy, Steve and Laudon, James and Law, James and Le, Diemthu and Leary, Chris and Liu, Zhuyuan and Lucke, Kyle and Lundin, Alan and MacKean, Gordon and Maggiore, Adriana and Mahony, Maire and Miller, Kieran and Nagarajan, Rahul and Narayanaswami, Ravi and Ni, Ray and Nix, Kathy and Norrie, Thomas and Omernick, Mark and Penukonda, Narayana and Phelps, Andy and Ross, Jonathan and Ross, Matt and Salek, Amir and Samadiani, Emad and Severn, Chris and Sizikov, Gregory and Snelham, Matthew and Souter, Jed and Steinberg, Dan and Swing, Andy and Tan, Mercedes and Thorson, Gregory and Tian, Bo and Toma, Horia and Tuttle, Erick and Vasudevan, Vijay and Walter, Richard and Wang, Walter and Wilcox, Eric and Yoon, Doe Hyun}, + year = 2017, + booktitle = {Proceedings of the 44th Annual International Symposium on Computer Architecture}, + location = {Toronto, ON, Canada}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + series = {ISCA '17}, + pages = {1--12}, + doi = {10.1145/3079856.3080246}, + isbn = 9781450348928, + url = {https://doi.org/10.1145/3079856.3080246}, + numpages = 12, + bdsk-url-1 = {https://doi.org/10.1145/3079856.3080246} +} +@article{Norrie2021TPUv2_3, + author = {Norrie, Thomas and Patil, Nishant and Yoon, Doe Hyun and Kurian, George and Li, Sheng and Laudon, James and Young, Cliff and Jouppi, Norman and Patterson, David}, + year = 2021, + journal = {IEEE Micro}, + volume = 41, + number = 2, + pages = {56--63}, + doi = {10.1109/MM.2021.3058217}, + bdsk-url-1 = {https://doi.org/10.1109/MM.2021.3058217} +} @article{Northcutt_Athalye_Mueller_2021, - author = {Northcutt, Curtis G and Athalye, Anish and Mueller, Jonas}, - doi = {  https://doi.org/10.48550/arXiv.2103.14749 arXiv-issued DOI via DataCite}, - journal = {arXiv}, - month = {Mar}, - title = {Pervasive Label Errors in Test Sets Destabilize Machine Learning Benchmarks}, - year = {2021}, - Bdsk-Url-1 = { %20https://doi.org/10.48550/arXiv.2103.14749%20arXiv-issued%20DOI%20via%20DataCite}} - + author = {Northcutt, Curtis G and Athalye, Anish and Mueller, Jonas}, + year = 2021, + month = {Mar}, + journal = {arXiv}, + doi = {  https://doi.org/10.48550/arXiv.2103.14749 arXiv-issued DOI via DataCite}, + bdsk-url-1 = { %20https://doi.org/10.48550/arXiv.2103.14749%20arXiv-issued%20DOI%20via%20DataCite} +} +@article{oecd22, + author = {OECD}, + year = 2022, + number = 341, + doi = {https://doi.org/https://doi.org/10.1787/7babf571-en}, + url = {https://www.oecd-ilibrary.org/content/paper/7babf571-en} +} +@article{oliynyk2023know, + author = {Oliynyk, Daryna and Mayer, Rudolf and Rauber, Andreas}, + year = 2023, + month = {July}, + journal = {ACM Comput. Surv.}, + volume = 55, + number = {14s}, + date-added = {2023-11-22 16:18:21 -0500}, + date-modified = {2023-11-22 16:20:44 -0500} +} @inproceedings{ooko2021tinyml, - author = {Ooko, Samson Otieno and Ogore, Marvin Muyonga and Nsenga, Jimmy and Zennaro, Marco}, - booktitle = {2021 IEEE Globecom Workshops (GC Wkshps)}, - organization = {IEEE}, - pages = {1--6}, - title = {TinyML in Africa: Opportunities and challenges}, - year = {2021}} - + author = {Ooko, Samson Otieno and Ogore, Marvin Muyonga and Nsenga, Jimmy and Zennaro, Marco}, + year = 2021, + booktitle = {2021 IEEE Globecom Workshops (GC Wkshps)}, + pages = {1--6}, + organization = {IEEE} +} +@article{oprea2022poisoning, + author = {Oprea, Alina and Singhal, Anoop and Vassilev, Apostol}, + year = 2022, + journal = {Computer}, + publisher = {IEEE}, + volume = 55, + number = 11, + pages = {94--99} +} @misc{ou_low_2023, - abstract = {Deep neural networks have achieved great success in many data processing applications. However, the high computational complexity and storage cost makes deep learning hard to be used on resource-constrained devices, and it is not environmental-friendly with much power cost. In this paper, we focus on low-rank optimization for efficient deep learning techniques. In the space domain, deep neural networks are compressed by low rank approximation of the network parameters, which directly reduces the storage requirement with a smaller number of network parameters. In the time domain, the network parameters can be trained in a few subspaces, which enables efficient training for fast convergence. The model compression in the spatial domain is summarized into three categories as pre-train, pre-set, and compression-aware methods, respectively. With a series of integrable techniques discussed, such as sparse pruning, quantization, and entropy coding, we can ensemble them in an integration framework with lower computational complexity and storage. Besides of summary of recent technical advances, we have two findings for motivating future works: one is that the effective rank outperforms other sparse measures for network compression. The other is a spatial and temporal balance for tensorized neural networks.}, - author = {Ou, Xinwei and Chen, Zhangxin and Zhu, Ce and Liu, Yipeng}, - file = {arXiv.org Snapshot:/Users/jeffreyma/Zotero/storage/SPSZ2HR9/2303.html:text/html;Full Text PDF:/Users/jeffreyma/Zotero/storage/6TUEBTEX/Ou et al. - 2023 - Low Rank Optimization for Efficient Deep Learning.pdf:application/pdf}, - keywords = {Computer Science - Machine Learning}, - month = {Mar}, - note = {arXiv:2303.13635 [cs]}, - publisher = {arXiv}, - shorttitle = {Low {Rank} {Optimization} for {Efficient} {Deep} {Learning}}, - title = {Low {Rank} {Optimization} for {Efficient} {Deep} {Learning}: {Making} {A} {Balance} between {Compact} {Architecture} and {Fast} {Training}}, - url = {http://arxiv.org/abs/2303.13635}, - urldate = {2023-10-20}, - year = {2023}, - Bdsk-Url-1 = {http://arxiv.org/abs/2303.13635}} - + shorttitle = {Low {Rank} {Optimization} for {Efficient} {Deep} {Learning}}, + author = {Ou, Xinwei and Chen, Zhangxin and Zhu, Ce and Liu, Yipeng}, + year = 2023, + month = {Mar}, + publisher = {arXiv}, + url = {http://arxiv.org/abs/2303.13635}, + urldate = {2023-10-20}, + note = {arXiv:2303.13635 [cs]}, + bdsk-url-1 = {http://arxiv.org/abs/2303.13635} +} @article{pan_survey_2010, - author = {Pan, Sinno Jialin and Yang, Qiang}, - doi = {10.1109/TKDE.2009.191}, - file = {Pan and Yang - 2010 - A Survey on Transfer Learning.pdf:/Users/alex/Zotero/storage/T3H8E5K8/Pan and Yang - 2010 - A Survey on Transfer Learning.pdf:application/pdf}, - issn = {1041-4347}, - journal = {IEEE Transactions on Knowledge and Data Engineering}, - language = {en}, - month = {Oct}, - number = {10}, - pages = {1345--1359}, - title = {A {Survey} on {Transfer} {Learning}}, - url = {http://ieeexplore.ieee.org/document/5288526/}, - urldate = {2023-10-25}, - volume = {22}, - year = {2010}, - Bdsk-Url-1 = {http://ieeexplore.ieee.org/document/5288526/}, - Bdsk-Url-2 = {https://doi.org/10.1109/TKDE.2009.191}} - + author = {Pan, Sinno Jialin and Yang, Qiang}, + year = 2010, + month = {Oct}, + journal = {IEEE Transactions on Knowledge and Data Engineering}, + volume = 22, + number = 10, + pages = {1345--1359}, + doi = {10.1109/TKDE.2009.191}, + issn = {1041-4347}, + url = {http://ieeexplore.ieee.org/document/5288526/}, + urldate = {2023-10-25}, + language = {en}, + bdsk-url-1 = {http://ieeexplore.ieee.org/document/5288526/}, + bdsk-url-2 = {https://doi.org/10.1109/TKDE.2009.191} +} @article{pan2009survey, - author = {Pan, Sinno Jialin and Yang, Qiang}, - journal = {IEEE Transactions on knowledge and data engineering}, - number = {10}, - pages = {1345--1359}, - publisher = {IEEE}, - title = {A survey on transfer learning}, - volume = {22}, - year = {2009}} - + author = {Pan, Sinno Jialin and Yang, Qiang}, + year = 2009, + journal = {IEEE Transactions on knowledge and data engineering}, + publisher = {IEEE}, + volume = 22, + number = 10, + pages = {1345--1359} +} @article{parisi_continual_2019, - author = {Parisi, German I. and Kemker, Ronald and Part, Jose L. and Kanan, Christopher and Wermter, Stefan}, - doi = {10.1016/j.neunet.2019.01.012}, - file = {Parisi et al. - 2019 - Continual lifelong learning with neural networks .pdf:/Users/alex/Zotero/storage/TCGHD5TW/Parisi et al. - 2019 - Continual lifelong learning with neural networks .pdf:application/pdf}, - issn = {08936080}, - journal = {Neural Networks}, - language = {en}, - month = {May}, - pages = {54--71}, - shorttitle = {Continual lifelong learning with neural networks}, - title = {Continual lifelong learning with neural networks: {A} review}, - url = {https://linkinghub.elsevier.com/retrieve/pii/S0893608019300231}, - urldate = {2023-10-26}, - volume = {113}, - year = {2019}, - Bdsk-Url-1 = {https://linkinghub.elsevier.com/retrieve/pii/S0893608019300231}, - Bdsk-Url-2 = {https://doi.org/10.1016/j.neunet.2019.01.012}} - + shorttitle = {Continual lifelong learning with neural networks}, + author = {Parisi, German I. and Kemker, Ronald and Part, Jose L. and Kanan, Christopher and Wermter, Stefan}, + year = 2019, + month = {May}, + journal = {Neural Networks}, + volume = 113, + pages = {54--71}, + doi = {10.1016/j.neunet.2019.01.012}, + issn = {08936080}, + url = {https://linkinghub.elsevier.com/retrieve/pii/S0893608019300231}, + urldate = {2023-10-26}, + language = {en}, + bdsk-url-1 = {https://linkinghub.elsevier.com/retrieve/pii/S0893608019300231}, + bdsk-url-2 = {https://doi.org/10.1016/j.neunet.2019.01.012} +} +@article{parrish2023adversarial, + author = {Alicia Parrish and Hannah Rose Kirk and Jessica Quaye and Charvi Rastogi and Max Bartolo and Oana Inel and Juan Ciro and Rafael Mosquera and Addison Howard and Will Cukierski and D. Sculley and Vijay Janapa Reddi and Lora Aroyo}, + year = 2023, + journal = {arXiv preprint arXiv:2305.14384}, + date-added = {2023-11-22 16:24:50 -0500}, + date-modified = {2023-11-22 16:26:30 -0500} +} @article{paszke2019pytorch, - author = {Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and others}, - journal = {Advances in neural information processing systems}, - title = {Pytorch: An imperative style, high-performance deep learning library}, - volume = {32}, - year = {2019}} - + author = {Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and others}, + year = 2019, + journal = {Advances in neural information processing systems}, + volume = 32 +} +@book{patterson2016computer, + author = {Patterson, David A and Hennessy, John L}, + year = 2016, + publisher = {Morgan kaufmann} +} @misc{Perrigo_2023, - author = {Perrigo, Billy}, - journal = {Time}, - month = {Jan}, - publisher = {Time}, - title = {OpenAI used Kenyan workers on less than $2 per hour: Exclusive}, - url = {https://time.com/6247678/openai-chatgpt-kenya-workers/}, - year = {2023}, - Bdsk-Url-1 = {https://time.com/6247678/openai-chatgpt-kenya-workers/}} - + author = {Perrigo, Billy}, + year = 2023, + month = {Jan}, + journal = {Time}, + publisher = {Time}, + url = {https://time.com/6247678/openai-chatgpt-kenya-workers/}, + bdsk-url-1 = {https://time.com/6247678/openai-chatgpt-kenya-workers/} +} +@article{plasma, + author = {Attia, Zachi and Sugrue, Alan and Asirvatham, Samuel and Ackerman, Michael and Kapa, Suraj and Friedman, Paul and Noseworthy, Peter}, + year = 2018, + month = {08}, + journal = {PLOS ONE}, + volume = 13, + pages = {e0201059}, + doi = {10.1371/journal.pone.0201059}, + bdsk-url-1 = {https://doi.org/10.1371/journal.pone.0201059} +} @inproceedings{Prakash_2023, - author = {Shvetank Prakash and Tim Callahan and Joseph Bushagour and Colby Banbury and Alan V. Green and Pete Warden and Tim Ansell and Vijay Janapa Reddi}, - booktitle = {2023 {IEEE} International Symposium on Performance Analysis of Systems and Software ({ISPASS})}, - doi = {10.1109/ispass57527.2023.00024}, - month = {apr}, - publisher = {{IEEE}}, - title = {{CFU} Playground: Full-Stack Open-Source Framework for Tiny Machine Learning ({TinyML}) Acceleration on {FPGAs}}, - url = {https://doi.org/10.1109%2Fispass57527.2023.00024}, - year = {2023}, - Bdsk-Url-1 = {https://doi.org/10.1109%2Fispass57527.2023.00024}, - Bdsk-Url-2 = {https://doi.org/10.1109/ispass57527.2023.00024}} - + author = {Shvetank Prakash and Tim Callahan and Joseph Bushagour and Colby Banbury and Alan V. Green and Pete Warden and Tim Ansell and Vijay Janapa Reddi}, + year = 2023, + month = {apr}, + booktitle = {2023 {IEEE} International Symposium on Performance Analysis of Systems and Software ({ISPASS})}, + publisher = {{IEEE}}, + doi = {10.1109/ispass57527.2023.00024}, + url = {https://doi.org/10.1109%2Fispass57527.2023.00024}, + bdsk-url-1 = {https://doi.org/10.1109%2Fispass57527.2023.00024}, + bdsk-url-2 = {https://doi.org/10.1109/ispass57527.2023.00024} +} @inproceedings{prakash_cfu_2023, - author = {Prakash, Shvetank and Callahan, Tim and Bushagour, Joseph and Banbury, Colby and Green, Alan V. and Warden, Pete and Ansell, Tim and Reddi, Vijay Janapa}, - booktitle = {2023 {IEEE} {International} {Symposium} on {Performance} {Analysis} of {Systems} and {Software} ({ISPASS})}, - doi = {10.1109/ISPASS57527.2023.00024}, - file = {Prakash et al. - 2023 - CFU Playground Full-Stack Open-Source Framework f.pdf:/Users/alex/Zotero/storage/BZNRIDTL/Prakash et al. - 2023 - CFU Playground Full-Stack Open-Source Framework f.pdf:application/pdf}, - keywords = {Computer Science - Machine Learning, Computer Science - Distributed, Parallel, and Cluster Computing, Computer Science - Hardware Architecture}, - language = {en}, - month = {Apr}, - note = {arXiv:2201.01863 [cs]}, - pages = {157--167}, - shorttitle = {{CFU} {Playground}}, - title = {{CFU} {Playground}: {Full}-{Stack} {Open}-{Source} {Framework} for {Tiny} {Machine} {Learning} ({tinyML}) {Acceleration} on {FPGAs}}, - url = {http://arxiv.org/abs/2201.01863}, - urldate = {2023-10-25}, - year = {2023}, - Bdsk-Url-1 = {http://arxiv.org/abs/2201.01863}, - Bdsk-Url-2 = {https://doi.org/10.1109/ISPASS57527.2023.00024}} - + shorttitle = {{CFU} {Playground}}, + author = {Prakash, Shvetank and Callahan, Tim and Bushagour, Joseph and Banbury, Colby and Green, Alan V. and Warden, Pete and Ansell, Tim and Reddi, Vijay Janapa}, + year = 2023, + month = {Apr}, + booktitle = {2023 {IEEE} {International} {Symposium} on {Performance} {Analysis} of {Systems} and {Software} ({ISPASS})}, + pages = {157--167}, + doi = {10.1109/ISPASS57527.2023.00024}, + url = {http://arxiv.org/abs/2201.01863}, + urldate = {2023-10-25}, + note = {arXiv:2201.01863 [cs]}, + language = {en}, + bdsk-url-1 = {http://arxiv.org/abs/2201.01863}, + bdsk-url-2 = {https://doi.org/10.1109/ISPASS57527.2023.00024} +} @article{preparednesspublic, - author = {Preparedness, Emergency}, - title = {Public Health Law}} - + author = {Preparedness, Emergency} +} @article{Pushkarna_Zaldivar_Kjartansson_2022, - author = {Pushkarna, Mahima and Zaldivar, Andrew and Kjartansson, Oddur}, - doi = {10.1145/3531146.3533231}, - journal = {2022 ACM Conference on Fairness, Accountability, and Transparency}, - title = {Data cards: Purposeful and transparent dataset documentation for responsible ai}, - year = {2022}, - Bdsk-Url-1 = {https://doi.org/10.1145/3531146.3533231}} - + author = {Pushkarna, Mahima and Zaldivar, Andrew and Kjartansson, Oddur}, + year = 2022, + journal = {2022 ACM Conference on Fairness, Accountability, and Transparency}, + doi = {10.1145/3531146.3533231}, + bdsk-url-1 = {https://doi.org/10.1145/3531146.3533231} +} +@article{putnam_reconfigurable_2014, + author = {Putnam, Andrew and Caulfield, Adrian M. and Chung, Eric S. and Chiou, Derek and Constantinides, Kypros and Demme, John and Esmaeilzadeh, Hadi and Fowers, Jeremy and Gopal, Gopi Prashanth and Gray, Jan and Haselman, Michael and Hauck, Scott and Heil, Stephen and Hormati, Amir and Kim, Joo-Young and Lanka, Sitaram and Larus, James and Peterson, Eric and Pope, Simon and Smith, Aaron and Thong, Jason and Xiao, Phillip Yi and Burger, Doug}, + year = 2014, + month = oct, + journal = {ACM SIGARCH Computer Architecture News}, + volume = 42, + number = 3, + pages = {13--24}, + doi = {10.1145/2678373.2665678}, + issn = {0163-5964}, + url = {https://dl.acm.org/doi/10.1145/2678373.2665678}, + urldate = {2023-11-07}, + language = {en}, + bdsk-url-1 = {https://dl.acm.org/doi/10.1145/2678373.2665678}, + bdsk-url-2 = {https://doi.org/10.1145/2678373.2665678} +} @article{qi_efficient_2021, - abstract = {Nowadays, deep neural networks (DNNs) have been rapidly deployed to realize a number of functionalities like sensing, imaging, classification, recognition, etc. However, the computational-intensive requirement of DNNs makes it difficult to be applicable for resource-limited Internet of Things (IoT) devices. In this paper, we propose a novel pruning-based paradigm that aims to reduce the computational cost of DNNs, by uncovering a more compact structure and learning the effective weights therein, on the basis of not compromising the expressive capability of DNNs. In particular, our algorithm can achieve efficient end-to-end training that transfers a redundant neural network to a compact one with a specifically targeted compression rate directly. We comprehensively evaluate our approach on various representative benchmark datasets and compared with typical advanced convolutional neural network (CNN) architectures. The experimental results verify the superior performance and robust effectiveness of our scheme. For example, when pruning VGG on CIFAR-10, our proposed scheme is able to significantly reduce its FLOPs (floating-point operations) and number of parameters with a proportion of 76.2\% and 94.1\%, respectively, while still maintaining a satisfactory accuracy. To sum up, our scheme could facilitate the integration of DNNs into the common machine-learning-based IoT framework and establish distributed training of neural networks in both cloud and edge.}, - author = {Qi, Chen and Shen, Shibo and Li, Rongpeng and Zhifeng, Zhao and Liu, Qing and Liang, Jing and Zhang, Honggang}, - doi = {10.1186/s13634-021-00744-4}, - file = {Full Text PDF:/Users/jeffreyma/Zotero/storage/AGWCC5VS/Qi et al. - 2021 - An efficient pruning scheme of deep neural network.pdf:application/pdf}, - journal = {EURASIP Journal on Advances in Signal Processing}, - month = {Jun}, - title = {An efficient pruning scheme of deep neural networks for {Internet} of {Things} applications}, - volume = 2021, - year = {2021}, - Bdsk-Url-1 = {https://doi.org/10.1186/s13634-021-00744-4}} - + author = {Qi, Chen and Shen, Shibo and Li, Rongpeng and Zhifeng, Zhao and Liu, Qing and Liang, Jing and Zhang, Honggang}, + year = 2021, + month = {Jun}, + journal = {EURASIP Journal on Advances in Signal Processing}, + volume = 2021, + doi = {10.1186/s13634-021-00744-4}, + bdsk-url-1 = {https://doi.org/10.1186/s13634-021-00744-4} +} @misc{quantdeep, - author = {Krishnamoorthi}, - doi = {10.48550/arXiv.1806.08342}, - month = jun, - publisher = {arXiv}, - title = {Quantizing deep convolutional networks for efficient inference: A whitepaper}, - url = {https://arxiv.org/abs/1806.08342}, - urldate = {2018-06-21}, - year = 2018, - Bdsk-Url-1 = {https://arxiv.org/abs/1806.08342}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.1806.08342}} - + author = {Krishnamoorthi}, + year = 2018, + month = jun, + publisher = {arXiv}, + doi = {10.48550/arXiv.1806.08342}, + url = {https://arxiv.org/abs/1806.08342}, + urldate = {2018-06-21}, + bdsk-url-1 = {https://arxiv.org/abs/1806.08342}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.1806.08342} +} +@inproceedings{raina_large-scale_2009, + author = {Raina, Rajat and Madhavan, Anand and Ng, Andrew Y.}, + year = 2009, + month = jun, + booktitle = {Proceedings of the 26th {Annual} {International} {Conference} on {Machine} {Learning}}, + publisher = {ACM}, + address = {Montreal Quebec Canada}, + pages = {873--880}, + doi = {10.1145/1553374.1553486}, + isbn = {978-1-60558-516-1}, + url = {https://dl.acm.org/doi/10.1145/1553374.1553486}, + urldate = {2023-11-07}, + language = {en}, + bdsk-url-1 = {https://dl.acm.org/doi/10.1145/1553374.1553486}, + bdsk-url-2 = {https://doi.org/10.1145/1553374.1553486} +} @article{ramcharan2017deep, - author = {Ramcharan, Amanda and Baranowski, Kelsee and McCloskey, Peter and Ahmed, Babuali and Legg, James and Hughes, David P}, - journal = {Frontiers in plant science}, - pages = 1852, - publisher = {Frontiers Media SA}, - title = {Deep learning for image-based cassava disease detection}, - volume = 8, - year = 2017} - + author = {Ramcharan, Amanda and Baranowski, Kelsee and McCloskey, Peter and Ahmed, Babuali and Legg, James and Hughes, David P}, + year = 2017, + journal = {Frontiers in plant science}, + publisher = {Frontiers Media SA}, + volume = 8, + pages = 1852 +} +@inproceedings{ramesh2021zero, + author = {Ramesh, Aditya and Pavlov, Mikhail and Goh, Gabriel and Gray, Scott and Voss, Chelsea and Radford, Alec and Chen, Mark and Sutskever, Ilya}, + year = 2021, + booktitle = {International Conference on Machine Learning}, + pages = {8821--8831}, + organization = {PMLR} +} +@article{Ranganathan2011-dc, + author = {Ranganathan, Parthasarathy}, + year = 2011, + month = jan, + journal = {Computer (Long Beach Calif.)}, + publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, + volume = 44, + number = 1, + pages = {39--48} +} @misc{Rao_2021, - author = {Rao, Ravi}, - journal = {www.wevolver.com}, - month = {Dec}, - url = {https://www.wevolver.com/article/tinyml-unlocks-new-possibilities-for-sustainable-development-technologies}, - year = 2021, - Bdsk-Url-1 = {https://www.wevolver.com/article/tinyml-unlocks-new-possibilities-for-sustainable-development-technologies}} - + author = {Rao, Ravi}, + year = 2021, + month = {Dec}, + journal = {www.wevolver.com}, + url = {https://www.wevolver.com/article/tinyml-unlocks-new-possibilities-for-sustainable-development-technologies}, + bdsk-url-1 = {https://www.wevolver.com/article/tinyml-unlocks-new-possibilities-for-sustainable-development-technologies} +} +@inproceedings{Rashmi2018Secure, + author = {R.V. Rashmi and A. Karthikeyan}, + year = 2018, + booktitle = {2018 Second International Conference on Electronics, Communication and Aerospace Technology (ICECA)}, + pages = {291--298}, + date-added = {2023-11-22 17:50:16 -0500}, + date-modified = {2023-11-22 17:51:39 -0500} +} +@article{Ratner_Hancock_Dunnmon_Goldman_Ré_2018, + author = {Ratner, Alex and Hancock, Braden and Dunnmon, Jared and Goldman, Roger and R\'{e}, Christopher}, + year = 2018, + journal = {Proceedings of the Second Workshop on Data Management for End-To-End Machine Learning} +} @inproceedings{reddi2020mlperf, - author = {Reddi, Vijay Janapa and Cheng, Christine and Kanter, David and Mattson, Peter and Schmuelling, Guenther and Wu, Carole-Jean and Anderson, Brian and Breughe, Maximilien and Charlebois, Mark and Chou, William and others}, - booktitle = {2020 ACM/IEEE 47th Annual International Symposium on Computer Architecture (ISCA)}, - organization = {IEEE}, - pages = {446--459}, - title = {Mlperf inference benchmark}, - year = 2020} - + author = {Reddi, Vijay Janapa and Cheng, Christine and Kanter, David and Mattson, Peter and Schmuelling, Guenther and Wu, Carole-Jean and Anderson, Brian and Breughe, Maximilien and Charlebois, Mark and Chou, William and others}, + year = 2020, + booktitle = {2020 ACM/IEEE 47th Annual International Symposium on Computer Architecture (ISCA)}, + pages = {446--459}, + organization = {IEEE} +} @inproceedings{ribeiro2016should, - author = {Ribeiro, Marco Tulio and Singh, Sameer and Guestrin, Carlos}, - booktitle = {Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining}, - pages = {1135--1144}, - title = {" Why should i trust you?" Explaining the predictions of any classifier}, - year = 2016} - + author = {Ribeiro, Marco Tulio and Singh, Sameer and Guestrin, Carlos}, + year = 2016, + booktitle = {Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining}, + pages = {1135--1144} +} +@misc{rmsprop, + author = {Geoffrey Hinton}, + year = 2017, + institution = {University of Toronto}, + howpublished = {University Lecture} +} +@conference{Rombach22cvpr, + author = {Robin Rombach and Andreas Blattmann and Dominik Lorenz and Patrick Esser and Björn Ommer}, + year = 2022, + booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + url = {https://github.com/CompVis/latent-diffusionhttps://arxiv.org/abs/2112.10752} +} @book{rosenblatt1957perceptron, - author = {Rosenblatt, Frank}, - publisher = {Cornell Aeronautical Laboratory}, - title = {The perceptron, a perceiving and recognizing automaton Project Para}, - year = 1957} - + author = {Rosenblatt, Frank}, + year = 1957, + publisher = {Cornell Aeronautical Laboratory} +} +@article{roskies2002neuroethics, + author = {Roskies, Adina}, + year = 2002, + journal = {Neuron}, + publisher = {Elsevier}, + volume = 35, + number = 1, + pages = {21--23} +} @inproceedings{rouhani2017tinydl, - author = {Rouhani, Bita and Mirhoseini, Azalia and Koushanfar, Farinaz}, - doi = {10.1109/ISCAS.2017.8050343}, - month = {05}, - pages = {1--4}, - title = {TinyDL: Just-in-time deep learning solution for constrained embedded systems}, - year = 2017, - Bdsk-Url-1 = {https://doi.org/10.1109/ISCAS.2017.8050343}} - + author = {Rouhani, Bita and Mirhoseini, Azalia and Koushanfar, Farinaz}, + year = 2017, + month = {05}, + pages = {1--4}, + doi = {10.1109/ISCAS.2017.8050343}, + bdsk-url-1 = {https://doi.org/10.1109/ISCAS.2017.8050343} +} +@article{ruder2016overview, + author = {Ruder, Sebastian}, + year = 2016, + journal = {arXiv preprint arXiv:1609.04747} +} @article{rumelhart1986learning, - author = {Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J}, - journal = {nature}, - number = 6088, - pages = {533--536}, - publisher = {Nature Publishing Group UK London}, - title = {Learning representations by back-propagating errors}, - volume = 323, - year = 1986} - + author = {Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J}, + year = 1986, + journal = {nature}, + publisher = {Nature Publishing Group UK London}, + volume = 323, + number = 6088, + pages = {533--536} +} @article{ruvolo_ella_nodate, - author = {Ruvolo, Paul and Eaton, Eric}, - file = {Ruvolo and Eaton - ELLA An Efficient Lifelong Learning Algorithm.pdf:/Users/alex/Zotero/storage/QA5G29GL/Ruvolo and Eaton - ELLA An Efficient Lifelong Learning Algorithm.pdf:application/pdf}, - language = {en}, - title = {{ELLA}: {An} {Efficient} {Lifelong} {Learning} {Algorithm}}} - -@misc{ScaleAI, - journal = {ScaleAI}, - url = {https://scale.com/data-engine}, - Bdsk-Url-1 = {https://scale.com/data-engine}} - + author = {Ruvolo, Paul and Eaton, Eric}, + language = {en} +} +@article{samajdar2018scale, + author = {Samajdar, Ananda and Zhu, Yuhao and Whatmough, Paul and Mattina, Matthew and Krishna, Tushar}, + year = 2018, + journal = {arXiv preprint arXiv:1811.02883} +} +@misc{ScaleAI, + journal = {ScaleAI}, + url = {https://scale.com/data-engine}, + bdsk-url-1 = {https://scale.com/data-engine} +} +@article{schuman2022, + author = {Schuman, Catherine D and Kulkarni, Shruti R and Parsa, Maryam and Mitchell, J Parker and Date, Prasanna and Kay, Bill}, + year = 2022, + journal = {Nature Computational Science}, + publisher = {Nature Publishing Group US New York}, + volume = 2, + number = 1, + pages = {10--19} +} @inproceedings{schwarzschild2021just, - author = {Schwarzschild, Avi and Goldblum, Micah and Gupta, Arjun and Dickerson, John P and Goldstein, Tom}, - booktitle = {International Conference on Machine Learning}, - organization = {PMLR}, - pages = {9389--9398}, - title = {Just how toxic is data poisoning? a unified benchmark for backdoor and data poisoning attacks}, - year = 2021} - + author = {Schwarzschild, Avi and Goldblum, Micah and Gupta, Arjun and Dickerson, John P and Goldstein, Tom}, + year = 2021, + booktitle = {International Conference on Machine Learning}, + pages = {9389--9398}, + organization = {PMLR} +} +@inproceedings{sculley2015hidden, + author = {Nithya Sambasivan and Shivani Kapania and Hannah Highfill and Diana Akrong and Praveen Kumar Paritosh and Lora Mois Aroyo}, + year = 2021 +} @misc{see_compression_2016, - abstract = {Neural Machine Translation (NMT), like many other deep learning domains, typically suffers from over-parameterization, resulting in large storage sizes. This paper examines three simple magnitude-based pruning schemes to compress NMT models, namely class-blind, class-uniform, and class-distribution, which differ in terms of how pruning thresholds are computed for the different classes of weights in the NMT architecture. We demonstrate the efficacy of weight pruning as a compression technique for a state-of-the-art NMT system. We show that an NMT model with over 200 million parameters can be pruned by 40\% with very little performance loss as measured on the WMT'14 English-German translation task. This sheds light on the distribution of redundancy in the NMT architecture. Our main result is that with retraining, we can recover and even surpass the original performance with an 80\%-pruned model.}, - author = {See, Abigail and Luong, Minh-Thang and Manning, Christopher D.}, - doi = {10.48550/arXiv.1606.09274}, - file = {arXiv Fulltext PDF:/Users/jeffreyma/Zotero/storage/2CJ4TSNR/See et al. - 2016 - Compression of Neural Machine Translation Models v.pdf:application/pdf}, - keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Computer Science - Neural and Evolutionary Computing}, - month = jun, - note = {arXiv:1606.09274 [cs]}, - publisher = {arXiv}, - title = {Compression of {Neural} {Machine} {Translation} {Models} via {Pruning}}, - url = {http://arxiv.org/abs/1606.09274}, - urldate = {2023-10-20}, - year = 2016, - Bdsk-Url-1 = {http://arxiv.org/abs/1606.09274}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.1606.09274}} - + author = {See, Abigail and Luong, Minh-Thang and Manning, Christopher D.}, + year = 2016, + month = jun, + publisher = {arXiv}, + doi = {10.48550/arXiv.1606.09274}, + url = {http://arxiv.org/abs/1606.09274}, + urldate = {2023-10-20}, + note = {arXiv:1606.09274 [cs]}, + bdsk-url-1 = {http://arxiv.org/abs/1606.09274}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.1606.09274} +} +@misc{segal1999opengl, + author = {Segal, Mark and Akeley, Kurt}, + year = 1999 +} +@article{segura2018ethical, + author = {Segura Anaya, LH and Alsadoon, Abeer and Costadopoulos, Nectar and Prasad, PWC}, + year = 2018, + journal = {Science and engineering ethics}, + publisher = {Springer}, + volume = 24, + pages = {1--28} +} @inproceedings{seide2016cntk, - author = {Seide, Frank and Agarwal, Amit}, - booktitle = {Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining}, - pages = {2135--2135}, - title = {CNTK: Microsoft's open-source deep-learning toolkit}, - year = 2016} - + author = {Seide, Frank and Agarwal, Amit}, + year = 2016, + booktitle = {Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining}, + pages = {2135--2135} +} @misc{sevilla_compute_2022, - author = {Sevilla, Jaime and Heim, Lennart and Ho, Anson and Besiroglu, Tamay and Hobbhahn, Marius and Villalobos, Pablo}, - file = {Sevilla et al. - 2022 - Compute Trends Across Three Eras of Machine Learni.pdf:/Users/alex/Zotero/storage/24N9RZ72/Sevilla et al. - 2022 - Compute Trends Across Three Eras of Machine Learni.pdf:application/pdf}, - keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence, Computer Science - Computers and Society}, - language = {en}, - month = mar, - note = {arXiv:2202.05924 [cs]}, - publisher = {arXiv}, - title = {Compute {Trends} {Across} {Three} {Eras} of {Machine} {Learning}}, - url = {http://arxiv.org/abs/2202.05924}, - urldate = {2023-10-25}, - year = 2022, - Bdsk-Url-1 = {http://arxiv.org/abs/2202.05924}} - + author = {Sevilla, Jaime and Heim, Lennart and Ho, Anson and Besiroglu, Tamay and Hobbhahn, Marius and Villalobos, Pablo}, + year = 2022, + month = mar, + publisher = {arXiv}, + url = {http://arxiv.org/abs/2202.05924}, + urldate = {2023-10-25}, + note = {arXiv:2202.05924 [cs]}, + language = {en}, + bdsk-url-1 = {http://arxiv.org/abs/2202.05924} +} @article{seyedzadeh2018machine, - author = {Seyedzadeh, Saleh and Rahimian, Farzad Pour and Glesk, Ivan and Roper, Marc}, - journal = {Visualization in Engineering}, - pages = {1--20}, - publisher = {Springer}, - title = {Machine learning for estimation of building energy consumption and performance: a review}, - volume = 6, - year = 2018} - + author = {Seyedzadeh, Saleh and Rahimian, Farzad Pour and Glesk, Ivan and Roper, Marc}, + year = 2018, + journal = {Visualization in Engineering}, + publisher = {Springer}, + volume = 6, + pages = {1--20} +} +@article{sgd, + author = {Herbert Robbins and Sutton Monro}, + year = 1951, + journal = {The Annals of Mathematical Statistics}, + url = {https://doi.org/10.1214/aoms/1177729586} +} @article{shamir1979share, - author = {Shamir, Adi}, - journal = {Communications of the ACM}, - number = 11, - pages = {612--613}, - publisher = {ACm New York, NY, USA}, - title = {How to share a secret}, - volume = 22, - year = 1979} - + author = {Shamir, Adi}, + year = 1979, + journal = {Communications of the ACM}, + publisher = {ACm New York, NY, USA}, + volume = 22, + number = 11, + pages = {612--613} +} +@article{shan2023prompt, + author = {Shan, Shawn and Ding, Wenxin and Passananti, Josephine and Zheng, Haitao and Zhao, Ben Y}, + year = 2023, + journal = {arXiv preprint arXiv:2310.13828} +} +@article{shastri2021photonics, + author = {Shastri, Bhavin J and Tait, Alexander N and Ferreira de Lima, Thomas and Pernice, Wolfram HP and Bhaskaran, Harish and Wright, C David and Prucnal, Paul R}, + year = 2021, + journal = {Nature Photonics}, + publisher = {Nature Publishing Group UK London}, + volume = 15, + number = 2, + pages = {102--114} +} @article{Sheng_Zhang_2019, - author = {Sheng, Victor S. and Zhang, Jing}, - doi = {10.1609/aaai.v33i01.33019837}, - journal = {Proceedings of the AAAI Conference on Artificial Intelligence}, - number = {01}, - pages = {9837--9843}, - title = {Machine learning with crowdsourcing: A brief summary of the past research and Future Directions}, - volume = 33, - year = 2019, - Bdsk-Url-1 = {https://doi.org/10.1609/aaai.v33i01.33019837}} - + author = {Sheng, Victor S. and Zhang, Jing}, + year = 2019, + journal = {Proceedings of the AAAI Conference on Artificial Intelligence}, + volume = 33, + number = {01}, + pages = {9837--9843}, + doi = {10.1609/aaai.v33i01.33019837}, + bdsk-url-1 = {https://doi.org/10.1609/aaai.v33i01.33019837} +} @misc{Sheth_2022, - author = {Sheth, Dhruv}, - journal = {Hackster.io}, - month = {Mar}, - title = {Eletect - TinyML and IOT based Smart Wildlife Tracker}, - url = {https://www.hackster.io/dhruvsheth_/eletect-tinyml-and-iot-based-smart-wildlife-tracker-c03e5a}, - year = 2022, - Bdsk-Url-1 = {https://www.hackster.io/dhruvsheth_/eletect-tinyml-and-iot-based-smart-wildlife-tracker-c03e5a}} - + author = {Sheth, Dhruv}, + year = 2022, + month = {Mar}, + journal = {Hackster.io}, + url = {https://www.hackster.io/dhruvsheth_/eletect-tinyml-and-iot-based-smart-wildlife-tracker-c03e5a}, + bdsk-url-1 = {https://www.hackster.io/dhruvsheth_/eletect-tinyml-and-iot-based-smart-wildlife-tracker-c03e5a} +} @inproceedings{shi2022data, - author = {Shi, Hongrui and Radu, Valentin}, - booktitle = {Proceedings of the 2nd European Workshop on Machine Learning and Systems}, - pages = {72--78}, - title = {Data selection for efficient model update in federated learning}, - year = 2022} - + author = {Shi, Hongrui and Radu, Valentin}, + year = 2022, + booktitle = {Proceedings of the 2nd European Workshop on Machine Learning and Systems}, + pages = {72--78} +} +@inproceedings{skorobogatov2003optical, + author = {Skorobogatov, Sergei P and Anderson, Ross J}, + year = 2003, + booktitle = {Cryptographic Hardware and Embedded Systems-CHES 2002: 4th International Workshop Redwood Shores, CA, USA, August 13--15, 2002 Revised Papers 4}, + pages = {2--12}, + organization = {Springer} +} +@inproceedings{skorobogatov2009local, + author = {Skorobogatov, Sergei}, + year = 2009, + booktitle = {2009 IEEE International Workshop on Hardware-Oriented Security and Trust}, + pages = {1--6}, + organization = {IEEE} +} @article{smestad2023systematic, - author = {Smestad, Carl and Li, Jingyue}, - journal = {arXiv preprint arXiv:2306.04862}, - title = {A Systematic Literature Review on Client Selection in Federated Learning}, - year = 2023} - + author = {Smestad, Carl and Li, Jingyue}, + year = 2023, + journal = {arXiv preprint arXiv:2306.04862} +} @misc{smoothquant, - abstract = {Large language models (LLMs) show excellent performance but are compute- and memory-intensive. Quantization can reduce memory and accelerate inference. However, existing methods cannot maintain accuracy and hardware efficiency at the same time. We propose SmoothQuant, a training-free, accuracy-preserving, and general-purpose post-training quantization (PTQ) solution to enable 8-bit weight, 8-bit activation (W8A8) quantization for LLMs. Based on the fact that weights are easy to quantize while activations are not, SmoothQuant smooths the activation outliers by offline migrating the quantization difficulty from activations to weights with a mathematically equivalent transformation. SmoothQuant enables an INT8 quantization of both weights and activations for all the matrix multiplications in LLMs, including OPT, BLOOM, GLM, MT-NLG, and LLaMA family. We demonstrate up to 1.56x speedup and 2x memory reduction for LLMs with negligible loss in accuracy. SmoothQuant enables serving 530B LLM within a single node. Our work offers a turn-key solution that reduces hardware costs and democratizes LLMs.}, - author = {Xiao and Lin, Seznec and Wu, Demouth and Han}, - doi = {10.48550/arXiv.2211.10438}, - title = {SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models}, - url = {https://arxiv.org/abs/2211.10438}, - urldate = {2023-06-05}, - year = 2023, - Bdsk-Url-1 = {https://arxiv.org/abs/2211.10438}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.2211.10438}} - + author = {Xiao and Lin, Seznec and Wu, Demouth and Han}, + year = 2023, + doi = {10.48550/arXiv.2211.10438}, + url = {https://arxiv.org/abs/2211.10438}, + urldate = {2023-06-05}, + bdsk-url-1 = {https://arxiv.org/abs/2211.10438}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.2211.10438} +} +@article{soufleri2023synthetic, + author = {Efstathia Soufleri and Gobinda Saha and Kaushik Roy}, + year = 2023, + journal = {arXiv preprint arXiv:2210.03205}, + date-added = {2023-11-22 19:26:18 -0500}, + date-modified = {2023-11-22 19:26:57 -0500} +} +@inproceedings{suda2016throughput, + author = {Suda, Naveen and Chandra, Vikas and Dasika, Ganesh and Mohanty, Abinash and Ma, Yufei and Vrudhula, Sarma and Seo, Jae-sun and Cao, Yu}, + year = 2016, + booktitle = {Proceedings of the 2016 ACM/SIGDA international symposium on field-programmable gate arrays}, + pages = {16--25} +} @misc{surveyofquant, - abstract = {As soon as abstract mathematical computations were adapted to computation on digital computers, the problem of efficient representation, manipulation, and communication of the numerical values in those computations arose. Strongly related to the problem of numerical representation is the problem of quantization: in what manner should a set of continuous real-valued numbers be distributed over a fixed discrete set of numbers to minimize the number of bits required and also to maximize the accuracy of the attendant computations? This perennial problem of quantization is particularly relevant whenever memory and/or computational resources are severely restricted, and it has come to the forefront in recent years due to the remarkable performance of Neural Network models in computer vision, natural language processing, and related areas. Moving from floating-point representations to low-precision fixed integer values represented in four bits or less holds the potential to reduce the memory footprint and latency by a factor of 16x; and, in fact, reductions of 4x to 8x are often realized in practice in these applications. Thus, it is not surprising that quantization has emerged recently as an important and very active sub-area of research in the efficient implementation of computations associated with Neural Networks. In this article, we survey approaches to the problem of quantizing the numerical values in deep Neural Network computations, covering the advantages/disadvantages of current methods. With this survey and its organization, we hope to have presented a useful snapshot of the current research in quantization for Neural Networks and to have given an intelligent organization to ease the evaluation of future research in this area.}, - author = {Gholami and Kim, Dong and Yao, Mahoney and Keutzer}, - doi = {10.48550/arXiv.2103.13630}, - title = {A Survey of Quantization Methods for Efficient Neural Network Inference)}, - url = {https://arxiv.org/abs/2103.13630}, - urldate = {2021-06-21}, - year = 2021, - Bdsk-Url-1 = {https://arxiv.org/abs/2103.13630}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.2103.13630}} - + author = {Gholami and Kim, Dong and Yao, Mahoney and Keutzer}, + year = 2021, + doi = {10.48550/arXiv.2103.13630}, + url = {https://arxiv.org/abs/2103.13630}, + urldate = {2021-06-21}, + bdsk-url-1 = {https://arxiv.org/abs/2103.13630}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.2103.13630} +} +@article{Sze2017-ak, + author = {Sze, Vivienne and Chen, Yu-Hsin and Yang, Tien-Ju and Emer, Joel}, + year = 2017, + month = mar, + copyright = {http://arxiv.org/licenses/nonexclusive-distrib/1.0/}, + archiveprefix = {arXiv}, + eprint = {1703.09039}, + primaryclass = {cs.CV} +} +@article{sze2017efficient, + author = {Sze, Vivienne and Chen, Yu-Hsin and Yang, Tien-Ju and Emer, Joel S}, + year = 2017, + journal = {Proceedings of the IEEE}, + publisher = {Ieee}, + volume = 105, + number = 12, + pages = {2295--2329} +} @misc{tan_efficientnet_2020, - abstract = {Convolutional Neural Networks (ConvNets) are commonly developed at a fixed resource budget, and then scaled up for better accuracy if more resources are available. In this paper, we systematically study model scaling and identify that carefully balancing network depth, width, and resolution can lead to better performance. Based on this observation, we propose a new scaling method that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound coefficient. We demonstrate the effectiveness of this method on scaling up MobileNets and ResNet. To go even further, we use neural architecture search to design a new baseline network and scale it up to obtain a family of models, called EfficientNets, which achieve much better accuracy and efficiency than previous ConvNets. In particular, our EfficientNet-B7 achieves state-of-the-art 84.3\% top-1 accuracy on ImageNet, while being 8.4x smaller and 6.1x faster on inference than the best existing ConvNet. Our EfficientNets also transfer well and achieve state-of-the-art accuracy on CIFAR-100 (91.7\%), Flowers (98.8\%), and 3 other transfer learning datasets, with an order of magnitude fewer parameters. Source code is at https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet.}, - author = {Tan, Mingxing and Le, Quoc V.}, - doi = {10.48550/arXiv.1905.11946}, - file = {arXiv Fulltext PDF:/Users/jeffreyma/Zotero/storage/KISBF35I/Tan and Le - 2020 - EfficientNet Rethinking Model Scaling for Convolu.pdf:application/pdf;arXiv.org Snapshot:/Users/jeffreyma/Zotero/storage/TUD4PH4M/1905.html:text/html}, - keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Statistics - Machine Learning}, - month = sep, - note = {arXiv:1905.11946 [cs, stat]}, - publisher = {arXiv}, - shorttitle = {{EfficientNet}}, - title = {{EfficientNet}: {Rethinking} {Model} {Scaling} for {Convolutional} {Neural} {Networks}}, - url = {http://arxiv.org/abs/1905.11946}, - urldate = {2023-10-20}, - year = 2020, - Bdsk-Url-1 = {http://arxiv.org/abs/1905.11946}, - Bdsk-Url-2 = {https://doi.org/10.48550/arXiv.1905.11946}} - + shorttitle = {{EfficientNet}}, + author = {Tan, Mingxing and Le, Quoc V.}, + year = 2020, + month = sep, + publisher = {arXiv}, + doi = {10.48550/arXiv.1905.11946}, + url = {http://arxiv.org/abs/1905.11946}, + urldate = {2023-10-20}, + note = {arXiv:1905.11946 [cs, stat]}, + bdsk-url-1 = {http://arxiv.org/abs/1905.11946}, + bdsk-url-2 = {https://doi.org/10.48550/arXiv.1905.11946} +} @inproceedings{tan2019mnasnet, - author = {Tan, Mingxing and Chen, Bo and Pang, Ruoming and Vasudevan, Vijay and Sandler, Mark and Howard, Andrew and Le, Quoc V}, - booktitle = {Proceedings of the IEEE/CVF conference on computer vision and pattern recognition}, - pages = {2820--2828}, - title = {Mnasnet: Platform-aware neural architecture search for mobile}, - year = 2019} - + author = {Tan, Mingxing and Chen, Bo and Pang, Ruoming and Vasudevan, Vijay and Sandler, Mark and Howard, Andrew and Le, Quoc V}, + year = 2019, + booktitle = {Proceedings of the IEEE/CVF conference on computer vision and pattern recognition}, + pages = {2820--2828} +} @misc{tan2020efficientnet, + author = {Mingxing Tan and Quoc V. Le}, + year = 2020, archiveprefix = {arXiv}, - author = {Mingxing Tan and Quoc V. Le}, - eprint = {1905.11946}, - primaryclass = {cs.LG}, - title = {EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks}, - year = 2020} - + eprint = {1905.11946}, + primaryclass = {cs.LG} +} +@article{tang2022soft, + author = {Tang, Xin and He, Yichun and Liu, Jia}, + year = 2022, + journal = {Biophysics Reviews}, + publisher = {AIP Publishing}, + volume = 3, + number = 1 +} +@article{tang2023flexible, + author = {Tang, Xin and Shen, Hao and Zhao, Siyuan and Li, Na and Liu, Jia}, + year = 2023, + journal = {Nature Electronics}, + publisher = {Nature Publishing Group UK London}, + volume = 6, + number = 2, + pages = {109--118} +} +@article{tarun2023deep, + author = {Ayush K Tarun and Vikram S Chundawat and Murari Mandal and Mohan Kankanhalli}, + year = 2023, + journal = {arXiv preprint arXiv:2210.08196}, + date-added = {2023-11-22 19:20:59 -0500}, + date-modified = {2023-11-22 19:21:59 -0500} +} @misc{Team_2023, - author = {Team, Snorkel}, - journal = {Snorkel AI}, - month = {Aug}, - title = {Data-centric AI for the Enterprise}, - url = {https://snorkel.ai/}, - year = 2023, - Bdsk-Url-1 = {https://snorkel.ai/}} - + author = {Team, Snorkel}, + year = 2023, + month = {Aug}, + journal = {Snorkel AI}, + url = {https://snorkel.ai/}, + bdsk-url-1 = {https://snorkel.ai/} +} @misc{Thefutur92:online, - author = {ARM.com}, - howpublished = {\url{https://www.arm.com/company/news/2023/02/arm-announces-q3-fy22-results}}, - note = {(Accessed on 09/16/2023)}, - title = {The future is being built on Arm: Market diversification continues to drive strong royalty and licensing growth as ecosystem reaches quarter of a trillion chips milestone -- Arm{\textregistered}}} - + author = {ARM.com}, + note = {(Accessed on 09/16/2023)}, + howpublished = {\url{https://www.arm.com/company/news/2023/02/arm-announces-q3-fy22-results}} +} @misc{threefloat, - author = {Google}, - title = {Three Floating Point Formats}, - url = {https://storage.googleapis.com/gweb-cloudblog-publish/images/Three_floating-point_formats.max-624x261.png}, - urldate = {2023-10-20}, - year = 2023, - Bdsk-Url-1 = {https://storage.googleapis.com/gweb-cloudblog-publish/images/Three_floating-point_formats.max-624x261.png}} - + author = {Google}, + year = 2023, + url = {https://storage.googleapis.com/gweb-cloudblog-publish/images/Three_floating-point_formats.max-624x261.png}, + urldate = {2023-10-20}, + bdsk-url-1 = {https://storage.googleapis.com/gweb-cloudblog-publish/images/Three_floating-point_formats.max-624x261.png} +} @article{tirtalistyani2022indonesia, - author = {Tirtalistyani, Rose and Murtiningrum, Murtiningrum and Kanwar, Rameshwar S}, - journal = {Sustainability}, - number = 19, - pages = 12477, - publisher = {MDPI}, - title = {Indonesia rice irrigation system: Time for innovation}, - volume = 14, - year = 2022} - + author = {Tirtalistyani, Rose and Murtiningrum, Murtiningrum and Kanwar, Rameshwar S}, + year = 2022, + journal = {Sustainability}, + publisher = {MDPI}, + volume = 14, + number = 19, + pages = 12477 +} @inproceedings{tokui2015chainer, - author = {Tokui, Seiya and Oono, Kenta and Hido, Shohei and Clayton, Justin}, - booktitle = {Proceedings of workshop on machine learning systems (LearningSys) in the twenty-ninth annual conference on neural information processing systems (NIPS)}, - pages = {1--6}, - title = {Chainer: a next-generation open source framework for deep learning}, - volume = 5, - year = 2015} - + author = {Tokui, Seiya and Oono, Kenta and Hido, Shohei and Clayton, Justin}, + year = 2015, + booktitle = {Proceedings of workshop on machine learning systems (LearningSys) in the twenty-ninth annual conference on neural information processing systems (NIPS)}, + volume = 5, + pages = {1--6} +} @article{van_de_ven_three_2022, - author = {Van De Ven, Gido M. and Tuytelaars, Tinne and Tolias, Andreas S.}, - doi = {10.1038/s42256-022-00568-3}, - file = {Van De Ven et al. - 2022 - Three types of incremental learning.pdf:/Users/alex/Zotero/storage/5ZAHXMQN/Van De Ven et al. - 2022 - Three types of incremental learning.pdf:application/pdf}, - issn = {2522-5839}, - journal = {Nature Machine Intelligence}, - language = {en}, - month = dec, - number = 12, - pages = {1185--1197}, - title = {Three types of incremental learning}, - url = {https://www.nature.com/articles/s42256-022-00568-3}, - urldate = {2023-10-26}, - volume = 4, - year = 2022, - Bdsk-Url-1 = {https://www.nature.com/articles/s42256-022-00568-3}, - Bdsk-Url-2 = {https://doi.org/10.1038/s42256-022-00568-3}} - + author = {Van De Ven, Gido M. and Tuytelaars, Tinne and Tolias, Andreas S.}, + year = 2022, + month = dec, + journal = {Nature Machine Intelligence}, + volume = 4, + number = 12, + pages = {1185--1197}, + doi = {10.1038/s42256-022-00568-3}, + issn = {2522-5839}, + url = {https://www.nature.com/articles/s42256-022-00568-3}, + urldate = {2023-10-26}, + language = {en}, + bdsk-url-1 = {https://www.nature.com/articles/s42256-022-00568-3}, + bdsk-url-2 = {https://doi.org/10.1038/s42256-022-00568-3} +} @article{vaswani2017attention, - author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia}, - journal = {Advances in neural information processing systems}, - title = {Attention is all you need}, - volume = 30, - year = 2017} - + author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia}, + year = 2017, + journal = {Advances in neural information processing systems}, + volume = 30 +} @misc{Vectorbo78:online, - howpublished = {\url{https://www.who.int/news-room/fact-sheets/detail/vector-borne-diseases}}, - note = {(Accessed on 10/17/2023)}, - title = {Vector-borne diseases}} - + note = {(Accessed on 10/17/2023)}, + howpublished = {\url{https://www.who.int/news-room/fact-sheets/detail/vector-borne-diseases}} +} @misc{Verma_2022, - author = {Verma, Team Dual_Boot: Swapnil}, - journal = {Hackster.io}, - month = {Mar}, - title = {Elephant AI}, - url = {https://www.hackster.io/dual_boot/elephant-ai-ba71e9}, - year = 2022, - Bdsk-Url-1 = {https://www.hackster.io/dual_boot/elephant-ai-ba71e9}} - + author = {Verma, Team Dual_Boot: Swapnil}, + year = 2022, + month = {Mar}, + journal = {Hackster.io}, + url = {https://www.hackster.io/dual_boot/elephant-ai-ba71e9}, + bdsk-url-1 = {https://www.hackster.io/dual_boot/elephant-ai-ba71e9} +} +@article{verma2019memory, + author = {Verma, Naveen and Jia, Hongyang and Valavi, Hossein and Tang, Yinqi and Ozatay, Murat and Chen, Lung-Yen and Zhang, Bonan and Deaville, Peter}, + year = 2019, + journal = {IEEE Solid-State Circuits Magazine}, + publisher = {IEEE}, + volume = 11, + number = 3, + pages = {43--55} +} @misc{villalobos_machine_2022, - author = {Villalobos, Pablo and Sevilla, Jaime and Besiroglu, Tamay and Heim, Lennart and Ho, Anson and Hobbhahn, Marius}, - file = {Villalobos et al. - 2022 - Machine Learning Model Sizes and the Parameter Gap.pdf:/Users/alex/Zotero/storage/WW69A82B/Villalobos et al. - 2022 - Machine Learning Model Sizes and the Parameter Gap.pdf:application/pdf}, - keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence, Computer Science - Computers and Society, Computer Science - Computation and Language}, - language = {en}, - month = jul, - note = {arXiv:2207.02852 [cs]}, - publisher = {arXiv}, - title = {Machine {Learning} {Model} {Sizes} and the {Parameter} {Gap}}, - url = {http://arxiv.org/abs/2207.02852}, - urldate = {2023-10-25}, - year = 2022, - Bdsk-Url-1 = {http://arxiv.org/abs/2207.02852}} - + author = {Villalobos, Pablo and Sevilla, Jaime and Besiroglu, Tamay and Heim, Lennart and Ho, Anson and Hobbhahn, Marius}, + year = 2022, + month = jul, + publisher = {arXiv}, + url = {http://arxiv.org/abs/2207.02852}, + urldate = {2023-10-25}, + note = {arXiv:2207.02852 [cs]}, + language = {en}, + bdsk-url-1 = {http://arxiv.org/abs/2207.02852} +} @misc{villalobos_trends_2022, - author = {Villalobos, Pablo and Ho, Anson}, - journal = {Epoch AI}, - month = sep, - title = {Trends in {Training} {Dataset} {Sizes}}, - url = {https://epochai.org/blog/trends-in-training-dataset-sizes}, - year = 2022, - Bdsk-Url-1 = {https://epochai.org/blog/trends-in-training-dataset-sizes}} - + author = {Villalobos, Pablo and Ho, Anson}, + year = 2022, + month = sep, + journal = {Epoch AI}, + url = {https://epochai.org/blog/trends-in-training-dataset-sizes}, + bdsk-url-1 = {https://epochai.org/blog/trends-in-training-dataset-sizes} +} @misc{VinBrain, - journal = {VinBrain}, - url = {https://vinbrain.net/aiscaler}, - Bdsk-Url-1 = {https://vinbrain.net/aiscaler}} - + journal = {VinBrain}, + url = {https://vinbrain.net/aiscaler}, + bdsk-url-1 = {https://vinbrain.net/aiscaler} +} @article{vinuesa2020role, - author = {Vinuesa, Ricardo and Azizpour, Hossein and Leite, Iolanda and Balaam, Madeline and Dignum, Virginia and Domisch, Sami and Fell{\"a}nder, Anna and Langhans, Simone Daniela and Tegmark, Max and Fuso Nerini, Francesco}, - journal = {Nature communications}, - number = 1, - pages = {1--10}, - publisher = {Nature Publishing Group}, - title = {The role of artificial intelligence in achieving the Sustainable Development Goals}, - volume = 11, - year = 2020} - + author = {Vinuesa, Ricardo and Azizpour, Hossein and Leite, Iolanda and Balaam, Madeline and Dignum, Virginia and Domisch, Sami and Fell{\"a}nder, Anna and Langhans, Simone Daniela and Tegmark, Max and Fuso Nerini, Francesco}, + year = 2020, + journal = {Nature communications}, + publisher = {Nature Publishing Group}, + volume = 11, + number = 1, + pages = {1--10} +} +@article{Vivet2021, + author = {Vivet, Pascal and Guthmuller, Eric and Thonnart, Yvain and Pillonnet, Gael and Fuguet, C{\'e}sar and Miro-Panades, Ivan and Moritz, Guillaume and Durupt, Jean and Bernard, Christian and Varreau, Didier and Pontes, Julian and Thuries, S{\'e}bastien and Coriat, David and Harrand, Michel and Dutoit, Denis and Lattard, Didier and Arnaud, Lucile and Charbonnier, Jean and Coudrain, Perceval and Garnier, Arnaud and Berger, Fr{\'e}d{\'e}ric and Gueugnot, Alain and Greiner, Alain and Meunier, Quentin L. and Farcy, Alexis and Arriordaz, Alexandre and Ch{\'e}ramy, S{\'e}verine and Clermidy, Fabien}, + year = 2021, + journal = {IEEE Journal of Solid-State Circuits}, + volume = 56, + number = 1, + pages = {79--97}, + doi = {10.1109/JSSC.2020.3036341}, + bdsk-url-1 = {https://doi.org/10.1109/JSSC.2020.3036341} +} +@inproceedings{wang2020apq, + author = {Wang, Tianzhe and Wang, Kuan and Cai, Han and Lin, Ji and Liu, Zhijian and Wang, Hanrui and Lin, Yujun and Han, Song}, + year = 2020, + booktitle = {2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + pages = {2075--2084}, + doi = {10.1109/CVPR42600.2020.00215}, + bdsk-url-1 = {https://doi.org/10.1109/CVPR42600.2020.00215} +} @article{warden2018speech, - author = {Warden, Pete}, - journal = {arXiv preprint arXiv:1804.03209}, - title = {Speech commands: A dataset for limited-vocabulary speech recognition}, - year = 2018} - + author = {Warden, Pete}, + year = 2018, + journal = {arXiv preprint arXiv:1804.03209} +} @book{warden2019tinyml, - author = {Warden, Pete and Situnayake, Daniel}, - publisher = {O'Reilly Media}, - title = {Tinyml: Machine learning with tensorflow lite on arduino and ultra-low-power microcontrollers}, - year = 2019} - + author = {Warden, Pete and Situnayake, Daniel}, + year = 2019, + publisher = {O'Reilly Media} +} +@article{wearableinsulin, + author = {Psoma, Sotiria D. and Kanthou, Chryso}, + year = 2023, + journal = {Biosensors}, + volume = 13, + number = 7, + doi = {10.3390/bios13070719}, + issn = {2079-6374}, + url = {https://www.mdpi.com/2079-6374/13/7/719}, + article-number = 719, + pubmedid = 37504117, + bdsk-url-1 = {https://www.mdpi.com/2079-6374/13/7/719}, + bdsk-url-2 = {https://doi.org/10.3390/bios13070719} +} +@book{weik_survey_1955, + author = {Weik, Martin H.}, + year = 1955, + publisher = {Ballistic Research Laboratories}, + language = {en} +} @article{weiss_survey_2016, - author = {Weiss, Karl and Khoshgoftaar, Taghi M. and Wang, DingDing}, - doi = {10.1186/s40537-016-0043-6}, - file = {Weiss et al. - 2016 - A survey of transfer learning.pdf:/Users/alex/Zotero/storage/3FN2Y6EA/Weiss et al. - 2016 - A survey of transfer learning.pdf:application/pdf}, - issn = {2196-1115}, - journal = {Journal of Big Data}, - language = {en}, - month = dec, - number = 1, - pages = 9, - title = {A survey of transfer learning}, - url = {http://journalofbigdata.springeropen.com/articles/10.1186/s40537-016-0043-6}, - urldate = {2023-10-25}, - volume = 3, - year = 2016, - Bdsk-Url-1 = {http://journalofbigdata.springeropen.com/articles/10.1186/s40537-016-0043-6}, - Bdsk-Url-2 = {https://doi.org/10.1186/s40537-016-0043-6}} - + author = {Weiss, Karl and Khoshgoftaar, Taghi M. and Wang, DingDing}, + year = 2016, + month = dec, + journal = {Journal of Big Data}, + volume = 3, + number = 1, + pages = 9, + doi = {10.1186/s40537-016-0043-6}, + issn = {2196-1115}, + url = {http://journalofbigdata.springeropen.com/articles/10.1186/s40537-016-0043-6}, + urldate = {2023-10-25}, + language = {en}, + bdsk-url-1 = {http://journalofbigdata.springeropen.com/articles/10.1186/s40537-016-0043-6}, + bdsk-url-2 = {https://doi.org/10.1186/s40537-016-0043-6} +} +@article{wong2012metal, + author = {Wong, H-S Philip and Lee, Heng-Yuan and Yu, Shimeng and Chen, Yu-Sheng and Wu, Yi and Chen, Pang-Shiu and Lee, Byoungil and Chen, Frederick T and Tsai, Ming-Jinn}, + year = 2012, + journal = {Proceedings of the IEEE}, + publisher = {IEEE}, + volume = 100, + number = 6, + pages = {1951--1970} +} @inproceedings{wu2019fbnet, - author = {Wu, Bichen and Dai, Xiaoliang and Zhang, Peizhao and Wang, Yanghan and Sun, Fei and Wu, Yiming and Tian, Yuandong and Vajda, Peter and Jia, Yangqing and Keutzer, Kurt}, - booktitle = {Proceedings of the IEEE/CVF conference on computer vision and pattern recognition}, - pages = {10734--10742}, - title = {Fbnet: Hardware-aware efficient convnet design via differentiable neural architecture search}, - year = 2019} - + author = {Wu, Bichen and Dai, Xiaoliang and Zhang, Peizhao and Wang, Yanghan and Sun, Fei and Wu, Yiming and Tian, Yuandong and Vajda, Peter and Jia, Yangqing and Keutzer, Kurt}, + year = 2019, + booktitle = {Proceedings of the IEEE/CVF conference on computer vision and pattern recognition}, + pages = {10734--10742} +} @article{wu2022sustainable, - author = {Wu, Carole-Jean and Raghavendra, Ramya and Gupta, Udit and Acun, Bilge and Ardalani, Newsha and Maeng, Kiwan and Chang, Gloria and Aga, Fiona and Huang, Jinshi and Bai, Charles and others}, - journal = {Proceedings of Machine Learning and Systems}, - pages = {795--813}, - title = {Sustainable ai: Environmental implications, challenges and opportunities}, - volume = 4, - year = 2022} - + author = {Wu, Carole-Jean and Raghavendra, Ramya and Gupta, Udit and Acun, Bilge and Ardalani, Newsha and Maeng, Kiwan and Chang, Gloria and Aga, Fiona and Huang, Jinshi and Bai, Charles and others}, + year = 2022, + journal = {Proceedings of Machine Learning and Systems}, + volume = 4, + pages = {795--813} +} +@inproceedings{xavier, + author = {Glorot, Xavier and Bengio, Yoshua}, + year = 2010, + booktitle = {Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics}, + url = {https://proceedings.mlr.press/v9/glorot10a.html} +} @inproceedings{xie2020adversarial, - author = {Xie, Cihang and Tan, Mingxing and Gong, Boqing and Wang, Jiang and Yuille, Alan L and Le, Quoc V}, - booktitle = {Proceedings of the IEEE/CVF conference on computer vision and pattern recognition}, - pages = {819--828}, - title = {Adversarial examples improve image recognition}, - year = 2020} - + author = {Xie, Cihang and Tan, Mingxing and Gong, Boqing and Wang, Jiang and Yuille, Alan L and Le, Quoc V}, + year = 2020, + booktitle = {Proceedings of the IEEE/CVF conference on computer vision and pattern recognition}, + pages = {819--828} +} +@article{xiong_mri-based_2021, + author = {Xiong, Siyu and Wu, Guoqing and Fan, Xitian and Feng, Xuan and Huang, Zhongcheng and Cao, Wei and Zhou, Xuegong and Ding, Shijin and Yu, Jinhua and Wang, Lingli and Shi, Zhifeng}, + year = 2021, + month = sep, + journal = {BMC Bioinformatics}, + volume = 22, + number = 1, + pages = 421, + doi = {10.1186/s12859-021-04347-6}, + issn = {1471-2105}, + url = {https://doi.org/10.1186/s12859-021-04347-6}, + urldate = {2023-11-07}, + bdsk-url-1 = {https://doi.org/10.1186/s12859-021-04347-6} +} +@article{xiu2019time, + author = {Xiu, Liming}, + year = 2019, + journal = {IEEE Solid-State Circuits Magazine}, + publisher = {IEEE}, + volume = 11, + number = 1, + pages = {39--55} +} @article{xu2018alternating, - author = {Xu, Chen and Yao, Jianqiang and Lin, Zhouchen and Ou, Wenwu and Cao, Yuanbin and Wang, Zhirong and Zha, Hongbin}, - journal = {arXiv preprint arXiv:1802.00150}, - title = {Alternating multi-bit quantization for recurrent neural networks}, - year = 2018} - + author = {Xu, Chen and Yao, Jianqiang and Lin, Zhouchen and Ou, Wenwu and Cao, Yuanbin and Wang, Zhirong and Zha, Hongbin}, + year = 2018, + journal = {arXiv preprint arXiv:1802.00150} +} @article{xu2023demystifying, - author = {Xu, Hu and Xie, Saining and Tan, Xiaoqing Ellen and Huang, Po-Yao and Howes, Russell and Sharma, Vasu and Li, Shang-Wen and Ghosh, Gargi and Zettlemoyer, Luke and Feichtenhofer, Christoph}, - journal = {arXiv preprint arXiv:2309.16671}, - title = {Demystifying CLIP Data}, - year = 2023} - + author = {Xu, Hu and Xie, Saining and Tan, Xiaoqing Ellen and Huang, Po-Yao and Howes, Russell and Sharma, Vasu and Li, Shang-Wen and Ghosh, Gargi and Zettlemoyer, Luke and Feichtenhofer, Christoph}, + year = 2023, + journal = {arXiv preprint arXiv:2309.16671} +} @article{xu2023federated, - author = {Xu, Zheng and Zhang, Yanxiang and Andrew, Galen and Choquette-Choo, Christopher A and Kairouz, Peter and McMahan, H Brendan and Rosenstock, Jesse and Zhang, Yuanbo}, - journal = {arXiv preprint arXiv:2305.18465}, - title = {Federated Learning of Gboard Language Models with Differential Privacy}, - year = 2023} - + author = {Xu, Zheng and Zhang, Yanxiang and Andrew, Galen and Choquette-Choo, Christopher A and Kairouz, Peter and McMahan, H Brendan and Rosenstock, Jesse and Zhang, Yuanbo}, + year = 2023, + journal = {arXiv preprint arXiv:2305.18465} +} @article{yamashita2023coffee, - author = {Yamashita, Jo{\~a}o Vitor Yukio Bordin and Leite, Jo{\~a}o Paulo RR}, - journal = {Smart Agricultural Technology}, - pages = 100183, - publisher = {Elsevier}, - title = {Coffee disease classification at the edge using deep learning}, - volume = 4, - year = 2023} - + author = {Yamashita, Jo{\~a}o Vitor Yukio Bordin and Leite, Jo{\~a}o Paulo RR}, + year = 2023, + journal = {Smart Agricultural Technology}, + publisher = {Elsevier}, + volume = 4, + pages = 100183 +} @misc{yang2020coexploration, + author = {Lei Yang and Zheyu Yan and Meng Li and Hyoukjun Kwon and Liangzhen Lai and Tushar Krishna and Vikas Chandra and Weiwen Jiang and Yiyu Shi}, + year = 2020, archiveprefix = {arXiv}, - author = {Lei Yang and Zheyu Yan and Meng Li and Hyoukjun Kwon and Liangzhen Lai and Tushar Krishna and Vikas Chandra and Weiwen Jiang and Yiyu Shi}, - eprint = {2002.04116}, - primaryclass = {cs.LG}, - title = {Co-Exploration of Neural Architectures and Heterogeneous ASIC Accelerator Designs Targeting Multiple Tasks}, - year = 2020} - + eprint = {2002.04116}, + primaryclass = {cs.LG} +} @inproceedings{yang2023online, - author = {Yang, Tien-Ju and Xiao, Yonghui and Motta, Giovanni and Beaufays, Fran{\c{c}}oise and Mathews, Rajiv and Chen, Mingqing}, - booktitle = {ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, - organization = {IEEE}, - pages = {1--5}, - title = {Online Model Compression for Federated Learning with Large Models}, - year = 2023} - + author = {Yang, Tien-Ju and Xiao, Yonghui and Motta, Giovanni and Beaufays, Fran{\c{c}}oise and Mathews, Rajiv and Chen, Mingqing}, + year = 2023, + booktitle = {ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, + pages = {1--5}, + organization = {IEEE} +} +@misc{yik2023neurobench, + author = {Jason Yik and Soikat Hasan Ahmed and Zergham Ahmed and Brian Anderson and Andreas G. Andreou and Chiara Bartolozzi and Arindam Basu and Douwe den Blanken and Petrut Bogdan and Sander Bohte and Younes Bouhadjar and Sonia Buckley and Gert Cauwenberghs and Federico Corradi and Guido de Croon and Andreea Danielescu and Anurag Daram and Mike Davies and Yigit Demirag and Jason Eshraghian and Jeremy Forest and Steve Furber and Michael Furlong and Aditya Gilra and Giacomo Indiveri and Siddharth Joshi and Vedant Karia and Lyes Khacef and James C. Knight and Laura Kriener and Rajkumar Kubendran and Dhireesha Kudithipudi and Gregor Lenz and Rajit Manohar and Christian Mayr and Konstantinos Michmizos and Dylan Muir and Emre Neftci and Thomas Nowotny and Fabrizio Ottati and Ayca Ozcelikkale and Noah Pacik-Nelson and Priyadarshini Panda and Sun Pao-Sheng and Melika Payvand and Christian Pehle and Mihai A. Petrovici and Christoph Posch and Alpha Renner and Yulia Sandamirskaya and Clemens JS Schaefer and Andr{\'e} van Schaik and Johannes Schemmel and Catherine Schuman and Jae-sun Seo and Sadique Sheik and Sumit Bam Shrestha and Manolis Sifalakis and Amos Sironi and Kenneth Stewart and Terrence C. Stewart and Philipp Stratmann and Guangzhi Tang and Jonathan Timcheck and Marian Verhelst and Craig M. Vineyard and Bernhard Vogginger and Amirreza Yousefzadeh and Biyan Zhou and Fatima Tuz Zohora and Charlotte Frenkel and Vijay Janapa Reddi}, + year = 2023, + archiveprefix = {arXiv}, + eprint = {2304.04640}, + primaryclass = {cs.AI} +} +@article{young2018recent, + author = {Young, Tom and Hazarika, Devamanyu and Poria, Soujanya and Cambria, Erik}, + year = 2018, + journal = {ieee Computational intelligenCe magazine}, + publisher = {IEEE}, + volume = 13, + number = 3, + pages = {55--75} +} @inproceedings{zennaro2022tinyml, - author = {Zennaro, Marco and Plancher, Brian and Reddi, V Janapa}, - booktitle = {The UN 7th Multi-stakeholder Forum on Science, Technology and Innovation for the Sustainable Development Goals}, - pages = {2022--05}, - title = {TinyML: applied AI for development}, - year = 2022} - + author = {Zennaro, Marco and Plancher, Brian and Reddi, V Janapa}, + year = 2022, + booktitle = {The UN 7th Multi-stakeholder Forum on Science, Technology and Innovation for the Sustainable Development Goals}, + pages = {2022--05} +} @article{zennarobridging, - author = {Zennaro, Marco and Plancher, Brian and Reddi, Vijay Janapa}, - title = {Bridging the Digital Divide: the Promising Impact of TinyML for Developing Countries}} - + author = {Zennaro, Marco and Plancher, Brian and Reddi, Vijay Janapa} +} @inproceedings{Zhang_2020_CVPR_Workshops, - author = {Zhang, Li Lyna and Yang, Yuqing and Jiang, Yuhang and Zhu, Wenwu and Liu, Yunxin}, - booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, - month = {June}, - title = {Fast Hardware-Aware Neural Architecture Search}, - year = 2020} - + author = {Zhang, Li Lyna and Yang, Yuqing and Jiang, Yuhang and Zhu, Wenwu and Liu, Yunxin}, + year = 2020, + month = {June}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops} +} +@inproceedings{zhang2015fpga, + author = {Zhang, Chen and Li, Peng and Sun, Guangyu and Guan, Yijin and Xiao, Bingjun and Cong, Jason Optimizing}, + year = 2015, + booktitle = {SIGDA International Symposium on Field-Programmable Gate Arrays-FPGA}, + volume = 15, + pages = {161--170} +} +@article{Zhang2017, + author = {Zhang, Qingxue and Zhou, Dian and Zeng, Xuan}, + year = 2017, + month = {Feb}, + day = {06}, + journal = {BioMedical Engineering OnLine}, + volume = 16, + number = 1, + pages = 23, + doi = {10.1186/s12938-017-0317-z}, + issn = {1475-925X}, + url = {https://doi.org/10.1186/s12938-017-0317-z}, + bdsk-url-1 = {https://doi.org/10.1186/s12938-017-0317-z} +} @misc{zhang2019autoshrink, + author = {Tunhou Zhang and Hsin-Pai Cheng and Zhenwen Li and Feng Yan and Chengyu Huang and Hai Li and Yiran Chen}, + year = 2019, archiveprefix = {arXiv}, - author = {Tunhou Zhang and Hsin-Pai Cheng and Zhenwen Li and Feng Yan and Chengyu Huang and Hai Li and Yiran Chen}, - eprint = {1911.09251}, - primaryclass = {cs.LG}, - title = {AutoShrink: A Topology-aware NAS for Discovering Efficient Neural Architecture}, - year = 2019} - + eprint = {1911.09251}, + primaryclass = {cs.LG} +} @article{zhao2018federated, - author = {Zhao, Yue and Li, Meng and Lai, Liangzhen and Suda, Naveen and Civin, Damon and Chandra, Vikas}, - journal = {arXiv preprint arXiv:1806.00582}, - title = {Federated learning with non-iid data}, - year = 2018} - + author = {Zhao, Yue and Li, Meng and Lai, Liangzhen and Suda, Naveen and Civin, Damon and Chandra, Vikas}, + year = 2018, + journal = {arXiv preprint arXiv:1806.00582} +} +@inproceedings{zhao2018fpga, + author = {Zhao, Mark and Suh, G Edward}, + year = 2018, + booktitle = {2018 IEEE Symposium on Security and Privacy (SP)}, + pages = {229--244}, + date-added = {2023-11-22 17:08:21 -0500}, + date-modified = {2023-11-22 17:09:07 -0500}, + organization = {IEEE} +} @misc{zhou_deep_2023, - annote = {Comment: Code is available at https://github.com/zhoudw-zdw/CIL\_Survey/}, - author = {Zhou, Da-Wei and Wang, Qi-Wei and Qi, Zhi-Hong and Ye, Han-Jia and Zhan, De-Chuan and Liu, Ziwei}, - file = {Zhou et al. - 2023 - Deep Class-Incremental Learning A Survey.pdf:/Users/alex/Zotero/storage/859VZG7W/Zhou et al. - 2023 - Deep Class-Incremental Learning A Survey.pdf:application/pdf}, - keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning}, - language = {en}, - month = feb, - note = {arXiv:2302.03648 [cs]}, - publisher = {arXiv}, - shorttitle = {Deep {Class}-{Incremental} {Learning}}, - title = {Deep {Class}-{Incremental} {Learning}: {A} {Survey}}, - url = {http://arxiv.org/abs/2302.03648}, - urldate = {2023-10-26}, - year = 2023, - Bdsk-Url-1 = {http://arxiv.org/abs/2302.03648}} - -@misc{noauthor_who_nodate, - title = {Who {Invented} the {Microprocessor}? - {CHM}}, - url = {https://computerhistory.org/blog/who-invented-the-microprocessor/}, - urldate = {2023-11-07}, - Bdsk-Url-1 = {https://computerhistory.org/blog/who-invented-the-microprocessor/}} - -@book{weik_survey_1955, - author = {Weik, Martin H.}, - language = {en}, - publisher = {Ballistic Research Laboratories}, - title = {A {Survey} of {Domestic} {Electronic} {Digital} {Computing} {Systems}}, - year = {1955}} - -@inproceedings{brown_language_2020, - abstract = {We demonstrate that scaling up language models greatly improves task-agnostic, few-shot performance, sometimes even becoming competitive with prior state-of-the-art fine-tuning approaches. Specifically, we train GPT-3, an autoregressive language model with 175 billion parameters, 10x more than any previous non-sparse language model, and test its performance in the few-shot setting. For all tasks, GPT-3 is applied without any gradient updates or fine-tuning, with tasks and few-shot demonstrations specified purely via text interaction with the model. GPT-3 achieves strong performance on many NLP datasets, including translation, question-answering, and cloze tasks. We also identify some datasets where GPT-3's few-shot learning still struggles, as well as some datasets where GPT-3 faces methodological issues related to training on large web corpora.}, - author = {Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and Agarwal, Sandhini and Herbert-Voss, Ariel and Krueger, Gretchen and Henighan, Tom and Child, Rewon and Ramesh, Aditya and Ziegler, Daniel and Wu, Jeffrey and Winter, Clemens and Hesse, Chris and Chen, Mark and Sigler, Eric and Litwin, Mateusz and Gray, Scott and Chess, Benjamin and Clark, Jack and Berner, Christopher and McCandlish, Sam and Radford, Alec and Sutskever, Ilya and Amodei, Dario}, - booktitle = {Advances in {Neural} {Information} {Processing} {Systems}}, - pages = {1877--1901}, - publisher = {Curran Associates, Inc.}, - title = {Language {Models} are {Few}-{Shot} {Learners}}, - url = {https://proceedings.neurips.cc/paper_files/paper/2020/hash/1457c0d6bfcb4967418bfb8ac142f64a-Abstract.html}, - urldate = {2023-11-07}, - volume = {33}, - year = {2020}, - Bdsk-Url-1 = {https://proceedings.neurips.cc/paper_files/paper/2020/hash/1457c0d6bfcb4967418bfb8ac142f64a-Abstract.html}} - -@misc{jia_dissecting_2018, - abstract = {Every year, novel NVIDIA GPU designs are introduced. This rapid architectural and technological progression, coupled with a reluctance by manufacturers to disclose low-level details, makes it difficult for even the most proficient GPU software designers to remain up-to-date with the technological advances at a microarchitectural level. To address this dearth of public, microarchitectural-level information on the novel NVIDIA GPUs, independent researchers have resorted to microbenchmarks-based dissection and discovery. This has led to a prolific line of publications that shed light on instruction encoding, and memory hierarchy's geometry and features at each level. Namely, research that describes the performance and behavior of the Kepler, Maxwell and Pascal architectures. In this technical report, we continue this line of research by presenting the microarchitectural details of the NVIDIA Volta architecture, discovered through microbenchmarks and instruction set disassembly. Additionally, we compare quantitatively our Volta findings against its predecessors, Kepler, Maxwell and Pascal.}, - author = {Jia, Zhe and Maggioni, Marco and Staiger, Benjamin and Scarpazza, Daniele P.}, - keywords = {Computer Science - Distributed, Parallel, and Cluster Computing, Computer Science - Performance}, - month = apr, - note = {arXiv:1804.06826 [cs]}, - publisher = {arXiv}, - title = {Dissecting the {NVIDIA} {Volta} {GPU} {Architecture} via {Microbenchmarking}}, - url = {http://arxiv.org/abs/1804.06826}, - urldate = {2023-11-07}, - year = {2018}, - Bdsk-Url-1 = {http://arxiv.org/abs/1804.06826}} - -@article{jia2019beyond, - author = {Jia, Zhihao and Zaharia, Matei and Aiken, Alex}, - journal = {Proceedings of Machine Learning and Systems}, - pages = {1--13}, - title = {Beyond Data and Model Parallelism for Deep Neural Networks.}, - volume = {1}, - year = {2019}} - -@inproceedings{raina_large-scale_2009, - address = {Montreal Quebec Canada}, - author = {Raina, Rajat and Madhavan, Anand and Ng, Andrew Y.}, - booktitle = {Proceedings of the 26th {Annual} {International} {Conference} on {Machine} {Learning}}, - doi = {10.1145/1553374.1553486}, - isbn = {978-1-60558-516-1}, - language = {en}, - month = jun, - pages = {873--880}, - publisher = {ACM}, - title = {Large-scale deep unsupervised learning using graphics processors}, - url = {https://dl.acm.org/doi/10.1145/1553374.1553486}, - urldate = {2023-11-07}, - year = {2009}, - Bdsk-Url-1 = {https://dl.acm.org/doi/10.1145/1553374.1553486}, - Bdsk-Url-2 = {https://doi.org/10.1145/1553374.1553486}} - -@misc{noauthor_amd_nodate, - title = {{AMD} {Radeon} {RX} 7000 {Series} {Desktop} {Graphics} {Cards}}, - url = {https://www.amd.com/en/graphics/radeon-rx-graphics}, - urldate = {2023-11-07}, - Bdsk-Url-1 = {https://www.amd.com/en/graphics/radeon-rx-graphics}} - -@misc{noauthor_intel_nodate, - abstract = {Find out how Intel{\textregistered} Arc Graphics unlock lifelike gaming and seamless content creation.}, - journal = {Intel}, - language = {en}, - title = {Intel{\textregistered} {Arc}{\texttrademark} {Graphics} {Overview}}, - url = {https://www.intel.com/content/www/us/en/products/details/discrete-gpus/arc.html}, - urldate = {2023-11-07}, - Bdsk-Url-1 = {https://www.intel.com/content/www/us/en/products/details/discrete-gpus/arc.html}} - -@article{lindholm_nvidia_2008, - abstract = {To enable flexible, programmable graphics and high-performance computing, NVIDIA has developed the Tesla scalable unified graphics and parallel computing architecture. Its scalable parallel array of processors is massively multithreaded and programmable in C or via graphics APIs.}, - author = {Lindholm, Erik and Nickolls, John and Oberman, Stuart and Montrym, John}, - doi = {10.1109/MM.2008.31}, - issn = {1937-4143}, - journal = {IEEE Micro}, - month = mar, - note = {Conference Name: IEEE Micro}, - number = {2}, - pages = {39--55}, - shorttitle = {{NVIDIA} {Tesla}}, - title = {{NVIDIA} {Tesla}: {A} {Unified} {Graphics} and {Computing} {Architecture}}, - url = {https://ieeexplore.ieee.org/document/4523358}, - urldate = {2023-11-07}, - volume = {28}, - year = {2008}, - Bdsk-Url-1 = {https://ieeexplore.ieee.org/document/4523358}, - Bdsk-Url-2 = {https://doi.org/10.1109/MM.2008.31}} - -@article{dally_evolution_2021, - abstract = {Graphics processing units (GPUs) power today's fastest supercomputers, are the dominant platform for deep learning, and provide the intelligence for devices ranging from self-driving cars to robots and smart cameras. They also generate compelling photorealistic images at real-time frame rates. GPUs have evolved by adding features to support new use cases. NVIDIA's GeForce 256, the first GPU, was a dedicated processor for real-time graphics, an application that demands large amounts of floating-point arithmetic for vertex and fragment shading computations and high memory bandwidth. As real-time graphics advanced, GPUs became programmable. The combination of programmability and floating-point performance made GPUs attractive for running scientific applications. Scientists found ways to use early programmable GPUs by casting their calculations as vertex and fragment shaders. GPUs evolved to meet the needs of scientific users by adding hardware for simpler programming, double-precision floating-point arithmetic, and resilience.}, - author = {Dally, William J. and Keckler, Stephen W. and Kirk, David B.}, - doi = {10.1109/MM.2021.3113475}, - issn = {1937-4143}, - journal = {IEEE Micro}, - month = nov, - note = {Conference Name: IEEE Micro}, - number = {6}, - pages = {42--51}, - title = {Evolution of the {Graphics} {Processing} {Unit} ({GPU})}, - url = {https://ieeexplore.ieee.org/document/9623445}, - urldate = {2023-11-07}, - volume = {41}, - year = {2021}, - Bdsk-Url-1 = {https://ieeexplore.ieee.org/document/9623445}, - Bdsk-Url-2 = {https://doi.org/10.1109/MM.2021.3113475}} - -@article{demler_ceva_2020, - author = {Demler, Mike}, - language = {en}, - title = {{CEVA} {SENSPRO} {FUSES} {AI} {AND} {VECTOR} {DSP}}, - year = {2020}} - -@misc{noauthor_google_2023, - abstract = {Tensor G3 on Pixel 8 and Pixel 8 Pro is more helpful, more efficient and more powerful.}, - journal = {Google}, - language = {en-us}, - month = oct, - shorttitle = {Google {Tensor} {G3}}, - title = {Google {Tensor} {G3}: {The} new chip that gives your {Pixel} an {AI} upgrade}, - url = {https://blog.google/products/pixel/google-tensor-g3-pixel-8/}, - urldate = {2023-11-07}, - year = {2023}, - Bdsk-Url-1 = {https://blog.google/products/pixel/google-tensor-g3-pixel-8/}} - -@misc{noauthor_hexagon_nodate, - abstract = {The Hexagon DSP processor has both CPU and DSP functionality to support deeply embedded processing needs of the mobile platform for both multimedia and modem functions.}, - journal = {Qualcomm Developer Network}, - language = {en}, - title = {Hexagon {DSP} {SDK} {Processor}}, - url = {https://developer.qualcomm.com/software/hexagon-dsp-sdk/dsp-processor}, - urldate = {2023-11-07}, - Bdsk-Url-1 = {https://developer.qualcomm.com/software/hexagon-dsp-sdk/dsp-processor}} - -@misc{noauthor_evolution_2023, - abstract = {To complement the extensive perspective of another Market Update feature article on DSP Products and Applications, published in the November 2020 edition, audioXpress was honored to have the valuable contribution from one of the main suppliers in the field. In this article, Youval Nachum, CEVA's Senior Product Marketing Manager, writes about \"The Evolution of Audio DSPs,\" discussing how DSP technology has evolved, its impact on the user experience, and what the future of DSP has in store for us.}, - journal = {audioXpress}, - language = {en}, - month = oct, - title = {The {Evolution} of {Audio} {DSPs}}, - url = {https://audioxpress.com/article/the-evolution-of-audio-dsps}, - urldate = {2023-11-07}, - year = {2023}, - Bdsk-Url-1 = {https://audioxpress.com/article/the-evolution-of-audio-dsps}} - -@article{xiong_mri-based_2021, - abstract = {Brain tumor segmentation is a challenging problem in medical image processing and analysis. It is a very time-consuming and error-prone task. In order to reduce the burden on physicians and improve the segmentation accuracy, the computer-aided detection (CAD) systems need to be developed. Due to the powerful feature learning ability of the deep learning technology, many deep learning-based methods have been applied to the brain tumor segmentation CAD systems and achieved satisfactory accuracy. However, deep learning neural networks have high computational complexity, and the brain tumor segmentation process consumes significant time. Therefore, in order to achieve the high segmentation accuracy of brain tumors and obtain the segmentation results efficiently, it is very demanding to speed up the segmentation process of brain tumors.}, - author = {Xiong, Siyu and Wu, Guoqing and Fan, Xitian and Feng, Xuan and Huang, Zhongcheng and Cao, Wei and Zhou, Xuegong and Ding, Shijin and Yu, Jinhua and Wang, Lingli and Shi, Zhifeng}, - doi = {10.1186/s12859-021-04347-6}, - issn = {1471-2105}, - journal = {BMC Bioinformatics}, - keywords = {Brain tumor segmatation, FPGA acceleration, Neural network}, - month = sep, - number = {1}, - pages = {421}, - title = {{MRI}-based brain tumor segmentation using {FPGA}-accelerated neural network}, - url = {https://doi.org/10.1186/s12859-021-04347-6}, - urldate = {2023-11-07}, - volume = {22}, - year = {2021}, - Bdsk-Url-1 = {https://doi.org/10.1186/s12859-021-04347-6}} - -@article{gwennap_certus-nx_nodate, - author = {Gwennap, Linley}, - language = {en}, - title = {Certus-{NX} {Innovates} {General}-{Purpose} {FPGAs}}} - -@misc{noauthor_fpga_nodate, - title = {{FPGA} {Architecture} {Overview}}, - url = {https://www.intel.com/content/www/us/en/docs/oneapi-fpga-add-on/optimization-guide/2023-1/fpga-architecture-overview.html}, - urldate = {2023-11-07}, - Bdsk-Url-1 = {https://www.intel.com/content/www/us/en/docs/oneapi-fpga-add-on/optimization-guide/2023-1/fpga-architecture-overview.html}} - -@misc{noauthor_what_nodate, - abstract = {What is an FPGA - Field Programmable Gate Arrays are semiconductor devices that are based around a matrix of configurable logic blocks (CLBs) connected via programmable interconnects. FPGAs can be reprogrammed to desired application or functionality requirements after manufacturing.}, - journal = {AMD}, - language = {en}, - shorttitle = {What is an {FPGA}?}, - title = {What is an {FPGA}? {Field} {Programmable} {Gate} {Array}}, - url = {https://www.xilinx.com/products/silicon-devices/fpga/what-is-an-fpga.html}, - urldate = {2023-11-07}, - Bdsk-Url-1 = {https://www.xilinx.com/products/silicon-devices/fpga/what-is-an-fpga.html}} - -@article{putnam_reconfigurable_2014, - abstract = {Datacenter workloads demand high computational capabilities, flexibility, power efficiency, and low cost. It is challenging to improve all of these factors simultaneously. To advance datacenter capabilities beyond what commodity server designs can provide, we have designed and built a composable, reconfigurablefabric to accelerate portions of large-scale software services. Each instantiation of the fabric consists of a 6x8 2-D torus of high-end Stratix V FPGAs embedded into a half-rack of 48 machines. One FPGA is placed into each server, accessible through PCIe, and wired directly to other FPGAs with pairs of 10 Gb SAS cables - In this paper, we describe a medium-scale deployment of this fabric on a bed of 1,632 servers, and measure its efficacy in accelerating the Bing web search engine. We describe the requirements and architecture of the system, detail the critical engineering challenges and solutions needed to make the system robust in the presence of failures, and measure the performance, power, and resilience of the system when ranking candidate documents. Under high load, the largescale reconfigurable fabric improves the ranking throughput of each server by a factor of 95\% for a fixed latency distribution--- or, while maintaining equivalent throughput, reduces the tail latency by 29\%}, - author = {Putnam, Andrew and Caulfield, Adrian M. and Chung, Eric S. and Chiou, Derek and Constantinides, Kypros and Demme, John and Esmaeilzadeh, Hadi and Fowers, Jeremy and Gopal, Gopi Prashanth and Gray, Jan and Haselman, Michael and Hauck, Scott and Heil, Stephen and Hormati, Amir and Kim, Joo-Young and Lanka, Sitaram and Larus, James and Peterson, Eric and Pope, Simon and Smith, Aaron and Thong, Jason and Xiao, Phillip Yi and Burger, Doug}, - doi = {10.1145/2678373.2665678}, - issn = {0163-5964}, - journal = {ACM SIGARCH Computer Architecture News}, - language = {en}, - month = oct, - number = {3}, - pages = {13--24}, - title = {A reconfigurable fabric for accelerating large-scale datacenter services}, - url = {https://dl.acm.org/doi/10.1145/2678373.2665678}, - urldate = {2023-11-07}, - volume = {42}, - year = {2014}, - Bdsk-Url-1 = {https://dl.acm.org/doi/10.1145/2678373.2665678}, - Bdsk-Url-2 = {https://doi.org/10.1145/2678373.2665678}} - -@misc{noauthor_project_nodate, - title = {Project {Catapult} - {Microsoft} {Research}}, - url = {https://www.microsoft.com/en-us/research/project/project-catapult/}, - urldate = {2023-11-07}, - Bdsk-Url-1 = {https://www.microsoft.com/en-us/research/project/project-catapult/}} - -@misc{dean_jeff_numbers_nodate, - author = {Dean. Jeff}, - title = {Numbers {Everyone} {Should} {Know}}, - url = {https://brenocon.com/dean_perf.html}, - urldate = {2023-11-07}, - Bdsk-Url-1 = {https://brenocon.com/dean_perf.html}} - -@misc{bailey_enabling_2018, - abstract = {Enabling Cheaper Design, At what point does cheaper design enable a significant growth in custom semiconductor content? Not everyone is onboard with the idea.}, - author = {Bailey, Brian}, - journal = {Semiconductor Engineering}, - language = {en-US}, - month = sep, - title = {Enabling {Cheaper} {Design}}, - url = {https://semiengineering.com/enabling-cheaper-design/}, - urldate = {2023-11-07}, - year = {2018}, - Bdsk-Url-1 = {https://semiengineering.com/enabling-cheaper-design/}} - -@misc{noauthor_integrated_2023, - abstract = {An integrated circuit (also known as an IC, a chip, or a microchip) is a set of electronic circuits on one small flat piece of semiconductor material, usually silicon. Large numbers of miniaturized transistors and other electronic components are integrated together on the chip. This results in circuits that are orders of magnitude smaller, faster, and less expensive than those constructed of discrete components, allowing a large transistor count. -The IC's mass production capability, reliability, and building-block approach to integrated circuit design have ensured the rapid adoption of standardized ICs in place of designs using discrete transistors. ICs are now used in virtually all electronic equipment and have revolutionized the world of electronics. Computers, mobile phones and other home appliances are now essential parts of the structure of modern societies, made possible by the small size and low cost of ICs such as modern computer processors and microcontrollers. -Very-large-scale integration was made practical by technological advancements in semiconductor device fabrication. Since their origins in the 1960s, the size, speed, and capacity of chips have progressed enormously, driven by technical advances that fit more and more transistors on chips of the same size -- a modern chip may have many billions of transistors in an area the size of a human fingernail. These advances, roughly following Moore's law, make the computer chips of today possess millions of times the capacity and thousands of times the speed of the computer chips of the early 1970s. -ICs have three main advantages over discrete circuits: size, cost and performance. The size and cost is low because the chips, with all their components, are printed as a unit by photolithography rather than being constructed one transistor at a time. Furthermore, packaged ICs use much less material than discrete circuits. Performance is high because the IC's components switch quickly and consume comparatively little power because of their small size and proximity. The main disadvantage of ICs is the high initial cost of designing them and the enormous capital cost of factory construction. This high initial cost means ICs are only commercially viable when high production volumes are anticipated.}, - copyright = {Creative Commons Attribution-ShareAlike License}, - journal = {Wikipedia}, - language = {en}, - month = nov, - note = {Page Version ID: 1183537457}, - title = {Integrated circuit}, - url = {https://en.wikipedia.org/w/index.php?title=Integrated_circuit&oldid=1183537457}, - urldate = {2023-11-07}, - year = {2023}, - Bdsk-Url-1 = {https://en.wikipedia.org/w/index.php?title=Integrated_circuit&oldid=1183537457}} - -@article{el-rayis_reconfigurable_nodate, - author = {El-Rayis, Ahmed Osman}, - language = {en}, - title = {Reconfigurable {Architectures} for the {Next} {Generation} of {Mobile} {Device} {Telecommunications} {Systems}}} - -@misc{noauthor_intel_nodate, - abstract = {View Intel{\textregistered} Stratix{\textregistered} 10 NX FPGAs and find product specifications, features, applications and more.}, - journal = {Intel}, - language = {en}, - title = {Intel{\textregistered} {Stratix}{\textregistered} 10 {NX} {FPGA} {Overview} - {High} {Performance} {Stratix}{\textregistered} {FPGA}}, - url = {https://www.intel.com/content/www/us/en/products/details/fpga/stratix/10/nx.html}, - urldate = {2023-11-07}, - Bdsk-Url-1 = {https://www.intel.com/content/www/us/en/products/details/fpga/stratix/10/nx.html}} - -@book{patterson2016computer, - author = {Patterson, David A and Hennessy, John L}, - publisher = {Morgan kaufmann}, - title = {Computer organization and design ARM edition: the hardware software interface}, - year = {2016}} - -@article{xiu2019time, - author = {Xiu, Liming}, - journal = {IEEE Solid-State Circuits Magazine}, - number = {1}, - pages = {39--55}, - publisher = {IEEE}, - title = {Time Moore: Exploiting Moore's Law from the perspective of time}, - volume = {11}, - year = {2019}} - -@article{brown2020language, - author = {Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and others}, - journal = {Advances in neural information processing systems}, - pages = {1877--1901}, - title = {Language models are few-shot learners}, - volume = {33}, - year = {2020}} - -@article{cheng2017survey, - author = {Cheng, Yu and Wang, Duo and Zhou, Pan and Zhang, Tao}, - journal = {arXiv preprint arXiv:1710.09282}, - title = {A survey of model compression and acceleration for deep neural networks}, - year = {2017}} - -@article{sze2017efficient, - author = {Sze, Vivienne and Chen, Yu-Hsin and Yang, Tien-Ju and Emer, Joel S}, - journal = {Proceedings of the IEEE}, - number = {12}, - pages = {2295--2329}, - publisher = {Ieee}, - title = {Efficient processing of deep neural networks: A tutorial and survey}, - volume = {105}, - year = {2017}} - -@article{young2018recent, - author = {Young, Tom and Hazarika, Devamanyu and Poria, Soujanya and Cambria, Erik}, - journal = {ieee Computational intelligenCe magazine}, - number = {3}, - pages = {55--75}, - publisher = {IEEE}, - title = {Recent trends in deep learning based natural language processing}, - volume = {13}, - year = {2018}} - -@inproceedings{jacob2018quantization, - author = {Jacob, Benoit and Kligys, Skirmantas and Chen, Bo and Zhu, Menglong and Tang, Matthew and Howard, Andrew and Adam, Hartwig and Kalenichenko, Dmitry}, - booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, - pages = {2704--2713}, - title = {Quantization and training of neural networks for efficient integer-arithmetic-only inference}, - year = {2018}} - -@article{gale2019state, - author = {Gale, Trevor and Elsen, Erich and Hooker, Sara}, - journal = {arXiv preprint arXiv:1902.09574}, - title = {The state of sparsity in deep neural networks}, - year = {2019}} - -@inproceedings{zhang2015fpga, - author = {Zhang, Chen and Li, Peng and Sun, Guangyu and Guan, Yijin and Xiao, Bingjun and Cong, Jason Optimizing}, - booktitle = {SIGDA International Symposium on Field-Programmable Gate Arrays-FPGA}, - pages = {161--170}, - title = {FPGA-based Accelerator Design for Deep Convolutional Neural Networks Proceedings of the 2015 ACM}, - volume = {15}, - year = {2015}} - -@inproceedings{suda2016throughput, - author = {Suda, Naveen and Chandra, Vikas and Dasika, Ganesh and Mohanty, Abinash and Ma, Yufei and Vrudhula, Sarma and Seo, Jae-sun and Cao, Yu}, - booktitle = {Proceedings of the 2016 ACM/SIGDA international symposium on field-programmable gate arrays}, - pages = {16--25}, - title = {Throughput-optimized OpenCL-based FPGA accelerator for large-scale convolutional neural networks}, - year = {2016}} - -@inproceedings{fowers2018configurable, - author = {Fowers, Jeremy and Ovtcharov, Kalin and Papamichael, Michael and Massengill, Todd and Liu, Ming and Lo, Daniel and Alkalay, Shlomi and Haselman, Michael and Adams, Logan and Ghandi, Mahdi and others}, - booktitle = {2018 ACM/IEEE 45th Annual International Symposium on Computer Architecture (ISCA)}, - organization = {IEEE}, - pages = {1--14}, - title = {A configurable cloud-scale DNN processor for real-time AI}, - year = {2018}} - -@article{jia2019beyond, - author = {Jia, Zhihao and Zaharia, Matei and Aiken, Alex}, - journal = {Proceedings of Machine Learning and Systems}, - pages = {1--13}, - title = {Beyond Data and Model Parallelism for Deep Neural Networks.}, - volume = {1}, - year = {2019}} - -@inproceedings{zhu2018benchmarking, - author = {Zhu, Hongyu and Akrout, Mohamed and Zheng, Bojian and Pelegris, Andrew and Jayarajan, Anand and Phanishayee, Amar and Schroeder, Bianca and Pekhimenko, Gennady}, - booktitle = {2018 IEEE International Symposium on Workload Characterization (IISWC)}, - organization = {IEEE}, - pages = {88--100}, - title = {Benchmarking and analyzing deep neural network training}, - year = {2018}} - -@article{samajdar2018scale, - author = {Samajdar, Ananda and Zhu, Yuhao and Whatmough, Paul and Mattina, Matthew and Krishna, Tushar}, - journal = {arXiv preprint arXiv:1811.02883}, - title = {Scale-sim: Systolic cnn accelerator simulator}, - year = {2018}} - -@inproceedings{munshi2009opencl, - author = {Munshi, Aaftab}, - booktitle = {2009 IEEE Hot Chips 21 Symposium (HCS)}, - doi = {10.1109/HOTCHIPS.2009.7478342}, - pages = {1-314}, - title = {The OpenCL specification}, - year = {2009}, - Bdsk-Url-1 = {https://doi.org/10.1109/HOTCHIPS.2009.7478342}} - -@inproceedings{luebke2008cuda, - author = {Luebke, David}, - booktitle = {2008 5th IEEE International Symposium on Biomedical Imaging: From Nano to Macro}, - doi = {10.1109/ISBI.2008.4541126}, - pages = {836-838}, - title = {CUDA: Scalable parallel programming for high-performance scientific computing}, - year = {2008}, - Bdsk-Url-1 = {https://doi.org/10.1109/ISBI.2008.4541126}} - -@misc{segal1999opengl, - author = {Segal, Mark and Akeley, Kurt}, - title = {The OpenGL graphics system: A specification (version 1.1)}, - year = {1999}} - -@inproceedings{gannot1994verilog, - author = {Gannot, G. and Ligthart, M.}, - booktitle = {International Verilog HDL Conference}, - doi = {10.1109/IVC.1994.323743}, - pages = {86-92}, - title = {Verilog HDL based FPGA design}, - year = {1994}, - Bdsk-Url-1 = {https://doi.org/10.1109/IVC.1994.323743}} - -@article{binkert2011gem5, - author = {Binkert, Nathan and Beckmann, Bradford and Black, Gabriel and Reinhardt, Steven K and Saidi, Ali and Basu, Arkaprava and Hestness, Joel and Hower, Derek R and Krishna, Tushar and Sardashti, Somayeh and others}, - journal = {ACM SIGARCH computer architecture news}, - number = {2}, - pages = {1--7}, - publisher = {ACM New York, NY, USA}, - title = {The gem5 simulator}, - volume = {39}, - year = {2011}} - -@article{Vivet2021, - author = {Vivet, Pascal and Guthmuller, Eric and Thonnart, Yvain and Pillonnet, Gael and Fuguet, C{\'e}sar and Miro-Panades, Ivan and Moritz, Guillaume and Durupt, Jean and Bernard, Christian and Varreau, Didier and Pontes, Julian and Thuries, S{\'e}bastien and Coriat, David and Harrand, Michel and Dutoit, Denis and Lattard, Didier and Arnaud, Lucile and Charbonnier, Jean and Coudrain, Perceval and Garnier, Arnaud and Berger, Fr{\'e}d{\'e}ric and Gueugnot, Alain and Greiner, Alain and Meunier, Quentin L. and Farcy, Alexis and Arriordaz, Alexandre and Ch{\'e}ramy, S{\'e}verine and Clermidy, Fabien}, - doi = {10.1109/JSSC.2020.3036341}, - journal = {IEEE Journal of Solid-State Circuits}, - number = {1}, - pages = {79-97}, - title = {IntAct: A 96-Core Processor With Six Chiplets 3D-Stacked on an Active Interposer With Distributed Interconnects and Integrated Power Management}, - volume = {56}, - year = {2021}, - Bdsk-Url-1 = {https://doi.org/10.1109/JSSC.2020.3036341}} - -@article{schuman2022, - author = {Schuman, Catherine D and Kulkarni, Shruti R and Parsa, Maryam and Mitchell, J Parker and Date, Prasanna and Kay, Bill}, - journal = {Nature Computational Science}, - number = {1}, - pages = {10--19}, - publisher = {Nature Publishing Group US New York}, - title = {Opportunities for neuromorphic computing algorithms and applications}, - volume = {2}, - year = {2022}} - -@article{markovic2020, - author = {Markovi{\'c}, Danijela and Mizrahi, Alice and Querlioz, Damien and Grollier, Julie}, - journal = {Nature Reviews Physics}, - number = {9}, - pages = {499--510}, - publisher = {Nature Publishing Group UK London}, - title = {Physics for neuromorphic computing}, - volume = {2}, - year = {2020}} - -@article{furber2016large, - author = {Furber, Steve}, - journal = {Journal of neural engineering}, - number = {5}, - pages = {051001}, - publisher = {IOP Publishing}, - title = {Large-scale neuromorphic computing systems}, - volume = {13}, - year = {2016}} - -@article{davies2018loihi, - author = {Davies, Mike and Srinivasa, Narayan and Lin, Tsung-Han and Chinya, Gautham and Cao, Yongqiang and Choday, Sri Harsha and Dimou, Georgios and Joshi, Prasad and Imam, Nabil and Jain, Shweta and others}, - journal = {Ieee Micro}, - number = {1}, - pages = {82--99}, - publisher = {IEEE}, - title = {Loihi: A neuromorphic manycore processor with on-chip learning}, - volume = {38}, - year = {2018}} - -@article{davies2021advancing, - author = {Davies, Mike and Wild, Andreas and Orchard, Garrick and Sandamirskaya, Yulia and Guerra, Gabriel A Fonseca and Joshi, Prasad and Plank, Philipp and Risbud, Sumedh R}, - journal = {Proceedings of the IEEE}, - number = {5}, - pages = {911--934}, - publisher = {IEEE}, - title = {Advancing neuromorphic computing with loihi: A survey of results and outlook}, - volume = {109}, - year = {2021}} - -@article{modha2023neural, - author = {Modha, Dharmendra S and Akopyan, Filipp and Andreopoulos, Alexander and Appuswamy, Rathinakumar and Arthur, John V and Cassidy, Andrew S and Datta, Pallab and DeBole, Michael V and Esser, Steven K and Otero, Carlos Ortega and others}, - journal = {Science}, - number = {6668}, - pages = {329--335}, - publisher = {American Association for the Advancement of Science}, - title = {Neural inference at the frontier of energy, space, and time}, - volume = {382}, - year = {2023}} - -@article{maass1997networks, - author = {Maass, Wolfgang}, - journal = {Neural networks}, - number = {9}, - pages = {1659--1671}, - publisher = {Elsevier}, - title = {Networks of spiking neurons: the third generation of neural network models}, - volume = {10}, - year = {1997}} - -@article{10242251, - author = {Eshraghian, Jason K. and Ward, Max and Neftci, Emre O. and Wang, Xinxin and Lenz, Gregor and Dwivedi, Girish and Bennamoun, Mohammed and Jeong, Doo Seok and Lu, Wei D.}, - doi = {10.1109/JPROC.2023.3308088}, - journal = {Proceedings of the IEEE}, - number = {9}, - pages = {1016-1054}, - title = {Training Spiking Neural Networks Using Lessons From Deep Learning}, - volume = {111}, - year = {2023}, - Bdsk-Url-1 = {https://doi.org/10.1109/JPROC.2023.3308088}} - -@article{chua1971memristor, - author = {Chua, Leon}, - journal = {IEEE Transactions on circuit theory}, - number = {5}, - pages = {507--519}, - publisher = {IEEE}, - title = {Memristor-the missing circuit element}, - volume = {18}, - year = {1971}} - -@article{shastri2021photonics, - author = {Shastri, Bhavin J and Tait, Alexander N and Ferreira de Lima, Thomas and Pernice, Wolfram HP and Bhaskaran, Harish and Wright, C David and Prucnal, Paul R}, - journal = {Nature Photonics}, - number = {2}, - pages = {102--114}, - publisher = {Nature Publishing Group UK London}, - title = {Photonics for artificial intelligence and neuromorphic computing}, - volume = {15}, - year = {2021}} - -@article{haensch2018next, - author = {Haensch, Wilfried and Gokmen, Tayfun and Puri, Ruchir}, - journal = {Proceedings of the IEEE}, - number = {1}, - pages = {108--122}, - publisher = {IEEE}, - title = {The next generation of deep learning hardware: Analog computing}, - volume = {107}, - year = {2018}} - -@article{hazan2021neuromorphic, - author = {Hazan, Avi and Ezra Tsur, Elishai}, - journal = {Frontiers in Neuroscience}, - pages = {627221}, - publisher = {Frontiers Media SA}, - title = {Neuromorphic analog implementation of neural engineering framework-inspired spiking neuron for high-dimensional representation}, - volume = {15}, - year = {2021}} - -@article{gates2009flexible, - author = {Gates, Byron D}, - journal = {Science}, - number = {5921}, - pages = {1566--1567}, - publisher = {American Association for the Advancement of Science}, - title = {Flexible electronics}, - volume = {323}, - year = {2009}} - -@article{musk2019integrated, - author = {Musk, Elon and others}, - journal = {Journal of medical Internet research}, - number = {10}, - pages = {e16194}, - publisher = {JMIR Publications Inc., Toronto, Canada}, - title = {An integrated brain-machine interface platform with thousands of channels}, - volume = {21}, - year = {2019}} - -@article{tang2023flexible, - author = {Tang, Xin and Shen, Hao and Zhao, Siyuan and Li, Na and Liu, Jia}, - journal = {Nature Electronics}, - number = {2}, - pages = {109--118}, - publisher = {Nature Publishing Group UK London}, - title = {Flexible brain--computer interfaces}, - volume = {6}, - year = {2023}} - -@article{tang2022soft, - author = {Tang, Xin and He, Yichun and Liu, Jia}, - journal = {Biophysics Reviews}, - number = {1}, - publisher = {AIP Publishing}, - title = {Soft bioelectronics for cardiac interfaces}, - volume = {3}, - year = {2022}} - -@article{kwon2022flexible, - author = {Kwon, Sun Hwa and Dong, Lin}, - journal = {Nano Energy}, - pages = {107632}, - publisher = {Elsevier}, - title = {Flexible sensors and machine learning for heart monitoring}, - year = {2022}} - -@article{huang2010pseudo, - author = {Huang, Tsung-Ching and Fukuda, Kenjiro and Lo, Chun-Ming and Yeh, Yung-Hui and Sekitani, Tsuyoshi and Someya, Takao and Cheng, Kwang-Ting}, - journal = {IEEE Transactions on Electron Devices}, - number = {1}, - pages = {141--150}, - publisher = {IEEE}, - title = {Pseudo-CMOS: A design style for low-cost and robust flexible electronics}, - volume = {58}, - year = {2010}} - -@article{biggs2021natively, - author = {Biggs, John and Myers, James and Kufel, Jedrzej and Ozer, Emre and Craske, Simon and Sou, Antony and Ramsdale, Catherine and Williamson, Ken and Price, Richard and White, Scott}, - journal = {Nature}, - number = {7868}, - pages = {532--536}, - publisher = {Nature Publishing Group UK London}, - title = {A natively flexible 32-bit Arm microprocessor}, - volume = {595}, - year = {2021}} - -@article{farah2005neuroethics, - author = {Farah, Martha J}, - journal = {Trends in cognitive sciences}, - number = {1}, - pages = {34--40}, - publisher = {Elsevier}, - title = {Neuroethics: the practical and the philosophical}, - volume = {9}, - year = {2005}} - -@article{segura2018ethical, - author = {Segura Anaya, LH and Alsadoon, Abeer and Costadopoulos, Nectar and Prasad, PWC}, - journal = {Science and engineering ethics}, - pages = {1--28}, - publisher = {Springer}, - title = {Ethical implications of user perceptions of wearable devices}, - volume = {24}, - year = {2018}} - -@article{goodyear2017social, - author = {Goodyear, Victoria A}, - journal = {Qualitative research in sport, exercise and health}, - number = {3}, - pages = {285--302}, - publisher = {Taylor \& Francis}, - title = {Social media, apps and wearable technologies: navigating ethical dilemmas and procedures}, - volume = {9}, - year = {2017}} - -@article{roskies2002neuroethics, - author = {Roskies, Adina}, - journal = {Neuron}, - number = {1}, - pages = {21--23}, - publisher = {Elsevier}, - title = {Neuroethics for the new millenium}, - volume = {35}, - year = {2002}} - -@article{duarte2022fastml, - author = {Duarte, Javier and Tran, Nhan and Hawks, Ben and Herwig, Christian and Muhizi, Jules and Prakash, Shvetank and Reddi, Vijay Janapa}, - journal = {arXiv preprint arXiv:2207.07958}, - title = {FastML Science Benchmarks: Accelerating Real-Time Scientific Edge Machine Learning}, - year = {2022}} - -@article{verma2019memory, - author = {Verma, Naveen and Jia, Hongyang and Valavi, Hossein and Tang, Yinqi and Ozatay, Murat and Chen, Lung-Yen and Zhang, Bonan and Deaville, Peter}, - journal = {IEEE Solid-State Circuits Magazine}, - number = {3}, - pages = {43--55}, - publisher = {IEEE}, - title = {In-memory computing: Advances and prospects}, - volume = {11}, - year = {2019}} - -@article{chi2016prime, - author = {Chi, Ping and Li, Shuangchen and Xu, Cong and Zhang, Tao and Zhao, Jishen and Liu, Yongpan and Wang, Yu and Xie, Yuan}, - journal = {ACM SIGARCH Computer Architecture News}, - number = {3}, - pages = {27--39}, - publisher = {ACM New York, NY, USA}, - title = {Prime: A novel processing-in-memory architecture for neural network computation in reram-based main memory}, - volume = {44}, - year = {2016}} - -@article{burr2016recent, - author = {Burr, Geoffrey W and Brightsky, Matthew J and Sebastian, Abu and Cheng, Huai-Yu and Wu, Jau-Yi and Kim, Sangbum and Sosa, Norma E and Papandreou, Nikolaos and Lung, Hsiang-Lan and Pozidis, Haralampos and others}, - journal = {IEEE Journal on Emerging and Selected Topics in Circuits and Systems}, - number = {2}, - pages = {146--162}, - publisher = {IEEE}, - title = {Recent progress in phase-change memory technology}, - volume = {6}, - year = {2016}} - -@article{loh20083d, - author = {Loh, Gabriel H}, - journal = {ACM SIGARCH computer architecture news}, - number = {3}, - pages = {453--464}, - publisher = {ACM New York, NY, USA}, - title = {3D-stacked memory architectures for multi-core processors}, - volume = {36}, - year = {2008}} - -@article{mittal2021survey, - author = {Mittal, Sparsh and Verma, Gaurav and Kaushik, Brajesh and Khanday, Farooq A}, - journal = {Journal of Systems Architecture}, - pages = {102276}, - publisher = {Elsevier}, - title = {A survey of SRAM-based in-memory computing techniques and applications}, - volume = {119}, - year = {2021}} - -@article{wong2012metal, - author = {Wong, H-S Philip and Lee, Heng-Yuan and Yu, Shimeng and Chen, Yu-Sheng and Wu, Yi and Chen, Pang-Shiu and Lee, Byoungil and Chen, Frederick T and Tsai, Ming-Jinn}, - journal = {Proceedings of the IEEE}, - number = {6}, - pages = {1951--1970}, - publisher = {IEEE}, - title = {Metal--oxide RRAM}, - volume = {100}, - year = {2012}} - -@inproceedings{imani2016resistive, - author = {Imani, Mohsen and Rahimi, Abbas and Rosing, Tajana S}, - booktitle = {2016 Design, Automation \& Test in Europe Conference \& Exhibition (DATE)}, - organization = {IEEE}, - pages = {1327--1332}, - title = {Resistive configurable associative memory for approximate computing}, - year = {2016}} - -@article{miller2000optical, - author = {Miller, David AB}, - journal = {IEEE Journal of Selected Topics in Quantum Electronics}, - number = {6}, - pages = {1312--1317}, - publisher = {IEEE}, - title = {Optical interconnects to silicon}, - volume = {6}, - year = {2000}} - -@article{zhou2022photonic, - author = {Zhou, Hailong and Dong, Jianji and Cheng, Junwei and Dong, Wenchan and Huang, Chaoran and Shen, Yichen and Zhang, Qiming and Gu, Min and Qian, Chao and Chen, Hongsheng and others}, - journal = {Light: Science \& Applications}, - number = {1}, - pages = {30}, - publisher = {Nature Publishing Group UK London}, - title = {Photonic matrix multiplication lights up photonic accelerator and beyond}, - volume = {11}, - year = {2022}} - -@article{bains2020business, - author = {Bains, Sunny}, - journal = {Nat. Electron}, - number = {7}, - pages = {348--351}, - title = {The business of building brains}, - volume = {3}, - year = {2020}} - -@article{Hennessy2019-je, - abstract = {Innovations like domain-specific hardware, enhanced security, - open instruction sets, and agile chip development will lead the - way.}, - author = {Hennessy, John L and Patterson, David A}, - copyright = {http://www.acm.org/publications/policies/copyright\_policy\#Background}, - journal = {Commun. ACM}, - language = {en}, - month = jan, - number = 2, - pages = {48--60}, - publisher = {Association for Computing Machinery (ACM)}, - title = {A new golden age for computer architecture}, - volume = 62, - year = 2019} - -@article{Dongarra2009-na, - author = {Dongarra, Jack J}, - journal = {IBM Journal of Research and Development}, - pages = {3--4}, - title = {The evolution of high performance computing on system z}, - volume = 53, - year = 2009} - -@article{Ranganathan2011-dc, - author = {Ranganathan, Parthasarathy}, - journal = {Computer (Long Beach Calif.)}, - month = jan, - number = 1, - pages = {39--48}, - publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, - title = {From microprocessors to nanostores: Rethinking data-centric systems}, - volume = 44, - year = 2011} - -@article{Ignatov2018-kh, - abstract = {Over the last years, the computational power of mobile devices - such as smartphones and tablets has grown dramatically, reaching - the level of desktop computers available not long ago. While - standard smartphone apps are no longer a problem for them, there - is still a group of tasks that can easily challenge even - high-end devices, namely running artificial intelligence - algorithms. In this paper, we present a study of the current - state of deep learning in the Android ecosystem and describe - available frameworks, programming models and the limitations of - running AI on smartphones. We give an overview of the hardware - acceleration resources available on four main mobile chipset - platforms: Qualcomm, HiSilicon, MediaTek and Samsung. - Additionally, we present the real-world performance results of - different mobile SoCs collected with AI Benchmark that are - covering all main existing hardware configurations.}, - author = {Ignatov, Andrey and Timofte, Radu and Chou, William and Wang, Ke and Wu, Max and Hartley, Tim and Van Gool, Luc}, - publisher = {arXiv}, - title = {{AI} Benchmark: Running deep neural networks on Android smartphones}, - year = 2018} - -@article{Sze2017-ak, - abstract = {Deep neural networks (DNNs) are currently widely used for - many artificial intelligence (AI) applications including - computer vision, speech recognition, and robotics. While - DNNs deliver state-of-the-art accuracy on many AI tasks, it - comes at the cost of high computational complexity. - Accordingly, techniques that enable efficient processing of - DNNs to improve energy efficiency and throughput without - sacrificing application accuracy or increasing hardware cost - are critical to the wide deployment of DNNs in AI systems. - This article aims to provide a comprehensive tutorial and - survey about the recent advances towards the goal of - enabling efficient processing of DNNs. Specifically, it will - provide an overview of DNNs, discuss various hardware - platforms and architectures that support DNNs, and highlight - key trends in reducing the computation cost of DNNs either - solely via hardware design changes or via joint hardware - design and DNN algorithm changes. It will also summarize - various development resources that enable researchers and - practitioners to quickly get started in this field, and - highlight important benchmarking metrics and design - considerations that should be used for evaluating the - rapidly growing number of DNN hardware designs, optionally - including algorithmic co-designs, being proposed in academia - and industry. The reader will take away the following - concepts from this article: understand the key design - considerations for DNNs; be able to evaluate different DNN - hardware implementations with benchmarks and comparison - metrics; understand the trade-offs between various hardware - architectures and platforms; be able to evaluate the utility - of various DNN design techniques for efficient processing; - and understand recent implementation trends and - opportunities.}, - archiveprefix = {arXiv}, - author = {Sze, Vivienne and Chen, Yu-Hsin and Yang, Tien-Ju and Emer, Joel}, - copyright = {http://arxiv.org/licenses/nonexclusive-distrib/1.0/}, - eprint = {1703.09039}, - month = mar, - primaryclass = {cs.CV}, - title = {Efficient processing of deep neural networks: A tutorial and survey}, - year = 2017} - -@inproceedings{lin2022ondevice, - author = {Lin, Ji and Zhu, Ligeng and Chen, Wei-Ming and Wang, Wei-Chen and Gan, Chuang and Han, Song}, - booktitle = {ArXiv}, - title = {On-Device Training Under 256KB Memory}, - year = {2022}} - -@article{lin2023awq, - author = {Lin, Ji and Tang, Jiaming and Tang, Haotian and Yang, Shang and Dang, Xingyu and Han, Song}, - journal = {arXiv}, - title = {AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration}, - year = {2023}} - -@inproceedings{wang2020apq, - author = {Wang, Tianzhe and Wang, Kuan and Cai, Han and Lin, Ji and Liu, Zhijian and Wang, Hanrui and Lin, Yujun and Han, Song}, - booktitle = {2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, - doi = {10.1109/CVPR42600.2020.00215}, - pages = {2075-2084}, - title = {APQ: Joint Search for Network Architecture, Pruning and Quantization Policy}, - year = {2020}, - Bdsk-Url-1 = {https://doi.org/10.1109/CVPR42600.2020.00215}} - -@inproceedings{Li2020Additive, - author = {Yuhang Li and Xin Dong and Wei Wang}, - booktitle = {International Conference on Learning Representations}, - title = {Additive Powers-of-Two Quantization: An Efficient Non-uniform Discretization for Neural Networks}, - url = {https://openreview.net/forum?id=BkgXT24tDS}, - year = {2020}, - Bdsk-Url-1 = {https://openreview.net/forum?id=BkgXT24tDS}} - -@article{janapa2023edge, - author = {Janapa Reddi, Vijay and Elium, Alexander and Hymel, Shawn and Tischler, David and Situnayake, Daniel and Ward, Carl and Moreau, Louis and Plunkett, Jenny and Kelcey, Matthew and Baaijens, Mathijs and others}, - journal = {Proceedings of Machine Learning and Systems}, - title = {Edge Impulse: An MLOps Platform for Tiny Machine Learning}, - volume = {5}, - year = {2023}} - -@article{zhuang2020comprehensive, - author = {Zhuang, Fuzhen and Qi, Zhiyuan and Duan, Keyu and Xi, Dongbo and Zhu, Yongchun and Zhu, Hengshu and Xiong, Hui and He, Qing}, - journal = {Proceedings of the IEEE}, - number = {1}, - pages = {43--76}, - publisher = {IEEE}, - title = {A comprehensive survey on transfer learning}, - volume = {109}, - year = {2020}} - -@article{zhuang_comprehensive_2021, - author = {Zhuang, Fuzhen and Qi, Zhiyuan and Duan, Keyu and Xi, Dongbo and Zhu, Yongchun and Zhu, Hengshu and Xiong, Hui and He, Qing}, - doi = {10.1109/JPROC.2020.3004555}, - file = {Zhuang et al. - 2021 - A Comprehensive Survey on Transfer Learning.pdf:/Users/alex/Zotero/storage/CHJB2WE4/Zhuang et al. - 2021 - A Comprehensive Survey on Transfer Learning.pdf:application/pdf}, - issn = {0018-9219, 1558-2256}, - journal = {Proceedings of the IEEE}, - language = {en}, - month = jan, - number = {1}, - pages = {43--76}, - title = {A {Comprehensive} {Survey} on {Transfer} {Learning}}, - url = {https://ieeexplore.ieee.org/document/9134370/}, - urldate = {2023-10-25}, - volume = {109}, - year = {2021}, - Bdsk-Url-1 = {https://ieeexplore.ieee.org/document/9134370/}, - Bdsk-Url-2 = {https://doi.org/10.1109/JPROC.2020.3004555}} - -@inproceedings{Norman2017TPUv1, - abstract = {Many architects believe that major improvements in cost-energy-performance must now come from domain-specific hardware. This paper evaluates a custom ASIC---called a Tensor Processing Unit (TPU) --- deployed in datacenters since 2015 that accelerates the inference phase of neural networks (NN). The heart of the TPU is a 65,536 8-bit MAC matrix multiply unit that offers a peak throughput of 92 TeraOps/second (TOPS) and a large (28 MiB) software-managed on-chip memory. The TPU's deterministic execution model is a better match to the 99th-percentile response-time requirement of our NN applications than are the time-varying optimizations of CPUs and GPUs that help average throughput more than guaranteed latency. The lack of such features helps explain why, despite having myriad MACs and a big memory, the TPU is relatively small and low power. We compare the TPU to a server-class Intel Haswell CPU and an Nvidia K80 GPU, which are contemporaries deployed in the same datacenters. Our workload, written in the high-level TensorFlow framework, uses production NN applications (MLPs, CNNs, and LSTMs) that represent 95\% of our datacenters' NN inference demand. Despite low utilization for some applications, the TPU is on average about 15X -- 30X faster than its contemporary GPU or CPU, with TOPS/Watt about 30X -- 80X higher. Moreover, using the CPU's GDDR5 memory in the TPU would triple achieved TOPS and raise TOPS/Watt to nearly 70X the GPU and 200X the CPU.}, - address = {New York, NY, USA}, - author = {Jouppi, Norman P. and Young, Cliff and Patil, Nishant and Patterson, David and Agrawal, Gaurav and Bajwa, Raminder and Bates, Sarah and Bhatia, Suresh and Boden, Nan and Borchers, Al and Boyle, Rick and Cantin, Pierre-luc and Chao, Clifford and Clark, Chris and Coriell, Jeremy and Daley, Mike and Dau, Matt and Dean, Jeffrey and Gelb, Ben and Ghaemmaghami, Tara Vazir and Gottipati, Rajendra and Gulland, William and Hagmann, Robert and Ho, C. Richard and Hogberg, Doug and Hu, John and Hundt, Robert and Hurt, Dan and Ibarz, Julian and Jaffey, Aaron and Jaworski, Alek and Kaplan, Alexander and Khaitan, Harshit and Killebrew, Daniel and Koch, Andy and Kumar, Naveen and Lacy, Steve and Laudon, James and Law, James and Le, Diemthu and Leary, Chris and Liu, Zhuyuan and Lucke, Kyle and Lundin, Alan and MacKean, Gordon and Maggiore, Adriana and Mahony, Maire and Miller, Kieran and Nagarajan, Rahul and Narayanaswami, Ravi and Ni, Ray and Nix, Kathy and Norrie, Thomas and Omernick, Mark and Penukonda, Narayana and Phelps, Andy and Ross, Jonathan and Ross, Matt and Salek, Amir and Samadiani, Emad and Severn, Chris and Sizikov, Gregory and Snelham, Matthew and Souter, Jed and Steinberg, Dan and Swing, Andy and Tan, Mercedes and Thorson, Gregory and Tian, Bo and Toma, Horia and Tuttle, Erick and Vasudevan, Vijay and Walter, Richard and Wang, Walter and Wilcox, Eric and Yoon, Doe Hyun}, - booktitle = {Proceedings of the 44th Annual International Symposium on Computer Architecture}, - doi = {10.1145/3079856.3080246}, - isbn = {9781450348928}, - keywords = {accelerator, neural network, MLP, TPU, CNN, deep learning, domain-specific architecture, GPU, TensorFlow, DNN, RNN, LSTM}, - location = {Toronto, ON, Canada}, - numpages = {12}, - pages = {1-12}, - publisher = {Association for Computing Machinery}, - series = {ISCA '17}, - title = {In-Datacenter Performance Analysis of a Tensor Processing Unit}, - url = {https://doi.org/10.1145/3079856.3080246}, - year = {2017}, - Bdsk-Url-1 = {https://doi.org/10.1145/3079856.3080246}} - -@article{Norrie2021TPUv2_3, - author = {Norrie, Thomas and Patil, Nishant and Yoon, Doe Hyun and Kurian, George and Li, Sheng and Laudon, James and Young, Cliff and Jouppi, Norman and Patterson, David}, - doi = {10.1109/MM.2021.3058217}, - journal = {IEEE Micro}, - number = {2}, - pages = {56-63}, - title = {The Design Process for Google's Training Chips: TPUv2 and TPUv3}, - volume = {41}, - year = {2021}, - Bdsk-Url-1 = {https://doi.org/10.1109/MM.2021.3058217}} - -@inproceedings{Jouppi2023TPUv4, - abstract = {In response to innovations in machine learning (ML) models, production workloads changed radically and rapidly. TPU v4 is the fifth Google domain specific architecture (DSA) and its third supercomputer for such ML models. Optical circuit switches (OCSes) dynamically reconfigure its interconnect topology to improve scale, availability, utilization, modularity, deployment, security, power, and performance; users can pick a twisted 3D torus topology if desired. Much cheaper, lower power, and faster than Infiniband, OCSes and underlying optical components are <5\% of system cost and <3\% of system power. Each TPU v4 includes SparseCores, dataflow processors that accelerate models that rely on embeddings by 5x--7x yet use only 5\% of die area and power. Deployed since 2020, TPU v4 outperforms TPU v3 by 2.1x and improves performance/Watt by 2.7x. The TPU v4 supercomputer is 4x larger at 4096 chips and thus nearly 10x faster overall, which along with OCS flexibility and availability allows a large language model to train at an average of ~60\% of peak FLOPS/second. For similar sized systems, it is ~4.3x--4.5x faster than the Graphcore IPU Bow and is 1.2x--1.7x faster and uses 1.3x--1.9x less power than the Nvidia A100. TPU v4s inside the energy-optimized warehouse scale computers of Google Cloud use ~2--6x less energy and produce ~20x less CO2e than contemporary DSAs in typical on-premise data centers.}, - address = {New York, NY, USA}, - articleno = {82}, - author = {Jouppi, Norm and Kurian, George and Li, Sheng and Ma, Peter and Nagarajan, Rahul and Nai, Lifeng and Patil, Nishant and Subramanian, Suvinay and Swing, Andy and Towles, Brian and Young, Clifford and Zhou, Xiang and Zhou, Zongwei and Patterson, David A}, - booktitle = {Proceedings of the 50th Annual International Symposium on Computer Architecture}, - doi = {10.1145/3579371.3589350}, - isbn = {9798400700958}, - keywords = {warehouse scale computer, embeddings, supercomputer, domain specific architecture, reconfigurable, TPU, large language model, power usage effectiveness, CO2 equivalent emissions, energy, optical interconnect, IPU, machine learning, GPU, carbon emissions}, - location = {Orlando, FL, USA}, - numpages = {14}, - publisher = {Association for Computing Machinery}, - series = {ISCA '23}, - title = {TPU v4: An Optically Reconfigurable Supercomputer for Machine Learning with Hardware Support for Embeddings}, - url = {https://doi.org/10.1145/3579371.3589350}, - year = {2023}, - Bdsk-Url-1 = {https://doi.org/10.1145/3579371.3589350}} - + shorttitle = {Deep {Class}-{Incremental} {Learning}}, + author = {Zhou, Da-Wei and Wang, Qi-Wei and Qi, Zhi-Hong and Ye, Han-Jia and Zhan, De-Chuan and Liu, Ziwei}, + year = 2023, + month = feb, + publisher = {arXiv}, + url = {http://arxiv.org/abs/2302.03648}, + urldate = {2023-10-26}, + note = {arXiv:2302.03648 [cs]}, + annote = {Comment: Code is available at https://github.com/zhoudw-zdw/CIL\_Survey/}, + language = {en}, + bdsk-url-1 = {http://arxiv.org/abs/2302.03648} +} @misc{zhou2021analognets, + author = {Chuteng Zhou and Fernando Garcia Redondo and Julian B{\"u}chel and Irem Boybat and Xavier Timoneda Comas and S. R. Nandakumar and Shidhartha Das and Abu Sebastian and Manuel Le Gallo and Paul N. Whatmough}, + year = 2021, archiveprefix = {arXiv}, - author = {Chuteng Zhou and Fernando Garcia Redondo and Julian B{\"u}chel and Irem Boybat and Xavier Timoneda Comas and S. R. Nandakumar and Shidhartha Das and Abu Sebastian and Manuel Le Gallo and Paul N. Whatmough}, - eprint = {2111.06503}, - primaryclass = {cs.AR}, - title = {AnalogNets: ML-HW Co-Design of Noise-robust TinyML Models and Always-On Analog Compute-in-Memory Accelerator}, - year = 2021} - -@article{wearableinsulin, - article-number = {719}, - author = {Psoma, Sotiria D. and Kanthou, Chryso}, - doi = {10.3390/bios13070719}, - issn = {2079-6374}, - journal = {Biosensors}, - number = {7}, - pubmedid = {37504117}, - title = {Wearable Insulin Biosensors for Diabetes Management: Advances and Challenges}, - url = {https://www.mdpi.com/2079-6374/13/7/719}, - volume = {13}, - year = {2023}, - Bdsk-Url-1 = {https://www.mdpi.com/2079-6374/13/7/719}, - Bdsk-Url-2 = {https://doi.org/10.3390/bios13070719}} - -@article{glucosemonitor, - author = {Li, Jingzhen and Tobore, Igbe and Liu, Yuhang and Kandwal, Abhishek and Wang, Lei and Nie, Zedong}, - doi = {10.1109/JBHI.2021.3072628}, - journal = {IEEE Journal of Biomedical and Health Informatics}, - number = {9}, - pages = {3340-3350}, - title = {Non-invasive Monitoring of Three Glucose Ranges Based On ECG By Using DBSCAN-CNN}, - volume = {25}, - year = {2021}, - Bdsk-Url-1 = {https://doi.org/10.1109/JBHI.2021.3072628}} - -@article{plasma, - author = {Attia, Zachi and Sugrue, Alan and Asirvatham, Samuel and Ackerman, Michael and Kapa, Suraj and Friedman, Paul and Noseworthy, Peter}, - doi = {10.1371/journal.pone.0201059}, - journal = {PLOS ONE}, - month = {08}, - pages = {e0201059}, - title = {Noninvasive assessment of dofetilide plasma concentration using a deep learning (neural network) analysis of the surface electrocardiogram: A proof of concept study}, - volume = {13}, - year = {2018}, - Bdsk-Url-1 = {https://doi.org/10.1371/journal.pone.0201059}} - -@article{afib, - author = {Yutao Guo and Hao Wang and Hui Zhang and Tong Liu and Zhaoguang Liang and Yunlong Xia and Li Yan and Yunli Xing and Haili Shi and Shuyan Li and Yanxia Liu and Fan Liu and Mei Feng and Yundai Chen and Gregory Y.H. Lip and null null}, - doi = {10.1016/j.jacc.2019.08.019}, - journal = {Journal of the American College of Cardiology}, - number = {19}, - pages = {2365-2375}, - title = {Mobile Photoplethysmographic Technology to Detect Atrial Fibrillation}, - volume = {74}, - year = {2019}, - Bdsk-Url-1 = {https://doi.org/10.1016/j.jacc.2019.08.019}} - -@article{gaitathome, - author = {Yingcheng Liu and Guo Zhang and Christopher G. Tarolli and Rumen Hristov and Stella Jensen-Roberts and Emma M. Waddell and Taylor L. Myers and Meghan E. Pawlik and Julia M. Soto and Renee M. Wilson and Yuzhe Yang and Timothy Nordahl and Karlo J. Lizarraga and Jamie L. Adams and Ruth B. Schneider and Karl Kieburtz and Terry Ellis and E. Ray Dorsey and Dina Katabi}, - doi = {10.1126/scitranslmed.adc9669}, - eprint = {https://www.science.org/doi/pdf/10.1126/scitranslmed.adc9669}, - journal = {Science Translational Medicine}, - number = {663}, - pages = {eadc9669}, - title = {Monitoring gait at home with radio waves in Parkinson's disease: A marker of severity, progression, and medication response}, - url = {https://www.science.org/doi/abs/10.1126/scitranslmed.adc9669}, - volume = {14}, - year = {2022}, - Bdsk-Url-1 = {https://www.science.org/doi/abs/10.1126/scitranslmed.adc9669}, - Bdsk-Url-2 = {https://doi.org/10.1126/scitranslmed.adc9669}} - -@article{Chen2023, - author = {Chen, Emma and Prakash, Shvetank and Janapa Reddi, Vijay and Kim, David and Rajpurkar, Pranav}, - day = {06}, - doi = {10.1038/s41551-023-01115-0}, - issn = {2157-846X}, - journal = {Nature Biomedical Engineering}, - month = {Nov}, - title = {A framework for integrating artificial intelligence for clinical care with continuous therapeutic monitoring}, - url = {https://doi.org/10.1038/s41551-023-01115-0}, - year = {2023}, - Bdsk-Url-1 = {https://doi.org/10.1038/s41551-023-01115-0}} - -@article{Zhang2017, - author = {Zhang, Qingxue and Zhou, Dian and Zeng, Xuan}, - day = {06}, - doi = {10.1186/s12938-017-0317-z}, - issn = {1475-925X}, - journal = {BioMedical Engineering OnLine}, - month = {Feb}, - number = {1}, - pages = {23}, - title = {Highly wearable cuff-less blood pressure and heart rate monitoring with single-arm electrocardiogram and photoplethysmogram signals}, - url = {https://doi.org/10.1186/s12938-017-0317-z}, - volume = {16}, - year = {2017}, - Bdsk-Url-1 = {https://doi.org/10.1186/s12938-017-0317-z}} + eprint = {2111.06503}, + primaryclass = {cs.AR} +} +@article{zhou2022photonic, + author = {Zhou, Hailong and Dong, Jianji and Cheng, Junwei and Dong, Wenchan and Huang, Chaoran and Shen, Yichen and Zhang, Qiming and Gu, Min and Qian, Chao and Chen, Hongsheng and others}, + year = 2022, + journal = {Light: Science \& Applications}, + publisher = {Nature Publishing Group UK London}, + volume = 11, + number = 1, + pages = 30 +} +@inproceedings{zhu2018benchmarking, + author = {Zhu, Hongyu and Akrout, Mohamed and Zheng, Bojian and Pelegris, Andrew and Jayarajan, Anand and Phanishayee, Amar and Schroeder, Bianca and Pekhimenko, Gennady}, + year = 2018, + booktitle = {2018 IEEE International Symposium on Workload Characterization (IISWC)}, + pages = {88--100}, + organization = {IEEE} +} +@article{zhuang_comprehensive_2021, + author = {Zhuang, Fuzhen and Qi, Zhiyuan and Duan, Keyu and Xi, Dongbo and Zhu, Yongchun and Zhu, Hengshu and Xiong, Hui and He, Qing}, + year = 2021, + month = jan, + journal = {Proceedings of the IEEE}, + volume = 109, + number = 1, + pages = {43--76}, + doi = {10.1109/JPROC.2020.3004555}, + issn = {0018-9219, 1558-2256}, + url = {https://ieeexplore.ieee.org/document/9134370/}, + urldate = {2023-10-25}, + language = {en}, + bdsk-url-1 = {https://ieeexplore.ieee.org/document/9134370/}, + bdsk-url-2 = {https://doi.org/10.1109/JPROC.2020.3004555} +} +@article{zhuang2020comprehensive, + author = {Zhuang, Fuzhen and Qi, Zhiyuan and Duan, Keyu and Xi, Dongbo and Zhu, Yongchun and Zhu, Hengshu and Xiong, Hui and He, Qing}, + year = 2020, + journal = {Proceedings of the IEEE}, + publisher = {IEEE}, + volume = 109, + number = 1, + pages = {43--76} +} -@misc{yik2023neurobench, - archiveprefix = {arXiv}, - author = {Jason Yik and Soikat Hasan Ahmed and Zergham Ahmed and Brian Anderson and Andreas G. Andreou and Chiara Bartolozzi and Arindam Basu and Douwe den Blanken and Petrut Bogdan and Sander Bohte and Younes Bouhadjar and Sonia Buckley and Gert Cauwenberghs and Federico Corradi and Guido de Croon and Andreea Danielescu and Anurag Daram and Mike Davies and Yigit Demirag and Jason Eshraghian and Jeremy Forest and Steve Furber and Michael Furlong and Aditya Gilra and Giacomo Indiveri and Siddharth Joshi and Vedant Karia and Lyes Khacef and James C. Knight and Laura Kriener and Rajkumar Kubendran and Dhireesha Kudithipudi and Gregor Lenz and Rajit Manohar and Christian Mayr and Konstantinos Michmizos and Dylan Muir and Emre Neftci and Thomas Nowotny and Fabrizio Ottati and Ayca Ozcelikkale and Noah Pacik-Nelson and Priyadarshini Panda and Sun Pao-Sheng and Melika Payvand and Christian Pehle and Mihai A. Petrovici and Christoph Posch and Alpha Renner and Yulia Sandamirskaya and Clemens JS Schaefer and Andr{\'e} van Schaik and Johannes Schemmel and Catherine Schuman and Jae-sun Seo and Sadique Sheik and Sumit Bam Shrestha and Manolis Sifalakis and Amos Sironi and Kenneth Stewart and Terrence C. Stewart and Philipp Stratmann and Guangzhi Tang and Jonathan Timcheck and Marian Verhelst and Craig M. Vineyard and Bernhard Vogginger and Amirreza Yousefzadeh and Biyan Zhou and Fatima Tuz Zohora and Charlotte Frenkel and Vijay Janapa Reddi}, - eprint = {2304.04640}, - primaryclass = {cs.AI}, - title = {NeuroBench: Advancing Neuromorphic Computing through Collaborative, Fair and Representative Benchmarking}, - year = {2023}} \ No newline at end of file +@misc{bigbatch, + title={ImageNet Training in Minutes}, + author={Yang You and Zhao Zhang and Cho-Jui Hsieh and James Demmel and Kurt Keutzer}, + year={2018}, + eprint={1709.05011}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} \ No newline at end of file diff --git a/training.qmd b/training.qmd index 90f5cefe..1fc0d270 100644 --- a/training.qmd +++ b/training.qmd @@ -1,95 +1,926 @@ # AI Training +![_DALL·E 3 Prompt: An illustration for AI training, depicting a neural network with neurons that are being repaired and firing. The scene includes a vast network of neurons, each glowing and firing to represent activity and learning. Among these neurons, small figures resembling engineers and scientists are actively working, repairing and tweaking the neurons. These miniature workers symbolize the process of training the network, adjusting weights and biases to achieve convergence. The entire scene is a visual metaphor for the intricate and collaborative effort involved in AI training, with the workers representing the continuous optimization and learning within a neural network. The background is a complex array of interconnected neurons, creating a sense of depth and complexity._](./images/ai_training.png) + +The process of training is central to developing accurate and useful AI systems using machine learning techniques. At a high level, training involves feeding data into machine learning algorithms so they can learn patterns and make predictions. However, effectively training models requires tackling a variety of challenges around data, algorithms, optimization of model parameters, and enabling generalization. In this chapter, we will dive into the nuances and considerations around training machine learning models. + ::: {.callout-tip} + ## Learning Objectives -* coming soon. +* Understand the fundamental mathematics of neural networks, including linear transformations, activation functions, loss functions, backpropagation, and optimization via gradient descent. + +* Learn how to effectively leverage data for model training through proper splitting into train, validation, and test sets to enable generalization. + +* Learn various optimization algorithms like stochastic gradient descent and adaptations like momentum and Adam that accelerate training. + +* Understand techniques for hyperparameter tuning and regularization to improve model generalization through reducing overfitting. + +* Learn proper weight initialization strategies matched to model architectures and activation choices that accelerate convergence. + +* Identify the bottlenecks posed by key operations like matrix multiplication during training and deployment. + +* Learn how hardware improvements like GPUs, TPUs, and specialized accelerators speed up critical math operations to accelerate training. + +* Understand parallelization techniques, both data and model parallelism, to distribute training across multiple devices and accelerate system throughput. ::: ## Introduction -Explanation: An introductory section sets the stage for the reader, explaining what AI training is and why it's crucial, especially in the context of embedded systems. It helps to align the reader's expectations and prepares them for the upcoming content. +Training is a critical process for developing accurate and useful AI systems using machine learning. The goal of training is to create a machine learning model that can generalize to new, unseen data, rather than memorizing the training examples. This is done by feeding **training data** into algorithms that learn patterns from these examples by adjusting internal parameters. + +The algorithms minimize a **loss function**, which compares their predictions on the training data to the known labels or solutions, guiding the learning. Effective training often requires high-quality, representative training data sets that are large enough to capture variability in the real-world use cases. + +It also requires choosing an **algorithm** suited to the task, whether that be a neural network for computer vision, a reinforcement learning algorithm for robotic control, or a tree-based method for categorical prediction. Careful tuning is needed for the model structure, such as neural network depth and width, and learning parameters like step size and regularization strength. + +Techniques to prevent **overfitting** like regularization penalties and validation with held-out data are also important. Overfitting can occur when a model fits the training data too closely, failing to generalize to new data. This can happen if the model is too complex or trained for too long. + +To avoid overfitting **regularization** techniques can help constrain the model. One regularization method is adding a penalty term to the loss function that discourages complexity, like the L2 norm of the weights. This penalizes large parameter values. Another technique is dropout, where a percentage of neurons are randomly set to zero during training. This reduces co-adaptation of neurons. + +**Validation** methods also help detect and avoid overfitting. Part of the training data is held out from the training loop as a validation set. The model is evaluated on this data. If validation error increases while training error decreases, overfitting is occurring. The training can then be stopped early or regularized more strongly. Careful use of regularization and validation enables models to train to maximum capability without overfitting the training data. + +Training takes significant **computing resources**, especially for deep neural networks used in computer vision, natural language processing, and other areas. These networks have millions of adjustable weights that must be tuned through extensive training. Hardware improvements and distributed training techniques have enabled training ever larger neural nets that can achieve human-level performance on some tasks. + +In summary, some key points about training: + +* **Data is crucial:** Machine learning models learn from examples in training data. More high-quality, representative data leads to better model performance. Data needs to be processed and formatted for training. +* **Algorithms learn from data:** Different algorithms (neural networks, decision trees, etc.) have different approaches to finding patterns in data. Choosing the right algorithm for the task is important. +* **Training refines model parameters:** Model training adjusts internal parameters to find patterns in data. Advanced models like neural networks have many adjustable weights. Training iteratively adjusts weights to minimize a loss function. +* **Generalization is the goal:** A model that overfits to the training data will not generalize well. Regularization techniques (dropout, early stopping, etc.) reduce overfitting. Validation data is used to evaluate generalization. +* **Training takes compute resources:** Training complex models requires significant processing power and time. Hardware improvements and distributed training across GPUs/TPUs have enabled advances. + +We will walk you through these details in the rest of the sections. Understanding how to effectively leverage data, algorithms, parameter optimization, and generalization through thorough training is essential for developing capable, deployable AI systems that work robustly in the real world. + +## Mathematics of Neural Networks + +Deep learning has revolutionized the fields of machine learning and artificial intelligence, enabling computers to learn complex patterns and make intelligent decisions. At the heart of the deep learning revolution is the neural network, which, as discussed in section 3 "Deep Learning Primer", is a cornerstone in some of these advancements. + +Neural networks are made up of simple functions layered on top of each other. Each **layer** takes in some data, performs some computation, and passes it to the next layer. These layers learn progressively high level features that are useful for the task the network is trained to perform. For example, in a network trained for image recognition, the input layer may take in pixel values, while the next layers may detect simple shapes like edges, then the layers after that may detect more complex shapes like noses or eyes, and so on. The final output layer classifies the image as a whole. + +The network in a neural network refers to how these layers are connected. Each layer's output is considered as a single neuron, and is connected to many other neurons in the layers preceding it, forming a "network". The way these neurons interact with each other is determined by the weights between them, which model synaptic strengths similar to that of a brain's neuron. The neural network is trained by adjusting these weights. Concretely, the weights are initially set randomly, then an input is fed in and the output is compared to the desired result, and finally the weights are then tweaked to make the network better. This process is repeated until the network reliably minimizes the loss, indicating it has learned the patterns in the data. + +How is this process defined mathematically? Formally, neural networks are mathematical models that consist of alternating **linear** and **nonlinear** operations, parameterized by a set of learnable **weights** that are trained to minimize some **loss** function. This loss function is a measure of how good our model is with respect to fitting our training data, and it produces a numerical value when evaluated on our model against the training data. Training neural networks involve repeatedly evaluating the loss function on many different datapoints to get a measure of how good our model is, then continuously tweaking the weights of our model using backpropagation so that the loss decreases, which ultimately optimizes the model to fit our data. + +### Neural Network Notation + +Diving into the details, the core of a neural network can be viewed as a sequence of alternating linear and nonlinear operations: +$$ +L_i = W_i A_{i-1} +$$ +$$ +A_i = F_i(L_{i}) +$$ + +:::{.callout-note} +Why are the nonlinear operations necessary? If we only had linear layers the entire network is equivalent to just a single linear layer consisting of the product of the linear operators. Hence, the nonlinear functions play a key role in the power of neural networks as they enhance the neural network's ability to fit functions. +::: + +:::{.callout-note} +Convolutions are also linear operators, and can be cast as a matrix multiplication. +::: + +![Neural Network Diagram: Neural networks consist of alternating linear and nonlinear layers. Linear layers represent the interaction between the outputs of the prior layer and learned weights.](images/aitrainingnn.png) + +where $A_{0}$ is a vector input to the neural network (i.e: an image that we want the neural network to classify, or some other data that the neural network operates on), $A_{n}$ (where $n$ is the number of layers of the network) is the vector output of the neural network (i.e: a vector of size 10 in the case of classifying pictures of handwritten digits), $W_i$s are the weights of the neural network that are tweaked at training time to fit our data, and $F_{i}$ is that layer's nonlinear activation function (i.e: ReLU, softmax, etc). As defined, the intermediate output of the neural network is a vector of real-valued numbers with dimensions: + +$$ +L_i, A_i \in \mathbb{R}^{d_{i}} +$$ + +where $d_{i}$ is the number of neurons at layer $i$; in the case of the first layer $i=0$, $d_{i}$ is the dimension of the input data, and in the last layer $i=n$, $d_{n}$ is the dimension of the output label, and anything in between can be set arbitrarily and may be viewed as the **architecture** of the neural network (i.e: dimensionality of the intermediate layers). The weights, which determine how each layer of the neural network interacts with each other, therefore are matrices of real numbers with shape + +$$ +W_i \in \mathbb{R}^{d_{i} \times d_{i-1}} +$$ + +Our neural network, as defined, performs a sequence of linear and nonlinear operations on the input data ($L_{0}$), to optain predictions ($L_{n}$) which hopefully is a good answer to what we want the neural network to do on the input (i.e: classify if the input image is a cat or not). Our neural network may then be represented succinctly as a function $N$ which takes in an input $x \in \mathbb{R}^{d_0}$ parameterized by $W_1, ..., W_n$: + +$$ +\begin{align*} +N(x; W_1, ... W_n) &= \text{Let } A_0 = x, \text{ then output } A_n +\end{align*} +$$ + +Next we will see how to evaluate this neural network against training data by introducing a loss function. + +### Loss Function as a Measure of Goodness of Fit against Training Data + +After defining our neural network, we are given some training data, which is a set of points ${(x_j, y_j)}$ for $j=1..M$, and we want to evaluate how good our neural network is on fitting this data. To do this, we introduce a **loss function**, which is a function that takes the output of the neural network on a particular datapoint ($N(x_j; W_1, ..., W_n)$), and compares it against the "label" of that particular datapoint (the corresponding $y_j$), and outputs a single numerical scalar (i.e: one real number) that represents how "good" the neural network fit that particular data point; the final measure of how good the neural network is on the entire dataset is therefore just the average of the losses across all datapoints. + +There are many different types of loss functions, for example, in the case of image classification, we might use the cross-entropy loss function, which tells us how good two vectors that represent classification predictions compare (i.e: if our prediction predicts that an image is more likely a dog, but the label says it is a cat, it will return a high "loss" indicating a bad fit). + +Mathematically, this loss function is a function which takes in two real-valued vectors of the shape of the label, and outputs a single numerical scalar +$$ +L: \mathbb{R}^{d_{n}} \times \mathbb{R}^{d_{n}} \longrightarrow \mathbb{R} +$$ + +and the loss across the entire dataset can be written as the average loss across all datapoints in the training data + +> Loss Function for Optimizing Neural Network Model on a Dataset +$$ +L_{full} = \frac{1}{M} \sum_{j=1}^{M} L(N(x_j; W_1,...W_n), y_j) +$$ + +### Training Neural Networks with Gradient Descent + +Now that we have a measure of how good our network fits the training data, we can optimize the weights of the neural network to minimize this loss. At a high level, we tweak the parameters of the real-valued matrices $W_i$s so that the loss function $L_{full}$ is minimized. Overall, our mathematical objective is + +> Neural Network Training Objective +$$ +min_{W_1, ..., W_n} L_{full} +$$ +$$ += min_{W_1, ..., W_n} \frac{1}{M} \sum_{j=1}^{M} L(N(x_j; W_1,...W_n), y_j) +$$ + +So how do we optimize this objective? Recall from calculus that minimizing a function can be done by taking the derivative of the function with respect to the input parameters and tweaking the parameters in the direction of the gradient. This technique is called **gradient descent** and concretely involves calculating the derivative of the loss function $L_{full}$ with respect to $W_1, ..., W_n$ to obtain a gradient for these parameters to take a step in, then updating these parameters in the direction of the gradient. Thus, we can train our neural network using gradient descent which repeatedly applies the update rule + +> Gradient Descent Update Rule +$$ +W_i := W_i - \lambda \frac{\partial L_{full}}{\partial W_i} \mbox{ for } i=1..n +$$ + +:::{.callout-note} +In practice, the gradient is computed over a minibatch of datapoints, to improve computational efficiency. This is called stochastic gradient descent or batch gradient descent. +::: + +where $\lambda$ is the stepsize or learning rate of our tweaks. In training our neural network, we repeatedly perform the step above until convergence, or when the loss no longer decreases. This prior approach is known as full gradient descent since we are computing the derivative with respect to the entire training data, and only then taking a single gradient step; a more efficient approach is to calculate the gradient with respect to just a random batch of datapoints and then taking a step, a process known as batch gradient descent or stochastic gradient descent [@sgd], which is more efficient since now we are taking many more steps per pass of the entire training data. Next we will cover the mathematics behind computing the gradient of the loss function with respect to the $W_i$s, a process known as backpropagation. + +![Optimizing a neural network involves repeated application of gradient descent. This involves minimizing the loss function, which can be done by repeatedly taking steps in the direction of the gradient of the network with respect the loss function.](images/aitrainingsgd.png) + +### Backpropagation + +Training neural networks involve repeated applications of the gradient descent algorithm, which involves computing the derivative of the loss function with respect to the $W_i$s. How do we compute the derivative of the loss with respect to the $W_i$s given that the $W_i$s are nested functions of each other in a deep neural network? The trick is to leverage the **chain rule**: we can compute the derivative of the loss with respect to the $W_i$s by repeatedly applying the chain rule, in a complete process known as backpropagation. Specifically, we can calculate the gradients by computing the derivative of the loss with respect to the outputs of the last layer, then progressively use this to compute the derivative of the loss with respect to each prior layer, all the way to the input layer. This process starts from the end of the network (the layer closest to the output) and progresses backwards, and hence gets its name backpropagation. + +Let's break this down. We can compute the derivative of the loss with respect to the _the outputs of each layer of the neural network_ by using repeated applications of the chain rule + +$$ +\frac{\partial L_{full}}{\partial L_{n}} = \frac{\partial A_{n}}{\partial L_{n}} \frac{\partial L_{full}}{\partial A_{n}} +$$ + +$$ +\frac{\partial L_{full}}{\partial L_{n-1}} = \frac{\partial A_{n-1}}{\partial L_{n-1}} \frac{\partial L_{n}}{\partial A_{n-1}} \frac{\partial A_{n}}{\partial L_{n}} \frac{\partial L_{full}}{\partial A_{n}} +$$ + +or more generally + +$$ +\frac{\partial L_{full}}{\partial L_{i}} = \frac{\partial A_{i}}{\partial L_{i}} \frac{\partial L_{i+1}}{\partial A_{i}} ... \frac{\partial A_{n}}{\partial L_{n}} \frac{\partial L_{full}}{\partial A_{n}} +$$ + +:::{.callout-note} +In what order should we perform this computation? It is preferrable from a computational perspective to perform the calculations from the end to the front +(i.e: first compute $\frac{\partial L_{full}}{\partial A_{n}}$ then the prior terms, rather than start in the middle) since this avoids materializing and computing large jacobians. This is because $\frac{\partial L_{full}}{\partial A_{n}}$ is a vector, hence any matrix operation that includes this term has an output that is squished to be a vector. Thus performing the computation from the end avoids large matrix-matrix multiplications by ensuring that the intermediate products are vectors. +::: + +:::{.callout-note} +In our notation, we assume the intermediate activations $A_{i}$ are _column_ vectors, rather than _row_ vectors, hence the chain rule is $\frac{\partial L}{\partial L_{i}} = \frac{\partial L_{i+1}}{\partial L_{i}} ... \frac{\partial L}{\partial L_{n}}$ rather than $\frac{\partial L}{\partial L_{i}} = \frac{\partial L}{\partial L_{n}} ... \frac{\partial L_{i+1}}{\partial L_{i}}$ +::: + +After computing the derivative of the loss with respect to the _output of each layer_, we can easily obtain the derivative of the loss with respect to the _parameters_, again using the chain rule: + +$$ +\frac{\partial L_{full}}{W_{i}} = \frac{\partial L_{i}}{\partial W_{i}} \frac{\partial L_{full}}{\partial L_{i}} +$$ + +And this is ultimately how the derivatives of the layers' weights are computed using backpropagation! What does this concretely look like in a specific example? Below we walk through a specific example on a simple 2 layer neural network, on a regression task using a MSE loss function, with 100-dimensional inputs and a 30-dimensional hidden layer: + +> Example of Backpropagation\ +Suppose we have a two-layer neural network +$$ +L_1 = W_1 A_{0} +$$ +$$ +A_1 = ReLU(L_1) +$$ +$$ +L_2 = W_2 A_{1} +$$ +$$ +A_2 = ReLU(L_2) +$$ +$$ +NN(x) = \mbox{Let } A_{0} = x \mbox{ then output } A_2 +$$ +where $W_1 \in \mathbb{R}^{30 \times 100}$ and $W_2 \in \mathbb{R}^{1 \times 30}$. Furthermore suppose we use the MSE loss function: +$$ +L(x, y) = (x-y)^2 +$$ +We wish to compute +$$ +\frac{\partial L(NN(x), y)}{\partial W_i} \mbox{ for } i=1,2 +$$ +Note the following: +$$ +\frac{\partial L(x, y)}{\partial x} = 2 \times (x-y) +$$ +$$ +\frac{\partial ReLU(x)}{\partial x} \delta = \left\{\begin{array}{lr} + 0 & \text{for } x \leq 0 \\ + 1 & \text{for } x \geq 0 \\ + \end{array}\right\} \odot \delta +$$ +$$ +\frac{\partial WA}{\partial A} \delta = W^T \delta +$$ +$$ +\frac{\partial WA}{\partial W} \delta = \delta A^T +$$ +Then we have +$$ +\frac{\partial L(NN(x), y)}{\partial W_2} = \frac{\partial L_2}{\partial W_2} \frac{\partial A_2}{\partial L_2} \frac{\partial L(NN(x), y)}{\partial A_2} +$$ +$$ += (2L(NN(x) - y) \odot ReLU'(L_2)) A_1^T +$$ +and +$$ +\frac{\partial L(NN(x), y)}{\partial W_1} = \frac{\partial L_1}{\partial W_1} \frac{\partial A_1}{\partial L_1} \frac{\partial L_2}{\partial A_1} \frac{\partial A_2}{\partial L_2} \frac{\partial L(NN(x), y)}{\partial A_2} +$$ +$$ += [ReLU'(L_1) \odot (W_2^T [2L(NN(x) - y) \odot ReLU'(L_2)])] A_0^T +$$ + +::: {.callout-tip} +Double check your work by making sure that the shapes are correct! + +* All hadamard products ($\odot$) should operate on tensors of the same shape +* All matrix multiplications should operate on matrices that share a common dimension (i.e: m by n, n by k) +* All gradients with respect to the weights should have the same shape as the weight matrices themselves +::: + +The entire backpropagation process can be complex, especially for networks that are very deep. Fortunately, machine learning frameworks like PyTorch support automatic differentiation, which performs backpropagation for us. In these machine learning frameworks we simply need to specify the forward pass, and the derivatives will be automatically computed for us. Nevertheless, it is beneficial to understand the theoretical process that is happening under the hood in these machine-learning frameworks. + +:::{.callout-note} +As seen above, intermediate activations $A_i$ are re-used in backpropagation. To improve performance, these activations are cached from the forward pass to avoid recomputing them. However, this means that activations must be kept in memory between the forward and backward passes, leading to higher memory usage. If the network and batchsize is large, this may lead to memory issues. Similarly, the derivatives with respect to each layer's outputs are cached to avoid recomputation. +::: + +## Training Data + +To enable effective training of neural networks, the available data must be split into training, validation, and test sets. The training set is used to train the model parameters. The validation set evaluates the model during training to tune hyperparameters and prevent overfitting. The test set provides an unbiased final evaluation of the trained model's performance. + +Maintaining clear splits between train, validation, and test sets with representative data in each is crucial to properly training, tuning, and evaluating models to achieve the best real-world performance. To this end, we will learn about the common pitfalls or mistakes that people make in creating these data splits. + +Here is a summary table for training, validation, and test data splits: + +| Data Split | Purpose | Typical Size | +|-|-|-| +| Training Set | Train the model parameters | 60-80% of total data | +| Validation Set | Evaluate model during training to tune hyperparameters and prevent overfitting | ∼20% of total data | +| Test Set | Provide unbiased evaluation of final trained model | ∼20% of total data | + +### Dataset Splits + +#### Training Set + +The training set is used to actually train the model. It is the largest subset consisting of typically 60-80% of the total data. The model sees and learns from the training data in order to make predictions. A sufficiently large and representative training set is required for the model to effectively learn the underlying patterns. + +#### Validation Set + +The validation set is used to evaluate the model during training, usually after each epoch. Typically 20% of the data is allocated for the validation set. The model does not learn or update its parameters based on the validation data. It is used to tune hyperparameters and make other tweaks to improve training. Monitoring metrics like loss and accuracy on the validation set prevents overfitting on just the training data. + +#### Test Set + +The test set acts as a completely unseen dataset that the model did not see during training. It is used to provide an unbiased evaluation of the final trained model. Typically 20% of the data is reserved for testing. Maintaining a hold-out test set is vital for obtaining an accurate estimate of how the trained model would perform on real world unseen data. Data leakage from the test set must be avoided at all costs. + +The relative proportions of the training, validation and test sets can vary based on data size and application. But following the general guideline of a 60/20/20 split is a good starting point. Careful splitting of data ensures models are properly trained, tuned and evaluated to achieve the best performance. + +### Common Pitfalls and Mistakes + +#### Insufficient Training Data -- Brief overview of what AI training entails -- Importance of training in the context of embedded AI +Allocating too little data to the training set is a common mistake when splitting data that can severely impact model performance. If the training set is too small, the model will not have enough samples to effectively learn the true underlying patterns in the data. This leads to high variance and causes the model to fail to generalize well to new data. -## Types of Training +For example, if you are training an image classification model to recognize handwritten digits, providing only 10 or 20 images per digit class would be completely inadequate. The model would struggle to capture the wide variances in writing styles, rotations, stroke widths and other variations with so few examples. -Explanation: Understanding the different types of training methods is foundational. It allows the reader to appreciate the diversity of approaches and to select the most appropriate one for their specific embedded AI application. +As a rule of thumb, the training set size should be at least in the hundreds or thousands of examples for most machine learning algorithms to work effectively. For deep neural networks, especially those using convolutional layers, the training set often needs to be in the tens or hundreds of thousands due to the large number of parameters. -- Supervised Learning -- Unsupervised Learning -- Reinforcement Learning -- Semi-supervised Learning +Insufficient training data typically manifests in symptoms like high error rates on validation/test sets, low model accuracy, high variance, and overfitting on the small training set samples. Collecting more quality training data is the solution. Data augmentation techniques can also help virtually increase training data size for images, audio etc. -## Data Preparation +Carefully factoring in the model complexity and problem difficulty when allocating training samples is important to ensure sufficient data is available for the model to learn successfully. Following guidelines on minimum training set sizes for different algorithms is also recommended. Insufficient training data is a fundamental issue that will undermine the overall success of any machine learning application. -Explanation: Data is the fuel for AI. This section is essential because it guides the reader through the initial steps of gathering and preparing data, which is a prerequisite for effective training. +![Overfitting is one potential problem when training machine learning models, and occurs when the model fits the training data well but fails to generalize to the test data.](images/aitrainingfit.png) -- Data Collection -- Data Annotation -- Data Augmentation -- Data Preprocessing +#### Data Leakage Between Sets -## Training Algorithms +Data leakage refers to the unintentional transfer of information between the training, validation, and test sets. This violates the fundamental assumption that the splits are completely separated. Data leakage leads to seriously compromised evaluation results and inflated performance metrics. -Explanation: This section delves into the algorithms that power the training process. It's crucial for understanding how models learn from data and how to implement these algorithms efficiently in embedded systems. +A common way data leakage can occur is if some samples from the test set inadvertently get included in the training data. Now when evaluating on the test set, the model has already seen some of the data which gives overly optimistic scores. For example, if 2% of the test data leaks into the training set of a binary classifier, it can result in a accuracy boost of up to 20%! -- Gradient Descent -- Backpropagation -- Optimizers (SGD, Adam, RMSprop, etc.) +More subtle forms of leakage can happen if the data splits are not done carefully. If the splits are not properly randomized and shuffled, samples close to each other in the dataset may end up across different splits. This creates information bleed through based on proximity in the dataset. Time series data is especially vulnerable unless special cross validation techniques are used. -## Training Environments +Preventing data leakage requires creating solid separation between splits - no sample should exist in more than one split. Shuffling and randomized splitting help create robust divisions. Cross validation techniques can be used for more rigorous evaluation. Detecting leakage is difficult buttelltale signs include models doing way better on test vs. validation data. -Explanation: Different training environments have their own pros and cons. This section helps the reader make informed decisions about where to train their models, considering factors like computational resources and latency. +Data leakage severely compromises the validity of evaluation because the model has already partially seen the test data. No amount of tuning or complex architectures can substitute for clean data splits. It is better to be conservative and create complete separation between splits to avoid this fundamental mistake in machine learning pipelines. -- Local vs. Cloud -- Specialized Hardware (GPUs, TPUs, etc.) +#### Small or Unrepresentative Validation Set + +The validation set is used to evaluate models during training and for hyperparameter tuning. If the validation set is too small or not representative of the real data distribution, it will not provide reliable or stable evaluations during training. This makes model selection and tuning more difficult. + +For example, if the validation set only contains 100 samples, metrics calculated on it will have high variance. The accuracy may fluctuate up to 5-10% between epochs just due to noise. This makes it difficult to know if a drop in validation accuracy is due to overfitting or natural variance. With a larger validation set of say 1000 samples, the metrics will be much more stable. + +Additionally, if the validation set is not representative, perhaps missing certain subclasses, the estimated skill of the model may be inflated. This could lead to poor choices of hyperparameters or stopping training prematurely. Models selected based on such biased validation sets do not generalize well to real data. + +A good rule of thumb is the validation set size should be at least several hundred samples, and up to 10-20% size of the training set. The splits should also be stratified, especially if working with imbalanced datasets. A larger validation set that well represents the original data characteristics is essential for proper model selection and tuning. + +Care should be taken that the validation set is also not too large, leaving insufficient samples for training. Overall, the validation set is a critical piece of the data splitting process and care should be taken to avoid the pitfalls of small, inadequate samples that negatively impact model development. + +#### Reusing the Test Set Multiple Times + +The test set is designed to provide an unbiased evaluation of the fully-trained model only once at the end of the model development process. Reusing the test set multiple times during development for model evaluation, hyperparameter tuning, model selection etc. can result in overfitting on the test data. + +If the test set is reused as part of the validation process, the model may start to see and learn from the test samples. This coupled with intentionally or unintentionally optimizing model performance on the test set can artificially inflate metrics like accuracy. + +For example, if the test set is used repeatedly for model selection out of 5 architectures, the model may achieve 99% test accuracy just by memorizing the samples rather than learning generalizable patterns. However, deployed in the real world, the accuracy could drop to 60% on new data. + +Best practice is to interact with the test set only once at the very end to report unbiased metrics on how the final tuned model would perform in the real world. The validation set should be used for all parameter tuning, model selection, early stopping etc. while developing the model. + +Maintaining the complete separation of training/validation from the test set is essential to obtain accurate estimates of model performance. Even minor deviations from single use of the test set could positively bias results and metrics, providing an overly optimistic view of real world efficacy. + +#### Same Data Splits Across Experiments + +When comparing different machine learning models or experimenting with various architectures and hyperparameters, using the same data splits for training, validation and testing across the different experiments can introduce bias and invalidate the comparisons. + +If the same splits are reused, the evaluation results may be overly correlated and not provide an accurate measure of which model performs better. For example, a certain random split of the data may happen to favor model A over model B irrespective of the algorithms. Reusing this split will then be biased towards model A. + +Instead, the data splits should be randomized or shuffled for each experimental iteration. This ensures that randomness in the sampling of the splits does not confer an unfair advantage to any model. + +With different splits per experiment, the evaluation becomes more robust. Each model is tested on a wide range of test sets drawn randomly from the overall population. This smoothens out variation and removes correlation between results. + +Proper practice is to set a random seed before splitting the data for each experiment. Splitting should be carried out after any shuffling/resampling as part of the experimental pipeline. Carrying out comparisons on the same splits violates the i.i.d (independent and identically distributed) assumption required for statistical validity. + +Unique splits are essential for fair model comparisons. Though more compute intensive, randomized allocation per experiment removes sampling bias and enables valid benchmarking. This highlights the true differences in model performance irrespective of a particular split's characteristics. + +#### Information Leakage Between Sets + +Information leakage between the training, validation and test sets occurs when information from one set inadvertently bleeds into another set. This could happen due to flaws in the data splitting process and violates the assumption that the sets are mutually exclusive. + +For example, consider a dataset sorted chronologically. If a simple random split is performed, samples close to each other in the dataset may end up in different splits. Models could then learn from 'future' data if test samples are leaked into the training set. + +Similarly, if the splits are not properly shuffled, distribution biases may persist across sets. The training set may not contain certain outliers that end up in the test set only, compromising generalization. Issues like class imbalance may also get amplified if splitting is not stratified. + +Another case is when datasets have linked samples that are inherently connected, such as graphs, networks or time series data. Naive splitting may isolate connected nodes or time steps into different sets. Models can make invalid assumptions based on partial information. + +Preventing information leakage requires awareness of the structure of the dataset and relationships between samples. Shuffling, stratification and grouped splitting of related samples can help mitigate leakage. Proper cross validation procedures should be followed, being mindful of temporal or sample proximity. + +Subtle leakage of information between sets undermines model evaluation and training. It creates misleading results on model effectiveness. Data splitting procedures should account for sample relationships and distribution differences to ensure mutual exclusivity between sets. + +#### Failing to Stratify Splits + +When splitting data into training, validation and test sets, failing to stratify the splits can result in uneven representation of the target classes across the splits and introduce sampling bias. This is especially problematic for imbalanced datasets. + +Stratified splitting involves sampling data points such that the proportion of output classes is approximately preserved in each split. For example, if performing a 70/30 train-test split on a dataset with 60% negative and 40% positive samples, stratification ensures ~60% negative and ~40% positive examples in both training and test sets. + +Without stratification, due to random chance, the training split could end up with 70% positive while test has 30% positive samples. The model trained on this skewed training distribution will not generalize well. Class imbalance also compromises model metrics like accuracy. + +Stratification works best when done using the labels though proxies like clustering can be used for unsupervised learning. It becomes essential for highly skewed datasets with rare classes that could easily get omitted from splits. + +Libraries like Scikit-Learn have stratified splitting methods inbuilt. Failing to use them could inadvertently introduce sampling bias and hurt model performance on minority groups. The overall class balance should be examined after performing the splits to ensure even representation across the splits. + +Stratification provides a balanced dataset for both model training and evaluation. Though simple random splitting is easy, being mindful of stratification needs, especially for real-world imbalanced data, results in more robust model development and evaluation. + +#### Ignoring Time Series Dependencies + +Time series data has an inherent temporal structure with observations depending on past context. Naively splitting time series data into train and test sets without accounting for this dependency leads to data leakage and lookahead bias. + +For example, simply splitting a time series into the first 70% training and last 30% as test data will contaminate the training data with future data points. The model can use this information to "peek" ahead during training. + +This results in overly optimistic evaluation of the model's performance. The model may appear to forecast the future accurately but has actually implicitly learned based on future data. This does not translate to real world performance. + +Proper time series cross validation techniques should be used to preserve order and dependency, such as forward chaining. The test set should only contain data points from a future time window that the model did not get exposed to for training. + +Failing to account for temporal relationships leads to invalid assumptions of causality. The model may also not learn how to extrapolate forecasts further into the future if the training data contains future points. + +Maintaining the temporal flow of events and avoiding lookahead bias is key for properly training and testing time series models to ensure they can truly predict future patterns and not just memorize past training data. + +#### No Unseen Data for Final Evaluation + +A common mistake when splitting data is failing to keep aside some portion of the data just for final evaluation of the completed model. All of the data is used for training, validation and test sets during development. + +This leaves no unseen data to get an unbiased estimate of how the final tuned model would perform in the real world. The metrics on the test set used during development may not fully reflect actual model skill. + +For example, choices like early stopping and hyperparameter tuning are often optimized based on performance on the test set. This couples the model to the test data. An unseen dataset is needed to break this coupling and get true real-world metrics. + +Best practice is to reserve a portion like 20-30% of the full dataset solely for final model evaluation. This data should not be used for any validation, tuning or model selection during development. + +Saving some unseen data allows evaluating the completely trained model as a black box on real-world like data. This provides reliable metrics to decide if the model is truly ready for production deployment. + +Failing to keep an unseen hold-out set for final validation risks optimistically biasing results and overlooking potential failures before model release. Having some fresh data provides a final sanity check on real-world efficacy. + +#### Overoptimizing on the Validation Set + +The validation set is meant to guide the model training process, not serve as additional training data. Overoptimizing on the validation set to maximize performance metrics treats it more like a secondary training set and leads to inflated metrics and poor generalization. + +For example, techniques like extensively tuning hyperparameters or adding data augmentations targeted to boost validation accuracy can cause the model to fit too closely to the validation data. The model may achieve 99% validation accuracy but only 55% test accuracy. + +Similarly, reusing the validation set for early stopping can also optimize the model specifically for that data. Stopping at the best validation performance overfits to noise and fluctuations caused by the small validation size. + +The validation set serves as a proxy to tune and select models. But the end goal remains maximizing performance on real-world data, not the validation set. Minimizing the loss or error on validation data does not automatically translate to good generalization. + +A good approach is to keep the validation set use minimal - hyperparameters can be tuned coarsely first on training data for example. The validation set guides the training, but should not influence or alter the model itself. It is a diagnostic, not an optimization tool. + +Care should be taken to not overfit when assessing performance on the validation set. Tradeoffs are needed to build models that perform well on the overall population, not overly tuned to the validation samples. + +## Optimization Algorithms + +Stochastic gradient descent (SGD) is a simple yet powerful optimization algorithm commonly used to train machine learning models. SGD works by estimating the gradient of the loss function with respect to the model parameters using a single training example, and then updating the parameters in the direction that reduces the loss. + +While conceptually straightforward, SGD suffers from a few shortcomings. First, choosing a proper learning rate can be difficult - too small and progress is very slow, too large and parameters may oscillate and fail to converge. Second, SGD treats all parameters equally and independently, which may not be ideal in all cases. Finally, vanilla SGD uses only first order gradient information which results in slow progress on ill-conditioned problems. + +### Optimizations + +Over the years, various optimizations have been proposed to accelerate and improve upon vanilla SGD. @ruder2016overview gives an excellent overview of the different optimizers. Briefly, several commonly used SGD optimization techniques include: + +**Momentum:** Accumulates a velocity vector in directions of persistent gradient across iterations. This helps accelerate progress by dampening oscillations and maintains progress in consistent directions. + +**Nesterov Accelerated Gradient (NAG):** A variant of momentum that computes gradients at the "look ahead" position rather than the current parameter position. This anticipatory update prevents overshooting while the momentum maintains the accelerated progress. + +**RMSProp:** Divides the learning rate by an exponentially decaying average of squared gradients. This has a similar normalizing effect as Adagrad but does not accumulate the gradients over time, avoiding a rapid decay of learning rates. [@rmsprop] + +**Adagrad:** An adaptive learning rate algorithm that maintains a per-parameter learning rate that is scaled down proportionate to the historical sum of gradients on each parameter. This helps eliminate the need to manually tune learning rates. [@adagrad] + +**Adadelta:** A modification to Adagrad which restricts the window of accumulated past gradients thus reducing the aggressive decay of learning rates. [@adelta] + +**Adam:** - Combination of momentum and rmsprop where rmsprop modifies the learning rate based on average of recent magnitudes of gradients. Displays very fast initial progress and automatically tunes step sizes. [@adam] + +Of these methods, Adam is widely considered the go-to optimization algorithm for many deep learning tasks, consistently outperforming vanilla SGD in terms of both training speed and performance. Other optimizers may be better suited in some cases, particularly for simpler models. + +### Trade-offs + +Here is a pros and cons table for some of the main optimization algorithms for neural network training: + +| Algorithm | Pros | Cons | +|-|-|-| +| Momentum | Faster convergence due to acceleration along gradients Less oscillation than vanilla SGD | Requires tuning of momentum parameter | +| Nesterov Accelerated Gradient (NAG) | Faster than standard momentum in some cases Anticipatory updates prevent overshooting | More complex to understand intuitively | +| Adagrad | Eliminates need to manually tune learning rates Performs well on sparse gradients | Learning rate may decay too quickly on dense gradients | +| Adadelta | Less aggressive learning rate decay than Adagrad | Still sensitive to initial learning rate value | +| RMSProp | Automatically adjusts learning rates Works well in practice | No major downsides | +| Adam | Combination of momentum and adaptive learning rates Efficient and fast convergence | Slightly worse generalization performance in some cases | +| AMSGrad | Improvement to Adam addressing generalization issue | Not as extensively used/tested as Adam | + +### Benchmarking Algorithms + +No single method is best for all problem types. This means we need a comprehensive benchmarking to identify the most effective optimizer for specific datasets and models. The performance of algorithms like Adam, RMSProp, and Momentum varies due to factors such as batch size, learning rate schedules, model architecture, data distribution, and regularization. These variations underline the importance of evaluating each optimizer under diverse conditions. + +Take Adam, for example, which often excels in computer vision tasks, in contrast to RMSProp that may show better generalization in certain natural language processing tasks. Momentum's strength lies in its acceleration in scenarios with consistent gradient directions, whereas Adagrad's adaptive learning rates are more suited for sparse gradient problems. + +This wide array of interactions among different optimizers demonstrates the challenge in declaring a single, universally superior algorithm. Each optimizer has unique strengths, making it crucial to empirically evaluate a range of methods to discover their optimal application conditions. + +A comprehensive benchmarking approach should assess not just the speed of convergence but also factors like generalization error, stability, hyperparameter sensitivity, and computational efficiency, among others. This entails monitoring training and validation learning curves across multiple runs and comparing optimizers on a variety of datasets and models to understand their strengths and weaknesses. + +AlgoPerf, introduced by @dahl2023benchmarking, addresses the need for a robust benchmarking system. This platform evaluates optimizer performance using criteria such as training loss curves, generalization error, sensitivity to hyperparameters, and computational efficiency. AlgoPerf tests various optimization methods, including Adam, LAMB, and Adafactor, across different model types like CNNs and RNNs/LSTMs on established datasets. It utilizes containerization and automatic metric collection to minimize inconsistencies and allows for controlled experiments across thousands of configurations, providing a reliable basis for comparing different optimizers. + +The insights gained from AlgoPerf and similar benchmarks are invaluable for guiding the optimal choice or tuning of optimizers. By enabling reproducible evaluations, these benchmarks contribute to a deeper understanding of each optimizer's performance, paving the way for future innovations and accelerated progress in the field. ## Hyperparameter Tuning -Explanation: Hyperparameters can significantly impact the performance of a trained model. This section educates the reader on how to fine-tune these settings for optimal results, which is especially important for resource-constrained embedded systems. +Hyperparameters are important settings in machine learning models that have a large impact on how well your models ultimately perform. Unlike other model parameters that are learned during training, hyperparameters are specified by the data scientists or machine learning engineers prior to training the model. + +Choosing the right hyperparameter values is crucial for enabling your models to effectively learn patterns from data. Some examples of key hyperparameters across ML algorithms include: + +* **Neural networks:** Learning rate, batch size, number of hidden units, activation functions +* **Support vector machines:** Regularization strength, kernel type and parameters +* **Random forests:** Number of trees, tree depth +* **K-means:** Number of clusters + +The problem is that there are no reliable rules-of-thumb for choosing optimal hyperparameter configurations - you typically have to try out different values and evaluate performance. This process is called hyperparameter tuning. + +In the early years of modern deep learning, researchers were still grappling with unstable and slow convergence issues. Common pain points included training losses fluctuating wildly, gradients exploding or vanishing, and extensive trial-and-error needed to train networks reliably. As a result, an early focal point was using hyperparameters to control model optimization. For instance, seminal techniques like batch normalization allowed much faster model convergence by tuning aspects of internal covariate shift. Adaptive learning rate methods also mitigated the need for extensive manual schedules. These addressed optimization issues during training like uncontrolled gradient divergence. Carefully adapted learning rates are also the primary control factor even today for achieving rapid and stable convegence. + +As computational capacity expanded exponentially in subsequent years, much larger models could be trained without falling prey to pure numerical optimization issues. The focus shifted towards generalization - though efficient convergence was a core prerequisite. State-of-the-art techniques like Transformers brought in parameters in billions. At such sizes, hyperparameters around capacity, regularization, ensembling etc. took center stage for tuning rather than only raw convergence metrics. + +The lesson is that understanding acceleration and stability of the optimization process itself constitutes the groundwork. Even today initialization schemes, batch sizes, weight decays and other training hyperparameters remain indispensable. Mastering fast and flawless convergence allows practitioners to expand focus on emerging needs around tuning for metrics like accuracy, robustness and efficiency at scale. + +### Search Algorithms + +When it comes to the critical process of hyperparameter tuning, there are several sophisticated algorithms machine learning practitioners rely on to systematically search through the vast space of possible model configurations. Some of the most prominent hyperparameter search algorithms include: + +* **Grid Search:** The most basic search method, where you manually define a grid of values to check for each hyperparameter. For example, checking learning rates = [0.01, 0.1, 1] and batch sizes = [32, 64, 128]. The key advantage is simplicity, but exploring all combinations leads to exponential search space explosion. Best for fine-tuning a few params. + +* **Random Search:** Instead of a grid, you define a random distribution per hyperparameter to sample values from during search. It is more efficient at searching a vast hyperparameter space. However, still somewhat arbitrary compared to more adaptive methods. + +* **Bayesian Optimization:** An advanced probabilistic approach for adaptive exploration based on a surrogate function to model performance over iterations. It is very sample efficient - finds highly optimized hyperparameters in fewer evaluation steps. Requires more investment in setup. [@bayes_hyperparam] + +* **Evolutionary Algorithms:** Mimic natural selection principles - generate populations of hyperparameter combinations, evolve them over time based on performance. These algorithms offer robust search capabilities better suited for complex response surfaces. But many iterations required for reasonable convergence. -- Learning Rate -- Batch Size -- Number of Epochs -- Regularization Techniques +* **Neural Architecture Search:** An approach to designing well-performing architectures for neural networks. Traditionally, NAS approaches use some form of reinforcement learning to propose neural network architectures which are then repeatedly evaluated. [@nas] -## Evaluation Metrics +### System Implications -Explanation: Knowing how to evaluate a model's performance is crucial. This section introduces metrics that help in assessing how well the model will perform in real-world embedded applications. +Hyperparameter tuning can significantly impact time to convergence during model training, directly affecting overall runtime. Selecting the right values for key training hyperparameters is crucial for efficient model convergence. For example, the learning rate hyperparameter controls the step size during gradient descent optimization. Setting a properly tuned learning rate schedule ensures the optimization algorithm converges quickly towards a good minimum. Too small a learning rate leads to painfully slow convergence, while too large a value causes the losses to fluctuate wildly. Proper tuning ensures rapid movement towards optimal weights and biases. -- Accuracy -- Precision and Recall -- F1 Score -- ROC and AUC +Similarly, batch size for stochastic gradient descent impacts convergence stability. The right batch size smooths out fluctuations in parameter updates to approach the minimum faster. Insufficient batch sizes cause noisy convergence, while large batch sizes fail to generalize and also slow down convergence due to less frequent parameter updates. Tuning hyperparameters for faster convergence and reduced training duration has direct implications on cost and resource requirements for scaling machine learning systems: -## Overfitting and Underfitting +* **Lower computatioanal costs:** Shorter time to convergence means lower computational costs for training models. ML training often leverages large cloud compute instances like GPU and TPU clusters that incur heavy charges per hour. Minimizing training time directly brings down this resource rental cost that tends to dominate ML budgets for organizations. Quicker iteration also lets data scientists experiment more freely within the same budget. -Explanation: Overfitting and underfitting are common pitfalls in AI training. This section is vital for teaching strategies to avoid these issues, ensuring that the model generalizes well to new, unseen data. +* **Reduced training time:** Reduced training time unlocks opportunities to train more models using the same computational budget. Optimized hyperparameters stretch available resources further allowing businesses to develop and experiment with more models under resource constraints to maximize performance. -- Techniques to Avoid Overfitting (Dropout, Early Stopping, etc.) -- Understanding Underfitting and How to Address It +* **Resource efficiency:** Quicker training allows allocating smaller compute instances in cloud since models require access to the resources for a shorter duration. For example, a 1-hour training job allows using less powerful GPU instances compared to multi-hour training requiring sustained compute access over longer intervals. This achieves cost savings especially for large workloads. -## Transfer Learning +There are other benefits as well. For instance, faster convergence reduces pressure on ML engineering teams around provisioning training resources. Simple model retraining routines can use lower powered resources as opposed to requesting for access to high priority queues for constrained production-grade GPU clusters. This frees up deployment resources for other applications. -Explanation: Transfer learning can save time and computational resources, which is particularly beneficial for embedded systems. This section explains how to leverage pre-trained models for new tasks. +### Auto Tuners -- Basics of Transfer Learning -- Applications in Embedded AI +There are a wide array of commercial offerings to help with hyperparameter tuning given how important it is. We will briefly touch on two examples focused on optimization for machine learning models targeting microcontrollers and another focused on cloud-scale ML. -## Challenges and Best Practices +#### BigML -Explanation: Every technology comes with its own set of challenges. This section prepares the reader for potential hurdles in AI training, offering best practices to navigate them effectively. +There are several commercial auto tuning platforms available to deal with this problem. One such solution is Google's Vertex AI Cloud, which has extensive integrated support for state-of-the-art tuning techniques. -- Computational Constraints -- Data Privacy -- Ethical Considerations +One of the most salient capabilities offered by Google's Vertex AI managed machine learning platform is efficient, integrated hyperparameter tuning for model development. Successfully training performant ML models requires identifying optimal configurations for a set of external hyperparameters that dictate model behavior - which poses a challenging high-dimensional search problem. Vertex AI aims to simplify this through Automated Machine Learning (AutoML) tooling. + +Specifically, data scientists can leverage Vertex AI's hyperparameter tuning engines by providing a labeled dataset and choosing a model type such as Neural Network or Random Forest classifier. Vertex launches a Hyperparameter Search job transparently on the backend, fully handling resource provisioning, model training, metric tracking and result analysis automatically using advanced optimization algorithms. + +Under the hood, Vertex AutoML employs a wide array of different search strategies to intelligently explore the most promising hyperparameter configurations based on previous evaluation results. Compared to standard Grid Search or Random Search methods, Bayesian Optimization offers superior sample efficiency requiring fewer training iterations to arrive at optimized model quality. For more complex neural architecture search spaces, Vertex AutoML utilizes Population Based Training approaches which evolve candidate solutions over time analogous to natural selection principles. + +Vertex AI aims to democratize state-of-the-art hyperparameter search techniques at cloud scale for all ML developers, abstracting away the underlying orchestration and execution complexity. Users focus solely on their dataset, model requirements and accuracy goals while Vertex manages the tuning cycle, resource allocation, model training, accuracy tracking and artifact storage under the hood. The end result is getting deployment-ready, optimized ML models faster for the target problem. + +#### TinyML + +Edge Impulse's Efficient On-device Neural Network Tuner (EON Tuner) is an automated hyperparameter optimization tool designed specifically for developing machine learning models for microcontrollers. The EON Tuner streamlines the model development process by automatically finding the best neural network configuration for efficient and accurate deployment on resource-constrained devices. + +The key functionality of the EON Tuner is as follows. First, developers define the model hyperparameters, such as number of layers, nodes per layer, activation functions, and learning rate annealing schedule. These parameters constitute the search space that will be optimized. Next, the target microcontroller platform is selected, providing embedded hardware constraints. The user can also specify optimization objectives, such as minimizing memory footprint, lowering latency, reducing power consumption or maximizing accuracy. + +With the search space and optimization goals defined, the EON Tuner leverages Bayesian hyperparameter optimization to intelligently explore possible configurations. Each prospective configuration is automatically implemented as a full model specification, trained and evaluated for quality metrics. The continual process balances exploration and exploitation to arrive at optimized settings tailored to the developer's chosen chip architecture and performance requirements. + +By automatically tuning models for embedded deployment, the EON Tuner frees machine learning engineers from the demandingly iterative process of hand-tuning models. The tool integrates seamlessly into the Edge Impulse workflow for taking models from concept to efficiently optimized implementations on microcontrollers. The expertise encapsulated in EON Tuner regarding ML model optimization for microcontrollers ensures beginner and experienced developers alike can rapidly iterate to models fitting their project needs. + +## Regularization + +Regularization is a critical technique for improving the performance and generalizability of machine learning models in applied settings. It refers to mathematically constraining or penalizing model complexity to avoid overfitting the training data. Without regularization, complex ML models are prone to memorizing peculiarities and noise in the training set, rather than learning meaningful patterns. They may achieve high training accuracy, but perform poorly when evaluating new unseen inputs. + +Regularization helps address this problem by placing constraints that favor simpler, more generalizable models that don't latch onto sampling errors. Techniques like L1/L2 regularization directly penalize large parameter values during training, forcing the model to use the smallest parameters that can adequately explain the signal. Early stopping rules halt training when validation set performance stops improving - before the model starts overfitting. + +Appropriate regularization is crucial when deploying models to new user populations and environments where distribution shifts are likely. For example, an irregularized fraud detection model trained at a bank may work initially but accrue technical debt over time as new fraud patterns emerge. + +Regularizing complex neural networks also allows computational advantages - smaller models require less data augmentation, compute power, and data storage. Regularization allows more efficient AI systems, where accuracy, robustness, and resource management are balanced thoughtfully against training set limitations. + +Several powerful regularization techniques are commonly used to improve model generalization. Architecting the optimal strategy requires understanding how each method affects model learning and complexity. + +### L1 and L2 + +Two of the most widely used regularization forms are L1 and L2 regularization. Both penalize model complexity by adding an extra term to the cost function optimized during training. This term grows larger as model parameters increase. + +L2 regularization, also known as ridge regression, adds the sum of squared magnitudes of all parameters, multiplied by a coefficient α. This quadratic penalty curtails extreme parameter values more aggressively than L1 techniques. Implementation requires only changing the cost function and tuning α. + +$$R_{L2}(\Theta) = \alpha \sum_{i=1}^{n}\theta_{i}^2$$ + +Where: + +* $R_{L2}(\Theta)$ - The L2 regularization term that is added to the cost function +* $\alpha$ - The L2 regularization hyperparameter that controls the strength of regularization +* $\theta_{i}$ - The ith model parameter +* $n$ - The number of parameters in the model +* $\theta_{i}^2$ - The square of each parameter + +And the full L2 regularized cost function is: + +$$J(\theta) = L(\theta) + R_{L2}(\Theta)$$ + +Where: + +* $L(\theta)$ - The original unregularized cost function +* $J(\theta)$ - The new regularized cost function + +The key difference from L1 is that L2 regularization penalizes the squares of the parameters rather than the absolute values. + +L1 regularization, also known as lasso regression, utilizes the absolute sum of magnitudes, rather than the square, multiplied by α. By inducing sparsity in the parameter vector, lasso regularization automatically performs feature selection, setting the weights of irrelevant features to zero. Unlike L2 regularization, L1 regularization leads to sparsity as weights are set to 0; in L2 regularization weights may be set to a value very close to 0. This works well for models with many features. + +$$R_{L1}(\Theta) = \alpha \sum_{i=1}^{n}||\theta_{i}||$$ + +Where: + +* $R_{L1}(\Theta)$ - The L1 regularization term that is added to the cost function +* $\alpha$ - The L1 regularization hyperparameter that controls the strength of regularization +* $\theta_{i}$ - The i-th model parameter +* $n$ - The number of parameters in the model +* $||\theta_{i}||$ - The L1 norm, which takes the absolute value of each parameter + +And the full L1 regularized cost function is: + +$$J(\theta) = L(\theta) + R_{L1}(\Theta)$$ + +Where: + +* $L(\theta)$ - The original unregularized cost function +* $J(\theta)$ - The new regularized cost function + +The choice between L1 and L2 depends on the expected model complexity and whether intrinsic feature selection is needed. Both require iterative tuning across a validation set to select the optimal α hyperparameter. + +### Dropout + +Another widely adopted regularization method is dropout [@dropout]. During training, dropout randomly sets a fraction $p$ of node outputs or hidden activations to zero. This encourages greater distribribution of information across more nodes, rather than reliance on a small number of nodes. Come prediction time, the full neural network is used, with intermediate activations scaled by $p$ to maintain output magnitudes. GPU optimizations make implementing dropout efficiently straightforward via frameworks like PyTorch and TensorFlow. + +Let's be a bit more pendantic. During training with dropout, each node's output $a_i$ is passed through a dropout mask $r_i$ before being used by the next layer: + +$$ ã_i = r_i \odot a_i $$ + +Where: + +* $a_i$ - output of node $i$ +* $ã_i$ - output of node $i$ after dropout +* $r_i$ - independent Bernoulli random variable with probability $p$ of being 1 +* $\odot$ - elementwise multiplication + +This dropout mask $r_i$ randomly sets a fraction $1-p$ of activations to 0 during training, forcing the network to redundant representations. + +At test time, the dropout mask is removed and the activations are rescaled by $p$ to maintain expected output magnitudes: + +$$ a_i^{test} = p a_i$$ + +Where: + +* $a_i^{test}$ - node output at test time +* $p$ - dropout probability hyperparameter + +The key hyperparameter is $p$, the fraction of nodes dropped, often set between 0.2 and 0.5. Larger networks tend to benefit from more dropout, while small networks risk underfitting if too many nodes are cut out. Trial and error combined with monitoring validation performance helps tune the dropout level. + +### Early Stopping + +The intuition behind early stopping involves tracking model performance on a held-out validation set across training epochs. At first, increases in training set fitness accompany gains in validation accuracy as the model picks up generalizable patterns. After some point however, the model starts overfitting - latching onto peculiarities and noise in the training data that don't apply more broadly. The validation performance peaks and then degrades if training continues. Early stopping rules halt training at this peak to prevent overfitting. This technique demonstrates how ML pipelines must monitor system feedback, not just blindly maximize performance on a static training set. The system’s state evolves, and the optimal endpoints change. + +Formal early stopping methods therefore require monitoring a metric like validation accuracy or loss after each epoch. Common curves exhibit rapid initial gains that taper off, eventually plateauing and decreasing slightly as overfitting occurs. The optimal stopping point is often between 5-15 epochs past the peak depending on patience thresholds. Tracking multiple metrics can improve signal since variance exists between measures. + +Simple early stopping rules stop immediately at the first post-peak degradation. More robust methods introduce a patience parameter - the number of degrading epochs permitted before stopping. This avoids prematurely halting training due to transient fluctuations. Typical patience windows range from 50-200 validation batches. Wider windows incur risk of overfit. Formal tuning strategies can determine optimal patience. + +## Weight Initialization + +Proper initialization of the weights in a neural network prior to training is a vital step that directly impacts model performance. Randomly initializing weights to very large or small values can lead to problems like vanishing/exploding gradients, slow convergence of training, or getting trapped in poor local minima. Proper weight initialization not only accelerates model convergence during training, but also carries implications for system performance at inference time in production environments. Some key aspects include: + +* **Faster Time-to-Accuracy:** Carefully tuned initialization leading to faster convergence results in models reaching target accuracy milestones earlier in the training cycle. For instance, Xavier init could reduce time-to-accuracy by 20% versus bad random init. As training is typically the most time and compute-intensive phase, this directly enhances ML system velocity and productivity. + +* **Model Iteration Cycle Efficiency:** If models train faster, the overall turnaround time for experimentation, evaluation, and model design iterations also decreases significantly. Systems have more flexibility to explore architectures, data pipelines etc within given timeframes. + +* **Impact on Necessary Training Epochs:** The training process runs for multiple epochs - with each full pass through the data being an epoch. Good initialization can reduce the epochs required to converge the loss and accuracy curves on the training set by 10-30% in some cases. This means tangible resource and infrastructure cost savings. + +* **Effect on Training Hyperparameters:** Weight initialization parameters interacts strongly with certain regularization hyperparameters that govern the training dynamics - like learning rate schedules and dropout probabilities. Finding the right combination of settings is non-trivial. Appropriate initialization smoothens this search. + +Weight initialization has cascading benefits for machine learning engineering efficiency as well as minimized system resource overhead. It is an easily overlooked tactic that every practitioner should master. The choice of which weight initialization technique to use depends on factors like model architecture (number of layers, connectivity pattern etc.), activation functions, and the specific problem being solved. Over the years, researchers have developed and empirically verified different initialization strategies targeted to common neural network architectures, which we will discuss here. + +### Uniform and Normal Initialization + +When randomly initializing weights, two standard probability distributions are commonly used - uniform and Gaussian (normal). The uniform distribution sets equal probability of the initial weight parameters falling anywhere within set minimum and maximum bounds. For example, the bounds could be -1 and 1, leading to a uniform spread of weights between these limits. The Gaussian distribution on the other hand concentrates probability around a mean value, following the shape of a bell curve. Most of the weight values will cluster in the region of the specified mean, with fewer samples towards the extreme ends. The standard deviation (std dev) parameter controls the spread around the mean. + +The choice between uniform or normal initialization depends on the network architecture and activation functions. For shallow networks, a normal distribution with relatively small std dev (e.g. 0.01) is recommended. The bell curve prevents very large weight values that could trigger training instability in small networks. For deeper networks, a normal distribution with higher std dev (say 0.5 or above) or uniform distribution may be preferred to account for vanishing gradient issues over many layers. The larger spread drives greater differentiation between neuron behaviors. Fine-tuning the initialization distribution parameters is crucial for stable and speedy model convergence. Monitoring training loss trends can diagnose issues for tweaking the parameters iteratively. + +### Xavier/Glorot Initialization + +Proposed by @xavier, this initialization technique is specially designed for sigmoid and tanh activation functions. These saturated activations can cause vanishing or exploding gradients during backpropagation over many layers. + +The Xavier method cleverly sets the variance of the weight distribution based on the number of inputs and outputs to each layer. The intuition is that this balances the flow of information and gradients throughout the network. For example, consider a layer with 300 input units and 100 output units. Plugging this into the formula variance = 2/(#inputs + #outputs) gives a variance of 2/(300+100) = 0.01. + +Sampling the initial weights from a uniform or normal distribution centered at 0 with this variance provides much smoother training convergence for deep sigmoid/tanh networks. The gradients are well-conditioned, preventing exponential vanishing or growth. + +### He Initialization + +Proposed by @kaiming this initialization is tailored for ReLU (Rectified Linear Unit) activation functions. ReLUs introduce the dying neuron problem where units get stuck outputting all 0s if they receive strong negative inputs initially. This slows and hinders training. + +He init overcomes this by sampling weights from a distribution with variance set based only on the number of inputs per layer, disregarding the outputs. This keeps the incoming signals small enough to activate the ReLUs into their linear regime from the beginning, avoiding dead units. For a layer with 1024 inputs, the formula variance = 2/1024 = 0.002 keeps most weights concentrated closely around 0. + +This specialized initialization allows ReLU networks to converge efficiently right from the start. The choice between Xavier and He init must match the intended network activation function. + +## Activation Functions + +Activation functions play a crucial role in neural networks - they introduce non-linear behaviors that allow neural nets to model complex patterns. Element-wise activation functions are applied to the weighted sums coming into each neuron in the network. Without activation functions, neural nets would be reduced to just linear regression models. + +Ideally, activation functions possess certain desirable qualities: + +* **Non-linear:** They enable modeling complex relationships through nonlinear transformations of the input sum. +* **Differentiable:** They must have well-defined first derivatives to enable backpropagation and gradient-based optimization during training. +* **Range-bounding:** They constrain the output signal preventing explosion. For example, sigmoid squashes inputs to (0,1). + +Additionally, properties like computational efficiency, monotonicity, and smoothness make some activations better suited over others based on network architecture and problem complexity. + +We will briefly survey some of the most widely adopted activation functions along with their strengths and limitations. We also provide guidelines for selecting appropriate functions matched to ML system constraints and use case needs. + +### Sigmoid + +The sigmoid activation applies a squashingle S-shaped curve that tightly binds the output between 0 and 1. It has the mathematical form: + +$$ sigmoid(x) = \frac{1}{1+e^{-x}} $$ + +The exponentiation transform allows the function to smoothly transition from near 0 towards near 1 as the input moves from very negative to very positive. The monotonic rise covers the full (0,1) range. + +Pros: + +Smooth gradient always available for backprop +Output bounded preventing "exploding" +Simple formula +Cons: + +Tendency to saturate at extremes killing gradients ("vanishing") +Not zero-centered - outputs not symmetrically distributed + +### Tanh + +Tanh or hyperbolic tangent also assumes an S-shape but is zero-centered meaning the output average value sits at 0. + +$$ tanh(x) = \frac{e^x - e^{-x}}{e^x + e^{-x}} $$ + +The numerator/denominator transform shifts the range from (0,1) in sigmoid to (-1, 1) in tanh. + +Most of the pros/cons are shared with sigmoid, but tanh avoids some output saturation issues by being centered. However, it still suffers from vanishing gradients with many layers. + +### ReLU + +The Rectified Linear Unit (ReLU) introduces a simple thresholding behavior with its mathematical form: + +$$ ReLU(x) = max(0, x) $$ + +It leaves all positive inputs unchanged while clipping all negative values to 0. This sparse activation and cheap computation make ReLU widely favored over sigmoid/tanh. + +![Common activation functions](https://1394217531-files.gitbook.io/~/files/v0/b/gitbook-legacy-files/o/assets%2F-LvBP1svpACTB1R1x_U4%2F-LvNWUoWieQqaGmU_gl9%2F-LvO3qs2RImYjpBE8vln%2Factivation-functions3.jpg?alt=media&token=f96a3007-5888-43c3-a256-2dafadd5df7c){width=70%} + +### Pros and Cons + +Here are the summarizing pros and cons of these various standard activation functions: + +| Activation Function | Pros | Cons | +|:-|:-|:-| +| Sigmoid | Smooth gradient for backprop
Output bounded between 0 and 1 | Saturation kills gradients
Not zero-centered | +| Tanh | Smoother gradient than sigmoid
Zero-centered output [-1, 1] | Still suffers vanishing gradient issue | +| ReLU | Computationally efficient
Introduces sparsity
Avoids vanishing gradients | "Dying ReLU" units
Not bounded | + +I have removed all the dash symbols from the table formatting to clean it up. Let me know if you need any other changes to the table or have additional pros/cons to cover for the various activation functions! + +## System Bottlenecks + +As introduced earlier, neural networks are comprised of linear operations (matrix multiplications) interleaved with element-wise nonlinear activation functions. The most computationally expensive portion of neural networks is the linear transformations, specifically the matrix multiplications between each layer. These linear layers map the activations from the previous layer to a higher dimensional space that serves as inputs to the next layer's activation function. + +### Runtime Complexity of Matrix Multiplication + +#### Layer Multiplications vs. Activations + +The bulk of computation in neural networks arises from the matrix multiplications between layers. Consider a neural network layer with an input dimension of $M$ = 500 and output dimension of $N$ = 1000, the matrix multiplication requires $O(N \cdot M) = O(1000 \cdot 500) = 500,000$ multiply-accumulate (MAC) operations between those layers. + +Contrast this with the preceding layer which had $M$ = 300 inputs, requiring $O(500 \cdot 300) = 150,000$ ops. We can see how the computations scale exponentially as the layer widths increase, with the total computations across $L$ layers being $\sum_{l=1}^{L-1} O\big(N^{(l)} \cdot M^{(l-1)}\big)$. + +Now comparing the matrix multiplication to the activation function which requires only $O(N) = 1000$ element-wise nonlinearities for $N = 1000$ outputs, we can clearly see the linear transformations dominating the activations computationally. + +These large matrix multiplications directly impact hardware choices, inference latency, and power constraints for real-world neural network applications. For example, a typical DNN layer may require 500,000 multiply-accumulates vs. only 1000 nonlinear activations, demonstrating a 500x increase in mathematical operations. + +When training neural networks, we typically use mini-batch gradient descent, operating on small batches of data at a time. Considering a batch size of $B$ training examples, the input to the matrix multiplication becomes a $M \times B$ matrix, while the output is an $N \times B$ matrix. + +#### Mini-batch + +In training neural networks, we need to repeatedly estimate the gradient of the loss function with respect to the network parameters (i.e. weights and biases). This gradient indicates which direction the parameters should be updated in order to minimize the loss. As introduced previously, use perform updates over a batch of datapoints every update, also known as stochastic gradient descent, or mini-batch gradient descent. + +The most straightforward approach is to estimate the gradient based on a single training example, compute the parameter update, lather, rinse, and repeat for the next example. However, this involves very small and frequent parameter updates that can be computationally inefficient, and may additionally be inaccurate in terms of convergence due to the stochasticity of using just a single datapoint for a model update. + +Instead, mini-batch gradient descent strikes a balance between convergence stability and computational efficiency. Rather than compute the gradient on single examples, we estimate the gradient based on small "mini-batches" of data - usually between 8 to 256 examples in practice. + +This provides a noisy but consistent gradient estimate that leads to more stable convergence. Additionally, the parameter update only needs to be performed once per mini-batch rather than once per example, reducing computational overhead. + +By tuning the mini-batch size, we can control the tradeoff between the smoothness of the estimate (larger batches are generally better) and the frequency of updates (smaller batches allow more frequent updates). Mini-batch sizes are usually powers of 2 so they can leverage parallelism across GPU cores efficiently. + +So the total computation is performing an $N \times M$ by $M \times B$ matrix multiplication, yielding $O(N \cdot M \cdot B)$ floating point operations. As a numerical example, with $N=1000$ hidden units, $M=500$ input units, and a batch size $B=64$, this equates to 1000 x 500 x 64 = 32 million multiply-accumulates per training iteration! + +In contrast, the activation functions are applied element-wise to the $N \times B$ output matrix, requiring only $O(N \cdot B)$ computations. For $N=1000$ and $B=64$, that is just 64,000 nonlinearities - 500X less work than the matrix multiplication. + +As we increase the batch size to fully leverage parallel hardware like GPUs, the discrepancy between matrix multiplication and activation function cost grows even larger. This reveals how optimizing the linear algebra operations offers tremendous efficiency gains. + +Therefore, when analyzing where and how neural networks spend computation, matrix multiplication clearly plays a central role. For example, matrix multiplications often account for over 90% of both inference latency and training time in common convolutional and recurrent neural networks. + +#### Optimizing Matrix Multiplication + +A number of techniques enhance the efficiency of general dense/sparse matrix-matrix and matrix-vector operations to directly improve overall efficiency. Some key methods include: + +* Leveraging optimized math libraries like [cuBLAS](https://developer.nvidia.com/cublas) for GPU acceleration +* Enabling lower precision formats like FP16 or INT8 where accuracy permits +* Employing [Tensor Processing Units](https://en.wikipedia.org/wiki/Tensor_Processing_Unit) with hardware matrix multiplication +* Sparsity-aware computations and data storage formats to exploit zero parameters +* Approximating matrix multiplications with algorithms like Fast Fourier Transforms +* Model architecture design to reduce layer widths and activations +* Quantization, pruning, distillation and other compression techniques +* Parallelization of computation across available hardware +* Caching/pre-computing results where possible to reduce redundant operations + +The potential optimization techniques are vast given the outsized portion of time models spend in matrix and vector math. Even incremental improvements would directly speed up runtimes and lower energy usage. Finding new ways to enhance these linear algebra primitives continues to be an active area of research aligned with the future demands of machine learning. We will discuss these in detail in the [Optimizations](./optimizations.qmd) and [AI Acceleration](./hw_acceleration.qmd) chapters. + +### Compute vs Memory Bottleneck + +At this point, it should be clear that the core mathematical operation underpinning neural networks is the matrix-matrix multiplication. Both training and inference for neural networks heavily utilize these matrix multiply operations. Recent analysis shows that over 90% of computational requirements in state-of-the-art neural networks arise from matrix multiplications [?]. Consequently, the performance of matrix multiplication has an enormous influence on overall model training or inference time. + +#### Training versus Inference + +While both training and inference rely heavily on matrix multiplication performance, their precise computational profiles differ. Specifically, neural network inference tends to be more compute-bound compared to training for an equivalent batch size. The key difference lies in the backpropagation pass which is only required during training. Backpropagation involves a sequence matrix multiply operations to calculate gradients with respect to activations across each network layer. Critically though, no additional memory bandwidth is needed here - the inputs, outputs, and gradients are read/written from cache or registers. + +As a result, training exhibits lower arithmetic intensities, with gradient calculations bounded by memory access instead of FLOPs. In contrast, neural network inference is dominated by the forward propagation which corresponds to a series of matrix-matrix multiplies. With no memory-intensive gradient retrospecting, larger batch sizes readily push inference into being extremely compute-bound. This is exhibited by the high measured arithmetic intensities. Note that for some inference applications, response times may be a critical requirement, which might force the application-provider to use a smaller batch size to meet these response-time requirements, thereby reducing hardware efficiency; hence in these cases inference may see lower hardware utilization. + +The implications are that hardware provisioning and bandwidth vs FLOP tradeoffs differ based on whether a system targets training or inference. High-throughput low-latency servers for inference should emphasize computational power instead of memory while training clusters require a more balanced architecture. + +However, matrix multiplication exhibits an interesting tension - it can either be bound by the memory bandwidth or arithmetic throughput capabilities of the underlying hardware. The system’s ability to fetch and supply matrix data versus its ability to perform computational operations determines this direction. + +This phenomenon has profound impacts; hardware must be designed judiciously and software optimizations need to keep this in mind. Optimizing and balancing compute versus memory to alleviate this underlying matrix multiplication bottleneck is crucial for both efficient model training as well as deployment. + +Finally, the batch size used may impact convergence rates during neural network training, which is another important consideration. For example, there is generally diminishing returns in benefits to convergence with extremely large batch sizes (i.e: > 16384), and hence while extremely large batch sizes may be increasingly beneficial from a hardware/arithmetic intensity perspective, using such large batches may not translate to faster convergence vs wall-clock time due to their diminishing benefits to convergence. These tradeoffs are part of the design decisions core to systems for machine-learning type of research. + +#### Batch Size + +The batch size used during neural network training and inference has a significant impact on whether matrix multiplication poses more of a computational or memory bottleneck. Concretely, the batch size refers to the number of samples that are propagated through the network together in one forward/backward pass. In terms of matrix multiplication, this equates to larger matrix sizes. + +Specifically, let's look at the arithmetic intensity of matrix multiplication during neural network training. This measures the ratio between computational operations and memory transfers. The matrix multiply of two matrices of size $N \times M$ and $M \times B$ requires $N \times M \times B$ multiply-accumulate operations, but only transfers of $N \times M + M \times B$ matrix elements. + +As we increase the batch size $B$, the number of arithmetic operations grows much faster than the memory transfers. For example, with a batch size of 1, we need $N \times M$ operations and $N + M$ transfers, giving an arithmetic intensity ratio of around $\frac{N \times M}{N+M}$. But with a large batch size of 128, the intensity ratio becomes $\frac{128 \times N \times M}{N \times M + M \times 128} \approx 128$. Using a larger batch size shifts the overall computation from being more memory-bounded to being more compute-bounded. In practice, AI training uses large batch sizes and is generally limited by peak arithmetic computational performance, i.e: Application 3 in @fig-roofline. + +Therefore, batched matrix multiplication is far more computationally intensive than memory access bound. This has implications on hardware design as well as software optimizations, which we will cover next. The key insight is that by tuning the batch size, we can significantly alter the computational profile and bottlenecks posed by neural network training and inference. + +![AI training is typically compute bound due to the high arithmetic intensity of matrix-multiplication when batch size is large.](images/aitrainingroof.png){#fig-roofline} + +#### Hardware Characteristics + +Modern hardware like CPUs and GPUs are highly optimized for computational throughput as opposed to memory bandwidth. For example, high-end H100 Tensor Core GPUs can deliver over 60 TFLOPS of double-precision performance but only provide up to 3 TB/s of memory bandwidth. This means there is almost a 20x imbalance between arithmetic units and memory access. Consequently, for hardware like GPU accelerators, neural network training workloads need to be made as computationally intensive as possible in order to fully utilize the available resources. + +This further motivates the need for using large batch sizes during training. When using a small batch, the matrix multiplication is bounded by memory bandwidth, underutilizing the abundant compute resources. However, with sufficiently large batches, we can shift the bottleneck more towards computation and attain much higher arithmetic intensity. For instance, batches of 256 or 512 samples may be needed to saturate a high-end GPU. The downside is that larger batches provide less frequent parameter updates, which can impact convergence. Still, the parameter serves as an important tuning knob to balance memory vs compute limitations. + +Therefore, given the imbalanced compute-memory architectures of modern hardware, employing large batch sizes is essential to alleviate bottlenecks and maximize throughput. The subsequent software and algorithms also need to accommodate such batch sizes, as mentioned, since larger batch sizes may have diminishing returns towards the convergence of the network. Using very small batch sizes may lead to suboptimal hardware utilization, ultimately limiting training efficiency. Scaling up to large batch sizes is a topic of research and has been explored in various works that aim to do large scale training. [@bigbatch] + +#### Model Architectures + +The underlying neural network architecture also affects whether matrix multiplication poses more of a computational or memory bottleneck during execution. Transformers and MLPs tend to be much more compute-bound compared to CNN convolutional neural networks. This stems from the types of matrix multiplication operations involved in each model. Transformers rely on self-attention - multiplying large activation matrices by massive parameter matrices to relate elements. MLPs stack fully-connected layers also requiring large matrix multiplies. + +In contrast, the convolutional layers in CNNs have a sliding window that reuses activations and parameters across the input. This means fewer unique matrix operations are needed. However, the convolutions require repeatedly accessing small parts of the input and moving partial sums to populate each window. Even though the arithmetic operations in convolutions are intense, this data movement and buffer manipulation imposes huge memory access overheads. Additionally, CNNs comprise several layered stages so intermediate outputs need to be materialized to memory frequently. + +As a result, CNN training tends to be more memory bandwidth bound relative to arithmetic bound compared to Transformers and MLPs. Therefore, the matrix multiplication profile and in turn the bottleneck posed varies significantly based on model choice. Hardware and systems need to be designed with appropriate compute-memory bandwidth balance depending on target model deployment. Models relying more on attention and MLP layers require higher arithmetic throughput compared to CNNs which necessitate high memory bandwidth. + +## Training Parallelization + +Training neural networks entails intensive computational and memory demands. The backpropagation algorithm for calculating gradients and updating weights consists of repeated matrix multiplications and arithmetic operations over the entire dataset. For example, one pass of backpropagation scales in time complexity with $O(num\_parameters \times batch\_size \times sequence\_length)$. + +As model size increases in terms of parameters and layers, the computational requirements grow rapidly. Moreover, the algorithm requires storing activation outputs and model parameters for the backward pass, which also grows with model size. + +The memory footprint becomes prohibitive for larger models to fit and train on a single accelerator device like a GPU. Therefore, we need to parallelize model training across multiple devices in order to provide sufficient compute and memory to train state-of-the-art neural networks. + +As shown in @fig-training-parallelism, the two main approaches are _data parallelism_, which replicates the model across devices while splitting the input data batch-wise, and _model parallelism_, which partitions the model architecture itself across different devices. By training in parallel, we can leverage greater aggregate compute and memory resources to overcome system limitations and accelerate deep learning workloads. + +![Data parallelism veresus model parallelism.](images/aitrainingpara.png){#fig-training-parallelism} + +### Data Parallel + +Data parallelization is a common approach to parallelize machine learning training across multiple processing units, such as GPUs or distributed computing resources. In data parallelism, the training dataset is divided into batches, and each batch is processed by a separate processing unit. The model parameters are then updated based on the gradients computed from the processing of each batch. Here's a step-by-step description of data parallel parallelization for ML training: + +1. **Dividing the Dataset:** The entire training dataset is divided into smaller batches. Each batch contains a subset of the training examples. + +2. **Replicating the Model:** The neural network model is replicated across all processing units. Each processing unit has its copy of the model. + +3. **Parallel Computation:** Each processing unit takes a different batch and computes the forward and backward passes independently. During the forward pass, the model makes predictions on the input data. During the backward pass, gradients are computed for the model parameters using the loss function. + +4. **Gradient Aggregation:** After processing their respective batches, the gradients from each processing unit are aggregated. Common aggregation methods include summation or averaging of the gradients. + +5. **Parameter Update:** The aggregated gradients are used to update the model parameters. The update can be performed using optimization algorithms like SGD or variants like Adam. + +6. **Synchronization:** All processing units synchronize their model parameters after the update. This ensures that each processing unit has the latest version of the model. + +The prior steps are repeated for a certain number of iterations or until convergence. + +Let's take a specific example. Let's say for instance we have 256 batch size and 8 GPUs, each GPU will get a micro-batch of 32 samples. Their forward and backward passes compute losses and gradients only based on the local 32 samples. The gradients get aggregated across devices either with a parameter server or collective communications library to get the effective gradient for the global batch. Weight updates happen independently on each GPU according to these gradients. After a configured number of iterations, updated weights synchronize and equalize across devices before continuing for the next iterations. + +Data parallelism is effective when the model is large, and the dataset is substantial, as it allows for parallel processing of different parts of the data. It is widely used in deep learning frameworks and libraries that support distributed training, such as TensorFlow and PyTorch. However, care must be taken to handle issues like communication overhead, load balancing, and synchronization to ensure efficient parallelization. + +### Model Parallel + +Model parallelism refers to distributing the neural network model itself across multiple devices, rather than replicating the full model like data parallelism. This is particularly useful when a model is too large to fit into the memory of a single GPU or accelerator device. While this might not be specifically applicable for embedded or TinyML use cases as most of the models are relatively small(er), it is still useful to know. + +In model parallel training, different parts or layers of the model are assigned to separate devices. The input activations and intermediate outputs get partitioned and passed between these devices during the forward and backward passes to coordinate gradient computations across model partitions. + +By splitting the model architecture across multiple devices, the memory footprint and computational operations distribute across the devices instead of concentrating on one. This enables training very large models with billions of parameters that otherwise exceed capacity of a single device. There are several main ways in which we can do partitioning: + +* **Layer-wise parallelism:** Consecutive layers are distributed onto different devices. For example, device 1 contains layers 1-3, device 2 contains layers 4-6. The output activations from layer 3 would be transferred to device 2 to start the next layers for the forward pass computations. + +* **Filter-wise parallelism:** In convolutional layers, output filters can be split up among devices. Each device computes activation outputs for a subset of filters, which get concatenated before propagating further. + +* **Spatial parallelism:** The input images get divided spatially, so each device processes over a certain region like the top-left quarter of images. The output regions then combine to form the full output. + +Additionally, hybrid combinations can split model both layer-wise and data batch-wise. The appropriate type of model parallelism to use depends on the specific neural architecture constraints and hardware setup. Optimizing the partitioning and communication for the model topology is key to minimizing overhead. + +However, as the model parts run on physically separate devices, they must communicate and synchronize their parameters during each training step. The backward pass needs to ensure gradient updates propagate across the model partitions accurately. Hence, coordination and high-speed interconnect between devices is crucial for optimizing performance of model parallel training. Careful partitioning and communication protocols are required to minimize transfer overhead. + +### Comparison + +To summarize, here are some key characteristics to compare data parallelism and model parallelism in a summary table: + +| Characteristic | Data Parallelism | Model Parallelism | +|-|-----------------|-------------------| +| Definition | Distribute data across devices with model replicas | Distribute model across devices | +| Objective | Accelerate training through compute scaling | Enable larger model training | +| Scaling Method | Scale devices/workers | Scale model size| +| Main Constraint | Model size per device | Device coordination overhead | +| Hardware Requirements | Multiple GPU/TPUs | Often specialized interconnect | +| Primary Challenge | Parameter synchronization | Complex partitioning + communication | +| Types | N/A | Layer-wise, filter-wise, spatial | +| Code Complexity | Minimal changes | More significant model surgery | +| Popular Libraries | Horovod, PyTorch Distributed | Mesh TensorFlow | + +I included the high-level definition/objective, way it scales, main hardware assumptions and constraints, types of techniques (where applicable), overall implementation complexity, and some examples of associated libraries. + +Let me know if you would like me to explain or expand on any part of this comparison summary! Open to adding other characteristics as well. ## Conclusion -Explanation: A summary helps to consolidate the key points of the chapter, aiding in better retention and understanding of the material. +In this chapter, we have covered the core foundations that enable effective training of artificial intelligence models. We explored the mathematical concepts like loss functions, backpropagation, and gradient descent that make neural network optimization possible. We also discussed practical techniques around leveraging training data, regularization, hyperparameter tuning, weight initialization, and distributed parallelization strategies that improve convergence, generalization, and scalability. + +These methodologies form the bedrock through which the success of deep learning has been attained over the past decade. Mastering these fundamentals equips practitioners to architect systems and refine models tailored to their problem context. However, as models and datasets grow exponentially in size, training systems will need to optimize across metrics like time, cost, and carbon footprint. Hardware scaling through warehouse-scales enables massive computational throughput - but optimizations around efficiency and specialization will be key. Software techniques like compression and sparsity exploitation can augment hardware gains. We will discuss several of these in the coming chapters. -- Key Takeaways -- Future Trends in AI Training for Embedded Systems \ No newline at end of file +Overall, the fundamentals covered in this chapter equip practitioners to build, refine and deploy models. However, interdisciplinary skills spanning theory, systems, and hardware will differentiate experts who can lift AI to the next level in the sustainable and responsible manner that society requires. Understanding efficiency alongside accuracy constitutes the balanced engineering approach needed to train intelligent systems that integrate smoothly across a breadth of real-world contexts.