bibliography.bib

@article{silver2016mastering,
  title={Mastering the game of Go with deep neural networks and tree search},
  author={Silver, David and Huang, Aja and Maddison, Chris J and Guez, Arthur and Sifre, Laurent and Van Den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and others},
  journal={Nature},
  volume={529},
  number={7587},
  pages={484--489},
  year={2016},
  publisher={Nature Publishing Group}
}

@article{mnih2015human,
  title={Human-level control through deep reinforcement learning},
  author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
  journal={Nature},
  volume={518},
  number={7540},
  pages={529--533},
  year={2015},
  publisher={Nature Research}
}

@article{jaderberg2016reinforcement,
  title={Reinforcement learning with unsupervised auxiliary tasks},
  author={Jaderberg, Max and Mnih, Volodymyr and Czarnecki, Wojciech Marian and Schaul, Tom and Leibo, Joel Z and Silver, David and Kavukcuoglu, Koray},
  journal={arXiv preprint arXiv:1611.05397},
  year={2016}
}

@article{ghavamzadeh2015bayesian,
  title={Bayesian reinforcement learning: A survey},
  author={Ghavamzadeh, Mohammad and Mannor, Shie and Pineau, Joelle and Tamar, Aviv and others},
  journal={Foundations and Trends{\textregistered} in Machine Learning},
  volume={8},
  number={5-6},
  pages={359--483},
  year={2015},
  publisher={Now Publishers, Inc.}
}


@inproceedings{dearden1998bayesian,
  title={Bayesian Q-learning},
  author={Dearden, Richard and Friedman, Nir and Russell, Stuart},
  booktitle={AAAI/IAAI},
  pages={761--768},
  year={1998}
}


@article{osband2016posterior,
  title={Why is Posterior Sampling Better than Optimism for Reinforcement Learning},
  author={Osband, Ian and Van Roy, Benjamin},
  journal={arXiv preprint arXiv:1607.00215},
  year={2016}
}


@inproceedings{kleinberg2014time,
  title={Time-inconsistent planning: a computational problem in behavioral economics},
  author={Kleinberg, Jon and Oren, Sigal},
  booktitle={Proceedings of the fifteenth ACM conference on Economics and computation},
  pages={547--564},
  year={2014},
  organization={ACM}
}

@article{kuleshov2014algorithms,
  title={Algorithms for multi-armed bandit problems},
  author={Kuleshov, Volodymyr and Precup, Doina},
  journal={arXiv preprint arXiv:1402.6028},
  year={2014}
}

@inproceedings{chentanez2004intrinsically,
  title={Intrinsically motivated reinforcement learning},
  author={Chentanez, Nuttapong and Barto, Andrew G and Singh, Satinder P},
  booktitle={Advances in neural information processing systems},
  pages={1281--1288},
  year={2004}
}


@article{abbeel2010autonomous,
  title={Autonomous helicopter aerobatics through apprenticeship learning},
  author={Abbeel, Pieter and Coates, Adam and Ng, Andrew Y},
  journal={The International Journal of Robotics Research},
  year={2010},
  publisher={SAGE Publications}
}

@inproceedings{lee2010learning,
  title={Learning behavior styles with inverse reinforcement learning},
  author={Lee, Seong Jae and Popovi{\'c}, Zoran},
  booktitle={ACM Transactions on Graphics (TOG)},
  volume={29},
  number={4},
  pages={122},
  year={2010},
  organization={ACM}
}

@inproceedings{abbeel2008apprenticeship,
  title={Apprenticeship learning for motion planning with application to parking lot navigation},
  author={Abbeel, Pieter and Dolgov, Dmitri and Ng, Andrew Y and Thrun, Sebastian},
  booktitle={2008 IEEE/RSJ International Conference on Intelligent Robots and Systems},
  pages={1083--1090},
  year={2008},
  organization={IEEE}
}

@article{muelling2014learning,
  title={Learning strategies in table tennis using inverse reinforcement learning},
  author={Muelling, Katharina and Boularias, Abdeslam and Mohler, Betty and Sch{\"o}lkopf, Bernhard and Peters, Jan},
  journal={Biological cybernetics},
  volume={108},
  number={5},
  pages={603--619},
  year={2014},
  publisher={Springer}
}

@inproceedings{abbeel2004apprenticeship,
  title={Apprenticeship learning via inverse reinforcement learning},
  author={Abbeel, Pieter and Ng, Andrew Y},
  booktitle={Proceedings of the twenty-first international conference on Machine learning},
  pages={1},
  year={2004},
  organization={ACM}
}

@book{ainslie2001breakdown,
  title={Breakdown of will},
  author={Ainslie, George},
  year={2001},
  publisher={Cambridge University Press}
}

@article{amin2016towards,
  title={Towards Resolving Unidentifiability in Inverse Reinforcement Learning},
  author={Amin, Kareem and Singh, Satinder},
  journal={arXiv preprint arXiv:1601.06569},
  year={2016}
}

@misc{patterson2015can,
  title={Can Behavioral Tools Improve Online Student Outcomes? Experimental Evidence from a Massive Open Online Course},
  author={Patterson, Richard W},
  year={2015}
}


@article{laibson1997golden,
  title={Golden eggs and hyperbolic discounting},
  author={Laibson, David},
  journal={The Quarterly Journal of Economics},
  pages={443--477},
  year={1997},
  publisher={JSTOR}
}


@article{milkman2009highbrow,
  title={Highbrow films gather dust: Time-inconsistent preferences and online DVD rentals},
  author={Milkman, Katherine L and Rogers, Todd and Bazerman, Max H},
  journal={Management Science},
  volume={55},
  number={6},
  pages={1047--1059},
  year={2009},
  publisher={INFORMS}
}

@inproceedings{doshi2011comparison,
  title={A Comparison of Human and Agent Reinforcement Learning in Partially Observable Domains},
  author={Doshi-Velez, Finale and Ghahramani, Zoubin},
  booktitle={Proc. 33rd Annu. Meeting Cogn. Sci. Soc},
  year={2011},
  organization={Citeseer}
}


@misc{dippl,
  title = {The Design and Implementation of Probabilistic Programming Languages},
  author = {Goodman, Noah D and Stuhlm\"{u}ller, Andreas},
  year = {2014},
  howpublished = {\url{http://dippl.org}},
  note = {Accessed: 2016-1-26},
  url = {http://dippl.org}
}

@book{chater2003rational,
  title={Rational models of cognition},
  author={Chater, Nick and Oaksford, Mike},
  year={2003},
  publisher={Wiley Online Library}
}


@article{gonzalez2015glasses,
  title={GLASSES: Relieving The Myopia Of Bayesian Optimisation},
  author={Gonz{\'a}lez, Javier and Osborne, Michael and Lawrence, Neil D},
  journal={arXiv preprint arXiv:1510.06299},
  year={2015}
}

@book{rubinstein2012lecture,
  title={Lecture notes in microeconomic theory: the economic agent},
  author={Rubinstein, Ariel},
  year={2012},
  publisher={Princeton University Press}
}

@article{russell1995modern,
  title={Artificial Intelligence: A modern approach},
  author={Russell, Stuart and Norvig, Peter},
  journal={Artificial Intelligence. Prentice-Hall, Egnlewood Cliffs},
  volume={25},
  pages={27},
  year={1995},
  publisher={Citeseer}
}

@article{aguirregabiria2010dynamic,
  title={Dynamic Discrete Choice Structural Models: A Survey},
  author={Aguirregabiria, Victor and Mira, Pedro},
  journal={Journal of Econometrics},
  volume={156},
  number={1},
  pages={38--67},
  year={2010},
  publisher={Elsevier}
}

@inproceedings{ullman2009help,
  title={Help or hinder: Bayesian models of social goal inference},
  author={Ullman, Tomer and Baker, Chris and Macindoe, Owen and Evans, Owain and Goodman, Noah and Tenenbaum, Joshua B},
  booktitle={Advances in neural information processing systems},
  pages={1874--1882},
  year={2009}
}

@inproceedings{ng2000algorithms,
  title={Algorithms for inverse reinforcement learning.},
  author={Ng, Andrew Y and Russell, Stuart J},
  booktitle={ICML},
  pages={663--670},
  year={2000}
}

@article{evans2015learning,
  author = {Evans, Owain and Stuhlm\"{u}ller, Andreas and Goodman, Noah D.},
  booktitle = {NIPS 2015 Workshop on Bounded Optimality},
  title = {Learning the Preferences of Bounded Agents},
  year = 2015,
  url = {http://stuhlmueller.org/papers/preferences-nipsworkshop2015.pdf}
}

@article{botvinick2012planning,
  title={Planning as Inference},
  author={Botvinick, Matthew and Toussaint, Marc},
  journal={Trends in Cognitive Sciences},
  volume={16},
  number={10},
  pages={485--488},
  year={2012},
  publisher={Elsevier}
}

@article{kaelbling1998planning,
  title={Planning and acting in partially observable stochastic domains},
  author={Kaelbling, Leslie Pack and Littman, Michael L and Cassandra, Anthony R},
  journal={Artificial intelligence},
  volume={101},
  number={1},
  pages={99--134},
  year={1998},
  publisher={Elsevier}
}

@article{kaelbling1996reinforcement,
  title={Reinforcement learning: A survey},
  author={Kaelbling, Leslie Pack and Littman, Michael L and Moore, Andrew W},
  journal={Journal of artificial intelligence research},
  pages={237--285},
  year={1996}
}


@inproceedings{kim2014inverse,
  title={Inverse Reinforcement Learning for Micro-Turn Management},
  author={Kim, Dongho and Breslin, Catherine and Tsiakoulis, Pirros and Gasic, Milica and Henderson, Matthew and Young, Steve},
  booktitle={Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH},
  pages={328--332},
  year={2014},
  organization={International Speech and Communication Association}
}

@inproceedings{zheng2014robust,
  title={Robust Bayesian Inverse Reinforcement Learning with Sparse Behavior Noise},
  author={Zheng, Jiangchuan and Liu, Siyuan and Ni, Lionel M},
  booktitle={Twenty-Eighth AAAI Conference on Artificial Intelligence},
  year={2014}
}

@book{luce2005individual,
  title={Individual choice behavior: A theoretical analysis},
  author={Luce, R Duncan},
  year={2005},
  publisher={Courier Corporation}
}

@article{aguirregabiria2010dynamic,
  title={Dynamic Discrete Choice Structural Models: A Survey},
  author={Aguirregabiria, Victor and Mira, Pedro},
  journal={Journal of Econometrics},
  volume={156},
  number={1},
  pages={38--67},
  year={2010},
  publisher={Elsevier}
}

@article{darden2010smoking,
  title={Smoking, Expectations, and Health: a Dynamic Stochastic Model of Lifetime Smoking Behavior},
  author={Darden, Michael and others},
  journal={Health Econometrics and Data Group-University of York, Working Paper},
  volume={10},
  pages={28},
  year={2010}
}


@inproceedings{ermon2014learning,
  title={Learning Large-Scale Dynamic Discrete Choice Models of Spatio-Temporal Preferences with Application to Migratory Pastoralism in East Africa},
  author={Ermon, Stefano and Xue, Yexiang and Toth, Russell and Dilkina, Bistra and Bernstein, Richard and Damoulas, Theodoros and Clark, Patrick and DeGloria, Steve and Mude, Andrew and Barrett, Christopher and others},
  booktitle={Meeting Abstract},
  year={2014}
}


@article{ryzhov2012knowledge,
  title={The knowledge gradient algorithm for a general class of online learning problems},
  author={Ryzhov, Ilya O and Powell, Warren B and Frazier, Peter I},
  journal={Operations Research},
  volume={60},
  number={1},
  pages={180--195},
  year={2012},
  publisher={INFORMS}
}

@inproceedings{zhang2013forgetful,
  title={Forgetful Bayes and myopic planning: Human learning and decision-making in a bandit setting},
  author={Zhang, Shunan and Angela, J Yu},
  booktitle={Advances in neural information processing systems},
  pages={2607--2615},
  year={2013}
}

@article{strotz1955myopia,
  title={Myopia and inconsistency in dynamic utility maximization},
  author={Strotz, Robert Henry},
  journal={The Review of Economic Studies},
  volume={23},
  number={3},
  pages={165--180},
  year={1955},
  publisher={JSTOR}
}

@article{lattimore2014general,
  title={General time consistent discounting},
  author={Lattimore, Tor and Hutter, Marcus},
  journal={Theoretical Computer Science},
  volume={519},
  pages={140--154},
  year={2014},
  publisher={Elsevier}
}

@book{dehaene2011number,
  title={The number sense: How the mind creates mathematics},
  author={Dehaene, Stanislas},
  year={2011},
  publisher={OUP USA}
}


@book{kahneman2011thinking,
  title={Thinking, fast and slow},
  author={Kahneman, Daniel},
  year={2011},
  publisher={Macmillan}
}

@article{kahneman1984choices,
  title={Choices, values, and frames.},
  author={Kahneman, Daniel and Tversky, Amos},
  journal={American psychologist},
  volume={39},
  number={4},
  pages={341},
  year={1984},
  publisher={American Psychological Association}
}

@article{kahneman1979prospect,
  title={Prospect theory: An analysis of decision under risk},
  author={Kahneman, Daniel and Tversky, Amos},
  journal={Econometrica: Journal of the Econometric Society},
  pages={263--291},
  year={1979},
  publisher={JSTOR}
}

@article{gershman2015computational,
  title={Computational rationality: A converging paradigm for intelligence in brains, minds, and machines},
  author={Gershman, Samuel J and Horvitz, Eric J and Tenenbaum, Joshua B},
  journal={Science},
  volume={349},
  number={6245},
  pages={273--278},
  year={2015},
  publisher={American Association for the Advancement of Science}
}

@inproceedings{madani1999undecidability,
  title={On the undecidability of probabilistic planning and infinite-horizon partially observable Markov decision problems},
  author={Madani, Omid and Hanks, Steve and Condon, Anne},
  booktitle={AAAI/IAAI},
  pages={541--548},
  year={1999}
}

@inproceedings{cassandra1994acting,
  title={Acting optimally in partially observable stochastic domains},
  author={Cassandra, Anthony R and Kaelbling, Leslie Pack and Littman, Michael L},
  booktitle={AAAI},
  volume={94},
  pages={1023--1028},
  year={1994}
}

@article{stuhlmueller2013reasoning,
  author = {Stuhlm\"{u}ller, Andreas and Goodman, Noah D.},
  journaltitle = {Cognitive Systems Research},
  title = {Reasoning about Reasoning by Nested Conditioning: Modeling Theory of Mind with Probabilistic Programs},
  year = 2013,
  issn = {1389-0417},
  doi = {http://dx.doi.org/10.1016/j.cogsys.2013.07.003},
  url = {http://stuhlmueller.org/papers/nested-conditioning-cogsys2013.pdf}
}

@article{frazier2008knowledge,
  title={A knowledge-gradient policy for sequential information collection},
  author={Frazier, Peter I and Powell, Warren B and Dayanik, Savas},
  journal={SIAM Journal on Control and Optimization},
  volume={47},
  number={5},
  pages={2410--2439},
  year={2008},
  publisher={SIAM}
}

@article{papadimitriou1987complexity,
  title={The complexity of Markov decision processes},
  author={Papadimitriou, Christos H and Tsitsiklis, John N},
  journal={Mathematics of operations research},
  volume={12},
  number={3},
  pages={441--450},
  year={1987},
  publisher={INFORMS}
}