dc.contributor.author | Doshi-Velez, Finale P. | |
dc.contributor.author | Wingate, David | |
dc.contributor.author | Roy, Nicholas | |
dc.contributor.author | Tenenbaum, Joshua B. | |
dc.date.accessioned | 2011-09-28T19:38:37Z | |
dc.date.available | 2011-09-28T19:38:37Z | |
dc.date.issued | 2010-12 | |
dc.identifier.isbn | 9781617823800 | |
dc.identifier.uri | http://hdl.handle.net/1721.1/66107 | |
dc.description.abstract | We consider reinforcement learning in partially observable domains where the agent can query an expert for
demonstrations. Our nonparametric Bayesian approach combines model knowledge, inferred from expert information and independent exploration, with policy knowledge inferred from expert trajectories. We introduce priors that bias the agent towards models with both simple representations and simple policies, resulting in improved policy and model learning. | en_US |
dc.language.iso | en_US | |
dc.publisher | Neural Information Processing Systems Foundation | en_US |
dc.relation.isversionof | http://media.nips.cc/Conferences/2010/2010-NIPS-Conference-Program.pdf | en_US |
dc.rights | Creative Commons Attribution-Noncommercial-Share Alike 3.0 | en_US |
dc.rights.uri | http://creativecommons.org/licenses/by-nc-sa/3.0/ | en_US |
dc.source | MIT web domain | en_US |
dc.title | Nonparametric Bayesian Policy Priors for Reinforcement Learning | en_US |
dc.type | Article | en_US |
dc.identifier.citation | Doshi-Velez, Finale, David Wingate, Nicholas Roy, and Joshua Tenenbaum. "Nonparametric Bayesian Policy Priors for Reinforcement Learning." Proceedings of the 24th Annual Conference on Neural Information Processing Systems, NIPS 2010, December 6-9, 2010, Vancouver, British Columbia. | en_US |
dc.contributor.department | Massachusetts Institute of Technology. Department of Aeronautics and Astronautics | en_US |
dc.contributor.department | Massachusetts Institute of Technology. Department of Brain and Cognitive Sciences | en_US |
dc.contributor.department | Massachusetts Institute of Technology. Laboratory for Information and Decision Systems | en_US |
dc.contributor.approver | Roy, Nicholas | |
dc.contributor.mitauthor | Roy, Nicholas | |
dc.contributor.mitauthor | Doshi-Velez, Finale P. | |
dc.contributor.mitauthor | Wingate, David | |
dc.contributor.mitauthor | Tenenbaum, Joshua B. | |
dc.relation.journal | Proceedings of the 24th Annual Conference on Neural Information Processing Systems, (NIPS 2010) | en_US |
dc.eprint.version | Author's final manuscript | en_US |
dc.type.uri | http://purl.org/eprint/type/ConferencePaper | en_US |
dspace.orderedauthors | Doshi-Velez, Finale; Wingate, David; Roy, Nicholas; Tenenbaum, Joshua | en_US |
dc.identifier.orcid | https://orcid.org/0000-0002-1925-2035 | |
dc.identifier.orcid | https://orcid.org/0000-0002-8293-0492 | |
mit.license | OPEN_ACCESS_POLICY | en_US |
mit.metadata.status | Complete | |