{"id":3121,"date":"2022-08-21T20:28:01","date_gmt":"2022-08-21T20:28:01","guid":{"rendered":"https:\/\/lucylabs.gatech.edu\/ml4t\/?page_id=3121"},"modified":"2022-10-22T06:25:38","modified_gmt":"2022-10-22T06:25:38","slug":"project-7","status":"publish","type":"page","link":"https:\/\/lucylabs.gatech.edu\/ml4t\/fall2022\/project-7\/","title":{"rendered":"Project 7"},"content":{"rendered":"<p>[et_pb_section fb_built=&#8221;1&#8243; admin_label=&#8221;Section&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_row admin_label=&#8221;Project Title&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_column type=&#8221;4_4&#8243; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_text _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; header_font=&#8221;|700||on|||||&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h1 style=\"text-align: center;\">Project 7: Q-Learning Robot<\/h1>\n<p>[\/et_pb_text][\/et_pb_column][\/et_pb_row][et_pb_row use_custom_gutter=&#8221;on&#8221; gutter_width=&#8221;1&#8243; admin_label=&#8221;row&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; background_size=&#8221;initial&#8221; background_position=&#8221;top_left&#8221; background_repeat=&#8221;repeat&#8221; width=&#8221;100%&#8221; custom_padding=&#8221;0px||0px||false|false&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_column type=&#8221;4_4&#8243; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_divider color=&#8221;#eeeeee&#8221; divider_position=&#8221;center&#8221; divider_weight=&#8221;3px&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; width=&#8221;25%&#8221; custom_padding=&#8221;30px||30px||true|false&#8221; global_colors_info=&#8221;{}&#8221;][\/et_pb_divider][et_pb_blurb title=&#8221;Table of Contents&#8221; use_icon=&#8221;on&#8221; font_icon=&#8221;&#x68;||divi||400&#8243; icon_color=&#8221;rgba(0,0,0,0.05)&#8221; icon_placement=&#8221;left&#8221; image_icon_width=&#8221;100px&#8221; content_max_width=&#8221;100%&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; header_level=&#8221;h2&#8243; header_font_size=&#8221;26px&#8221; height=&#8221;38px&#8221; icon_font_size=&#8221;100px&#8221; global_colors_info=&#8221;{}&#8221;][\/et_pb_blurb][et_pb_blurb title=&#8221;Overview&#8221; use_icon=&#8221;on&#8221; font_icon=&#8221;&#x24;||divi||400&#8243; icon_color=&#8221;#000000&#8243; image_icon_background_color=&#8221;#FFFFFF&#8221; icon_placement=&#8221;left&#8221; image_icon_width=&#8221;16px&#8221; content_max_width=&#8221;100%&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; header_font_size=&#8221;16px&#8221; header_line_height=&#8221;2em&#8221; image_icon_custom_padding=&#8221;8px|8px|8px|8px|false|false&#8221; custom_margin=&#8221;|||118px|false|false&#8221; custom_padding=&#8221;10px|||0px|false|false&#8221; link_option_url=&#8221;#overview&#8221; border_radii_image=&#8221;on|100%|100%|100%|100%&#8221; border_width_all_image=&#8221;2px&#8221; border_color_all_image=&#8221;#000000&#8243; icon_font_size=&#8221;16px&#8221; use_circle=&#8221;on&#8221; use_circle_border=&#8221;on&#8221; circle_border_color=&#8221;#b856c7&#8243; circle_color=&#8221;#FFFFFF&#8221; global_colors_info=&#8221;{}&#8221; font_icon__hover_enabled=&#8221;on|hover&#8221; font_icon__hover=&#8221;%22||divi||400&#8243; custom_padding__hover=&#8221;|||10px|false|false&#8221; custom_padding__hover_enabled=&#8221;on|hover&#8221; image_icon_background_color__sticky_enabled=&#8221;#7EBEC5&#8243; image_icon_background_color__sticky=&#8221;#7EBEC5&#8243;][\/et_pb_blurb][et_pb_blurb title=&#8221;About the Project&#8221; use_icon=&#8221;on&#8221; font_icon=&#8221;&#x24;||divi||400&#8243; icon_color=&#8221;#000000&#8243; image_icon_background_color=&#8221;#FFFFFF&#8221; icon_placement=&#8221;left&#8221; image_icon_width=&#8221;16px&#8221; content_max_width=&#8221;100%&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; header_font_size=&#8221;16px&#8221; header_line_height=&#8221;2em&#8221; image_icon_custom_padding=&#8221;8px|8px|8px|8px|false|false&#8221; custom_margin=&#8221;|||118px|false|false&#8221; custom_padding=&#8221;10px|||0px|false|false&#8221; link_option_url=&#8221;#about&#8221; border_radii_image=&#8221;on|100%|100%|100%|100%&#8221; border_width_all_image=&#8221;2px&#8221; border_color_all_image=&#8221;#000000&#8243; icon_font_size=&#8221;16px&#8221; use_circle=&#8221;on&#8221; use_circle_border=&#8221;on&#8221; circle_border_color=&#8221;#b856c7&#8243; circle_color=&#8221;#FFFFFF&#8221; global_colors_info=&#8221;{}&#8221; font_icon__hover_enabled=&#8221;on|hover&#8221; font_icon__hover=&#8221;%22||divi||400&#8243; custom_padding__hover=&#8221;|||10px|false|false&#8221; custom_padding__hover_enabled=&#8221;on|hover&#8221; image_icon_background_color__sticky_enabled=&#8221;#7EBEC5&#8243; image_icon_background_color__sticky=&#8221;#7EBEC5&#8243;][\/et_pb_blurb][et_pb_blurb title=&#8221;Your Implementation&#8221; use_icon=&#8221;on&#8221; font_icon=&#8221;&#x24;||divi||400&#8243; icon_color=&#8221;#000000&#8243; image_icon_background_color=&#8221;#FFFFFF&#8221; icon_placement=&#8221;left&#8221; image_icon_width=&#8221;16px&#8221; content_max_width=&#8221;100%&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; header_font_size=&#8221;16px&#8221; header_line_height=&#8221;2em&#8221; image_icon_custom_padding=&#8221;8px|8px|8px|8px|false|false&#8221; custom_margin=&#8221;|||118px|false|false&#8221; custom_padding=&#8221;10px|||0px|false|false&#8221; link_option_url=&#8221;#implementation&#8221; border_radii_image=&#8221;on|100%|100%|100%|100%&#8221; border_width_all_image=&#8221;2px&#8221; border_color_all_image=&#8221;#000000&#8243; icon_font_size=&#8221;16px&#8221; use_circle=&#8221;on&#8221; use_circle_border=&#8221;on&#8221; circle_border_color=&#8221;#b856c7&#8243; circle_color=&#8221;#FFFFFF&#8221; global_colors_info=&#8221;{}&#8221; font_icon__hover_enabled=&#8221;on|hover&#8221; font_icon__hover=&#8221;%22||divi||400&#8243; custom_padding__hover=&#8221;|||10px|false|false&#8221; custom_padding__hover_enabled=&#8221;on|hover&#8221; image_icon_background_color__sticky_enabled=&#8221;#7EBEC5&#8243; image_icon_background_color__sticky=&#8221;#7EBEC5&#8243;][\/et_pb_blurb][et_pb_blurb title=&#8221;Contents of Report&#8221; use_icon=&#8221;on&#8221; font_icon=&#8221;&#x24;||divi||400&#8243; icon_color=&#8221;#000000&#8243; image_icon_background_color=&#8221;#FFFFFF&#8221; icon_placement=&#8221;left&#8221; image_icon_width=&#8221;16px&#8221; content_max_width=&#8221;100%&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; header_font_size=&#8221;16px&#8221; header_line_height=&#8221;2em&#8221; image_icon_custom_padding=&#8221;8px|8px|8px|8px|false|false&#8221; custom_margin=&#8221;|||118px|false|false&#8221; custom_padding=&#8221;10px|||0px|false|false&#8221; link_option_url=&#8221;#report&#8221; border_radii_image=&#8221;on|100%|100%|100%|100%&#8221; border_width_all_image=&#8221;2px&#8221; border_color_all_image=&#8221;#000000&#8243; icon_font_size=&#8221;16px&#8221; use_circle=&#8221;on&#8221; use_circle_border=&#8221;on&#8221; circle_border_color=&#8221;#b856c7&#8243; circle_color=&#8221;#FFFFFF&#8221; global_colors_info=&#8221;{}&#8221; font_icon__hover_enabled=&#8221;on|hover&#8221; font_icon__hover=&#8221;%22||divi||400&#8243; custom_padding__hover=&#8221;|||10px|false|false&#8221; custom_padding__hover_enabled=&#8221;on|hover&#8221; image_icon_background_color__sticky_enabled=&#8221;#7EBEC5&#8243; image_icon_background_color__sticky=&#8221;#7EBEC5&#8243;][\/et_pb_blurb][et_pb_blurb title=&#8221;Testing Recommendations&#8221; use_icon=&#8221;on&#8221; font_icon=&#8221;&#x24;||divi||400&#8243; icon_color=&#8221;#000000&#8243; image_icon_background_color=&#8221;#FFFFFF&#8221; icon_placement=&#8221;left&#8221; image_icon_width=&#8221;16px&#8221; content_max_width=&#8221;100%&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; header_font_size=&#8221;16px&#8221; header_line_height=&#8221;2em&#8221; image_icon_custom_padding=&#8221;8px|8px|8px|8px|false|false&#8221; custom_margin=&#8221;|||118px|false|false&#8221; custom_padding=&#8221;10px|||0px|false|false&#8221; link_option_url=&#8221;#testing&#8221; border_radii_image=&#8221;on|100%|100%|100%|100%&#8221; border_width_all_image=&#8221;2px&#8221; border_color_all_image=&#8221;#000000&#8243; icon_font_size=&#8221;16px&#8221; use_circle=&#8221;on&#8221; use_circle_border=&#8221;on&#8221; circle_border_color=&#8221;#b856c7&#8243; circle_color=&#8221;#FFFFFF&#8221; global_colors_info=&#8221;{}&#8221; font_icon__hover_enabled=&#8221;on|hover&#8221; font_icon__hover=&#8221;%22||divi||400&#8243; custom_padding__hover=&#8221;|||10px|false|false&#8221; custom_padding__hover_enabled=&#8221;on|hover&#8221; image_icon_background_color__sticky_enabled=&#8221;#7EBEC5&#8243; image_icon_background_color__sticky=&#8221;#7EBEC5&#8243;][\/et_pb_blurb][et_pb_blurb title=&#8221;Submission Requirements&#8221; use_icon=&#8221;on&#8221; font_icon=&#8221;&#x24;||divi||400&#8243; icon_color=&#8221;#000000&#8243; image_icon_background_color=&#8221;#FFFFFF&#8221; icon_placement=&#8221;left&#8221; image_icon_width=&#8221;16px&#8221; content_max_width=&#8221;100%&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; header_font_size=&#8221;16px&#8221; header_line_height=&#8221;2em&#8221; image_icon_custom_padding=&#8221;8px|8px|8px|8px|false|false&#8221; custom_margin=&#8221;|||118px|false|false&#8221; custom_padding=&#8221;10px|||0px|false|false&#8221; link_option_url=&#8221;#submission&#8221; border_radii_image=&#8221;on|100%|100%|100%|100%&#8221; border_width_all_image=&#8221;2px&#8221; border_color_all_image=&#8221;#000000&#8243; icon_font_size=&#8221;16px&#8221; use_circle=&#8221;on&#8221; use_circle_border=&#8221;on&#8221; circle_border_color=&#8221;#b856c7&#8243; circle_color=&#8221;#FFFFFF&#8221; global_colors_info=&#8221;{}&#8221; font_icon__hover_enabled=&#8221;on|hover&#8221; font_icon__hover=&#8221;%22||divi||400&#8243; custom_padding__hover=&#8221;|||10px|false|false&#8221; custom_padding__hover_enabled=&#8221;on|hover&#8221; image_icon_background_color__sticky_enabled=&#8221;#7EBEC5&#8243; image_icon_background_color__sticky=&#8221;#7EBEC5&#8243;][\/et_pb_blurb][et_pb_blurb title=&#8221;Grading Information&#8221; use_icon=&#8221;on&#8221; font_icon=&#8221;&#x24;||divi||400&#8243; icon_color=&#8221;#000000&#8243; image_icon_background_color=&#8221;#FFFFFF&#8221; icon_placement=&#8221;left&#8221; image_icon_width=&#8221;16px&#8221; content_max_width=&#8221;100%&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; header_font_size=&#8221;16px&#8221; header_line_height=&#8221;2em&#8221; image_icon_custom_padding=&#8221;8px|8px|8px|8px|false|false&#8221; custom_margin=&#8221;|||118px|false|false&#8221; custom_padding=&#8221;10px|||0px|false|false&#8221; link_option_url=&#8221;#grading&#8221; border_radii_image=&#8221;on|100%|100%|100%|100%&#8221; border_width_all_image=&#8221;2px&#8221; border_color_all_image=&#8221;#000000&#8243; icon_font_size=&#8221;16px&#8221; use_circle=&#8221;on&#8221; use_circle_border=&#8221;on&#8221; circle_border_color=&#8221;#b856c7&#8243; circle_color=&#8221;#FFFFFF&#8221; global_colors_info=&#8221;{}&#8221; font_icon__hover_enabled=&#8221;on|hover&#8221; font_icon__hover=&#8221;%22||divi||400&#8243; custom_padding__hover=&#8221;|||10px|false|false&#8221; custom_padding__hover_enabled=&#8221;on|hover&#8221; image_icon_background_color__sticky_enabled=&#8221;#7EBEC5&#8243; image_icon_background_color__sticky=&#8221;#7EBEC5&#8243;][\/et_pb_blurb][et_pb_blurb title=&#8221;Development Guidelines&#8221; use_icon=&#8221;on&#8221; font_icon=&#8221;&#x24;||divi||400&#8243; icon_color=&#8221;#000000&#8243; image_icon_background_color=&#8221;#FFFFFF&#8221; icon_placement=&#8221;left&#8221; image_icon_width=&#8221;16px&#8221; content_max_width=&#8221;100%&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; header_font_size=&#8221;16px&#8221; header_line_height=&#8221;2em&#8221; image_icon_custom_padding=&#8221;8px|8px|8px|8px|false|false&#8221; custom_margin=&#8221;|||118px|false|false&#8221; custom_padding=&#8221;10px|||0px|false|false&#8221; link_option_url=&#8221;#guidelines&#8221; border_radii_image=&#8221;on|100%|100%|100%|100%&#8221; border_width_all_image=&#8221;2px&#8221; border_color_all_image=&#8221;#000000&#8243; icon_font_size=&#8221;16px&#8221; use_circle=&#8221;on&#8221; use_circle_border=&#8221;on&#8221; circle_border_color=&#8221;#b856c7&#8243; circle_color=&#8221;#FFFFFF&#8221; global_colors_info=&#8221;{}&#8221; font_icon__hover_enabled=&#8221;on|hover&#8221; font_icon__hover=&#8221;%22||divi||400&#8243; custom_padding__hover=&#8221;|||10px|false|false&#8221; custom_padding__hover_enabled=&#8221;on|hover&#8221; image_icon_background_color__sticky_enabled=&#8221;#7EBEC5&#8243; image_icon_background_color__sticky=&#8221;#7EBEC5&#8243;][\/et_pb_blurb][et_pb_blurb title=&#8221;Optional Resources&#8221; use_icon=&#8221;on&#8221; font_icon=&#8221;&#x24;||divi||400&#8243; icon_color=&#8221;#000000&#8243; image_icon_background_color=&#8221;#FFFFFF&#8221; icon_placement=&#8221;left&#8221; image_icon_width=&#8221;16px&#8221; content_max_width=&#8221;100%&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; header_font_size=&#8221;16px&#8221; header_line_height=&#8221;2em&#8221; image_icon_custom_padding=&#8221;8px|8px|8px|8px|false|false&#8221; custom_margin=&#8221;|||118px|false|false&#8221; custom_padding=&#8221;10px|||0px|false|false&#8221; link_option_url=&#8221;#optional&#8221; border_radii_image=&#8221;on|100%|100%|100%|100%&#8221; border_width_all_image=&#8221;2px&#8221; border_color_all_image=&#8221;#000000&#8243; icon_font_size=&#8221;16px&#8221; use_circle=&#8221;on&#8221; use_circle_border=&#8221;on&#8221; circle_border_color=&#8221;#b856c7&#8243; circle_color=&#8221;#FFFFFF&#8221; global_colors_info=&#8221;{}&#8221; font_icon__hover_enabled=&#8221;on|hover&#8221; font_icon__hover=&#8221;%22||divi||400&#8243; custom_padding__hover=&#8221;|||10px|false|false&#8221; custom_padding__hover_enabled=&#8221;on|hover&#8221; image_icon_background_color__sticky_enabled=&#8221;#7EBEC5&#8243; image_icon_background_color__sticky=&#8221;#7EBEC5&#8243;][\/et_pb_blurb][et_pb_divider color=&#8221;#eeeeee&#8221; divider_position=&#8221;center&#8221; divider_weight=&#8221;3px&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; width=&#8221;25%&#8221; custom_padding=&#8221;30px||30px||true|false&#8221; global_colors_info=&#8221;{}&#8221;][\/et_pb_divider][\/et_pb_column][\/et_pb_row][et_pb_row admin_label=&#8221;Revisions&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_column type=&#8221;4_4&#8243; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_text admin_label=&#8221;Text&#8221; _builder_version=&#8221;4.17.4&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h2>Revisions<\/h2>\n<p><span>This assignment is subject to change up until 3 weeks prior to the due date. We do not anticipate changes; any changes will be logged in this section.<\/span><\/p>\n<p>[\/et_pb_text][\/et_pb_column][\/et_pb_row][et_pb_row admin_label=&#8221;1 Overview&#8221; module_id=&#8221;overview&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_column type=&#8221;4_4&#8243; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_text _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h2>1 Overview<\/h2>\n<p><span data-contrast=\"auto\" xml:lang=\"EN-US\" lang=\"EN-US\" class=\"TextRun SCXW139945243 BCX4\"><span class=\"NormalTextRun SCXW139945243 BCX4\">In this assignment, you <\/span><span class=\"NormalTextRun SCXW139945243 BCX4\">implement a <\/span><span class=\"NormalTextRun SCXW139945243 BCX4\">Reinforcement Learning algorithm called Q-learning<\/span><span class=\"NormalTextRun SCXW139945243 BCX4\">, which is a model-free RL algorithm. You will also ex<\/span><span class=\"NormalTextRun SCXW139945243 BCX4\">t<\/span><span class=\"NormalTextRun SCXW139945243 BCX4\">end your Q-learner implementation by adding a Dyna, model-based, <\/span><span class=\"NormalTextRun SCXW139945243 BCX4\">component. <\/span><span class=\"NormalTextRun SCXW139945243 BCX4\">You will submit the code for the project in <\/span><span class=\"NormalTextRun SpellingErrorV2 SCXW139945243 BCX4\">Gradescope<\/span><span class=\"NormalTextRun SCXW139945243 BCX4\"> SUBMISSION. <\/span><span class=\"NormalTextRun SCXW139945243 BCX4\">There is no report associated with this assignment.<\/span><\/span><span class=\"EOP SCXW139945243 BCX4\" data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_text admin_label=&#8221;1.1 Learning Objectives&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h3>1.1 Learning Objectives<\/h3>\n<p><span data-contrast=\"auto\">The specific learning objectives for this assignment are focused on the following areas:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><b><span data-contrast=\"auto\">Q-Learning (Model-free RL algorithm)<\/span><\/b><span data-contrast=\"auto\">: Develop a learner to \u201creinforce\u201d an understanding of the Q-learning model-free algorithm.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><b><span data-contrast=\"auto\">Dyna-Q (Model-based addition to Q-learner)<\/span><\/b><span data-contrast=\"auto\">:\u00a0 Develop a Dyna-Q implementation to \u201creinforce\u201d an understanding of the Dyan-Q model-based addition to the Q-learner implementation.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<p>[\/et_pb_text][\/et_pb_column][\/et_pb_row][et_pb_row admin_label=&#8221;2 About The Project&#8221; module_id=&#8221;about&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_column type=&#8221;4_4&#8243; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_text _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h2>2 About The Project<\/h2>\n<p><span data-contrast=\"auto\" xml:lang=\"EN-US\" lang=\"EN-US\" class=\"TextRun SCXW255662496 BCX4\"><span class=\"NormalTextRun SCXW255662496 BCX4\">I<\/span><span class=\"NormalTextRun SCXW255662496 BCX4\">n this project, you will implement the Q-Learning and Dyna-Q solutions to the reinforcement learning problem. You will apply them to a navigation problem in this project. In a later project, you will apply them to trading. The reason for working with the navigation problem first is that, as you will see, navigation is an easy problem to work with and understand. Note that your Q-Learning code really shouldn\u2019t care which problem it is solving. The difference is that you need to wrap the learner in a different code that frames the problem for the learner as necessary.<\/span><\/span><span class=\"EOP SCXW255662496 BCX4\" data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][\/et_pb_column][\/et_pb_row][et_pb_row admin_label=&#8221;3 Your Implementation&#8221; module_id=&#8221;implementation&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_column type=&#8221;4_4&#8243; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_text _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h2>3 Your Implementation<\/h2>\n<p><span data-contrast=\"auto\">For this project, we have created testqlearner.py that automates the testing of your Q-Learner in the navigation problem.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">Overall, your tasks for this project include:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">Code a Q-Learner<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><span data-contrast=\"auto\">Code the Dyna-Q feature of Q-Learning<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><span data-contrast=\"auto\">Test\/debug the Q-Learner in navigation problems<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<ul><\/ul>\n<p><span data-contrast=\"auto\">You must write your own code for this project. You are NOT allowed to use other people\u2019s code or packages to implement the Q-learner. For this assignment, we will test only your code (there is no report component).<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">Before the deadline, make sure to pre-validate your submission using Gradescope TESTING. Once you are satisfied with the results in testing, submit the code to Gradescope SUBMISSION. <\/span><b><span data-contrast=\"auto\">Only code submitted to Gradescope SUBMISSION will be graded. If you submit your code to Gradescope TESTING and have not also submitted your code to Gradescope SUBMISSION, you will receive a zero (0).<\/span><\/b><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_text admin_label=&#8221;3.1 Getting Started&#8221; _builder_version=&#8221;4.18.0&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h3>3.1 Getting Started<\/h3>\n<p><span data-contrast=\"auto\" xml:lang=\"EN-US\" lang=\"EN-US\" class=\"TextRun SCXW154703597 BCX4\"><span class=\"NormalTextRun SCXW154703597 BCX4\">To make it easier to get started on the project and focus on the concepts involved, you will be given a starter framework. This framework assumes you have already set up the <\/span><\/span><a class=\"Hyperlink SCXW154703597 BCX4\" href=\"https:\/\/lucylabs.gatech.edu\/ml4t\/fall2022\/local-environment\/\" target=\"_blank\" rel=\"noopener noreferrer\"><span data-contrast=\"none\" xml:lang=\"EN-US\" lang=\"EN-US\" class=\"TextRun SCXW154703597 BCX4\"><span class=\"NormalTextRun SCXW154703597 BCX4\" data-ccp-charstyle=\"Hyperlink\">local environment<\/span><\/span><\/a><span data-contrast=\"auto\" xml:lang=\"EN-US\" lang=\"EN-US\" class=\"TextRun SCXW154703597 BCX4\"><span class=\"NormalTextRun SCXW154703597 BCX4\"> and <\/span><\/span><a class=\"Hyperlink SCXW154703597 BCX4\" href=\"https:\/\/lucylabs.gatech.edu\/ml4t\/fall2022\/software-setup\/\" target=\"_blank\" rel=\"noopener noreferrer\"><span data-contrast=\"none\" xml:lang=\"EN-US\" lang=\"EN-US\" class=\"TextRun SCXW154703597 BCX4\"><span class=\"NormalTextRun SCXW154703597 BCX4\" data-ccp-charstyle=\"Hyperlink\">ML4T Software<\/span><\/span><\/a><span data-contrast=\"auto\" xml:lang=\"EN-US\" lang=\"EN-US\" class=\"TextRun SCXW154703597 BCX4\"><span class=\"NormalTextRun SCXW154703597 BCX4\">. <\/span><span class=\"NormalTextRun SCXW154703597 BCX4\">The framework for Project <\/span><span class=\"NormalTextRun SCXW154703597 BCX4\">7<\/span><span class=\"NormalTextRun SCXW154703597 BCX4\"> can be obtained from:\u202f<\/span><\/span><a href=\"https:\/\/www.dropbox.com\/s\/cvbjadfqznqbpwd\/qlearning_robot_2022Fall.zip?dl=1\" target=\"_blank\" rel=\"noopener\">QLearning_Robot_2022Fall.zip<\/a><span data-contrast=\"auto\" xml:lang=\"EN-US\" lang=\"EN-US\" class=\"TextRun SCXW154703597 BCX4\"><span class=\"NormalTextRun SCXW154703597 BCX4\">.\u202f<\/span><\/span><span class=\"EOP SCXW154703597 BCX4\" data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_image src=&#8221;http:\/\/lucylabs.gatech.edu\/ml4t\/wp-content\/uploads\/2021\/09\/Screen-Shot-2021-09-26-at-12.15.06-AM.png&#8221; title_text=&#8221;Screen Shot 2021-09-26 at 12.15.06 AM&#8221; admin_label=&#8221;Image&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;f2b74940-54e3-4aec-b2e5-e58531b6bfc5&#8243; global_colors_info=&#8221;{}&#8221;][\/et_pb_image][et_pb_text admin_label=&#8221;3.1 Cont.&#8221; _builder_version=&#8221;4.18.0&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<p>E<span data-contrast=\"auto\">xtract its contents into the base directory (e.g., ML4T_2022Fall). This will add a new folder called \u201cqlearning_robot\u201d to the course directory structure:\u202f\u202f<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">The framework for Project 7 can be obtained in the qlearning_robot folder alone. Within the qlearning_robot folder are several files:\u00a0<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">QLearner.py<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><span data-contrast=\"auto\">testqlearner.py\u00a0<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"3\" data-aria-level=\"1\"><span data-contrast=\"auto\">grade_robot_qlearning.py<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<p><i><span data-contrast=\"auto\">Note: Example navigation problems are provided in the qlearning_robot\/testworlds directory<\/span><\/i><span data-contrast=\"auto\">.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">Your q-learning class will be implemented in the QLearner.py file. The testqlearner.py file contains a simple testing scaffold that you can use to test your learners, which is useful for debugging. It must also be modified to run the experiments. The grade_robot_qlearners.py file is a local pre-validation script that mirrors the script used in the Gradescope TESTING environment.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_text admin_label=&#8221;3.2 Task &#038; Requirements&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h3>3.2 Task &amp; Requirements<\/h3>\n<p><span data-contrast=\"auto\">You will implement the following files:\u00a0\u00a0<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">QLearner.py \u2013 Contains the code for the Q-Learner (and Dyna-Q) implementation\u00a0<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<p><span data-contrast=\"auto\">All your learner code must be placed into the file above. No other code files will be accepted. The testqlearner.py file that is used to conduct your experiments must also reside in the qlearning_robot and is run using the following command:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_code _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;]<script src=\"https:\/\/gist.github.com\/CS7646-ML4T\/b70af129908f713fefc0c720d51ef269.js\"><\/script>[\/et_pb_code][et_pb_text admin_label=&#8221;3.3 QLearner&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h3>3.3 Implement Q-Learner (95 points)<\/h3>\n<p><span data-contrast=\"auto\">Your QLearner class should be implemented in the file QLearner.py. It should implement EXACTLY the API defined below. DO NOT import any modules besides those allowed below. Your class should implement the following methods:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><b><span data-contrast=\"auto\">The constructor QLearner()<\/span><\/b><span data-contrast=\"auto\"> should reserve space for keeping track of Q[s, a] for the number of states and actions. It should initialize Q[] with all zeros. Details on the input arguments to the constructor:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><b><span data-contrast=\"auto\">num_states<\/span><\/b><span data-contrast=\"auto\"> integer, the number of states to consider<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><b><span data-contrast=\"auto\">num_actions<\/span><\/b><span data-contrast=\"auto\"> integer, the number of actions available.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"3\" data-aria-level=\"1\"><b><span data-contrast=\"auto\">alpha<\/span><\/b><span data-contrast=\"auto\"> float, the learning rate used in the update rule. Should range between 0.0 and 1.0 with 0.2 as a typical value.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"4\" data-aria-level=\"1\"><b><span data-contrast=\"auto\">gamma<\/span><\/b><span data-contrast=\"auto\"> float, the discount rate used in the update rule. Should range between 0.0 and 1.0 with 0.9 as a typical value.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"5\" data-aria-level=\"1\"><b><span data-contrast=\"auto\">rar<\/span><\/b><span data-contrast=\"auto\"> float, random action rate: the probability of selecting a random action at each step. Should range between 0.0 (no random actions) to 1.0 (always random action) with 0.5 as a typical value.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><b><span data-contrast=\"auto\">radr<\/span><\/b><span data-contrast=\"auto\"> float, random action decay rate, after each update, rar = rar * radr. Ranges between 0.0 (immediate decay to 0) and 1.0 (no decay). Typically, 0.99.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><span data-contrast=\"auto\">dyna integer, number of dyna updates for each regular update. When Dyna is used, 200 is a typical value.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"3\" data-aria-level=\"1\"><b><span data-contrast=\"auto\">verbose<\/span><\/b><span data-contrast=\"auto\"> boolean, if True, your class is allowed to print debugging statements, if False, all printing is prohibited. This is useful when debugging in a local environment.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<p><b><span data-contrast=\"auto\">query(s_prime, r)<\/span><\/b><span data-contrast=\"auto\"> is the core method of the Q-Learner. It should keep track of the last state s and the last action a, then use the new information s_prime and r to update the Q table. The learning instance, or experience tuple is &lt;s, a, s_prime, r&gt;. query() should return an integer, which is the next action to take. Note that it should choose a random action with probability rar, and that it should update rar according to the decay rate radr at each step. Details on the arguments:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"4\" data-aria-level=\"1\"><b><span data-contrast=\"auto\">s_prime<\/span><\/b><span data-contrast=\"auto\"> integer, the new state.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><b><span data-contrast=\"auto\">r<\/span><\/b><span data-contrast=\"auto\"> float, a real-valued immediate reward.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<p><b><span data-contrast=\"auto\">querysetstate(s)<\/span><\/b><span data-contrast=\"auto\"> A special version of the query method that sets the state to s, and returns an integer action according to the same rules as query() (including choosing a random action sometimes), but it does not execute an update to the Q-table. It also does not update rar.\u00a0<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><b><span data-contrast=\"auto\">s<\/span><\/b><span data-contrast=\"auto\"> integer, a state<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<p><span data-contrast=\"auto\">There are two main uses for this method: 1) To set the initial state, and 2) when using a learned policy, but not updating it.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">Here\u2019s an example of the API in use:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_code _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;]<script src=\"https:\/\/gist.github.com\/CS7646-ML4T\/758383712c7e33afe9a5080586a7b4a5.js\"><\/script>[\/et_pb_code][et_pb_text admin_label=&#8221;3.4 Implement Dyna&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h3>3.4 Implement Dyna (5 points)<\/h3>\n<p><span data-contrast=\"auto\">Add additional components to your QLearner class so that multiple \u201challucinated\u201d experience tuples are used to update the Q-table for each \u201creal\u201d experience. The addition of this component should speed convergence in terms of the number of calls to query(), <\/span><b><span data-contrast=\"auto\">not necessarily running time<\/span><\/b><span data-contrast=\"auto\">.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">Note that it is not important that you implement Dyna exactly as described in the lecture. The key requirement is that your code should somehow hallucinate additional experiences. The precise method you use for discovering those experiences is flexible. We will test your code on several test worlds with 50 epochs and with dyna = 200. Our expectation is that with Dyna, the solution should be much better after 50 epochs than without.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_text admin_label=&#8221;3.5 Implement author&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h3>3.5 Implement author() Method (up to 20 point penalty)\u00a0<\/h3>\n<p><span data-contrast=\"auto\">You should implement a method called author() that returns your Georgia Tech user ID as a string. This is the ID you use to log into Canvas. It is not your 9 digit student number. Here is an example of how you might implement author() within a learner object:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_code _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;]<script src=\"https:\/\/gist.github.com\/CS7646-ML4T\/83809a84f99e7f683360bf663e7990e4.js\"><\/script>[\/et_pb_code][et_pb_text admin_label=&#8221;3.6 Navigation Problem Test Cases&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h3><span>3.6 Navigation Problem Test Cases<\/span><\/h3>\n<p><b><i><span data-contrast=\"auto\">Note<\/span><\/i><\/b><i><span data-contrast=\"auto\">: Understanding how testing will be performed may facilitate coding. So, we are placing this section here rather than in the \u201ctesting\u201d section below. The standard testing text will be retained in the testing section below.<\/span><\/i><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">We will test your Q-Learner with a navigation problem as follows. Note that your Q-Learner does not need to be coded specially for this task. In fact, the code doesn\u2019t need to know anything about it. The code necessary to test your learner with this navigation task is implemented in testqlearner.py for you. The navigation task takes place in a 10 x 10 grid world. The particular environment is expressed in a CSV file of integers, where the value in each position is interpreted as follows:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">0: blank space.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><span data-contrast=\"auto\">1: an obstacle.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"3\" data-aria-level=\"1\"><span data-contrast=\"auto\">2: the starting location for the robot.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"4\" data-aria-level=\"1\"><span data-contrast=\"auto\">3: the goal location.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"5\" data-aria-level=\"1\"><span data-contrast=\"auto\">5: quicksand.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<p><span data-contrast=\"auto\">An example navigation problem (world01.csv) is shown below. Following python conventions, [0,0] is upper left, or northwest corner, [9,9] lower right or southeast corner. Rows are north\/south, columns are east\/west.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_code _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;]<script src=\"https:\/\/gist.github.com\/CS7646-ML4T\/5f66785f9f1715970ad9353dfe26183b.js\"><\/script>[\/et_pb_code][et_pb_text admin_label=&#8221;3.6 Cont.&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<p><span data-contrast=\"auto\">In this example the robot starts at the bottom center and must navigate to the top left. Note that a wall of obstacles blocks its path, and there is some quicksand along the left side. The objective is for the robot to learn how to navigate from the starting location to the goal with the highest total reward. We define the reward for each step as:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">-1 if the robot moves to an empty or blank space, or attempts to move into a wall<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><span data-contrast=\"auto\">-100 if the robot moves to a quicksand space<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"3\" data-aria-level=\"1\"><span data-contrast=\"auto\">1 if the robot moves to the goal space<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<p><span data-contrast=\"auto\">Overall, we will assess the performance of a policy as the median reward it incurs to travel from the start to the goal (higher reward is better). We assess a learner in terms of the reward it converges to over a given number of training epochs (trips from start to goal). Important note: the problem includes random actions. So, for example, if your learner responds with a \u201cmove north\u201d action, there is some probability that the robot will actually move in a different direction. For this reason, the \u201cwise\u201d learner develops policies that keep the robot well away from quicksand. We map this problem to a reinforcement learning problem as follows:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">State: The state is the location of the robot, it is computed (discretized) as: row location * 10 + column location.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><span data-contrast=\"auto\">Actions: There are 4 possible actions, 0: move north, 1: move east, 2: move south, 3: move west.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"3\" data-aria-level=\"1\"><span data-contrast=\"auto\">R: The reward is as described above.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"4\" data-aria-level=\"1\"><span data-contrast=\"auto\">T: The transition matrix can be inferred from the CSV map and the actions.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<p><span data-contrast=\"auto\">Note that R and T are not known by or available to the learner. The code in testqlearner.py will test your code as follows (pseudo code):<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_code _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;]<script src=\"https:\/\/gist.github.com\/CS7646-ML4T\/429bc5bbee31b50e59f7d018d85c1945.js\"><\/script>[\/et_pb_code][et_pb_text admin_label=&#8221;3.6 Cont.&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<p><span data-contrast=\"auto\" xml:lang=\"EN-US\" lang=\"EN-US\" class=\"TextRun SCXW31642540 BCX4\"><span class=\"NormalTextRun SCXW31642540 BCX4\">A few things to note about this code: The learner always receives a reward of -1.0 (or -100.0) until it reaches the goal when it receives a reward of +1.0. As soon as the robot reaches the goal, it is immediately returned to the starting location.<\/span><\/span><span class=\"EOP SCXW31642540 BCX4\" data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_text admin_label=&#8221;3.6.1 Additional Example Solutions&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h4><span>3.6.1 Additional Example Solutions<\/span><\/h4>\n<p><span data-contrast=\"auto\">Here are example solutions. Note that these examples were created before we added \u201cquicksand\u201d to the project. In the future, we will be updating the examples to reflect this change. In the meantime, you may find these useful:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><a href=\"http:\/\/lucylabs.gatech.edu\/ml4t\/mc3_p2_examples\/\" target=\"_blank\" rel=\"noopener\"><span data-contrast=\"none\">mc3_p2_examples<\/span><\/a><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><a href=\"http:\/\/lucylabs.gatech.edu\/ml4t\/mc3_p2_dyna_examples\/\" target=\"_blank\" rel=\"noopener\"><span data-contrast=\"none\">mc3_p2_dyna_examples<\/span><\/a><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_text admin_label=&#8221;3.7 Technical Requirements&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h3>3.7 Technical Requirements<\/h3>\n<p><span data-contrast=\"auto\">The following technical requirements apply to this assignment:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ol>\n<li data-leveltext=\"%1.\" data-font=\"\" data-listid=\"1\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">The value of Dyna cannot be \u201ccreatively\u201d decreased (i.e., one cannot ignore the first few iterations (e.g.., ignore the first 5 Dyna steps) or take only the nth iteration (e.g., every 10 Dyna cycles) to perform the Dyna steps).<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ol>\n<ol>\n<li data-leveltext=\"%1.\" data-font=\"&quot;Palatino Linotype&quot;, &quot;Palatino Linotype_MSFontService&quot;, sans-serif\" data-listid=\"1\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><span data-contrast=\"auto\">Each test (500 epochs, see rubric below) should complete in less than 2 seconds.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ol>\n<p>[\/et_pb_text][et_pb_text admin_label=&#8221;3.8 Hints and Resources&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h3>3.8 Hints and Resources<\/h3>\n<p><span data-contrast=\"auto\">This paper by Kaelbling, Littman, and Moore, is a good resource for RL in general: <\/span><a href=\"https:\/\/arxiv.org\/pdf\/cs\/9605103.pdf\" target=\"_blank\" rel=\"noopener\"><span data-contrast=\"none\">https:\/\/arxiv.org\/pdf\/cs\/9605103.pdf<\/span><\/a><span data-contrast=\"none\">.<\/span><span data-contrast=\"auto\"> See Section 4.2 for details on Q-Learning.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">There is also a chapter in the Mitchell book on Q-Learning.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">For implementing Dyna, you may find the following resources useful:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><a href=\"http:\/\/incompleteideas.net\/sutton\/book\/RLbook2018.pdf\" target=\"_blank\" rel=\"noopener\"><span data-contrast=\"none\">http:\/\/incompleteideas.net\/sutton\/book\/RLbook2018.pdf<\/span><\/a><span data-contrast=\"auto\">(Section 8.2)<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><a href=\"http:\/\/www-anw.cs.umass.edu\/~barto\/courses\/cs687\/Chapter%209.pdf\" target=\"_blank\" rel=\"noopener\"><span data-contrast=\"none\">http:\/\/www-anw.cs.umass.edu\/~barto\/courses\/cs687\/Chapter%209.pdf<\/span><\/a><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><a href=\"https:\/\/arxiv.org\/pdf\/1712.01275.pdf\" target=\"_blank\" rel=\"noopener\"><span data-contrast=\"none\">https:\/\/arxiv.org\/pdf\/1712.01275.pdf<\/span><\/a><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<ul><\/ul>\n<p><span data-contrast=\"auto\">If after submitting the project for grading you are not entirely satisfied with the implementation, you are encouraged to continue to improve the Q-learner as it can play a role in a future project (project 8).<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][\/et_pb_column][\/et_pb_row][et_pb_row admin_label=&#8221;4 Contents of Report&#8221; module_id=&#8221;report&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_column type=&#8221;4_4&#8243; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_text _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h2>4 Contents of Report<\/h2>\n<p><span data-contrast=\"auto\" xml:lang=\"EN-US\" lang=\"EN-US\" class=\"TextRun SCXW86313358 BCX4\"><span class=\"NormalTextRun SCXW86313358 BCX4\">There is no report associated with this assignment.<\/span><\/span><span class=\"EOP SCXW86313358 BCX4\" data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][\/et_pb_column][\/et_pb_row][et_pb_row admin_label=&#8221;5 Testing Recommendations&#8221; module_id=&#8221;testing&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_column type=&#8221;4_4&#8243; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_text _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h2>5 Testing Recommendations<\/h2>\n<p><span data-contrast=\"auto\">To test your code, we will invoke each of the functions. You are encouraged to perform any tests necessary to instill confidence that the code will run properly when submitted for grading and will produce the required results. You should confirm that testqlearner.py runs as expected from the qlearning_robot.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">Additionally, we provide the grade_robot_qlearning.py file that can be used for lightweight testing. This local grading\/pre-validation script is the same script that will be run when the code is submitted to GradeScope TESTING. To run and test that the file will run from within the qlearning_robot directory, use the command:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_code _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;]<script src=\"https:\/\/gist.github.com\/CS7646-ML4T\/7b9fac28f0e0594a91606ff1192698cf.js\"><\/script>[\/et_pb_code][et_pb_text _builder_version=&#8221;4.18.0&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<p><span data-contrast=\"auto\">In addition to testing on your local machine, you are encouraged to submit your file to Gradescope TESTING, where some basic pre-validation tests will be performed against the code. No credit will be given for coding assignments that do not pass this pre-validation. <\/span><b><span data-contrast=\"auto\">Gradescope TESTING does not grade your assignment.<\/span><\/b><span data-contrast=\"auto\"> The Gradescope TESTING script is not a complete test suite and does not match the more stringent private grader that is used in Gradescope SUBMISSION. Thus, the maximum Gradescope TESTING score of 81, while instructional, does not represent the minimum score one can expect when the assignment is graded using the private grading script. You are encouraged to develop additional tests to ensure that all project requirements are met.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">You are allowed <\/span><b><span data-contrast=\"auto\">unlimited<\/span><\/b><span data-contrast=\"auto\"> resubmissions to Gradescope <\/span><b><span data-contrast=\"auto\">TESTING<\/span><\/b><span data-contrast=\"auto\">. Please refer to the <\/span><a href=\"https:\/\/lucylabs.gatech.edu\/ml4t\/fall2022\/gradescope\/\" target=\"_blank\" rel=\"noopener\"><span data-contrast=\"none\">Gradescope Instructions<\/span><\/a><span data-contrast=\"auto\"> for more information.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][\/et_pb_column][\/et_pb_row][et_pb_row admin_label=&#8221;6 Submission Requirements&#8221; module_id=&#8221;submission&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_column type=&#8221;4_4&#8243; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_text _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h2>6 Submission Requirements<\/h2>\n<p><b><span data-contrast=\"auto\">This is an individual assignment<\/span><\/b><span data-contrast=\"auto\">. All work you submit should be your own. Make sure to cite any sources you reference and use quotes and in-line citations to mark any direct quotes.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span>Assignment due dates in your time zone can be found by looking at the<\/span><span> Project in the Assignment menu item in Canvas (ensure your Canvas time zone settings are set up properly).\u00a0<\/span> <span>This date <\/span><span>is 23:59 AOE <\/span><span>converted to <\/span><span>your time zone.\u00a0 <\/span><span>Late submissions are allowed for a penalty.\u00a0 The times and penalties are as follows:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:0,&quot;335551550&quot;:1,&quot;335551620&quot;:1,&quot;335559739&quot;:160,&quot;335559740&quot;:259}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"2\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span>-10% Late Penalty: +1 Hour late: submitted by 00:59 AOE (next day)<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:0,&quot;335551550&quot;:1,&quot;335551620&quot;:1,&quot;335559739&quot;:160,&quot;335559740&quot;:259}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"2\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><span>-25% Late Penalty: +12 Hours Late: submitted by 11:59 AOE (next day)<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:0,&quot;335551550&quot;:1,&quot;335551620&quot;:1,&quot;335559739&quot;:160,&quot;335559740&quot;:259}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"2\" aria-setsize=\"-1\" data-aria-posinset=\"3\" data-aria-level=\"1\"><span>-50% Late Penalty: +24 Hours Late: submitted by 23:59 AOE (next day)<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:0,&quot;335551550&quot;:1,&quot;335551620&quot;:1,&quot;335559739&quot;:160,&quot;335559740&quot;:259}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"2\" aria-setsize=\"-1\" data-aria-posinset=\"3\" data-aria-level=\"1\"><span>-100% Late Penalty: &gt; 24+ Late: submitted after 23:59 AOE (next day)<\/span><span>\u00a0<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:0,&quot;335551550&quot;:1,&quot;335551620&quot;:1,&quot;335559739&quot;:160,&quot;335559740&quot;:259}\">\u00a0<\/span><\/li>\n<\/ul>\n<ul><\/ul>\n<p><span data-contrast=\"auto\">Assignments received after Monday at 23:59 AOE (even if only by a few seconds) are not accepted without advanced agreement except in cases of medical or family emergencies. In the case of such an emergency, please contact the <\/span><a href=\"https:\/\/gatech-advocate.symplicity.com\/care_report\/index.php\/pid986879?\" target=\"_blank\" rel=\"noopener\"><span data-contrast=\"none\">Dean of Students<\/span><\/a><span data-contrast=\"auto\">.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_text admin_label=&#8221;6.1 Report Submission&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h3>6.1 Report Submission<\/h3>\n<p><span data-contrast=\"auto\" xml:lang=\"EN-US\" lang=\"EN-US\" class=\"TextRun SCXW124104899 BCX4\"><span class=\"NormalTextRun SCXW124104899 BCX4\">There is no report associated with this assignment.\u00a0<\/span><\/span><span class=\"EOP SCXW124104899 BCX4\" data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_text admin_label=&#8221;6.2 Code Submission&#8221; _builder_version=&#8221;4.18.0&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h3>6.2 Code Submission<\/h3>\n<p><span data-contrast=\"auto\">This class uses Gradescope, a server-side auto-grader, to evaluate your code submission. No credit will be given for code that does not run in this environment and students are encouraged to leverage Gradescope TESTING prior to submitting an assignment for grading. <\/span><b><span data-contrast=\"auto\">Only code submitted to Gradescope SUBMISSION will be graded. If you submit your code to Gradescope TESTING and have not also submitted your code to Gradescope SUBMISSION, you will receive a zero (0).<\/span><\/b><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">Please submit the following file to Gradescope <\/span><b><span data-contrast=\"auto\">SUBMISSION<\/span><\/b><span data-contrast=\"auto\">:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p style=\"padding-left: 80px;\"><b><span data-contrast=\"auto\">QLearner.py<\/span><\/b><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559685&quot;:720,&quot;335559737&quot;:720,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">Do not submit any other files.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><b><span data-contrast=\"auto\">Important: You are allowed a MAXIMUM of five (5) code submissions to Gradescope <\/span><\/b><b><span data-contrast=\"auto\">SUBMISSION<\/span><\/b><b><span data-contrast=\"auto\">.<\/span><\/b><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][\/et_pb_column][\/et_pb_row][et_pb_row admin_label=&#8221;7 Grading Information&#8221; module_id=&#8221;grading&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_column type=&#8221;4_4&#8243; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_text _builder_version=&#8221;4.18.0&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h2>7 Grading Information<\/h2>\n<p><span data-contrast=\"auto\">The submitted code (which is worth 100% of your grade) is run as a batch job after the project deadline. The code will be graded using a 100-point scale coinciding with a rubric design to mirror the implementation details above. Deductions will be applied for unmet implementation requirements or code that fails to run.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">We do not provide an explicit set timeline for returning grades, except that all assignments and exams will be graded before the institute deadline (end of the term). As will be the case throughout the term, the grading team will work as quickly as possible to provide project feedback and grades.\u00a0<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">Once grades are released, any grade-related matters must follow the <\/span><a href=\"https:\/\/lucylabs.gatech.edu\/ml4t\/fall2022\/assignment-follow-up\/\" target=\"_blank\" rel=\"noopener\"><span data-contrast=\"none\">Assignment Follow-Up guidelines and process<\/span><\/a><span data-contrast=\"auto\"> alone. Regrading will only be undertaken in cases where there has been a genuine error or misunderstanding. Please note that requests will be denied if they are not submitted using the <\/span><span data-contrast=\"auto\">Summer 2022<\/span><span data-contrast=\"auto\">\u00a0form or do not fall within the timeframes specified on the <\/span><a href=\"https:\/\/lucylabs.gatech.edu\/ml4t\/fall2022\/assignment-follow-up\/\" target=\"_blank\" rel=\"noopener\"><span data-contrast=\"none\">Assignment Follow-Up<\/span><\/a><span data-contrast=\"auto\"> page.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_text admin_label=&#8221;7.1 Grading Rubric&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h3>7.1 Grading Rubric<\/h3>\n<p>[\/et_pb_text][et_pb_text admin_label=&#8221;7.1.1 Report&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h4>7.1.1 Report [0 points]<\/h4>\n<p><span data-contrast=\"auto\" xml:lang=\"EN-US\" lang=\"EN-US\" class=\"TextRun SCXW123441216 BCX4\"><span class=\"NormalTextRun SCXW123441216 BCX4\">There is no report associated with this assignment<\/span><span class=\"NormalTextRun SCXW123441216 BCX4\">.<\/span><\/span><span class=\"EOP SCXW123441216 BCX4\" data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p>[\/et_pb_text][et_pb_text admin_label=&#8221;7.1.2 Code&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h4>7.1.2 Code<\/h4>\n<p><span data-contrast=\"auto\">Code deductions will be applied if any of the following occur:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">If the author() method is not correctly implemented in the QLearner file: (-20 points)<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<p>[\/et_pb_text][et_pb_text admin_label=&#8221;7.1.3 Auto-Grader&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h4>7.1.3 Auto-Grader (Private Grading Script) [100 points]<\/h4>\n<p><span data-contrast=\"auto\">For basic Q-Learning (dyna = 0) we will test your learner against 10 test worlds with 500 epochs in each world. One \u201cepoch\u201d means your robot reaches the goal one time, or after 10000 steps, whichever comes first. Your QLearner retains its state (Q-table), and then we allow it to navigate to the goal again, over and over, 500 times. Each test (500 epochs) should complete in less than 2 seconds. <\/span><b><i><span data-contrast=\"auto\">NOTE<\/span><\/i><\/b><i><span data-contrast=\"auto\">: an epoch where the robot fails to reach the goal will likely take much longer (in running time), than one that does reach the goal, and is a common reason for failing to complete test cases within the time limit.<\/span><\/i><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">Benchmark: As a benchmark to compare your solution to, we will run our reference solution in the same world, with 500 epochs. We will take the median reward of our reference across all those 500 epochs.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">Your score: For each world, we will take the median cost your solution finds across all 500 epochs.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">For a test to be successful, your learner should find a total reward &gt;= 1.5 x the benchmark. Note that since the reward for a single epoch is negative, your solution can be up to 50% worse than the reference solution and still pass.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">There are 10 test cases, each test case is worth 9.5 points.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">Here is how we will initialize your QLearner for these test cases:<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<p>[\/et_pb_text][et_pb_code _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;]<script src=\"https:\/\/gist.github.com\/CS7646-ML4T\/a35c6b4312d477ed27421024d70da01d.js\"><\/script>[\/et_pb_code][et_pb_text admin_label=&#8221;7.1.3 cont.&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">For Dyna-Q, we will set dyna = 200. We will test your learner against world01.csv and world02.csv with 50 epochs. Scoring is similar to the non-dyna case: Each test should complete in less than 10 seconds. For the test to be successful, your learner should find a solution with total reward to the goal &gt;= 1.5 x the median reward our reference solution across all 50 epochs. Note that since the reward for a single epoch is negative, your solution can be up to 50% worse than the reference solution and still pass. We will check this by taking the median of all 50 runs. Each test case is worth 2.5 points. We will initialize your learner with the following parameter values for these test cases:<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<p>[\/et_pb_text][et_pb_code _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;]<script src=\"https:\/\/gist.github.com\/CS7646-ML4T\/238175b3b165a4d8acc57862a3a978b4.js\"><\/script>[\/et_pb_code][\/et_pb_column][\/et_pb_row][et_pb_row admin_label=&#8221;8 DEVELOPMENT GUIDELINES (ALLOWED &#038; PROHIBITED) &#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_column type=&#8221;4_4&#8243; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_text module_id=&#8221;guidelines&#8221; _builder_version=&#8221;4.18.0&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; hover_enabled=&#8221;0&#8243; global_colors_info=&#8221;{}&#8221; sticky_enabled=&#8221;0&#8243;]<\/p>\n<h2>8 DEVELOPMENT GUIDELINES (ALLOWED &amp; PROHIBITED)<\/h2>\n<p><span data-contrast=\"auto\">See the <\/span><a href=\"https:\/\/lucylabs.gatech.edu\/ml4t\/fall2022\/project-guidelines-2\/\" target=\"_blank\" rel=\"noopener\"><span data-contrast=\"auto\">Course Development Recommendations, Guidelines, and Rules<\/span><\/a><span data-contrast=\"auto\"> for the complete list of requirements applicable to all course assignments. <\/span><b><span data-contrast=\"auto\">The Project Technical Requirements are grouped into three sections: Always Allowed, Prohibited with Some Exceptions, and Always Prohibited<\/span><\/b><span data-contrast=\"auto\">.<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">The following exemptions to the Course Development Recommendations, Guidelines, and Rules apply to this project:<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><span data-contrast=\"auto\">N\/A<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<p>[\/et_pb_text][\/et_pb_column][\/et_pb_row][et_pb_row admin_label=&#8221;9 Optional Resources&#8221; module_id=&#8221;optional&#8221; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_column type=&#8221;4_4&#8243; _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;default&#8221; global_colors_info=&#8221;{}&#8221;][et_pb_text _builder_version=&#8221;4.16&#8243; _module_preset=&#8221;d93d428f-8726-40a1-9b24-ea6370f64b6f&#8221; global_colors_info=&#8221;{}&#8221;]<\/p>\n<h2>9 Optional Resources<\/h2>\n<p><span data-contrast=\"auto\">Although the use of these or other resources is not required; some may find them useful in completing the project or in providing an in-depth discussion of the material.\u00a0<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<p><span data-contrast=\"auto\">Videos:\u00a0<\/span><span data-ccp-props=\"{&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/p>\n<ul>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"1\" data-aria-level=\"1\"><a href=\"https:\/\/deepmind.com\/learning-resources\/-introduction-reinforcement-learning-david-silver\" target=\"_blank\" rel=\"noopener\"><span data-contrast=\"none\">Reinforcement Learning<\/span><\/a><span data-contrast=\"auto\"> (David Silver Video Lectures) &#8211; Videos and PowerPoint presentations.<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><a href=\"https:\/\/omscs.gatech.edu\/cs-7642-reinforcement-learning-course-videos\" target=\"_blank\" rel=\"noopener\"><span data-contrast=\"none\">CS7642 Reinforcement Learning<\/span><\/a><span data-contrast=\"auto\"> (Georgia Tech OMSCS Course Videos)<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<li data-leveltext=\"\uf0d7\" data-font=\"Symbol\" data-listid=\"3\" aria-setsize=\"-1\" data-aria-posinset=\"2\" data-aria-level=\"1\"><a href=\"https:\/\/www.youtube.com\/playlist?list=PLoROMvodv4rOSOPzutgyCTapiGlY2Nd8u\" target=\"_blank\" rel=\"noopener\"><span data-contrast=\"none\">CS234 Reinforcement Learning<\/span><\/a><span data-contrast=\"auto\"> (Stanford University Course Videos)<\/span><span data-ccp-props=\"{&quot;134233279&quot;:true,&quot;201341983&quot;:1,&quot;335551550&quot;:6,&quot;335551620&quot;:6,&quot;335559739&quot;:170,&quot;335559740&quot;:340}\">\u00a0<\/span><\/li>\n<\/ul>\n<ul><\/ul>\n<p>[\/et_pb_text][\/et_pb_column][\/et_pb_row][\/et_pb_section]<\/p>\n","protected":false},"excerpt":{"rendered":"<p>Project 7: Q-Learning RobotRevisions This assignment is subject to change up until 3 weeks prior to the due date. We do not anticipate changes; any changes will be logged in this section.1 Overview In this assignment, you implement a Reinforcement Learning algorithm called Q-learning, which is a model-free RL algorithm. You will also extend your [&hellip;]<\/p>\n","protected":false},"author":2,"featured_media":0,"parent":3071,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"_et_pb_use_builder":"on","_et_pb_old_content":"<!-- wp:divi\/placeholder \/-->","_et_gb_content_width":"","footnotes":""},"class_list":["post-3121","page","type-page","status-publish","hentry"],"jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/lucylabs.gatech.edu\/ml4t\/wp-json\/wp\/v2\/pages\/3121","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/lucylabs.gatech.edu\/ml4t\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/lucylabs.gatech.edu\/ml4t\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/lucylabs.gatech.edu\/ml4t\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/lucylabs.gatech.edu\/ml4t\/wp-json\/wp\/v2\/comments?post=3121"}],"version-history":[{"count":6,"href":"https:\/\/lucylabs.gatech.edu\/ml4t\/wp-json\/wp\/v2\/pages\/3121\/revisions"}],"predecessor-version":[{"id":3346,"href":"https:\/\/lucylabs.gatech.edu\/ml4t\/wp-json\/wp\/v2\/pages\/3121\/revisions\/3346"}],"up":[{"embeddable":true,"href":"https:\/\/lucylabs.gatech.edu\/ml4t\/wp-json\/wp\/v2\/pages\/3071"}],"wp:attachment":[{"href":"https:\/\/lucylabs.gatech.edu\/ml4t\/wp-json\/wp\/v2\/media?parent=3121"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}