{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "polish-Roberta-classification-simp.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"03083056d1e94a538c2d3a6e452a54e7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_f583b1167728452ba6a48f482de1b242",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_02d2b9682b614004a1e5b16b77d1b44a",
"IPY_MODEL_a8b9a66d854d441d85e14b47c1f7cd86",
"IPY_MODEL_04e56f18cdc74054806bd2c2e759655b"
]
}
},
"f583b1167728452ba6a48f482de1b242": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"02d2b9682b614004a1e5b16b77d1b44a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_31b5c73f04ab4a028a08a9dd27aaaef6",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_81a23c33f8d443719fc15cbff841ab3e"
}
},
"a8b9a66d854d441d85e14b47c1f7cd86": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_3ba8e94b076e4d65bc8fff6fdd36fd3a",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 57,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 57,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_263d73496f7f4a0b913dc9ec206965e7"
}
},
"04e56f18cdc74054806bd2c2e759655b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_955ddc78ae49407abc3d73f3dc2cc004",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 57/57 [00:06<00:00, 9.27ba/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_ca8dbf3b6f9f426291ec058ccf5afcf3"
}
},
"31b5c73f04ab4a028a08a9dd27aaaef6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"81a23c33f8d443719fc15cbff841ab3e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"3ba8e94b076e4d65bc8fff6fdd36fd3a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"263d73496f7f4a0b913dc9ec206965e7": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"955ddc78ae49407abc3d73f3dc2cc004": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"ca8dbf3b6f9f426291ec058ccf5afcf3": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"65c5531fb5ef49b0b14e9f717f7a4e99": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_6f1e5f95976e4c96a8c745330a3e3615",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_b360b556de414116914ce0f94d3c4f0f",
"IPY_MODEL_8f2347db18e84913a18cbdf5e18f0135",
"IPY_MODEL_c80576964fbf41f1be6aad1a7eedff07"
]
}
},
"6f1e5f95976e4c96a8c745330a3e3615": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"b360b556de414116914ce0f94d3c4f0f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_8eeae0a30e344c0e8a993a2fe9b25bea",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_4d4fe6a979d047ea821235f00ceea60e"
}
},
"8f2347db18e84913a18cbdf5e18f0135": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_dc2a38711a054435b64dcc1edb99645b",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 8,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 8,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_fe8667b44b6e4a95b110d72a5924f486"
}
},
"c80576964fbf41f1be6aad1a7eedff07": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_f0642e1b2a864aabbb187aea2e687e5b",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 8/8 [00:00<00:00, 9.52ba/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_26e56fc61c2a483fb26decc4d4d297d4"
}
},
"8eeae0a30e344c0e8a993a2fe9b25bea": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"4d4fe6a979d047ea821235f00ceea60e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"dc2a38711a054435b64dcc1edb99645b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"fe8667b44b6e4a95b110d72a5924f486": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"f0642e1b2a864aabbb187aea2e687e5b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"26e56fc61c2a483fb26decc4d4d297d4": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"8b8627f8ea894c828bef3eeea8799383": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_9810c3734016439b84b54aaa0873d9a1",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_04e078b606ee4f5eadbfebc1f891f69c",
"IPY_MODEL_93e096058a954a1fa6ad76b289a2f727",
"IPY_MODEL_9b8d70149121432fb14b2ff4643ef8a2"
]
}
},
"9810c3734016439b84b54aaa0873d9a1": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"04e078b606ee4f5eadbfebc1f891f69c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_018b342b25674c20b1fc65763326603f",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_0b7bb7a8fb0e4d7d858614b044f56641"
}
},
"93e096058a954a1fa6ad76b289a2f727": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_41b6cf7ff23d42b08700e7a7673b8062",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 7,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 7,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_341de48240614b32bda8fafe2c3dc87a"
}
},
"9b8d70149121432fb14b2ff4643ef8a2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_2e97a973b749451788366e7dc0f3c104",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 7/7 [00:00<00:00, 8.37ba/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_01b1a11134164856b8840b8176c59163"
}
},
"018b342b25674c20b1fc65763326603f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"0b7bb7a8fb0e4d7d858614b044f56641": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"41b6cf7ff23d42b08700e7a7673b8062": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"341de48240614b32bda8fafe2c3dc87a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"2e97a973b749451788366e7dc0f3c104": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"01b1a11134164856b8840b8176c59163": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"2be036b84ac748e7b697b3d972fe4967": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_b4eaf387135443f298baffc0c2a95cc0",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_862018a61fb745689f034c09f84c69b6",
"IPY_MODEL_a3b037ddc080458bb357e72a33ff9ee3",
"IPY_MODEL_0af68e4132da4bbdbfc1b2cd4f1b2b62"
]
}
},
"b4eaf387135443f298baffc0c2a95cc0": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"862018a61fb745689f034c09f84c69b6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_504fd6c0375243368455f31285dd4ce0",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_9bf60f74a05d495aad2f63d4d4032e4c"
}
},
"a3b037ddc080458bb357e72a33ff9ee3": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_e9d794d4d0594682a8008e3abbc50ab8",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 57,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 57,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_9a97abb3e0824f7b9a72469ad35d066a"
}
},
"0af68e4132da4bbdbfc1b2cd4f1b2b62": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_70053402ff7e4b6dbafd30cb8584b92b",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 57/57 [00:07<00:00, 7.16ba/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_91e0b6babb82458caaa0d8076ac7aeb0"
}
},
"504fd6c0375243368455f31285dd4ce0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"9bf60f74a05d495aad2f63d4d4032e4c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"e9d794d4d0594682a8008e3abbc50ab8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"9a97abb3e0824f7b9a72469ad35d066a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"70053402ff7e4b6dbafd30cb8584b92b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"91e0b6babb82458caaa0d8076ac7aeb0": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"90053ed6c533463fbf7d48fb983b9694": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_d66cdd223e6a4367ab491277280ab4bf",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_bd3240fab0aa449fa1027a38900a8cf2",
"IPY_MODEL_f3080f8e34ff458dba0869694e6dbe9f",
"IPY_MODEL_9fde0a63838b4fac893d0f445d6bd426"
]
}
},
"d66cdd223e6a4367ab491277280ab4bf": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"bd3240fab0aa449fa1027a38900a8cf2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_cf9c374f7c534f7ca096e16ebaa9316b",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_407cb21a4b0c4b18883ca4f6ad13c989"
}
},
"f3080f8e34ff458dba0869694e6dbe9f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_7f1e2139db334a95ab784e17035224a5",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 8,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 8,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_fd172f5929094f8ebe133ce12c591f17"
}
},
"9fde0a63838b4fac893d0f445d6bd426": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_fa477548d69847d580d79b5e82787c4c",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 8/8 [00:00<00:00, 8.17ba/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_cdb924bce4334e89b1b22fe77dc7cc88"
}
},
"cf9c374f7c534f7ca096e16ebaa9316b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"407cb21a4b0c4b18883ca4f6ad13c989": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"7f1e2139db334a95ab784e17035224a5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"fd172f5929094f8ebe133ce12c591f17": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"fa477548d69847d580d79b5e82787c4c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"cdb924bce4334e89b1b22fe77dc7cc88": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"88b0009e094e488b8010f38d86de5465": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_97e0858da4f64a4783bc2ae90c1e5f1b",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_71942ca586324397a3925a2ea49edce0",
"IPY_MODEL_ba93057c5c07481284265fae022d5e57",
"IPY_MODEL_46271c25e484489ea75b690416443ead"
]
}
},
"97e0858da4f64a4783bc2ae90c1e5f1b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"71942ca586324397a3925a2ea49edce0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_a3b73dece1184bcaa16ebe04c9e0821a",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_91f37911d5db4a0498d71b437be6ac37"
}
},
"ba93057c5c07481284265fae022d5e57": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_9a0de19ac7bc4243a7acedea10f402aa",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 7,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 7,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_e8f04dcdc1994b87bfda44b1fcc4b14b"
}
},
"46271c25e484489ea75b690416443ead": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_f51da76d11044098a31ff4eab4b77e50",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 7/7 [00:00<00:00, 8.16ba/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_5546130c0cae46f7a8f551d7057dbefb"
}
},
"a3b73dece1184bcaa16ebe04c9e0821a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"91f37911d5db4a0498d71b437be6ac37": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"9a0de19ac7bc4243a7acedea10f402aa": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"e8f04dcdc1994b87bfda44b1fcc4b14b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"f51da76d11044098a31ff4eab4b77e50": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"5546130c0cae46f7a8f551d7057dbefb": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"9a95dfae21114a1a93bb50310d710754": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_275b3764999748038ce567a2e1b1b61b",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_eb75972d26854d568772f51e2b5426f1",
"IPY_MODEL_ec5926d828874a10a80d06955600833e",
"IPY_MODEL_adb0f07da27a4ddca3072a791f3958f4"
]
}
},
"275b3764999748038ce567a2e1b1b61b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"eb75972d26854d568772f51e2b5426f1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_6f48c9c0c7934e63a3a63d9f7288f06f",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_9384b570a2054e8481537ea14c7c7cbd"
}
},
"ec5926d828874a10a80d06955600833e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_d834733cce544004a86dfb0aa233599e",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 56023,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 56023,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_59d0174058af4e12b058d0e0c95cfb23"
}
},
"adb0f07da27a4ddca3072a791f3958f4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_8c7237abcde143819d6ddf52d42f32a8",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 56023/56023 [00:18<00:00, 3184.22ex/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_f2c338fdddc148abb6193454f11dda2b"
}
},
"6f48c9c0c7934e63a3a63d9f7288f06f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"9384b570a2054e8481537ea14c7c7cbd": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"d834733cce544004a86dfb0aa233599e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"59d0174058af4e12b058d0e0c95cfb23": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"8c7237abcde143819d6ddf52d42f32a8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"f2c338fdddc148abb6193454f11dda2b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"a977ebda8d794f41bae26fab5f14084f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_e5051acb1a154d5b9a744d5c89dea9e0",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_accf803e6a4a4e8aa08f1726e0fbda1c",
"IPY_MODEL_8b2cbf1f7de64500acb8e4484e37e56a",
"IPY_MODEL_dc8bad987e904cfca8580fe7ed327c58"
]
}
},
"e5051acb1a154d5b9a744d5c89dea9e0": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"accf803e6a4a4e8aa08f1726e0fbda1c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_b189b0e55a1344a9b109d098d49ffb82",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_2f6c3884f0604922a192371cdac1290e"
}
},
"8b2cbf1f7de64500acb8e4484e37e56a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_b231cbedbb474bf080e788f8352ade01",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 7009,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 7009,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_7badc04ce6574071947f2df0915b9542"
}
},
"dc8bad987e904cfca8580fe7ed327c58": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_46b9861a23bb42babd995f77620f21c8",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 7009/7009 [00:02<00:00, 3211.62ex/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_ab051e934c5e481bab6b41a2bbe69906"
}
},
"b189b0e55a1344a9b109d098d49ffb82": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"2f6c3884f0604922a192371cdac1290e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"b231cbedbb474bf080e788f8352ade01": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"7badc04ce6574071947f2df0915b9542": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"46b9861a23bb42babd995f77620f21c8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"ab051e934c5e481bab6b41a2bbe69906": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"49c090151d4040ed8527da7f65f0f07b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_e5f57195b58c45a5ae1245d36906ba36",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_7f819f33cd7d49fd80b71e90188d1580",
"IPY_MODEL_36f121566b244fe08e0aa433887cd47c",
"IPY_MODEL_9337dcb073014911abd193e771195c03"
]
}
},
"e5f57195b58c45a5ae1245d36906ba36": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"7f819f33cd7d49fd80b71e90188d1580": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_01620fb554914c38918870f685805890",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_7c19eb696edf4f20861d92b434f88167"
}
},
"36f121566b244fe08e0aa433887cd47c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_f35f004fac25428d801cfcd677b4acbe",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 6972,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 6972,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_d7976b33e48e4196b0440a89a86a5f52"
}
},
"9337dcb073014911abd193e771195c03": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_fb373c03d38841fa86f9b359252c7139",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 6972/6972 [00:02<00:00, 3165.35ex/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_2294fe7ca886491d9c5517ddb7eec86e"
}
},
"01620fb554914c38918870f685805890": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"7c19eb696edf4f20861d92b434f88167": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"f35f004fac25428d801cfcd677b4acbe": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"d7976b33e48e4196b0440a89a86a5f52": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"fb373c03d38841fa86f9b359252c7139": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"2294fe7ca886491d9c5517ddb7eec86e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"90a506b640c54985945de67ee75b0b04": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_82fe7eca36cc42d69672983632f04ffd",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_a09cb4c9b74b459f9b5b380abe1d9e4e",
"IPY_MODEL_f71fa21be43646aaa21b3899d266764a",
"IPY_MODEL_e1cc66f6c1cd4c1b9ee370f552768dec"
]
}
},
"82fe7eca36cc42d69672983632f04ffd": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"a09cb4c9b74b459f9b5b380abe1d9e4e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_6dd1aadc16794777a8102cc8a27bf639",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_aaaeec2e8ceb4acfa8daa90d2e901e16"
}
},
"f71fa21be43646aaa21b3899d266764a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_1b7e82f3b0da4ef299cb835a91110be1",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 6,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 6,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_d2a698d539ae4848896498b05a234224"
}
},
"e1cc66f6c1cd4c1b9ee370f552768dec": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_732c9ac9574e4dd79ab845319f9e916c",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 6/6 [00:06<00:00, 1.05ba/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_b2e075103c2740c0ba5a7e5fb4fb07e7"
}
},
"6dd1aadc16794777a8102cc8a27bf639": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"aaaeec2e8ceb4acfa8daa90d2e901e16": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"1b7e82f3b0da4ef299cb835a91110be1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"d2a698d539ae4848896498b05a234224": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"732c9ac9574e4dd79ab845319f9e916c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"b2e075103c2740c0ba5a7e5fb4fb07e7": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"dc86b6ae1b2c49089fdf66d3c4fdb092": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_c3bfc3f461fb414890981bb14a9cdf65",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_59751096baad41b4ab70d77f8bd424d4",
"IPY_MODEL_5693d90b0ef64334bb0dbbe876db4d29",
"IPY_MODEL_4b16edac5070467d85f860876453f27d"
]
}
},
"c3bfc3f461fb414890981bb14a9cdf65": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"59751096baad41b4ab70d77f8bd424d4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_9c0980093556435c9e1d818ebb5b4573",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_078aa55a428d489f98525190e8c802d9"
}
},
"5693d90b0ef64334bb0dbbe876db4d29": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_fd52e7c8c00e438eb9f1763dd215cb8a",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 1,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 1,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_84a59a95194542868f75c4be3639f750"
}
},
"4b16edac5070467d85f860876453f27d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_fedf2e50afec4a07b8f215b02dc01024",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 1/1 [00:00<00:00, 1.24ba/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_f3d66973388048eb9bc9cb3dfdf2b7f9"
}
},
"9c0980093556435c9e1d818ebb5b4573": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"078aa55a428d489f98525190e8c802d9": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"fd52e7c8c00e438eb9f1763dd215cb8a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"84a59a95194542868f75c4be3639f750": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"fedf2e50afec4a07b8f215b02dc01024": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"f3d66973388048eb9bc9cb3dfdf2b7f9": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"dce01c7c32ca497086441282121489c0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_c8445136cfd84ef29140f3fca960173e",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_85b6d179d2f94a2e8723c52cb910143f",
"IPY_MODEL_f2a1806e2f754e5b8b1033c6667e547a",
"IPY_MODEL_227376b21fb44ae3aa490f7eb683f622"
]
}
},
"c8445136cfd84ef29140f3fca960173e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"85b6d179d2f94a2e8723c52cb910143f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_5248261748eb4085875802c2f6cf920d",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_4e6b143070a642f0bf856ac3e07777be"
}
},
"f2a1806e2f754e5b8b1033c6667e547a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_9b64280311714e41a802f5a4b9816b85",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 1,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 1,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_86bd3fa51c4749619f04695e175f3ca9"
}
},
"227376b21fb44ae3aa490f7eb683f622": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_640de67654fa4ab1a4ea408f0630d74d",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 1/1 [00:00<00:00, 1.26ba/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_eb9930e1e30747758295405343e5ba77"
}
},
"5248261748eb4085875802c2f6cf920d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"4e6b143070a642f0bf856ac3e07777be": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"9b64280311714e41a802f5a4b9816b85": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"86bd3fa51c4749619f04695e175f3ca9": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"640de67654fa4ab1a4ea408f0630d74d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"eb9930e1e30747758295405343e5ba77": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "AxPcZwwJeXwG"
},
"source": [
"**Transfer learning tutorial for NLP enthusiasts, part 3.**\n",
"\n",
"This notebook shows you how to train BERT-like model for sequence (sentence, set of sentences) classification.\n",
"\n",
"This one uses GPU so remember to connect to it."
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "6mlOQUfneWhy",
"outputId": "a0470868-edec-4267-c62e-6abc0f07b272"
},
"source": [
"!pip install transformers datasets\n",
"\n",
"import torch, os\n",
"from transformers import RobertaModel, AutoModel, PreTrainedTokenizerFast\n",
"from transformers import AutoTokenizer\n",
"from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer\n",
"import pandas as pd\n",
"from datasets import Dataset, DatasetDict, load_dataset, load_metric\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"%matplotlib inline"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting transformers\n",
" Downloading transformers-4.10.2-py3-none-any.whl (2.8 MB)\n",
"\u001b[K |████████████████████████████████| 2.8 MB 5.3 MB/s \n",
"\u001b[?25hCollecting datasets\n",
" Downloading datasets-1.12.1-py3-none-any.whl (270 kB)\n",
"\u001b[K |████████████████████████████████| 270 kB 40.2 MB/s \n",
"\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers) (21.0)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.19.5)\n",
"Collecting pyyaml>=5.1\n",
" Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)\n",
"\u001b[K |████████████████████████████████| 636 kB 29.2 MB/s \n",
"\u001b[?25hCollecting tokenizers<0.11,>=0.10.1\n",
" Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n",
"\u001b[K |████████████████████████████████| 3.3 MB 28.3 MB/s \n",
"\u001b[?25hRequirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.8.1)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.2)\n",
"Collecting sacremoses\n",
" Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)\n",
"\u001b[K |████████████████████████████████| 895 kB 20.9 MB/s \n",
"\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.0.12)\n",
"Collecting huggingface-hub>=0.0.12\n",
" Downloading huggingface_hub-0.0.17-py3-none-any.whl (52 kB)\n",
"\u001b[K |████████████████████████████████| 52 kB 954 kB/s \n",
"\u001b[?25hRequirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from huggingface-hub>=0.0.12->transformers) (3.7.4.3)\n",
"Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers) (2.4.7)\n",
"Collecting xxhash\n",
" Downloading xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243 kB)\n",
"\u001b[K |████████████████████████████████| 243 kB 33.8 MB/s \n",
"\u001b[?25hRequirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets) (0.70.12.2)\n",
"Collecting fsspec[http]>=2021.05.0\n",
" Downloading fsspec-2021.8.1-py3-none-any.whl (119 kB)\n",
"\u001b[K |████████████████████████████████| 119 kB 30.7 MB/s \n",
"\u001b[?25hRequirement already satisfied: pyarrow!=4.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (3.0.0)\n",
"Collecting aiohttp\n",
" Downloading aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_x86_64.whl (1.3 MB)\n",
"\u001b[K |████████████████████████████████| 1.3 MB 32.1 MB/s \n",
"\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets) (1.1.5)\n",
"Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets) (0.3.4)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.5.30)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
"Collecting async-timeout<4.0,>=3.0\n",
" Downloading async_timeout-3.0.1-py3-none-any.whl (8.2 kB)\n",
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (21.2.0)\n",
"Collecting multidict<7.0,>=4.5\n",
" Downloading multidict-5.1.0-cp37-cp37m-manylinux2014_x86_64.whl (142 kB)\n",
"\u001b[K |████████████████████████████████| 142 kB 47.8 MB/s \n",
"\u001b[?25hCollecting yarl<2.0,>=1.0\n",
" Downloading yarl-1.6.3-cp37-cp37m-manylinux2014_x86_64.whl (294 kB)\n",
"\u001b[K |████████████████████████████████| 294 kB 48.1 MB/s \n",
"\u001b[?25hRequirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.5.0)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2.8.2)\n",
"Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2018.9)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n",
"Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.0.1)\n",
"Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n",
"Installing collected packages: multidict, yarl, async-timeout, fsspec, aiohttp, xxhash, tokenizers, sacremoses, pyyaml, huggingface-hub, transformers, datasets\n",
" Attempting uninstall: pyyaml\n",
" Found existing installation: PyYAML 3.13\n",
" Uninstalling PyYAML-3.13:\n",
" Successfully uninstalled PyYAML-3.13\n",
"Successfully installed aiohttp-3.7.4.post0 async-timeout-3.0.1 datasets-1.12.1 fsspec-2021.8.1 huggingface-hub-0.0.17 multidict-5.1.0 pyyaml-5.4.1 sacremoses-0.0.45 tokenizers-0.10.3 transformers-4.10.2 xxhash-2.0.2 yarl-1.6.3\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "nTP_JAR1pikA",
"outputId": "1aa304b4-dcea-4a7a-bd40-73b6b0efe174"
},
"source": [
"#model\n",
"!wget https://github.com/sdadas/polish-roberta/releases/download/models-v2/roberta_base_transformers.zip\n",
"!mkdir roberta\n",
"!unzip roberta_base_transformers.zip -d roberta\n",
"#load a tokenizer model (here: polish roberta)\n",
"model_dir = \"./roberta\"\n",
"rtokenizer = AutoTokenizer.from_pretrained(model_dir)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2021-09-22 09:27:16-- https://github.com/sdadas/polish-roberta/releases/download/models-v2/roberta_base_transformers.zip\n",
"Resolving github.com (github.com)... 140.82.114.3\n",
"Connecting to github.com (github.com)|140.82.114.3|:443... connected.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://github-releases.githubusercontent.com/247501435/bea4e000-8a5d-11eb-86cc-793bd6e126a7?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20210922%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20210922T092641Z&X-Amz-Expires=300&X-Amz-Signature=8df9173e9d839a2e38f599e32bf1d7fd801058698aeeff6cdb674d0e8c8d6fe4&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=247501435&response-content-disposition=attachment%3B%20filename%3Droberta_base_transformers.zip&response-content-type=application%2Foctet-stream [following]\n",
"--2021-09-22 09:27:16-- https://github-releases.githubusercontent.com/247501435/bea4e000-8a5d-11eb-86cc-793bd6e126a7?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20210922%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20210922T092641Z&X-Amz-Expires=300&X-Amz-Signature=8df9173e9d839a2e38f599e32bf1d7fd801058698aeeff6cdb674d0e8c8d6fe4&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=247501435&response-content-disposition=attachment%3B%20filename%3Droberta_base_transformers.zip&response-content-type=application%2Foctet-stream\n",
"Resolving github-releases.githubusercontent.com (github-releases.githubusercontent.com)... 185.199.108.154, 185.199.109.154, 185.199.110.154, ...\n",
"Connecting to github-releases.githubusercontent.com (github-releases.githubusercontent.com)|185.199.108.154|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 294210036 (281M) [application/octet-stream]\n",
"Saving to: ‘roberta_base_transformers.zip’\n",
"\n",
"roberta_base_transf 100%[===================>] 280.58M 151MB/s in 1.9s \n",
"\n",
"2021-09-22 09:27:18 (151 MB/s) - ‘roberta_base_transformers.zip’ saved [294210036/294210036]\n",
"\n",
"Archive: roberta_base_transformers.zip\n",
" inflating: roberta/config.json \n",
" inflating: roberta/pytorch_model.bin \n",
" inflating: roberta/tokenizer.json \n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"Some weights of the model checkpoint at ./roberta were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.decoder.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']\n",
"- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Some weights of RobertaModel were not initialized from the model checkpoint at ./roberta and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "4aFRXnJzcaX3",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "6fef4d83-0d5f-4649-bba6-442b378b33df"
},
"source": [
"!wget -O dataset_clarin.zip https://clarin-pl.eu/dspace/bitstream/handle/11321/700/dataset_clarin.zip?sequence=1&isAllowed=y\n",
"!mkdir data\n",
"!unzip dataset_clarin.zip -d data"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2021-09-22 09:28:47-- https://clarin-pl.eu/dspace/bitstream/handle/11321/700/dataset_clarin.zip?sequence=1\n",
"Resolving clarin-pl.eu (clarin-pl.eu)... 156.17.135.38\n",
"Connecting to clarin-pl.eu (clarin-pl.eu)|156.17.135.38|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 15917355 (15M) [application/zip]\n",
"Saving to: ‘dataset_clarin.zip’\n",
"\n",
"dataset_clarin.zip 100%[===================>] 15.18M 10.2MB/s in 1.5s \n",
"\n",
"2021-09-22 09:28:49 (10.2 MB/s) - ‘dataset_clarin.zip’ saved [15917355/15917355]\n",
"\n",
"Archive: dataset_clarin.zip\n",
" creating: data/dataset/\n",
" inflating: data/dataset/all2.sentence.dev.txt \n",
" inflating: data/dataset/all2.sentence.test.txt \n",
" inflating: data/dataset/all2.sentence.train.txt \n",
" inflating: data/dataset/all4.text.dev.txt \n",
" inflating: data/dataset/all4.text.test.txt \n",
" inflating: data/dataset/all4.text.train.txt \n",
" inflating: data/dataset/hotels.sentence.dev.txt \n",
" inflating: data/dataset/hotels.sentence.test.txt \n",
" inflating: data/dataset/hotels.sentence.train.txt \n",
" inflating: data/dataset/hotels.text.dev.txt \n",
" inflating: data/dataset/hotels.text.test.txt \n",
" inflating: data/dataset/hotels.text.train.txt \n",
" inflating: data/dataset/medicine.sentence.dev.txt \n",
" inflating: data/dataset/medicine.sentence.test.txt \n",
" inflating: data/dataset/medicine.sentence.train.txt \n",
" inflating: data/dataset/medicine.text.dev.txt \n",
" inflating: data/dataset/medicine.text.test.txt \n",
" inflating: data/dataset/medicine.text.train.txt \n",
" inflating: data/dataset/medicine_hotels_reviews.text.dev.txt \n",
" inflating: data/dataset/medicine_hotels_reviews.text.train.txt \n",
" inflating: data/dataset/products.text.dev.txt \n",
" inflating: data/dataset/products.text.test.txt \n",
" inflating: data/dataset/products.text.train.txt \n",
" inflating: data/dataset/products_hotels_reviews.text.dev.txt \n",
" inflating: data/dataset/products_hotels_reviews.text.train.txt \n",
" inflating: data/dataset/products_medicine_hotels.text.dev.txt \n",
" inflating: data/dataset/products_medicine_hotels.text.train.txt \n",
" inflating: data/dataset/products_medicine_reviews.text.dev.txt \n",
" inflating: data/dataset/products_medicine_reviews.text.train.txt \n",
" inflating: data/dataset/reviews.text.dev.txt \n",
" inflating: data/dataset/reviews.text.test.txt \n",
" inflating: data/dataset/reviews.text.train.txt \n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yttZFbhK4pRQ",
"outputId": "494b2e39-818e-4ce0-f218-d66e7bd7c240"
},
"source": [
"!wc -l data/dataset/*sentence.train.*"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" 28581 data/dataset/all2.sentence.train.txt\n",
" 12434 data/dataset/hotels.sentence.train.txt\n",
" 16200 data/dataset/medicine.sentence.train.txt\n",
" 57215 total\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "je4Be1D7pa-h",
"outputId": "f4c403a7-4f6a-4ea4-c975-f111971caf15"
},
"source": [
"!head data/dataset/medicine.sentence.dev.txt"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Według niej komosa ryżowa to bardzo duża dawka białka , które jest bardzo podobne do białka zwierzęcego . __label__z_zero\n",
"Sadze ze na Pani Malgorzacie tak po prostu mozna polegac . __label__z_plus_m\n",
"Niezdecydowanym - polecam ! __label__z_plus_m\n",
"Widocznie jestem człowiekiem małej wiary , ale tej drogi leczenia nie polecam , szkoda czasu i pieniędzy . __label__z_minus_m\n",
"moja skóra zmieniła się diametrialnie na lepszą . __label__z_plus_m\n",
"Po badaniach lekarz przepisał lek clostilbegyt - lek na wywołanie owulacji bez oceny czy mam owulację czy nie . __label__z_minus_m\n",
"WSPANIAŁE EFEKTY I BARDZO POCHLEBNE OPINIE . __label__z_plus_m\n",
"Dr Szymula pokierował mnie sam na różne badania wykluczając choroby które mogły by spowodować tak fatalne samopoczucie . __label__z_plus_m\n",
"Należy pamiętać , że przecież najważniejsza w pierwszych tygodniach ciąży jest obserwacja rozwoju płodu , czy płód rozwija się zgodnie z przyjętymi etapami , a o tym sama Pani powinna wiedzieć . __label__z_zero\n",
"Po kilku zdaniach dot . dolegliwości Pani dr miała już własną opinie ale \" o matce \" . __label__z_minus_m\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "7TauFRh12ej5",
"outputId": "27d9a63e-61cf-4ad2-8715-7a462e31dd96"
},
"source": [
"!head data/dataset/medicine.text.dev.txt"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Po wakacjach we Włoszech zaobserwował em u siebie dziwne znamię . Oczywiście przeczytał em internet ( cały ) i zdiagnozował em u siebie czerniaka . Aby potwierdzić , wybrał em się do lekarza . Pani Doktor fantastyczna . Po pierwsze primo przesympatyczna ( dla mnie to ważna bo dzięki temu nie boje się pytać ) po drugie primo mega profesjonalna i merytoryczna . Wnikliwie zbadała problem , zebrała szczegółowy wywiad , uważnie słucha , drąży i dopytuje , notuje to , co mówi pacjent . Nie wzdycha , nie fuka , nie deprecjonuje . Jest bardzo empatyczna , ze zrozumieniem przyjęła moje diagnozy internetowe . Zaplanowali śmy strategie leczenia , omówili śmy różne scenariusze ( najłagodniejszy i ten HARD ) . . . . Kolejna wizyta za 12 tygodni . Wrócę na stówę . Serdecznie dziękuję Pani Doktor . Obyśmy dożyli takich lekarzy w państwowej Służbie Zdrowia . . . . __label__meta_plus_m\n",
"Trafiła m do Pani doktor w sprawie podejrzanej insulinooporności . Pani doktor spojrzała jedynie na jeden z wielu wyników , powiedziała , że płaskiej krzywej cukrowej , którą mam , się nie leczy i była gotowa zakończyć wizytę . Dopiero mój upór i dalszy opis objawów \" zachęcił \" Panią doktor do spojrzenia na pozostałe wyniki . Sposób rozmowy z pacjentem również pozostawia wiele do życzenia . Pani doktor \" doradza \" tak , jakby właśnie karciła kilkuletnie dziecko . Takiego podejścia spodziewała by m się podczas wizyty na NFZ a nie prywatnej . Podsumowując , wyszła m z wizyty z poczuciem , że skoro jeszcze nie mam cukrzycy , a przyszła m po to , żeby w przypadku potwierdzenia insulinooporności dowiedzieć się jak mam postępować dalej by nie doprowadzić do rozwinięcia się cukrzycy ( mój tato jest cukrzykiem więc mogę mieć predyspozycje genetyczne ) , nie jestem interesującym przypadkiem dla Pani doktor . Zdecydowanie odradzam ! ! ! __label__meta_minus_m\n",
"Zgadzam się w 100 % z wcześniejszymi opiniami pacjentów dotyczącymi osoby i pracy Pani Urszuli . Wizyty u Pani Psycholog w pewien sposób postawiły mnie na nogi , pozwoliły spojrzeć na problem \" systemowo \" , dały nadzieję i przekonanie że każdy ma prawo do własnego szczęscia . Pani Psycholog obudziła we mnie wolę \" walki \" o własne JA jak i motywacją do działania . Cenię sobie wysokie zaangażowanie Pani Urszuli jak i indywidualne przygotowanie do danego problemu pacjenta . Współpraca z Panią Psycholog przebiega w bardzo miłej , ciepłej atmosferze . Pani Urszula jest według mnie świetnym specjalistą ( wykonującym swój zawód z powołania ) . __label__meta_plus_m\n",
"Badaczka analizuje interakcje pomiędzy endokanabinoidami i endowaniloidami . Kanabinoidy powstające wewnątrz naszego organizmu , nazywamy endogennymi ( endokanabinoidami ) . Jednym z nich jest analog THC ( tetrahydrokanabidiolu ) - anandamid . Układ endokanabinoidowy to właśnie wewnętrzny układ naszego organizmu . Wyniki pracy dr hab . Starowicz - Bubak wskażą nowe kierunki w terapii bólu neuropatycznego . Uczona sprawdza , na jakie układy , receptory czy cząsteczki można działać , aby leczyć i uśmierzać ból przewlekły . \" W wyniku naszego projektu nie powstanie nowy zastrzyk czy tabletka , którą po zakończeniu realizacji projektu będzie można kupić w aptece . Natomiast wytypujemy cząsteczki , które najlepiej potwierdzą postawioną przez nas hipotezę i zbadamy ich właściwości farmakologiczne ” – tłumaczy dr Starowicz - Bubak . Praca jej zespołu może dać wskazówki firmom farmaceutycznym co do sposobów projektowania leków . Na etapie badań podstawowych nie chodzi jeszcze o licencje czy patenty . __label__meta_zero\n",
"Ludzki lekarz , pomimo mnóstwa pracy , setek pacjentek , w szpitalu znajduje czas i chęci , żeby każdej pomóc . Prowadził moją ciąże bliżniaczą i w połowie roku 2013 , pomógł moim synom przyjść na świat wykonując zabieg cesarskiego cięcia . Kierował mnie na wszystkie badania , zauważył w porę problem ze skracającą sie szyjką macicy i zapobiegł przedwczesnemu porodowi . Zawsze mogła m sie z doktorem skontaktowac w pilnej sprawie , jak nie odbierał telefonu , to pisała m smsa i odpisywał . Chodziłąm na wizyty w ramach nfz do szpitala , dwa razy była m na kleszczowej , z powodu długiego terminu oczekiwania na wizytę w bielańskim . Gdyby więcej było by takich lekarzy , z powołania , wykonujących swoją pracę w sposób odpowiedzialny i życzliwych pacjentowi , nasza służba zdrowia była by super . A jest jak jest , wszyscy wiemy . __label__meta_plus_m\n",
"Leczę się z powodu niepłodności od kilkunastu lat . Przechodziła m już dwie kliniki niepłodności , wielu lekarzy i kilkukrotne in vitro . Do Pani Doktor trafiła m przez przypadek . Pierwszy raz spotkała m się z rozpoczęciem wizyty od tak szczegółowego wywiadu . Następnie seria badań . I tu moje zaskoczenie . Interpretacja wyników i okazało się , że występuje u mnie kilka nieprawidłowości o których nikt wcześniej nie raczył mnie poinformować . Propozycja leczenia i działanie . Każda wizyta u Pani doktor trwa minimum pół godziny . Jest to czas wypełniony badaniami , rozmową , planowaniem kolejnych działań itp . . Jest to lekarz z prawdziwego zdarzenia . Co prawda nie udało mi się zajść w upragnioną ciążę , ale wiem że lekarz to nie cudotwórca . Po wielu latach doświadczeń z lekarzami , nie oczekują już cudu , tylko podejścia do mnie jak do człowieka , a nie maszynki do zarabiania pieniędzy . Tak właśnie oceniam wizyty u p . Doktor Roczniok . Polecam ją z czystym sumieniem . __label__meta_plus_m\n",
"Aby rozwijać i komercjalizować unikalny polski wynalazek uczeni powołali w 2008 r . spółkę BRASTER , obecnie notowaną już na NewConnect . Wówczas była spółką z o . o . utworzoną przez grupę pięciu naukowców . Obecnie tester został wzbogacony w oprogramowanie z możliwością interaktywnej komunikacji z użytkownikiem . Sekwencje obrazów zapisywane są w postaci cyfrowej , którą następnie można wielokrotnie odtwarzać i prezentować . Eksperci opracowali algorytm postępowania w formie drzewa decyzyjnego który ułatwia interpretację obrazów cieplnych . „ We współpracy z WAT - Wydziałem Nowych Technologii mogli śmy zintensyfikować badania nad technologią hermetyzacji termotropowych ciekłych kryształów . Były to badania idące w kierunku pozyskania dużych powierzchni folii termograficznych , pozbawionych defektów dyskwalifikujących je z zastosowań medycznych . Pomysł poparty konkretnymi działaniami , w tym wsparciem finansowym , przekształcił się w opatentowaną , unikatową w skali światowej , technologię wytwarzania emulsji ciekłokrystalicznej i matryc filmu termo optycznego ” – mówi dr Jaremek . __label__meta_zero\n",
"Nie mam żadnych zarzutów co do samej pracy Pani doktor . Badanie wykonane delikatnie , została m poinformowana o jego wynikach i o dalszym leczeniu , dodatkowo otrzymała m szereg skierowań i Pani Julia nie wypisała pochopnie recepty - to na wielki plus . Jednak samo podejście do pacjenta mogło by być zwyczajnie milsze , atmosfera w gabinecie była dosyć napięta i odniosła m wrażenie jakby Pani doktor była poirytowana . Być może trafiła m na gorszy dzień , dlatego nie skreślam Pani Julii zbyt pochopnie i prawdopodobnie wrócę , gdyż jest bardzo rzeczowym lekarzem : ) __label__meta_plus_s\n",
"Cieżko mi ocenić wizytę . Czuła m się mocno ignorowana podczas zadawania pytań nt metody badania , zasadności zaleceń oraz pozostałych pytań . Poza tym Pani Aleksandra gotowała obiad w trakcie wizyty , była dość znudzona rozmową ze mną . . . Usłyszała m , że jestem zainfekowana wszystkim , mam podejrzenia nowotworowe plus całą masę innych schorzeń więc wyszła m kompletnie załamana . . . Zero podejścia . Nie oceniam całkowicie źle , ponieważ , zalecane suplementy są ziołowe i mają uzasadnienie zdrowotne natomiast osobowość tej Pani zupełnie do mnie nie przemawia . __label__meta_minus_m\n",
"Troche zawiedziona : / Nie znamy dokladnej miary maluszka ani teminu porodu bo sprzet \" dziwnie \" pokazuje . Do tego niedawno dowiedzialam sie ze powinna m miec karte ciazy , ktorej Pan doktor mi nie wystawil do dzis ( 17tc ) , a po spytaniu byl zdziwiony , ze jej nie mam bo myslal ze juz mi wystawil : / __label__meta_minus_m\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "cQduJo3vpNuz"
},
"source": [
"data = pd.read_csv('data/dataset/all2.sentence.dev.txt', delimiter='\\t', header=None)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "HuT-yb001ZBl",
"outputId": "a9b98453-e1c0-42de-d688-de67395319ca"
},
"source": [
"data.head()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" 0 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Sprawdzane będzie funkcjonowanie tej zastawki... \n",
" \n",
" \n",
" 1 \n",
" Obce sobie osoby muszą spać razem na sofach ( ... \n",
" \n",
" \n",
" 2 \n",
" Nie jest ona łatwa , zwykle podejrzenie złośli... \n",
" \n",
" \n",
" 3 \n",
" Bardzo miła i fachowa obsługa . __label__z_plus_m \n",
" \n",
" \n",
" 4 \n",
" Jest to osoba , która potrafi w trakcie proces... \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0\n",
"0 Sprawdzane będzie funkcjonowanie tej zastawki...\n",
"1 Obce sobie osoby muszą spać razem na sofach ( ...\n",
"2 Nie jest ona łatwa , zwykle podejrzenie złośli...\n",
"3 Bardzo miła i fachowa obsługa . __label__z_plus_m\n",
"4 Jest to osoba , która potrafi w trakcie proces..."
]
},
"metadata": {},
"execution_count": 8
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "2s_LC7FuuycZ"
},
"source": [
"data['text'] = data[0].str.rsplit(' ', 1).str[0]\n",
"data['text_label'] = data[0].str.rsplit(' ', 1).str[-1]"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "3fCHKLdj1UKM",
"outputId": "17f01c60-dbed-435c-93c4-b721b2616bf3"
},
"source": [
"data.head()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" 0 \n",
" text \n",
" text_label \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Sprawdzane będzie funkcjonowanie tej zastawki... \n",
" Sprawdzane będzie funkcjonowanie tej zastawki... \n",
" __label__z_zero \n",
" \n",
" \n",
" 1 \n",
" Obce sobie osoby muszą spać razem na sofach ( ... \n",
" Obce sobie osoby muszą spać razem na sofach ( ... \n",
" __label__z_minus_m \n",
" \n",
" \n",
" 2 \n",
" Nie jest ona łatwa , zwykle podejrzenie złośli... \n",
" Nie jest ona łatwa , zwykle podejrzenie złośli... \n",
" __label__z_zero \n",
" \n",
" \n",
" 3 \n",
" Bardzo miła i fachowa obsługa . __label__z_plus_m \n",
" Bardzo miła i fachowa obsługa . \n",
" __label__z_plus_m \n",
" \n",
" \n",
" 4 \n",
" Jest to osoba , która potrafi w trakcie proces... \n",
" Jest to osoba , która potrafi w trakcie proces... \n",
" __label__z_plus_m \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 ... text_label\n",
"0 Sprawdzane będzie funkcjonowanie tej zastawki... ... __label__z_zero\n",
"1 Obce sobie osoby muszą spać razem na sofach ( ... ... __label__z_minus_m\n",
"2 Nie jest ona łatwa , zwykle podejrzenie złośli... ... __label__z_zero\n",
"3 Bardzo miła i fachowa obsługa . __label__z_plus_m ... __label__z_plus_m\n",
"4 Jest to osoba , która potrafi w trakcie proces... ... __label__z_plus_m\n",
"\n",
"[5 rows x 3 columns]"
]
},
"metadata": {},
"execution_count": 10
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "eAW2WBbs4ius",
"outputId": "5a4c6e62-7c4c-4541-d253-0fb3e8fdcb9c"
},
"source": [
"len(data)"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"3128"
]
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "GstwUu8z3epu",
"outputId": "8129ec35-9f63-4353-a1e1-7c205b020515"
},
"source": [
"data['text_label'].unique()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array(['__label__z_zero', '__label__z_minus_m', '__label__z_plus_m',\n",
" '__label__z_plus_s', '__label__z_amb', '__label__z_minus_s'],\n",
" dtype=object)"
]
},
"metadata": {},
"execution_count": 12
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ce5Czfi24Drm"
},
"source": [
"minus_m -- strong negative; minus_s -- weak negative, zero -- neutral, amb -- ambiguous, plus_s -- weak positive, plus_m -- strong positive"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "WsSFn1_F3f1T",
"outputId": "07f055d4-4b96-41dd-eca0-9f91fafc341b"
},
"source": [
"selected = data[data['text_label'].isin(['__label__z_zero', '__label__z_minus_m', '__label__z_plus_m'])]\n",
"len(selected)"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"3060"
]
},
"metadata": {},
"execution_count": 13
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wV7wa0Z28baq",
"outputId": "47a9050a-e01f-4f25-c33c-32dd587a4202"
},
"source": [
"mapping = {'__label__z_minus_m':0., '__label__z_zero':0.5, '__label__z_plus_m':1.}\n",
"selected['label'] = selected['text_label'].map(mapping)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "E4xt6VCP8bFi",
"outputId": "6cc37646-6782-4d1f-a88e-ba57ef56ab1f"
},
"source": [
"selected.head()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" 0 \n",
" text \n",
" text_label \n",
" label \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Sprawdzane będzie funkcjonowanie tej zastawki... \n",
" Sprawdzane będzie funkcjonowanie tej zastawki... \n",
" __label__z_zero \n",
" 0.5 \n",
" \n",
" \n",
" 1 \n",
" Obce sobie osoby muszą spać razem na sofach ( ... \n",
" Obce sobie osoby muszą spać razem na sofach ( ... \n",
" __label__z_minus_m \n",
" 0.0 \n",
" \n",
" \n",
" 2 \n",
" Nie jest ona łatwa , zwykle podejrzenie złośli... \n",
" Nie jest ona łatwa , zwykle podejrzenie złośli... \n",
" __label__z_zero \n",
" 0.5 \n",
" \n",
" \n",
" 3 \n",
" Bardzo miła i fachowa obsługa . __label__z_plus_m \n",
" Bardzo miła i fachowa obsługa . \n",
" __label__z_plus_m \n",
" 1.0 \n",
" \n",
" \n",
" 4 \n",
" Jest to osoba , która potrafi w trakcie proces... \n",
" Jest to osoba , która potrafi w trakcie proces... \n",
" __label__z_plus_m \n",
" 1.0 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 ... label\n",
"0 Sprawdzane będzie funkcjonowanie tej zastawki... ... 0.5\n",
"1 Obce sobie osoby muszą spać razem na sofach ( ... ... 0.0\n",
"2 Nie jest ona łatwa , zwykle podejrzenie złośli... ... 0.5\n",
"3 Bardzo miła i fachowa obsługa . __label__z_plus_m ... 1.0\n",
"4 Jest to osoba , która potrafi w trakcie proces... ... 1.0\n",
"\n",
"[5 rows x 4 columns]"
]
},
"metadata": {},
"execution_count": 15
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "r7GWcr39-V7z"
},
"source": [
"#let's write a nice function then\n",
"def prepare(df):\n",
" df['text'] = df[0].str.rsplit(' ', 1).str[0]\n",
" df['text_label'] = df[0].str.rsplit(' ', 1).str[-1]\n",
" mapping = {'__label__z_minus_m':0., '__label__z_minus_s':0.25, '__label__z_zero':0.5, '__label__z_plus_s':0.75, '__label__z_plus_m':1.}\n",
" df['label'] = df['text_label'].map(mapping)\n",
" selected = df[df['text_label'].isin(['__label__z_zero', '__label__z_minus_m', '__label__z_plus_m'])]\n",
" return selected"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "EBwMy6b9-UwN",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "3adbfbe7-c087-45ea-a5c7-6e6c4bf9bfa4"
},
"source": [
"data_train = pd.read_csv('data/dataset/all2.sentence.train.txt', delimiter='\\t', header=None)\n",
"data_train = data_train.append(pd.read_csv('data/dataset/hotels.sentence.train.txt', delimiter='\\t', header=None))\n",
"data_train = data_train.append(pd.read_csv('data/dataset/medicine.sentence.train.txt', delimiter='\\t', header=None))\n",
"len(data_train)"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"50712"
]
},
"metadata": {},
"execution_count": 17
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "PuodbATXCSwY",
"outputId": "5f446353-a50c-4048-cd28-f83b86d68cc2"
},
"source": [
"#let's try to read with open function\n",
"text_train = open('data/dataset/all2.sentence.train.txt', encoding=\"utf8\").read().split('\\n')\n",
"text_train += open('data/dataset/hotels.sentence.train.txt', encoding=\"utf8\").read().split('\\n')\n",
"text_train += open('data/dataset/medicine.sentence.train.txt', encoding=\"utf8\").read().split('\\n')\n",
"data_train = pd.DataFrame(text_train)\n",
"data_train.head(), len(data_train)"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"( 0\n",
" 0 W szpitalu w Lubinie niestety zaniża jedynie d...\n",
" 1 „ Z danych publikowanych na świecie wynika , ż...\n",
" 2 jak dobrze to on a jak coś nie tak to na czyje...\n",
" 3 Oceniam pobyt w pokoju dwuosobowym ze wspólną ...\n",
" 4 Posiłki były świetne - duży wybór , wszystko d..., 57218)"
]
},
"metadata": {},
"execution_count": 18
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "ju9Zo7RB537F",
"outputId": "d6e8b08a-ff7e-4d69-89c6-b8e8b6882172"
},
"source": [
"data_train.head()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" 0 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" W szpitalu w Lubinie niestety zaniża jedynie d... \n",
" \n",
" \n",
" 1 \n",
" „ Z danych publikowanych na świecie wynika , ż... \n",
" \n",
" \n",
" 2 \n",
" jak dobrze to on a jak coś nie tak to na czyje... \n",
" \n",
" \n",
" 3 \n",
" Oceniam pobyt w pokoju dwuosobowym ze wspólną ... \n",
" \n",
" \n",
" 4 \n",
" Posiłki były świetne - duży wybór , wszystko d... \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0\n",
"0 W szpitalu w Lubinie niestety zaniża jedynie d...\n",
"1 „ Z danych publikowanych na świecie wynika , ż...\n",
"2 jak dobrze to on a jak coś nie tak to na czyje...\n",
"3 Oceniam pobyt w pokoju dwuosobowym ze wspólną ...\n",
"4 Posiłki były świetne - duży wybór , wszystko d..."
]
},
"metadata": {},
"execution_count": 19
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "_mxzPHsmCScq",
"outputId": "6dd0164c-1f62-431a-db7e-9bd0d65f662f"
},
"source": [
"data_train_sel = prepare(data_train)\n",
"len(data_train_sel)"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"56023"
]
},
"metadata": {},
"execution_count": 20
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Tf_Dh-d71NlU",
"outputId": "7b5a1dbb-8042-4575-c099-134ac4bf7b1c"
},
"source": [
"data_train['text_label'].value_counts()\n",
"#more or less balanced"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"__label__z_minus_m 21602\n",
"__label__z_zero 17874\n",
"__label__z_plus_m 16547\n",
"__label__z_amb 945\n",
"__label__z_minus_s 148\n",
"__label__z_plus_s 102\n",
"Name: text_label, dtype: int64"
]
},
"metadata": {},
"execution_count": 21
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "D55T5vNiDqkS",
"outputId": "3f603acc-cf31-4bb1-aef9-45bab6f24f2a"
},
"source": [
"data_train_sel.tail()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" 0 \n",
" text \n",
" text_label \n",
" label \n",
" \n",
" \n",
" \n",
" \n",
" 57213 \n",
" Jak ktoś wie proszę o wpis w tym miejscu . __l... \n",
" Jak ktoś wie proszę o wpis w tym miejscu . \n",
" __label__z_zero \n",
" 0.5 \n",
" \n",
" \n",
" 57214 \n",
" Czuję sie teraz jak zdrowy człowiek , chociaż ... \n",
" Czuję sie teraz jak zdrowy człowiek , chociaż ... \n",
" __label__z_plus_m \n",
" 1.0 \n",
" \n",
" \n",
" 57215 \n",
" Z wyrazami szacunku dla wszystkich forumowiczó... \n",
" Z wyrazami szacunku dla wszystkich forumowiczó... \n",
" __label__z_zero \n",
" 0.5 \n",
" \n",
" \n",
" 57216 \n",
" Stracone pieniadze . __label__z_minus_m \n",
" Stracone pieniadze . \n",
" __label__z_minus_m \n",
" 0.0 \n",
" \n",
" \n",
" 57217 \n",
" Prowadziła mi ciążę i wizyty u niej były błysk... \n",
" Prowadziła mi ciążę i wizyty u niej były błysk... \n",
" __label__z_minus_m \n",
" 0.0 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 ... label\n",
"57213 Jak ktoś wie proszę o wpis w tym miejscu . __l... ... 0.5\n",
"57214 Czuję sie teraz jak zdrowy człowiek , chociaż ... ... 1.0\n",
"57215 Z wyrazami szacunku dla wszystkich forumowiczó... ... 0.5\n",
"57216 Stracone pieniadze . __label__z_minus_m ... 0.0\n",
"57217 Prowadziła mi ciążę i wizyty u niej były błysk... ... 0.0\n",
"\n",
"[5 rows x 4 columns]"
]
},
"metadata": {},
"execution_count": 22
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "EHA7bTZ3pVf1",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "db3982a6-5dc3-4851-aefe-bda87cece295"
},
"source": [
"#dev files\n",
"text_dev = open('data/dataset/all2.sentence.dev.txt', encoding=\"utf8\").read().split('\\n')\n",
"text_dev += open('data/dataset/hotels.sentence.dev.txt', encoding=\"utf8\").read().split('\\n')\n",
"text_dev += open('data/dataset/medicine.sentence.dev.txt', encoding=\"utf8\").read().split('\\n')\n",
"data_dev = pd.DataFrame(text_dev)\n",
"data_dev_sel = prepare(data_dev)\n",
"data_dev_sel.head(), len(data_dev_sel), data_dev_sel['label'].value_counts()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"( 0 ... label\n",
" 0 \" Sprawdzane będzie funkcjonowanie tej zastawk... ... 0.5\n",
" 1 Obce sobie osoby muszą spać razem na sofach ( ... ... 0.0\n",
" 2 Nie jest ona łatwa , zwykle podejrzenie złośli... ... 0.5\n",
" 3 Bardzo miła i fachowa obsługa . __label__z_plus_m ... 1.0\n",
" 4 Jest to osoba , która potrafi w trakcie proces... ... 1.0\n",
" \n",
" [5 rows x 4 columns], 7009, 0.0 2686\n",
" 0.5 2191\n",
" 1.0 2132\n",
" Name: label, dtype: int64)"
]
},
"metadata": {},
"execution_count": 23
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "f9td_vc3svtW",
"outputId": "6f3964e5-bf21-4569-a817-ed410113ee0b"
},
"source": [
"#test files\n",
"text_test = open('data/dataset/all2.sentence.test.txt', encoding=\"utf8\").read().split('\\n')\n",
"text_test += open('data/dataset/hotels.sentence.test.txt', encoding=\"utf8\").read().split('\\n')\n",
"text_test += open('data/dataset/medicine.sentence.test.txt', encoding=\"utf8\").read().split('\\n')\n",
"data_test = pd.DataFrame(text_test)\n",
"data_test_sel = prepare(data_test)\n",
"data_test_sel.head(), len(data_test_sel), data_test_sel['label'].value_counts()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"( 0 ... label\n",
" 0 Pani nie miała nic do powiedzenia , gdy pytała... ... 0.0\n",
" 1 Zero powiązania gdzie przecież jeżeli ma ktoś ... ... 0.0\n",
" 2 Niestety bylo juz za pozno . __label__z_minus_m ... 0.0\n",
" 3 To jest skandal i niedopuszczalne , żeby lekar... ... 0.0\n",
" 4 z calą pewnością nie byl to zabieg warty 5 000... ... 0.0\n",
" \n",
" [5 rows x 4 columns], 6972, 0.0 2670\n",
" 0.5 2215\n",
" 1.0 2087\n",
" Name: label, dtype: int64)"
]
},
"metadata": {},
"execution_count": 24
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 289
},
"id": "WfdNmVKzsyXO",
"outputId": "eda9d035-c43a-44b2-a65d-8b798ed5a642"
},
"source": [
"data_train_sel.drop([0, 'text_label'], axis=1, inplace=True)\n",
"data_dev_sel.drop([0, 'text_label'], axis=1, inplace=True)\n",
"data_test_sel.drop([0, 'text_label'], axis=1, inplace=True)\n",
"data_train_sel.head()"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py:4174: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" errors=errors,\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" text \n",
" label \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" W szpitalu w Lubinie niestety zaniża jedynie d... \n",
" 0.0 \n",
" \n",
" \n",
" 1 \n",
" „ Z danych publikowanych na świecie wynika , ż... \n",
" 0.5 \n",
" \n",
" \n",
" 2 \n",
" jak dobrze to on a jak coś nie tak to na czyje... \n",
" 0.0 \n",
" \n",
" \n",
" 3 \n",
" Oceniam pobyt w pokoju dwuosobowym ze wspólną ... \n",
" 0.5 \n",
" \n",
" \n",
" 4 \n",
" Posiłki były świetne - duży wybór , wszystko d... \n",
" 1.0 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" text label\n",
"0 W szpitalu w Lubinie niestety zaniża jedynie d... 0.0\n",
"1 „ Z danych publikowanych na świecie wynika , ż... 0.5\n",
"2 jak dobrze to on a jak coś nie tak to na czyje... 0.0\n",
"3 Oceniam pobyt w pokoju dwuosobowym ze wspólną ... 0.5\n",
"4 Posiłki były świetne - duży wybór , wszystko d... 1.0"
]
},
"metadata": {},
"execution_count": 25
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Ix2uSqNZvjCv",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "986585d5-525a-4547-f5c2-5eaa241f8ea8"
},
"source": [
"data_train_sel['text'].str.len()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0 97\n",
"1 77\n",
"2 92\n",
"3 69\n",
"4 115\n",
" ... \n",
"57213 42\n",
"57214 110\n",
"57215 54\n",
"57216 20\n",
"57217 87\n",
"Name: text, Length: 56023, dtype: int64"
]
},
"metadata": {},
"execution_count": 26
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 279
},
"id": "S5GUcArDwke0",
"outputId": "58ece8b1-989b-4cfb-c073-9451798cd14b"
},
"source": [
"plt.hist(data_train_sel['text'].str.len(), bins=30, range=(0,500)) \n",
"plt.ylabel('Count')\n",
"plt.xlabel('Length in chars'); "
],
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEGCAYAAACUzrmNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAYOUlEQVR4nO3df7BfdZ3f8edLEPy5JoFsyiawwZquxbYiZgF/dEdhDL9cQ1tEHGeJljbtlO5Kd2d3Q9cpXVk62On4g7XLNiNZg2NlwR8lChVjQNtty4+gyG+aiDAkBRIN4KKzavDdP76fS76Ge3Nu4J7vzb33+Zj5zvec9/mccz7ncsPrnt+pKiRJ2pcXTXcHJEkHPsNCktTJsJAkdTIsJEmdDAtJUqeDp7sDfTj88MNr6dKl090NSZpRbr/99u9X1cLxps3KsFi6dCmbN2+e7m5I0oyS5OGJpnkYSpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktRpVt7BPVMtXXPdpNo9dOkZPfdEkn6RexaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnq1FtYJPm1JHcMfX6Y5IIkC5JsTLKlfc9v7ZPksiRbk9yZ5LihZa1q7bckWdVXnyVJ4+stLKrqgao6tqqOBd4I/Bj4ErAG2FRVy4BNbRzgNGBZ+6wGLgdIsgC4CDgBOB64aCxgJEmjMarDUCcD362qh4GVwPpWXw+c2YZXAlfWwM3AvCRHAKcAG6tqV1U9AWwETh1RvyVJjO5xH+cAn2vDi6rq0Tb8GLCoDS8GHhmaZ1urTVT/BUlWM9gj4aijjpqyjk+FyT7GQ5IOVL3vWSQ5BHgXcM3e06qqgJqK9VTV2qpaXlXLFy5cOBWLlCQ1ozgMdRrwrap6vI0/3g4v0b53tPp24Mih+Za02kR1SdKIjCIs3sueQ1AAG4CxK5pWAdcO1c9tV0WdCDzVDlfdAKxIMr+d2F7RapKkEen1nEWSlwPvAP7FUPlS4Ook5wEPA2e3+vXA6cBWBldOfQCgqnYluRi4rbX7cFXt6rPfkqRf1GtYVNWPgMP2qv2AwdVRe7ct4PwJlrMOWNdHHyVJ3byDW5LUybCQJHUyLCRJnQwLSVInw0KS1GlUj/vQFJrs40MeuvSMnnsiaa5wz0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktTJsJAkdTIsJEmdDAtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1KnXsEgyL8nnk9yf5L4kb0qyIMnGJFva9/zWNkkuS7I1yZ1JjhtazqrWfkuSVX32WZL0XH3vWXwC+GpVvRZ4PXAfsAbYVFXLgE1tHOA0YFn7rAYuB0iyALgIOAE4HrhoLGAkSaPRW1gkeRXwG8AVAFX106p6ElgJrG/N1gNntuGVwJU1cDMwL8kRwCnAxqraVVVPABuBU/vqtyTpufrcszga2An8RZJvJ/lUkpcDi6rq0dbmMWBRG14MPDI0/7ZWm6j+C5KsTrI5yeadO3dO8aZI0tzWZ1gcDBwHXF5VbwB+xJ5DTgBUVQE1FSurqrVVtbyqli9cuHAqFilJavoMi23Atqq6pY1/nkF4PN4OL9G+d7Tp24Ejh+Zf0moT1SVJI9JbWFTVY8AjSX6tlU4G7gU2AGNXNK0Crm3DG4Bz21VRJwJPtcNVNwArksxvJ7ZXtJokaUQO7nn5vw18NskhwIPABxgE1NVJzgMeBs5uba8HTge2Aj9ubamqXUkuBm5r7T5cVbt67rckaUivYVFVdwDLx5l08jhtCzh/guWsA9ZNbe8kSZPlHdySpE6GhSSpk2EhSepkWEiSOhkWkqROfV86q2m0dM11k2r30KVn9NwTSTOdexaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjoZFpKkTt7B/QJM9g5pSZrp3LOQJHUyLCRJnTwMpf06nOZDB6W5qdc9iyQPJbkryR1JNrfagiQbk2xp3/NbPUkuS7I1yZ1JjhtazqrWfkuSVX32WZL0XKM4DPX2qjq2qpa38TXApqpaBmxq4wCnAcvaZzVwOQzCBbgIOAE4HrhoLGAkSaMxHecsVgLr2/B64Myh+pU1cDMwL8kRwCnAxqraVVVPABuBU0fdaUmay/oOiwK+luT2JKtbbVFVPdqGHwMWteHFwCND825rtYnqvyDJ6iSbk2zeuXPnVG6DJM15fZ/gfmtVbU/yy8DGJPcPT6yqSlJTsaKqWgusBVi+fPmULFOSNNDrnkVVbW/fO4AvMTjn8Hg7vET73tGabweOHJp9SatNVJckjUhvYZHk5UleOTYMrADuBjYAY1c0rQKubcMbgHPbVVEnAk+1w1U3ACuSzG8ntle0miRpRPo8DLUI+FKSsfX816r6apLbgKuTnAc8DJzd2l8PnA5sBX4MfACgqnYluRi4rbX7cFXt6rHfkqS99BYWVfUg8Ppx6j8ATh6nXsD5EyxrHbBuqvsoSZocH/chSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjoZFpKkToaFJKmTYSFJ6mRYSJI6GRaSpE6GhSSpk2EhSeo0qbBI8pbJ1CRJs9Nk9yz+dJI1SdIstM/3WSR5E/BmYGGS3x2a9EvAQX12TJJ04Oh6+dEhwCtau1cO1X8InNVXpyRJB5Z9hkVVfRP4ZpJPV9XDI+qTJOkAM9nXqh6aZC2wdHieqjqpj05Jkg4skw2La4A/Bz4FPLM/K0hyELAZ2F5V70xyNHAVcBhwO/BbVfXTJIcCVwJvBH4AvKeqHmrLuBA4r637d6rqhv3pg6bO0jXXTardQ5ee0XNPJI3SZK+G2l1Vl1fVrVV1+9hnkvN+ELhvaPwjwMeq6jXAEwxCgPb9RKt/rLUjyTHAOcDrgFOBP2sBJEkakcmGxZeT/KskRyRZMPbpminJEuAMBnskJAlwEvD51mQ9cGYbXtnGadNPbu1XAldV1U+q6nvAVuD4SfZbkjQFJnsYalX7/v2hWgGv7pjv48AfsOdKqsOAJ6tqdxvfBixuw4uBRwCqaneSp1r7xcDNQ8scnudZSVYDqwGOOuqo7i2SJE3apMKiqo7e3wUneSewo6puT/K2/Z1/f1XVWmAtwPLly6vv9UnSXDKpsEhy7nj1qrpyH7O9BXhXktOBlzC4ke8TwLwkB7e9iyXA9tZ+O3AksC3JwcCrGJzoHquPGZ5HkjQCkz1n8etDn38I/HvgXfuaoaourKolVbWUwQnqG6vqfcBN7LmhbxVwbRvewJ7DXWe19tXq5yQ5tF1JtQy4dZL9liRNgckehvrt4fEk8xhc/vp8/CFwVZI/Ab4NXNHqVwCfSbIV2MUgYKiqe5JcDdwL7AbOr6r9unxXkvTCTPYE995+BEz6PEZVfQP4Rht+kHGuZqqqvwHePcH8lwCXPI9+SpKmwGTPWXyZwdVPMHiA4N8Fru6rU5KkA8tk9yz+09DwbuDhqtrWQ38kSQegSZ3gbg8UvJ/B/RLzgZ/22SlJ0oFlsm/KO5vBFUjvBs4GbkniI8olaY6Y7GGoPwJ+vap2ACRZCHydPY/tkCTNYpO9z+JFY0HR/GA/5pUkzXCT3bP4apIbgM+18fcA1/fTJUnSgabrHdyvARZV1e8n+cfAW9uk/wN8tu/OSZIODF17Fh8HLgSoqi8CXwRI8vfbtN/stXeSpANC13mHRVV1197FVlvaS48kSQecrrCYt49pL53KjkiSDlxdYbE5yT/fu5jknzF4f7YkaQ7oOmdxAfClJO9jTzgsBw4B/lGfHZMkHTj2GRZV9Tjw5iRvB/5eK19XVTf23jNJ0gFjsu+zuInBS4skSXOQd2FLkjoZFpKkToaFJKmTYSFJ6tRbWCR5SZJbk3wnyT1J/rjVj05yS5KtSf4yySGtfmgb39qmLx1a1oWt/kCSU/rqsyRpfJN96uzz8RPgpKp6OsmLgb9K8t+B3wU+VlVXJflz4Dzg8vb9RFW9Jsk5wEeA9yQ5BjgHeB3wK8DXk/ydqnqmx77rBVq65rpJtXvo0jN67omkqdDbnkUNPN1GX9w+BZzEnpcmrQfObMMr2zht+slJ0upXVdVPqup7wFbg+L76LUl6rl7PWSQ5KMkdwA5gI/Bd4Mmq2t2abAMWt+HFwCMAbfpTwGHD9XHmGV7X6iSbk2zeuXNnH5sjSXNWr2FRVc9U1bHAEgZ7A6/tcV1rq2p5VS1fuHBhX6uRpDlpJFdDVdWTDO4AfxMwL8nYuZIlwPY2vB04EqBNfxWD17c+Wx9nHknSCPR5NdTCJPPa8EuBdwD3MQiNs1qzVcC1bXhDG6dNv7GqqtXPaVdLHQ0sA27tq9+SpOfq82qoI4D1SQ5iEEpXV9VXktwLXJXkT4BvA1e09lcAn0myFdjF4AooquqeJFcD9wK7gfO9EkqSRqu3sKiqO4E3jFN/kHGuZqqqvwHePcGyLgEumeo+SpImxzu4JUmdDAtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR16vMObqmT772QZgb3LCRJnQwLSVInw0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktTJsJAkdTIsJEmdDAtJUqfewiLJkUluSnJvknuSfLDVFyTZmGRL+57f6klyWZKtSe5MctzQsla19luSrOqrz5Kk8fW5Z7Eb+L2qOgY4ETg/yTHAGmBTVS0DNrVxgNOAZe2zGrgcBuECXAScABwPXDQWMJKk0egtLKrq0ar6Vhv+a+A+YDGwEljfmq0HzmzDK4Era+BmYF6SI4BTgI1VtauqngA2Aqf21W9J0nON5JxFkqXAG4BbgEVV9Wib9BiwqA0vBh4Zmm1bq01U33sdq5NsTrJ5586dU9p/SZrreg+LJK8AvgBcUFU/HJ5WVQXUVKynqtZW1fKqWr5w4cKpWKQkqen1fRZJXswgKD5bVV9s5ceTHFFVj7bDTDtafTtw5NDsS1ptO/C2verf6LPfOvD43gtpevV5NVSAK4D7quqjQ5M2AGNXNK0Crh2qn9uuijoReKodrroBWJFkfjuxvaLVJEkj0ueexVuA3wLuSnJHq/1b4FLg6iTnAQ8DZ7dp1wOnA1uBHwMfAKiqXUkuBm5r7T5cVbt67LckaS+9hUVV/RWQCSafPE77As6fYFnrgHVT1ztJ0v7wDm5JUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ16fTaUNGo+Q0rqh3sWkqROhoUkqZNhIUnqZFhIkjoZFpKkToaFJKmTYSFJ6mRYSJI6eVPeOCZ7Y5ckzRXuWUiSOvUWFknWJdmR5O6h2oIkG5Nsad/zWz1JLkuyNcmdSY4bmmdVa78lyaq++itJmlifh6E+DXwSuHKotgbYVFWXJlnTxv8QOA1Y1j4nAJcDJyRZAFwELAcKuD3Jhqp6osd+aw7wGVLS/ultz6Kq/gewa6/ySmB9G14PnDlUv7IGbgbmJTkCOAXYWFW7WkBsBE7tq8+SpPGN+pzFoqp6tA0/Bixqw4uBR4babWu1ierPkWR1ks1JNu/cuXNqey1Jc9y0neCuqmJwaGmqlre2qpZX1fKFCxdO1WIlSYw+LB5vh5do3ztafTtw5FC7Ja02UV2SNEKjDosNwNgVTauAa4fq57arok4EnmqHq24AViSZ366cWtFqkqQR6u1qqCSfA94GHJ5kG4Ormi4Frk5yHvAwcHZrfj1wOrAV+DHwAYCq2pXkYuC21u7DVbX3SXOpN/tzg6ZXTmk26y0squq9E0w6eZy2BZw/wXLWAeumsGuSpP3kHdySpE6GhSSpk2EhSepkWEiSOhkWkqROvs9CmiI+nFCzmXsWkqROhoUkqZNhIUnqZFhIkjp5glsaMU+EayYyLKQDlKGiA4mHoSRJnQwLSVInD0NJM5yHqzQK7llIkjq5ZyHNEfvz1r/JcE9lbnHPQpLUyT0LSc+L7yefW2ZMWCQ5FfgEcBDwqaq6dJq7JGmSPAQ2882IsEhyEPCfgXcA24Dbkmyoqnunt2eSpsNUh89Um41hNiPCAjge2FpVDwIkuQpYCRgWkg440xlmfQXVTAmLxcAjQ+PbgBOGGyRZDaxuo08neeAFrO9w4PsvYP6ZZq5tL7jNc8Wc2+Z85AVt869ONGGmhEWnqloLrJ2KZSXZXFXLp2JZM8Fc215wm+cKt3nqzJRLZ7cDRw6NL2k1SdIIzJSwuA1YluToJIcA5wAbprlPkjRnzIjDUFW1O8m/Bm5gcOnsuqq6p8dVTsnhrBlkrm0vuM1zhds8RVJVfSxXkjSLzJTDUJKkaWRYSJI6GRZDkpya5IEkW5Osme7+TJUk65LsSHL3UG1Bko1JtrTv+a2eJJe1n8GdSY6bvp4/f0mOTHJTknuT3JPkg60+a7c7yUuS3JrkO22b/7jVj05yS9u2v2wXiZDk0Da+tU1fOp39f76SHJTk20m+0sZn+/Y+lOSuJHck2dxqvf9eGxbN0CNFTgOOAd6b5Jjp7dWU+TRw6l61NcCmqloGbGrjMNj+Ze2zGrh8RH2caruB36uqY4ATgfPbf8/ZvN0/AU6qqtcDxwKnJjkR+Ajwsap6DfAEcF5rfx7wRKt/rLWbiT4I3Dc0Ptu3F+DtVXXs0P0U/f9eV5WfwUn+NwE3DI1fCFw43f2awu1bCtw9NP4AcEQbPgJ4oA3/F+C947WbyR/gWgbPFpsT2w28DPgWgycdfB84uNWf/T1ncHXhm9rwwa1dprvv+7mdS9r/HE8CvgJkNm9v6/tDwOF71Xr/vXbPYo/xHimyeJr6MgqLqurRNvwYsKgNz7qfQzvc8AbgFmb5drdDMncAO4CNwHeBJ6tqd2syvF3PbnOb/hRw2Gh7/IJ9HPgD4Odt/DBm9/YCFPC1JLe3xxzBCH6vZ8R9FupXVVWSWXkNdZJXAF8ALqiqHyZ5dtps3O6qegY4Nsk84EvAa6e5S71J8k5gR1XdnuRt092fEXprVW1P8svAxiT3D0/s6/faPYs95tojRR5PcgRA+97R6rPm55DkxQyC4rNV9cVWnvXbDVBVTwI3MTgMMy/J2B+Gw9v17Da36a8CfjDirr4QbwHeleQh4CoGh6I+wezdXgCqanv73sHgD4LjGcHvtWGxx1x7pMgGYFUbXsXgmP5Y/dx2FcWJwFNDu7czRga7EFcA91XVR4cmzdrtTrKw7VGQ5KUMztHcxyA0zmrN9t7msZ/FWcCN1Q5szwRVdWFVLamqpQz+vd5YVe9jlm4vQJKXJ3nl2DCwAribUfxeT/fJmgPpA5wO/F8Gx3n/aLr7M4Xb9TngUeBnDI5ZnsfgWO0mYAvwdWBBaxsGV4V9F7gLWD7d/X+e2/xWBsd27wTuaJ/TZ/N2A/8A+Hbb5ruBf9fqrwZuBbYC1wCHtvpL2vjWNv3V070NL2Db3wZ8ZbZvb9u277TPPWP/nxrF77WP+5AkdfIwlCSpk2EhSepkWEiSOhkWkqROhoUkqZNhoVkrydM9L/+CJC/bn/UleVem4InGST6d5KzultLUMCyk5+8CBg/sm7Sq2lBVl/bUn0lpN2j5b1/7xV8YzSlJ/naSr7aHsP3PJK9t9U+35/7/7yQPjv3VnuRFSf4syf3tPQHXJzkrye8AvwLclOSmoeVfksH7JG5Osmic9b8/ySf3tc5x5jm3vYvgO0k+MzTpN8bp7yuSbEryrfbOg5WtvjSDd7VcyeCGvSPb+u9u7f7NlPyANXtN9x2Jfvz09QGeHqe2CVjWhk9g8MgHGLzz4xoGf0AdA2xt9bOA61v9bzF4P8JZbdpDDD0qmsEd47/Zhv8j8KFx1v9+4JP7Wude7V/H4KkCh7fxBR39PRj4pTZ8OIO7lcPgEfU/B05s094IbBxaz7zp/u/l58D++NRZzRntCbRvBq4ZevrsoUNN/ltV/Ry4d2iv4K3ANa3+2PBexDh+yuCdCgC3M3g2U5fx1jnspLb+7wNU1a6OeQP8hyS/wSAcFrPncdUPV9XNbfhB4NVJ/hS4DvjaJPqqOcyw0FzyIgbvOjh2guk/GRrOBG325WdVNfb8nGeY3L+vF7LO8eZ9H7AQeGNV/aw9kfUlbdqPxhpX1RNJXg+cAvxL4Gzgn+7n+jWHeM5Cc0ZV/RD4XpJ3w7Mnel/fMdv/Av5JO3exiMED68b8NfDKXjq7x43Au5McBoN3LXe0fxWDdzz8LMnbgV8dr1GSw4EXVdUXgA8BM+6d4xot9yw0m70sybah8Y8y+Mv78iQfAl7M4D0I39nHMr4AnAzcy+CNY99i8IY1gLXAV5P8v6p6+1R3HqCq7klyCfDNJM8weKrs+/cxy2eBLye5C9gM3D9Bu8XAXwxdFXXhFHVZs5RPnZU6JHlFVT3d/rq/FXhLVT023f2SRsk9C6nbV9pLhQ4BLjYoNBe5ZyFJ6uQJbklSJ8NCktTJsJAkdTIsJEmdDAtJUqf/DyJYCrd8P8oxAAAAAElFTkSuQmCC\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "t6IuPP3owuZQ"
},
"source": [
"ds_train = Dataset.from_pandas(data_train_sel)\n",
"ds_dev = Dataset.from_pandas(data_dev_sel)\n",
"ds_test = Dataset.from_pandas(data_test_sel)\n",
"dd = DatasetDict({'train':ds_train, 'dev':ds_dev, 'test':ds_test})"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "OIAosmGqw5RA"
},
"source": [
"def preprocess_function(dd):\n",
" return rtokenizer(dd[\"text\"], truncation=True, padding='max_length', max_length=128)\n",
"def preprocess_function_nopad(dd):\n",
" return rtokenizer(dd[\"text\"])"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 209,
"referenced_widgets": [
"03083056d1e94a538c2d3a6e452a54e7",
"f583b1167728452ba6a48f482de1b242",
"02d2b9682b614004a1e5b16b77d1b44a",
"a8b9a66d854d441d85e14b47c1f7cd86",
"04e56f18cdc74054806bd2c2e759655b",
"31b5c73f04ab4a028a08a9dd27aaaef6",
"81a23c33f8d443719fc15cbff841ab3e",
"3ba8e94b076e4d65bc8fff6fdd36fd3a",
"263d73496f7f4a0b913dc9ec206965e7",
"955ddc78ae49407abc3d73f3dc2cc004",
"ca8dbf3b6f9f426291ec058ccf5afcf3",
"65c5531fb5ef49b0b14e9f717f7a4e99",
"6f1e5f95976e4c96a8c745330a3e3615",
"b360b556de414116914ce0f94d3c4f0f",
"8f2347db18e84913a18cbdf5e18f0135",
"c80576964fbf41f1be6aad1a7eedff07",
"8eeae0a30e344c0e8a993a2fe9b25bea",
"4d4fe6a979d047ea821235f00ceea60e",
"dc2a38711a054435b64dcc1edb99645b",
"fe8667b44b6e4a95b110d72a5924f486",
"f0642e1b2a864aabbb187aea2e687e5b",
"26e56fc61c2a483fb26decc4d4d297d4",
"8b8627f8ea894c828bef3eeea8799383",
"9810c3734016439b84b54aaa0873d9a1",
"04e078b606ee4f5eadbfebc1f891f69c",
"93e096058a954a1fa6ad76b289a2f727",
"9b8d70149121432fb14b2ff4643ef8a2",
"018b342b25674c20b1fc65763326603f",
"0b7bb7a8fb0e4d7d858614b044f56641",
"41b6cf7ff23d42b08700e7a7673b8062",
"341de48240614b32bda8fafe2c3dc87a",
"2e97a973b749451788366e7dc0f3c104",
"01b1a11134164856b8840b8176c59163",
"2be036b84ac748e7b697b3d972fe4967",
"b4eaf387135443f298baffc0c2a95cc0",
"862018a61fb745689f034c09f84c69b6",
"a3b037ddc080458bb357e72a33ff9ee3",
"0af68e4132da4bbdbfc1b2cd4f1b2b62",
"504fd6c0375243368455f31285dd4ce0",
"9bf60f74a05d495aad2f63d4d4032e4c",
"e9d794d4d0594682a8008e3abbc50ab8",
"9a97abb3e0824f7b9a72469ad35d066a",
"70053402ff7e4b6dbafd30cb8584b92b",
"91e0b6babb82458caaa0d8076ac7aeb0",
"90053ed6c533463fbf7d48fb983b9694",
"d66cdd223e6a4367ab491277280ab4bf",
"bd3240fab0aa449fa1027a38900a8cf2",
"f3080f8e34ff458dba0869694e6dbe9f",
"9fde0a63838b4fac893d0f445d6bd426",
"cf9c374f7c534f7ca096e16ebaa9316b",
"407cb21a4b0c4b18883ca4f6ad13c989",
"7f1e2139db334a95ab784e17035224a5",
"fd172f5929094f8ebe133ce12c591f17",
"fa477548d69847d580d79b5e82787c4c",
"cdb924bce4334e89b1b22fe77dc7cc88",
"88b0009e094e488b8010f38d86de5465",
"97e0858da4f64a4783bc2ae90c1e5f1b",
"71942ca586324397a3925a2ea49edce0",
"ba93057c5c07481284265fae022d5e57",
"46271c25e484489ea75b690416443ead",
"a3b73dece1184bcaa16ebe04c9e0821a",
"91f37911d5db4a0498d71b437be6ac37",
"9a0de19ac7bc4243a7acedea10f402aa",
"e8f04dcdc1994b87bfda44b1fcc4b14b",
"f51da76d11044098a31ff4eab4b77e50",
"5546130c0cae46f7a8f551d7057dbefb"
]
},
"id": "h9F6odPXw9Rc",
"outputId": "198cc9fe-7291-4f94-895b-0e9ed68b29cd"
},
"source": [
"encoded_dataset_nopad = dd.map(preprocess_function_nopad, batched=True)\n",
"encoded_dataset = dd.map(preprocess_function, batched=True)"
],
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "03083056d1e94a538c2d3a6e452a54e7",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/57 [00:00, ?ba/s]"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "65c5531fb5ef49b0b14e9f717f7a4e99",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/8 [00:00, ?ba/s]"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8b8627f8ea894c828bef3eeea8799383",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/7 [00:00, ?ba/s]"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2be036b84ac748e7b697b3d972fe4967",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/57 [00:00, ?ba/s]"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "90053ed6c533463fbf7d48fb983b9694",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/8 [00:00, ?ba/s]"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "88b0009e094e488b8010f38d86de5465",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/7 [00:00, ?ba/s]"
]
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "GkHs5fvdw_OL",
"outputId": "4793d0a8-75e1-4b59-c6e3-06e6d6add3b1"
},
"source": [
"encoded_dataset['train']"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Dataset({\n",
" features: ['__index_level_0__', 'attention_mask', 'input_ids', 'label', 'text'],\n",
" num_rows: 56023\n",
"})"
]
},
"metadata": {},
"execution_count": 33
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "YEtS9JLM9GFS",
"outputId": "19d77cc0-afb7-4573-9b92-0c410af0c63c"
},
"source": [
"encoded_dataset['train'][0]"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'__index_level_0__': 0,\n",
" 'attention_mask': [1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0],\n",
" 'input_ids': [0,\n",
" 17,\n",
" 5536,\n",
" 6,\n",
" 12026,\n",
" 87,\n",
" 1085,\n",
" 29,\n",
" 5699,\n",
" 13,\n",
" 408,\n",
" 24292,\n",
" 974,\n",
" 7,\n",
" 33821,\n",
" 21,\n",
" 946,\n",
" 6593,\n",
" 180,\n",
" 12,\n",
" 5,\n",
" 2,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1],\n",
" 'label': 0.0,\n",
" 'text': 'W szpitalu w Lubinie niestety zaniża jedynie drastycznie poziom i denerwuje większość pacjentek .'}"
]
},
"metadata": {},
"execution_count": 34
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 282
},
"id": "WNfAx1QlxFhx",
"outputId": "65a16be1-1e15-44cc-d51d-4cf638c5e166"
},
"source": [
"lens = []\n",
"for seq in encoded_dataset_nopad['train']['input_ids']:\n",
" lens += [len(seq)] \n",
"plt.hist(lens, bins=30, range=(0,200)) \n",
"plt.ylabel('Count')\n",
"plt.xlabel('Length in tokens');"
],
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAEJCAYAAABVFBp5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAYTElEQVR4nO3dfbRddX3n8ffHUFB8CkIWiwIziZpxBpmqGAEf6lJpebTGtog4rhotI9MlWhmnaihrDS6tMzB0asUqroxkBIfyIOKQChVTQO10BEkQeRRJeZBEHlJA0LFVQ7/zx/5dPIR7k5udnHPu5b5fa5119/nu3977t/c993zufjj7pKqQJKmPp427A5Kk2csQkST1ZohIknozRCRJvRkikqTeDBFJUm9DC5EkK5M8kOSmgdrpSb6X5IYkX04yf2DcSUnWJbktyWED9cNbbV2S5QP1RUmuafULkuw8rHWRJE1umHsinwcO36y2Gti/qn4N+D5wEkCS/YBjgRe3aT6TZF6SecCngSOA/YC3tbYApwGfqKoXAg8Dxw1xXSRJk9hpWDOuqm8mWbhZ7WsDT68Gjm7DS4Hzq+pnwJ1J1gEHtnHrquoOgCTnA0uT3Aq8Afh3rc3ZwEeAM7fWrz322KMWLly4tWaSpAFr1679h6pasHl9aCEyDb8PXNCG96YLlQnrWw3gns3qBwG7Az+qqk2TtN+ihQsXsmbNmr59lqQ5Kcndk9XHcmI9ycnAJuDcES3v+CRrkqzZuHHjKBYpSXPCyEMkyTuBNwJvr1/euGsDsO9As31abar6g8D8JDttVp9UVa2oqiVVtWTBgiftjUmSehppiCQ5HPgQ8Kaq+unAqFXAsUl2SbIIWAx8G7gWWNyuxNqZ7uT7qhY+V/HLcyrLgEtGtR6SpM4wL/E9D/gW8KIk65McB/wF8GxgdZLrk3wWoKpuBi4EbgG+CpxQVY+1cx7vBS4HbgUubG0BPgx8oJ2E3x04a1jrIkmaXObareCXLFlSnliXpG2TZG1VLdm87ifWJUm9GSKSpN4MEUlSb4aIJKm3cX5ifc5buPzSabW769SjhtwTSerHPRFJUm+GiCSpN0NEktSbISJJ6s0T60Mw3RPmw5ifJ+EljZJ7IpKk3gwRSVJvhogkqTdDRJLUmyEiSerNEJEk9WaISJJ6M0QkSb0ZIpKk3gwRSVJvhogkqTdDRJLUmyEiSerNEJEk9WaISJJ6M0QkSb0ZIpKk3oYWIklWJnkgyU0DteclWZ3k9vZzt1ZPkjOSrEtyQ5IDBqZZ1trfnmTZQP3lSW5s05yRJMNaF0nS5Ia5J/J54PDNasuBK6pqMXBFew5wBLC4PY4HzoQudIBTgIOAA4FTJoKntXn3wHSbL0uSNGRDC5Gq+ibw0GblpcDZbfhs4M0D9XOqczUwP8lewGHA6qp6qKoeBlYDh7dxz6mqq6uqgHMG5iVJGpFRnxPZs6rubcP3AXu24b2BewbarW+1LdXXT1KXJI3Q2E6stz2IGsWykhyfZE2SNRs3bhzFIiVpThh1iNzfDkXRfj7Q6huAfQfa7dNqW6rvM0l9UlW1oqqWVNWSBQsWbPdKSJI6ow6RVcDEFVbLgEsG6u9oV2kdDDzSDntdDhyaZLd2Qv1Q4PI27tEkB7erst4xMC9J0ojsNKwZJzkPeB2wR5L1dFdZnQpcmOQ44G7gmNb8MuBIYB3wU+BdAFX1UJKPAde2dh+tqomT9e+huwLsGcBft4ckaYSGFiJV9bYpRh0ySdsCTphiPiuBlZPU1wD7b08fJUnbx0+sS5J6M0QkSb0ZIpKk3gwRSVJvhogkqTdDRJLUmyEiSerNEJEk9WaISJJ6M0QkSb0ZIpKk3gwRSVJvhogkqTdDRJLUmyEiSerNEJEk9WaISJJ6M0QkSb0ZIpKk3gwRSVJvhogkqTdDRJLUmyEiSerNEJEk9WaISJJ6M0QkSb0ZIpKk3sYSIkn+Y5Kbk9yU5LwkT0+yKMk1SdYluSDJzq3tLu35ujZ+4cB8Tmr125IcNo51kaS5bOQhkmRv4A+BJVW1PzAPOBY4DfhEVb0QeBg4rk1yHPBwq3+itSPJfm26FwOHA59JMm+U6yJJc924DmftBDwjyU7ArsC9wBuAi9r4s4E3t+Gl7Tlt/CFJ0urnV9XPqupOYB1w4Ij6L0miezMfqarakORPgR8A/wh8DVgL/KiqNrVm64G92/DewD1t2k1JHgF2b/WrB2Y9OM2ctXD5pdNqd9epRw25J5LmgnEcztqNbi9iEfCrwDPpDkcNc5nHJ1mTZM3GjRuHuShJmlPGcTjrN4A7q2pjVf0CuBh4NTC/Hd4C2AfY0IY3APsCtPHPBR4crE8yzRNU1YqqWlJVSxYsWLCj10eS5qxxhMgPgIOT7NrObRwC3AJcBRzd2iwDLmnDq9pz2vgrq6pa/dh29dYiYDHw7RGtgySJ8ZwTuSbJRcB1wCbgO8AK4FLg/CR/0mpntUnOAr6QZB3wEN0VWVTVzUkupAugTcAJVfXYSFdGkua4kYcIQFWdApyyWfkOJrm6qqr+CXjLFPP5OPDxHd5BSdK0+Il1SVJvhogkqTdDRJLUmyEiSerNEJEk9WaISJJ6M0QkSb0ZIpKk3gwRSVJvhogkqTdDRJLUmyEiSerNEJEk9WaISJJ6M0QkSb0ZIpKk3gwRSVJv0wqRJK+eTk2SNLdMd0/kU9OsSZLmkC1+x3qSVwKvAhYk+cDAqOcA84bZMUnSzLfFEAF2Bp7V2j17oP4ocPSwOiVJmh22GCJV9Q3gG0k+X1V3j6hPkqRZYmt7IhN2SbICWDg4TVW9YRidkiTNDtMNkS8CnwU+Bzw2vO5IkmaT6YbIpqo6c6g9kSTNOtO9xPevkrwnyV5JnjfxGGrPJEkz3nT3RJa1nx8cqBXw/B3bHUnSbDKtPZGqWjTJo3eAJJmf5KIk30tya5JXtr2b1Ulubz93a22T5Iwk65LckOSAgfksa+1vT7Js6iVKkoZhWnsiSd4xWb2qzum53E8CX62qo5PsDOwK/DFwRVWdmmQ5sBz4MHAEsLg9DgLOBA5qh9NOAZbQ7RWtTbKqqh7u2SdJ0jaa7uGsVwwMPx04BLgO2OYQSfJc4LXAOwGq6ufAz5MsBV7Xmp0NfJ0uRJYC51RVAVe3vZi9WtvVVfVQm+9q4HDgvG3tkySpn2mFSFW9b/B5kvnA+T2XuQjYCPzPJC8B1gLvB/asqntbm/uAPdvw3sA9A9Ovb7Wp6pKkEel7K/j/RxcGfewEHACcWVUva/NaPtig7XVUz/k/SZLjk6xJsmbjxo07araSNOdN91bwf5VkVXtcCtwGfLnnMtcD66vqmvb8IrpQub8dpqL9fKCN3wDsOzD9Pq02Vf1JqmpFVS2pqiULFizo2W1J0uame07kTweGNwF3V9X6PgusqvuS3JPkRVV1G935lVvaYxlwavt5SZtkFfDeJOfTnVh/pKruTXI58F8mruICDgVO6tMnSVI/0z0n8o0ke/LLE+y3b+dy3wec267MugN4F91e0YVJjgPuBo5pbS8DjgTWAT9tbamqh5J8DLi2tfvoxEl2SdJoTPcS32OA0+mumArwqSQfrKqL+iy0qq6nuzR3c4dM0raAE6aYz0pgZZ8+SJK233QPZ50MvKKqHgBIsgD4G7rzGZKkOWq6V2c9bSJAmge3YVpJ0lPUdPdEvtpOZE98kO+tdOcqJElz2Na+Y/2FdB8C/GCS3wFe00Z9Czh32J2TJM1sW9sT+XPaZbNVdTFwMUCSf9vG/dZQeydJmtG2dl5jz6q6cfNiqy0cSo8kSbPG1kJk/hbGPWNHdkSSNPtsLUTWJHn35sUk/57uxomSpDlsa+dETgS+nOTt/DI0lgA7A789zI5Jkma+LYZIVd0PvCrJ64H9W/nSqrpy6D2TJM1407131lXAVUPuiyRplvFT55Kk3gwRSVJvhogkqTdDRJLUmyEiSerNEJEk9WaISJJ6M0QkSb0ZIpKk3gwRSVJvhogkqTdDRJLUmyEiSeptWnfx1VPPwuWXTqvdXaceNeSeSJrN3BORJPVmiEiSehtbiCSZl+Q7Sb7Sni9Kck2SdUkuSLJzq+/Snq9r4xcOzOOkVr8tyWHjWRNJmrvGuSfyfuDWgeenAZ+oqhcCDwPHtfpxwMOt/onWjiT7AccCLwYOBz6TZN6I+i5JYkwhkmQf4Cjgc+15gDcAF7UmZwNvbsNL23Pa+ENa+6XA+VX1s6q6E1gHHDiaNZAkwfj2RP4c+BDwz+357sCPqmpTe74e2LsN7w3cA9DGP9LaP16fZBpJ0giMPESSvBF4oKrWjnCZxydZk2TNxo0bR7VYSXrKG8eeyKuBNyW5Czif7jDWJ4H5SSY+t7IPsKENbwD2BWjjnws8OFifZJonqKoVVbWkqpYsWLBgx66NJM1hIw+RqjqpqvapqoV0J8avrKq3A1cBR7dmy4BL2vCq9pw2/sqqqlY/tl29tQhYDHx7RKshSWJmfWL9w8D5Sf4E+A5wVqufBXwhyTrgIbrgoapuTnIhcAuwCTihqh4bfbclae4aa4hU1deBr7fhO5jk6qqq+ifgLVNM/3Hg48ProSRpS/zEuiSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvc2kz4loBvIbECVtiXsikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktTbyEMkyb5JrkpyS5Kbk7y/1Z+XZHWS29vP3Vo9Sc5Isi7JDUkOGJjXstb+9iTLRr0ukjTXjWNPZBPwn6pqP+Bg4IQk+wHLgSuqajFwRXsOcASwuD2OB86ELnSAU4CDgAOBUyaCR5I0GiMPkaq6t6qua8M/Bm4F9gaWAme3ZmcDb27DS4FzqnM1MD/JXsBhwOqqeqiqHgZWA4ePcFUkac7baZwLT7IQeBlwDbBnVd3bRt0H7NmG9wbuGZhsfatNVdcYLFx+6bTa3XXqUUPuiaRRGtuJ9STPAr4EnFhVjw6Oq6oCagcu6/gka5Ks2bhx446arSTNeWMJkSS/Qhcg51bVxa18fztMRfv5QKtvAPYdmHyfVpuq/iRVtaKqllTVkgULFuy4FZGkOW4cV2cFOAu4tar+bGDUKmDiCqtlwCUD9Xe0q7QOBh5ph70uBw5Nsls7oX5oq0mSRmQc50ReDfwecGOS61vtj4FTgQuTHAfcDRzTxl0GHAmsA34KvAugqh5K8jHg2tbuo1X10GhWQZIEYwiRqvo/QKYYfcgk7Qs4YYp5rQRW7rjeSZK2hZ9YlyT1ZohIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvRkikqTexvrNhpp7/AZE6anFPRFJUm+GiCSpN0NEktSb50S2wXSP50vSXGGIaEbyBLw0O3g4S5LUmyEiSerNEJEk9WaISJJ688S6ZrVtuWLOk/DSjueeiCSpN0NEktSbh7M0Z/jZE2nHc09EktTbrN8TSXI48ElgHvC5qjp1zF3SLOceizR9szpEkswDPg38JrAeuDbJqqq6Zbw901xg2EizPESAA4F1VXUHQJLzgaWAIaIZY0ffuNNQ0kwy20Nkb+CegefrgYPG1BdpJMZ5N+npBtiO3ktzr2/mmu0hMi1JjgeOb09/kuS2nrPaA/iHHdOrHcp+bRv7tW0e71dO27Ez3s75PWl77ej+9TTjf489/cvJirM9RDYA+w4836fVnqCqVgArtndhSdZU1ZLtnc+OZr+2jf3aNvZr28y1fs32S3yvBRYnWZRkZ+BYYNWY+yRJc8as3hOpqk1J3gtcTneJ78qqunnM3ZKkOWNWhwhAVV0GXDaixW33IbEhsV/bxn5tG/u1beZUv1JVw5ivJGkOmO3nRCRJY2SITEOSw5PclmRdkuVj7Me+Sa5KckuSm5O8v9U/kmRDkuvb48gx9e+uJDe2PqxpteclWZ3k9vZztxH250UD2+T6JI8mOXFc2yvJyiQPJLlpoDbp9knnjPaauyHJASPu1+lJvteW/eUk81t9YZJ/HNh2nx1xv6b83SU5qW2v25IcNuJ+XTDQp7uSXN/qo9xeU70/DPc1VlU+tvCgO2H/98DzgZ2B7wL7jakvewEHtOFnA98H9gM+AvzRDNhWdwF7bFb7b8DyNrwcOG2Mv8f76K51H8v2Al4LHADctLXtAxwJ/DUQ4GDgmhH361BgpzZ82kC/Fg62G8P2mvR31/4OvgvsAixqf7PzRtWvzcb/d+A/j2F7TfX+MNTXmHsiW/f4rVWq6ufAxK1VRq6q7q2q69rwj4Fb6T61P5MtBc5uw2cDbx5TPw4B/r6q7h7T8qmqbwIPbVaeavssBc6pztXA/CR7japfVfW1qtrUnl5N9xmskZpie01lKXB+Vf2squ4E1tH97Y60X0kCHAOcN4xlb8kW3h+G+hozRLZuslurjP2NO8lC4GXANa303rZLunKUh4w2U8DXkqxNd5cAgD2r6t42fB+w53i6xrE88Q97JmwvmHr7zKTX3e/T/cc6YVGS7yT5RpJfH0N/JvvdzZTt9evA/VV1+0Bt5Ntrs/eHob7GDJFZKMmzgC8BJ1bVo8CZwAuAlwL30u1Oj8NrquoA4AjghCSvHRxZ3T70yC8HTPdB1DcBX2ylmbK9nmBc22dLkpwMbALObaV7gX9RVS8DPgD8ZZLnjLBLM/J3N+BtPPGflZFvr0neHx43jNeYIbJ107q1yqgk+RW6F8i5VXUxQFXdX1WPVdU/A/+DIe3Gb01VbWg/HwC+3Ppx/8Qucvv5wBi6dgRwXVXd3/o3I7ZXM9X2GfvrLsk7gTcCb29vPrTDRQ+24bV05x7+1aj6tIXf3UzYXjsBvwNcMFEb9faa7P2BIb/GDJGtmzG3VmnHW88Cbq2qPxuoDx7H/G3gps2nHUHfnpnk2RPDdCdmb6LbVstas2XAJaPuG5v9dzgTtteAqbbPKuAd7Qqag4FHBg5JDF26L3v7EPCmqvrpQH1Buu/xIcnzgcXAHSPs11S/u1XAsUl2SbKo9evbo+pX8xvA96pq/URhlNtrqvcHhv0aG8VVA7P9QXcVw/fp/os4eYz9eA3drugNwPXtcSTwBeDGVl8F7DWGvj2f7uqY7wI3T2wnYHfgCuB24G+A5424X88EHgSeO1Aby/aiC7J7gV/QHX8+bqrtQ3fFzKfba+5GYMmI+7WO7nj5xOvss63t77bf7/XAdcBvjbhfU/7ugJPb9roNOGKU/Wr1zwN/sFnbUW6vqd4fhvoa8xPrkqTePJwlSerNEJEk9WaISJJ6M0QkSb0ZIpKk3gwRzRlJfjLk+Z+YZNdtWV6SN2Ub7gydZH6S90yj3euSfGW685X6MkSkHedEYNetthpQVauq6tRtmGQ+sNUQkUbFENGcluQFSb7abhr5t0n+dat/vn3Xwv9NckeSo1v9aUk+k+67NlYnuSzJ0Un+EPhV4KokVw3M/+NJvpvk6iRPuvlkkncm+YstLXMzpwIvSPfdFKe3TxufnuSmdN/l8tZJlvGKdgPAFyR5ebsR4Noklw/cDuPrSU5L8u0k35+4UWCSF7fa9e2mh4u3f6vrqcQQ0Vy3AnhfVb0c+CPgMwPj9qL7FPAb6d68obs30kK672n4PeCVAFV1BvBD4PVV9frW9pnA1VX1EuCbwLun0Z/JljloOd0t7V9aVR9s/Xkp8BK6226cPnhrkCSvAj5Ld9vvHwCfAo5u67sS+PjAvHeqqgPp9qhOabU/AD5ZVS8FltB9Qlt63E7j7oA0Lu1up68CvtjddgjovtRowv+u7kZ/twzsRbwG+GKr3ze41zGJnwMT5yXWAr85jW5NtswteQ1wXlU9RnejvW8ArwAeBf4NXUgeWlU/TLI/sD+wuq3vPLrbd0yYuGHfWrqgBPgWcHKSfYCL64m3OJcMEc1pTwN+1P7LnszPBoYzRZst+UX98r5CjzG9v7ftXeage4Gn032vxA/b/G6uqlduZdmP97Wq/jLJNcBRwGVJ/kNVXbmd/dJTiIezNGdV910LdyZ5Czz+ndMv2cpkfwf8bjs3sifwuoFxP6b7WtJh2nwZfwu8Ncm8JAvovrp14u61P6J78/+vSV5Hd2PCBUleCd1tw5O8eEsLa3eevaMdrrsE+LUduTKa/QwRzSW7Jlk/8PgA8HbguCQTdx/e2lcff4nuvMAtwP+iuzPrI23cCuCrWznEtV2q+26Kv2sn0k+n+96WG+junnwl8KGqum+g/f1051c+TbdHcjRwWlvf6+kO523JMcBNSa6nOxR2zg5eJc1y3sVX2kZJnlVVP0myO91//a8efOOW5hLPiUjb7itJ5gM7Ax8zQDSXuSciSerNcyKSpN4MEUlSb4aIJKk3Q0SS1JshIknqzRCRJPX2/wFnjdv4BnL41AAAAABJRU5ErkJggg==\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "4v4V2xq6xLYE"
},
"source": [
"small_train_dataset = encoded_dataset[\"train\"].shuffle(seed=42).select(range(1000))\n",
"small_eval_dataset = encoded_dataset[\"dev\"].shuffle(seed=42).select(range(1000))\n",
"full_train_dataset = encoded_dataset[\"train\"]\n",
"full_eval_dataset = encoded_dataset[\"dev\"]"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "WfpFlyxJFpwK",
"outputId": "11058853-1d32-49f0-de93-dc13642f87ac"
},
"source": [
"classfmodel = AutoModelForSequenceClassification.from_pretrained(model_dir, num_labels=1)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Some weights of the model checkpoint at ./roberta were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.decoder.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']\n",
"- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./roberta and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "VEfYpjj1xUqV",
"outputId": "6d2e7bee-07f5-43a4-d060-8e09083b4632"
},
"source": [
"input = rtokenizer.encode(\"Moim zdaniem jest za wcześnie na ocenę , czekam aż to wszystko zadziała , ale jestem pozytywnie nastawiona . \");\n",
"output = classfmodel(torch.tensor([input]))[0]\n",
"print(output)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"tensor([[-0.1010]], grad_fn=)\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "TSv_ar_Yxap1"
},
"source": [
"training_args = TrainingArguments(\n",
" \"test-classf\",\n",
" evaluation_strategy = \"epoch\",\n",
" save_strategy = 'epoch',\n",
" learning_rate=3e-5,\n",
" per_device_train_batch_size=16,\n",
" per_device_eval_batch_size=16,\n",
" num_train_epochs=5,\n",
" weight_decay=0.01,\n",
" load_best_model_at_end=True,\n",
" metric_for_best_model=\"pp_accuracy\"\n",
")"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "ug8GxLe5xexu"
},
"source": [
"def compute_metrics(eval_pred):\n",
" outputs, labels = eval_pred\n",
" predictions = [output[0] for output in outputs]\n",
" pred_qt = [0 if pred<0.25 else 1 if pred>0.75 else 0.5 for pred in predictions]\n",
" return {'pp_avg_distance':np.sqrt(np.mean([(x-y)*(x-y) for x, y in zip(predictions, labels)])),\n",
" 'pp_accuracy':np.sum([1 if x==y else 0 for x, y in zip(pred_qt, labels)])/len(pred_qt)}"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 227
},
"id": "cunu4joYxpM3",
"outputId": "358b46f5-874e-4754-f7df-88d17d8eca8c"
},
"source": [
"#before training\n",
"#we will train on small datasets\n",
"trainer = Trainer(\n",
" model=classfmodel,\n",
" args=training_args,\n",
" train_dataset=small_train_dataset,\n",
" eval_dataset=small_eval_dataset,\n",
" compute_metrics=compute_metrics,\n",
")\n",
"trainer.evaluate()"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 16\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [63/63 00:13]\n",
"
\n",
" "
],
"text/plain": [
""
]
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'eval_loss': 0.43715235590934753,\n",
" 'eval_pp_accuracy': 0.394,\n",
" 'eval_pp_avg_distance': 0.661175012588501,\n",
" 'eval_runtime': 13.9877,\n",
" 'eval_samples_per_second': 71.492,\n",
" 'eval_steps_per_second': 4.504}"
]
},
"metadata": {},
"execution_count": 42
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "FVyh0sWHxtpI",
"outputId": "f3b12331-a671-4b1b-a69d-695ef08d565d"
},
"source": [
"print(torch.cuda.get_device_name(0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Tesla K80\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 387
},
"id": "eqQ47Mudymb_",
"outputId": "1f8e8a13-6119-46ab-d649-fb05191ebe7b"
},
"source": [
"output_before = trainer.predict(small_eval_dataset)\n",
"out = [item[0] for item in output_before[0]]\n",
"plt.hist(out, bins=100, range=(-1,1)) \n",
"plt.ylabel('Count')\n",
"plt.xlabel('Output');"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running Prediction *****\n",
" Num examples = 1000\n",
" Batch size = 16\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [63/63 02:01]\n",
"
\n",
" "
],
"text/plain": [
""
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAS10lEQVR4nO3dfZRcdX3H8fcXENCiJZE0jYAutKmKehrtioi28mB9gNZARYzVEhUbH9BTtfUY5A9tz/E09tRSrRWbKgKV8iBCCYJSCKDVI+jCQR5FAoSSNJAIClorEvz2j/ntj0uYzc5m585ssu/XOXvm3t99mG9+M9nP3jt3fjcyE0mSAHYadgGSpJnDUJAkVYaCJKkyFCRJlaEgSap2GXYB07HXXnvlyMjIsMuQpO3Ktdde+6PMnNdt2XYdCiMjI4yNjQ27DEnarkTE3RMt8/SRJKkyFCRJlaEgSaoMBUlSZShIkipDQZJUGQqSpMpQkCRVhoIkqdquv9EsDcrI8ovr9NoVRw6xEqldHilIkipDQZJUGQqSpMpQkCRVhoIkqTIUJEmVoSBJqgwFSVJlKEiSKkNBklQZCpKkylCQJFWGgiSpMhQkSZWhIEmqDAVJUmUoSJIqQ0GSVBkKkqTKUJAkVYaCJKnaZdgFSNubkeUXP25+7Yojh1SJ1H8eKUiSKkNBklQZCpKkqrVQiIh9I+LKiLglIm6OiL8o7XMj4rKIuL08zintERGfjog1EXFDRLyordokSd21eaSwGfjLzDwAOAg4ISIOAJYDqzNzIbC6zAO8FlhYfpYBp7RYmySpi9ZCITM3ZOZ1ZfqnwK3A3sBi4PSy2unAUWV6MXBGdlwN7BkRC9qqT5L0RAP5TCEiRoAXAtcA8zNzQ1l0LzC/TO8N3NPYbF1p23JfyyJiLCLGNm3a1FrNkjQbtR4KEbEH8BXg/Zn5UHNZZiaQU9lfZq7MzNHMHJ03b14fK5UktRoKEfEkOoFwZmaeX5rvGz8tVB43lvb1wL6NzfcpbZKkAWnz6qMAvgDcmpn/0Fi0ClhappcCFzbajytXIR0EPNg4zSRJGoA2h7l4GfBnwI0RcX1p+wiwAjg3Io4H7gaOLcsuAY4A1gA/B97WYm2SpC5aC4XM/BYQEyw+vMv6CZzQVj2SpMn5jWZJUmUoSJIqQ0GSVBkKkqTKUJAkVYaCJKkyFCRJlaEgSaoMBUlSZShIkipDQZJUGQqSpMpQkCRVhoIkqTIUJEmVoSBJqgwFSVJlKEiSKkNBklQZCpKkylCQJFWGgiSpMhQkSZWhIEmqDAVJUmUoSJIqQ0GSVBkKkqTKUJAkVYaCJKkyFCRJlaEgSap2GXYB0kw1svziYZcgDZxHCpKkylCQJFWthUJEnBoRGyPipkbbxyJifURcX36OaCw7MSLWRMRtEfHqtuqSJE2szSOF04DXdGk/OTMXlZ9LACLiAGAJ8LyyzWcjYucWa5MkddFaKGTmN4EHelx9MXB2Zj6cmXcBa4AD26pNktTdMD5TeG9E3FBOL80pbXsD9zTWWVfaniAilkXEWESMbdq0qe1aJWlWGXQonAL8FrAI2AB8cqo7yMyVmTmamaPz5s3rd32SNKsNNBQy877MfDQzfwX8K4+dIloP7NtYdZ/SJkkaoIGGQkQsaMweDYxfmbQKWBIRu0XEfsBC4LuDrE2S1OI3miPiLOAQYK+IWAd8FDgkIhYBCawF3gmQmTdHxLnALcBm4ITMfLSt2iRJ3bUWCpn5pi7NX9jK+h8HPt5WPZKkyfmNZklSZShIkipHSZWmqTma6toVRw6xEmn6PFKQJFWGgiSpMhQkSZWhIEmqegqFiHhZL22SpO1br0cK/9RjmyRpO7bVS1Ij4qXAwcC8iPhgY9HTAG+CI0k7mMm+p7ArsEdZ76mN9oeAY9oqSpI0HFsNhcz8BvCNiDgtM+8eUE2SpCHp9RvNu0XESmCkuU1mHtZGUZKk4eg1FL4MfA74POCQ1pK0g+o1FDZn5imtViJJGrpeL0m9KCLeExELImLu+E+rlUmSBq7XI4Wl5fFDjbYE9u9vOZKkYeopFDJzv7YLkSQNX0+hEBHHdWvPzDP6W44kaZh6PX304sb07sDhwHWAoSBJO5BeTx+9rzkfEXsCZ7dSkSRpaLZ16Oz/BfycQZJ2ML1+pnARnauNoDMQ3nOBc9sqSpI0HL1+pvD3jenNwN2Zua6FeiRJQ9TT6aMyMN4P6IyUOgf4ZZtFSZKGo9c7rx0LfBd4A3AscE1EOHS2JO1gej19dBLw4szcCBAR84DLgfPaKkySNHi9Xn2003ggFPdPYVtJ0nai1yOFr0fEpcBZZf6NwCXtlCRJGpbJ7tH828D8zPxQRPwJ8PKy6DvAmW0XJ0karMmOFP4ROBEgM88HzgeIiBeUZX/canWSpIGa7HOB+Zl545aNpW2klYokSUMzWSjsuZVlT+5nIZKk4ZssFMYi4s+3bIyIdwDXtlOSJGlYJvtM4f3ABRHxZh4LgVFgV+DoNguTJA3eVkMhM+8DDo6IQ4Hnl+aLM/OK1iuTJA1cr/dTuBK4cio7johTgT8CNmbm80vbXOAcOh9SrwWOzcwfR0QAnwKOAH4OvDUzr5vK80mSpq/XL69ti9OAz/D4u7MtB1Zn5oqIWF7mPwy8FlhYfl4CnFIepYEaWX7xsEuQhqq1oSoy85vAA1s0LwZOL9OnA0c12s/IjquBPSNiQVu1SZK6G/T4RfMzc0OZvheYX6b3Bu5prLeutD1BRCyLiLGIGNu0aVN7lUrSLDS0Qe0yM3nsbm5T2W5lZo5m5ui8efNaqEySZq9Bh8J946eFyuP4yKvrgX0b6+1T2iRJAzToUFgFLC3TS4ELG+3HRcdBwION00ySpAFp7eqjiDgLOATYKyLWAR8FVgDnRsTxwN107uIGnWG4jwDW0Lkk9W1t1SVJmlhroZCZb5pg0eFd1k3ghLZqkST1xrunSZIqQ0GSVBkKkqTKUJAkVYaCJKkyFCRJlaEgSaoMBUlSZShIkipDQZJUGQqSpMpQkCRVhoIkqTIUJEmVoSBJqgwFSVJlKEiSKkNBklQZCpKkylCQJFWGgiSpMhQkSZWhIEmqDAVJUmUoSJIqQ0GSVBkKkqTKUJAkVYaCJKkyFCRJlaEgSaoMBUlStcuwC5B2JCPLL67Ta1ccOcRKpG3jkYIkqTIUJEnVUE4fRcRa4KfAo8DmzByNiLnAOcAIsBY4NjN/PIz6JGm2GuaRwqGZuSgzR8v8cmB1Zi4EVpd5SdIAzaTTR4uB08v06cBRQ6xFkmalYYVCAv8ZEddGxLLSNj8zN5Tpe4H53TaMiGURMRYRY5s2bRpErZI0awzrktSXZ+b6iPgN4LKI+EFzYWZmRGS3DTNzJbASYHR0tOs6kqRtM5QjhcxcXx43AhcABwL3RcQCgPK4cRi1SdJsNvBQiIhfi4injk8DrwJuAlYBS8tqS4ELB12bJM12wzh9NB+4ICLGn//fM/PrEfE94NyIOB64Gzh2CLVJ0qw28FDIzDuB3+3Sfj9w+KDrkSQ9ZiZdkipJGjJDQZJUGQqSpMpQkCRVhoIkqTIUJEmVoSBJqgwFSVLlPZo16zXvqyzNdh4pSJIqQ0GSVBkKkqTKUJAkVYaCJKny6iOpJc2rmtauOHKIlUi980hBklQZCpKkylCQJFWGgiSpMhQkSZWhIEmqDAVJUmUoSJIqQ0GSVBkKkqTKUJAkVYaCJKlyQDzNSt6CU+rOIwVJUmUoSJIqQ0GSVPmZgjQA3nBH2wuPFCRJlaEgSaoMBUlS5WcKmjVm4ncTJqrJzx00LDPuSCEiXhMRt0XEmohYPux6JGk2mVFHChGxM/DPwB8C64DvRcSqzLxluJVpe7U9HR1IM8GMCgXgQGBNZt4JEBFnA4sBQ0GzlpezapBmWijsDdzTmF8HvKS5QkQsA5aV2Z9FxG3b+Fx7AT/axm3bNFPrgplb2w5XV3xiau1TtMP1V8t2xLqeNdGCmRYKk8rMlcDK6e4nIsYyc7QPJfXVTK0LZm5t1jU11jU1s62umfZB83pg38b8PqVNkjQAMy0UvgcsjIj9ImJXYAmwasg1SdKsMaNOH2Xm5oh4L3ApsDNwambe3NLTTfsUVEtmal0wc2uzrqmxrqmZVXVFZraxX0nSdmimnT6SJA2RoSBJqnboUIiIN0TEzRHxq4iY8NKtiYbWKB94X1PazykffvejrrkRcVlE3F4e53RZ59CIuL7x84uIOKosOy0i7mosWzSousp6jzaee1WjfZj9tSgivlNe7xsi4o2NZX3tr8mGYomI3cq/f03pj5HGshNL+20R8erp1LENdX0wIm4p/bM6Ip7VWNb1NR1QXW+NiE2N539HY9nS8rrfHhFLB1zXyY2afhgRP2ksa7O/To2IjRFx0wTLIyI+Xeq+ISJe1Fg2/f7KzB32B3gu8GzgKmB0gnV2Bu4A9gd2Bb4PHFCWnQssKdOfA97dp7r+DlheppcDn5hk/bnAA8BTyvxpwDEt9FdPdQE/m6B9aP0F/A6wsEw/A9gA7Nnv/tra+6WxznuAz5XpJcA5ZfqAsv5uwH5lPzsPsK5DG++hd4/XtbXXdEB1vRX4TJdt5wJ3lsc5ZXrOoOraYv330bnwpdX+Kvv+A+BFwE0TLD8C+BoQwEHANf3srx36SCEzb83Myb7xXIfWyMxfAmcDiyMigMOA88p6pwNH9am0xWV/ve73GOBrmfnzPj3/RKZaVzXs/srMH2bm7WX6f4CNwLw+PX9T1/fLVuo9Dzi89M9i4OzMfDgz7wLWlP0NpK7MvLLxHrqazveA2tZLf03k1cBlmflAZv4YuAx4zZDqehNwVp+ee6sy85t0/gicyGLgjOy4GtgzIhbQp/7aoUOhR92G1tgbeDrwk8zcvEV7P8zPzA1l+l5g/iTrL+GJb8iPl0PHkyNitwHXtXtEjEXE1eOntJhB/RURB9L56++ORnO/+mui90vXdUp/PEinf3rZts26mo6n89fmuG6v6SDren15fc6LiPEvsM6I/iqn2fYDrmg0t9VfvZio9r7014z6nsK2iIjLgd/ssuikzLxw0PWM21pdzZnMzIiY8Lrg8hfAC+h8d2PciXR+Oe5K51rlDwN/M8C6npWZ6yNif+CKiLiRzi++bdbn/vo3YGlm/qo0b3N/7Ygi4i3AKPCKRvMTXtPMvKP7HvruIuCszHw4It5J5yjrsAE9dy+WAOdl5qONtmH2V6u2+1DIzFdOcxcTDa1xP53Dsl3KX3tTGnJja3VFxH0RsSAzN5RfYhu3sqtjgQsy85HGvsf/an44Ir4I/NUg68rM9eXxzoi4Cngh8BWG3F8R8TTgYjp/EFzd2Pc291cXvQzFMr7OuojYBfh1Ou+nNodx6WnfEfFKOkH7isx8eLx9gte0H7/kJq0rM+9vzH6ezmdI49sessW2V/Whpp7qalgCnNBsaLG/ejFR7X3pL08fTTC0RnY+ubmSzvl8gKVAv448VpX99bLfJ5zLLL8Yx8/jHwV0vUqhjboiYs746ZeI2At4GXDLsPurvHYX0DnXet4Wy/rZX70MxdKs9xjgitI/q4Al0bk6aT9gIfDdadQypboi4oXAvwCvy8yNjfaur+kA61rQmH0dcGuZvhR4ValvDvAqHn/E3Gpdpbbn0PnQ9juNtjb7qxergOPKVUgHAQ+WP3z6019tfYI+E36Ao+mcV3sYuA+4tLQ/A7iksd4RwA/pJP1Jjfb96fynXQN8GditT3U9HVgN3A5cDswt7aPA5xvrjdBJ/5222P4K4EY6v9y+BOwxqLqAg8tzf788Hj8T+gt4C/AIcH3jZ1Eb/dXt/ULndNTryvTu5d+/pvTH/o1tTyrb3Qa8ts/v98nqurz8Pxjvn1WTvaYDqutvgZvL818JPKex7dtLP64B3jbIusr8x4AVW2zXdn+dRefquUfo/P46HngX8K6yPOjcjOyO8vyjjW2n3V8OcyFJqjx9JEmqDAVJUmUoSJIqQ0GSVBkKkqTKUJAmEBH7RMSFZcTJOyLiUzHJyK8R8ZFpPuchEXHwdPYhTYehIHVRvuh2PvAfmbmQziisewAfn2TTaYUCnW+kGgoaGkNB6u4w4BeZ+UWA7Ix78wHg7RHxnoj4zPiKEfHV8hf+CuDJ0Rlj/8yIGImIH5TpW8tgb08p26wt34YlIkYj4qro3HfhXcAHyj5+f7D/ZMlQkCbyPODaZkNmPgT8NxOMGZaZy4H/y8xFmfnm0vxs4LOZ+VzgITr3WugqM9fSuQ/FyWUf/zXtf4U0RYaC1K57MvPbZfpLwMuHWYw0GUNB6u4W4PeaDWUU1mcCP+Hx/3d238p+thxHZnx+c2MfW9teGihDQepuNfCUiDgOICJ2Bj5J59aedwKLImKnckOY5t3THomIJzXmnxkRLy3Tfwp8q0yv5bHQeX1j/Z8CT+3jv0OaEkNB6iI7I0UeDbwhIm6nM5rmL+hcXfRt4C46RxOfBq5rbLoSuCEizizztwEnRMStdIZgPqW0/zXwqYgYA5o3b7kIONoPmjUsjpIqtaRcTfTVzHz+kEuReuaRgiSp8khBklR5pCBJqgwFSVJlKEiSKkNBklQZCpKk6v8BZCnXxLOrylcAAAAASUVORK5CYII=\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"id": "0QMTQqUC0KqS",
"outputId": "cc11c5c1-38bc-432b-a153-618387ffe6df"
},
"source": [
"trainer.train()"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running training *****\n",
" Num examples = 1000\n",
" Num Epochs = 5\n",
" Instantaneous batch size per device = 16\n",
" Total train batch size (w. parallel, distributed & accumulation) = 16\n",
" Gradient Accumulation steps = 1\n",
" Total optimization steps = 315\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [315/315 05:29, Epoch 5/5]\n",
"
\n",
" \n",
" \n",
" \n",
" Epoch \n",
" Training Loss \n",
" Validation Loss \n",
" Pp Avg Distance \n",
" Pp Accuracy \n",
" \n",
" \n",
" \n",
" \n",
" 1 \n",
" No log \n",
" 0.081177 \n",
" 0.284916 \n",
" 0.717000 \n",
" \n",
" \n",
" 2 \n",
" No log \n",
" 0.084855 \n",
" 0.291300 \n",
" 0.757000 \n",
" \n",
" \n",
" 3 \n",
" No log \n",
" 0.066228 \n",
" 0.257348 \n",
" 0.821000 \n",
" \n",
" \n",
" 4 \n",
" No log \n",
" 0.078725 \n",
" 0.280580 \n",
" 0.790000 \n",
" \n",
" \n",
" 5 \n",
" No log \n",
" 0.070066 \n",
" 0.264699 \n",
" 0.821000 \n",
" \n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 16\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [63/63 03:53]\n",
"
\n",
" "
],
"text/plain": [
""
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"Saving model checkpoint to test-classf/checkpoint-63\n",
"Configuration saved in test-classf/checkpoint-63/config.json\n",
"Model weights saved in test-classf/checkpoint-63/pytorch_model.bin\n",
"The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 16\n",
"Saving model checkpoint to test-classf/checkpoint-126\n",
"Configuration saved in test-classf/checkpoint-126/config.json\n",
"Model weights saved in test-classf/checkpoint-126/pytorch_model.bin\n",
"The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 16\n",
"Saving model checkpoint to test-classf/checkpoint-189\n",
"Configuration saved in test-classf/checkpoint-189/config.json\n",
"Model weights saved in test-classf/checkpoint-189/pytorch_model.bin\n",
"The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 16\n",
"Saving model checkpoint to test-classf/checkpoint-252\n",
"Configuration saved in test-classf/checkpoint-252/config.json\n",
"Model weights saved in test-classf/checkpoint-252/pytorch_model.bin\n",
"The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 16\n",
"Saving model checkpoint to test-classf/checkpoint-315\n",
"Configuration saved in test-classf/checkpoint-315/config.json\n",
"Model weights saved in test-classf/checkpoint-315/pytorch_model.bin\n",
"\n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"Loading best model from test-classf/checkpoint-189 (score: 0.821).\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"TrainOutput(global_step=315, training_loss=0.05475908915201823, metrics={'train_runtime': 330.0469, 'train_samples_per_second': 15.149, 'train_steps_per_second': 0.954, 'total_flos': 328885866240000.0, 'train_loss': 0.05475908915201823, 'epoch': 5.0})"
]
},
"metadata": {},
"execution_count": 45
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 125
},
"id": "i7fYZAdR0R58",
"outputId": "0712b8d4-5b6f-4f4b-8f7d-1bbce8b68843"
},
"source": [
"output_after = trainer.predict(small_eval_dataset)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running Prediction *****\n",
" Num examples = 1000\n",
" Batch size = 16\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [63/63 00:13]\n",
"
\n",
" "
],
"text/plain": [
""
]
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "_Yvw9w085VNj"
},
"source": [
"def plothist(output):\n",
" out_tr = [item[0] for item in output[0]]\n",
" plt.hist(out_tr, bins=30, range=(-1.1,1.1)) \n",
" plt.ylabel('Count')\n",
" plt.xlabel('Output');"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 279
},
"id": "RKuf1hEI5bTo",
"outputId": "e86a94a6-d89c-4a9d-ee18-9d9cdf4572a5"
},
"source": [
"plothist(output_after)"
],
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAASkklEQVR4nO3df7DldX3f8edLiJhELSi3lAKbCwnaqG3X5IY0/sgQTVrUFsREAnUiRpKVUTpN0km76kxN03FKUimNscFZlQJTgqhIxGhjEI00maDZNRQXgfAjS92ddXcDqdjEEHd594/73Q+Hu+fee2D3fL939zwfM2fO9/v5fr/nvO/37L2v/Xy+P06qCkmSAJ42dAGSpLXDUJAkNYaCJKkxFCRJjaEgSWqOHrqAg3H88cfX/Pz80GVI0mFly5Ytf1FVc+OWHdahMD8/z+bNm4cuQ5IOK0keXG6Zw0eSpMZQkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSGkNBktRMLRSSXJlkd5KtI23XJ7m9e2xLcnvXPp/kWyPL3j+tuiRJy5vmFc1XAe8DrtnfUFU/vX86yWXAN0bWv7+q1k+xHumQmN/4qYnW23bpa6ZciXToTS0UqurWJPPjliUJcB7wimm9vyTpyRvqmMLLgV1Vde9I26lJ/jTJF5K8fLkNk2xIsjnJ5j179ky/UkmaIUOFwgXAdSPzO4F1VfVi4JeA307y7HEbVtWmqlqoqoW5ubE3+ZMkPUW9h0KSo4HXAdfvb6uqR6vqoW56C3A/8Ly+a5OkWTdET+HHgburavv+hiRzSY7qpk8DTgceGKA2SZpp0zwl9Trgj4HnJ9me5KJu0fk8cegI4EeBO7pTVD8GXFxVD0+rNknSeNM8++iCZdrfNKbtBuCGadUiSZqMVzRLkhpDQZLUGAqSpMZQkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSY2hIElqDAVJUmMoSJIaQ0GS1BgKkqTGUJAkNVMLhSRXJtmdZOtI268k2ZHk9u7x6pFlb09yX5J7kvyzadUlSVreNHsKVwFnjWm/vKrWd49PAyR5AXA+8MJum99KctQUa5MkjTG1UKiqW4GHJ1z9HODDVfVoVf05cB9wxrRqkySNN8QxhUuS3NENLx3XtZ0EfG1kne1d2wGSbEiyOcnmPXv2TLtWSZopfYfCFcD3AuuBncBlT/YFqmpTVS1U1cLc3Nyhrk+SZlqvoVBVu6pqX1U9BnyAx4eIdgCnjKx6ctcmSepRr6GQ5MSR2XOB/Wcm3QScn+SYJKcCpwNf6rM2SRIcPa0XTnIdcCZwfJLtwLuAM5OsBwrYBrwFoKruTPIR4KvAXuBtVbVvWrVJksabWihU1QVjmj+0wvrvBt49rXokSavzimZJUmMoSJIaQ0GS1BgKkqTGUJAkNYaCJKkxFCRJjaEgSWoMBUlSYyhIkhpDQZLUGAqSpMZQkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSGkNBktQYCpKkZmqhkOTKJLuTbB1p+89J7k5yR5Ibkxzbtc8n+VaS27vH+6dVlyRpedPsKVwFnLWk7WbgRVX1j4A/A94+suz+qlrfPS6eYl2SpGVMLRSq6lbg4SVtv19Ve7vZ24CTp/X+kqQnb8hjCm8G/ufI/KlJ/jTJF5K8fKiiJGmWHT3EmyZ5J7AXuLZr2gmsq6qHkvwg8DtJXlhVj4zZdgOwAWDdunV9lSxJM6H3nkKSNwH/HHhDVRVAVT1aVQ9101uA+4Hnjdu+qjZV1UJVLczNzfVUtSTNhl5DIclZwL8Fzq6qvx5pn0tyVDd9GnA68ECftUmSpjh8lOQ64Ezg+CTbgXexeLbRMcDNSQBu6840+lHgV5N8G3gMuLiqHh77wpKkqZlaKFTVBWOaP7TMujcAN0yrFknSZLyiWZLUGAqSpMZQkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSGkNBktQYCpKkZpBbZ0uzYH7jpyZab9ulr5lyJdLk7ClIkhpDQZLUGAqSpMZQkCQ1hoIkqTEUJEmNp6RK0hoy9KnM9hQkSY2hIElqJgqFJC+dpE2SdHibtKfwmxO2PUGSK5PsTrJ1pO05SW5Ocm/3fFzXniTvTXJfkjuS/MCEtUmSDpEVDzQn+RHgJcBckl8aWfRs4KgJXv8q4H3ANSNtG4FbqurSJBu7+X8HvAo4vXv8MHBF9yxJ6slqPYWnA89kMTyeNfJ4BPip1V68qm4FHl7SfA5wdTd9NfDakfZratFtwLFJTpzkh5AkHRor9hSq6gvAF5JcVVUPHqL3PKGqdnbTXwdO6KZPAr42st72rm3nSBtJNgAbANatW3eISpIkweTXKRyTZBMwP7pNVb3iYN68qipJPcltNgGbABYWFp7UtpKklU0aCh8F3g98ENh3kO+5K8mJVbWzGx7a3bXvAE4ZWe/krk2S1JNJQ2FvVV1xiN7zJuBC4NLu+RMj7Zck+TCLB5i/MTLMJEnqwaSh8MkkbwVuBB7d31hVSw8iP0GS64AzgeOTbAfexWIYfCTJRcCDwHnd6p8GXg3cB/w18LOT/xiSpENh0lC4sHv+5ZG2Ak5baaOqumCZRa8cs24Bb5uwHknSFEwUClV16rQLkSQNb6JQSPLGce1Vdc24dknS4WnS4aMfGpl+BovDP1/miVcqS5IOc5MOH/2r0fkkxwIfnkpFkqTBPNVbZ/8V4HEGSTrCTHpM4ZMsnm0EizfC+37gI9MqSpI0jEmPKbxnZHov8GBVbZ9CPZKkAU00fNTdGO9uFu+Qehzwt9MsSpI0jEm/ee084EvA61m8AvmLSVa9dbYk6fAy6fDRO4EfqqrdAEnmgM8CH5tWYZKk/k0aCk/bHwidh3jqZy5J0syZ3/ipoUuYyKSh8HtJPgNc183/NIs3sJMkHUFW+47m72Pxm9J+OcnrgJd1i/4YuHbaxUmS+rVaT+G/Am8HqKqPAx8HSPIPu2X/YqrVSZJ6tdpxgROq6itLG7u2+alUJEkazGqhcOwKy77zUBYiSRreaqGwOcnPL21M8nPAlumUJEkaymrHFH4BuDHJG3g8BBaApwPnTrMwSVL/VgyFqtoFvCTJjwEv6po/VVWfm3plkqTeTfp9Cp8HPj/lWiRJA5v04rVDJsnzgetHmk4D/j2LB7V/HtjTtb+jqrxATpJ61HsoVNU9wHqAJEcBO4AbgZ8FLq+q96ywuSRpioa+f9Ergfur6sGB65AkMXwonM/j91MCuCTJHUmuTHLcuA2SbEiyOcnmPXv2jFtFkvQUDRYKSZ4OnA18tGu6AvheFoeWdgKXjduuqjZV1UJVLczNzfVSqyTNiiF7Cq8Cvtyd9kpV7aqqfVX1GPAB4IwBa5OkmTRkKFzAyNBRkhNHlp0LbO29Ikmacb2ffQSQ5LuBnwDeMtL860nWAwVsW7JMktSDQUKhqv4KeO6Stp8ZohZJ/Zv0W8i2XfqaKVeipYY++0iStIYYCpKkxlCQJDWGgiSpMRQkSY2hIElqDAVJUmMoSJKaQS5ek3RkmvSiNK1d9hQkSY2hIElqDAVJUmMoSJIaQ0GS1BgKkqTGUJAkNYaCJKkxFCRJjaEgSWoMBUlSM9i9j5JsA74J7AP2VtVCkucA1wPzwDbgvKr6y6FqlKRZM3RP4ceqan1VLXTzG4Fbqup04JZuXpLUk6FDYalzgKu76auB1w5YiyTNnCFDoYDfT7IlyYau7YSq2tlNfx04YelGSTYk2Zxk8549e/qqVZJmwpDfp/CyqtqR5O8CNye5e3RhVVWSWrpRVW0CNgEsLCwcsFyS9NQN1lOoqh3d827gRuAMYFeSEwG6591D1SdJs2iQUEjy3UmetX8a+KfAVuAm4MJutQuBTwxRnyTNqqGGj04Abkyyv4bfrqrfS/InwEeSXAQ8CJw3UH1Sbyb9Csttl75mypVIA4VCVT0A/OMx7Q8Br+y/IkkSDHugWZJWZC+qf2vtOgVJ0oDsKUjSUzRpT+ZwYk9BktTYU5C0oiPxf8Nanj0FSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSY2hIElqDAVJUuMVzdJhwjuGqg+GgjSjvH2FxnH4SJLU2FOQdNhzaO3QMRQkaYlZHlozFCTNjFn+Yz+p3o8pJDklyeeTfDXJnUn+ddf+K0l2JLm9e7y679okadYN0VPYC/ybqvpykmcBW5Lc3C27vKreM0BNkiQGCIWq2gns7Ka/meQu4KS+65AkHWjQU1KTzAMvBr7YNV2S5I4kVyY5brDCJGlGDXagOckzgRuAX6iqR5JcAfxHoLrny4A3j9luA7ABYN26df0VrCOaByClRYP0FJJ8B4uBcG1VfRygqnZV1b6qegz4AHDGuG2ralNVLVTVwtzcXH9FS9IM6L2nkCTAh4C7quq/jLSf2B1vADgX2Np3bdKRwF6PDsYQw0cvBX4G+EqS27u2dwAXJFnP4vDRNuAtA9QmSTNtiLOP/hDImEWf7rsWSdITeUM8SVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSY2hIElqDAVJUmMoSJIaQ0GS1BgKkqTGUJAkNYaCJKkxFCRJjaEgSWoMBUlSYyhIkhpDQZLUGAqSpMZQkCQ1ay4UkpyV5J4k9yXZOHQ9kjRL1lQoJDkK+G/Aq4AXABckecGwVUnS7FhToQCcAdxXVQ9U1d8CHwbOGbgmSZoZRw9dwBInAV8bmd8O/PDoCkk2ABu62f+X5J6eatvveOAven7Pw4H75UDukwO5Tw70lPZJfu2g3vN7lluw1kJhVVW1Cdg01Psn2VxVC0O9/1rlfjmQ++RA7pMDrbV9staGj3YAp4zMn9y1SZJ6sNZC4U+A05OcmuTpwPnATQPXJEkzY00NH1XV3iSXAJ8BjgKurKo7By5rqcGGrtY498uB3CcHcp8caE3tk1TV0DVIktaItTZ8JEkakKEgSWoMhVUkeX2SO5M8lmTZ08Zm7fYcSZ6T5OYk93bPxy2z3r4kt3ePI+6kgdU+9yTHJLm+W/7FJPP9V9m/CfbLm5LsGfm38XND1NmXJFcm2Z1k6zLLk+S93f66I8kP9F3jfobC6rYCrwNuXW6FGb09x0bglqo6Hbilmx/nW1W1vnuc3V950zfh534R8JdV9X3A5cDBXXJ0GHgSvw/Xj/zb+GCvRfbvKuCsFZa/Cji9e2wAruihprEMhVVU1V1VtdpV07N4e45zgKu76auB1w5Yy1Am+dxH99PHgFcmSY81DmEWfx9WVFW3Ag+vsMo5wDW16Dbg2CQn9lPdExkKh8a423OcNFAtfTmhqnZ2018HTlhmvWck2ZzktiRHWnBM8rm3dapqL/AN4Lm9VDecSX8ffrIbKvlYklPGLJ8la+ZvyJq6TmEoST4L/L0xi95ZVZ/ou561YqX9MjpTVZVkuXObv6eqdiQ5Dfhckq9U1f2HulYddj4JXFdVjyZ5C4u9qVcMXJMwFACoqh8/yJc4Im/PsdJ+SbIryYlVtbPr5u5e5jV2dM8PJPkD4MXAkRIKk3zu+9fZnuRo4O8AD/VT3mBW3S9VNboPPgj8eg91rWVr5m+Iw0eHxizenuMm4MJu+kLggB5VkuOSHNNNHw+8FPhqbxVO3ySf++h++ingc3XkXzG66n5ZMl5+NnBXj/WtRTcBb+zOQvonwDdGhmf7VVU+VngA57I4vvcosAv4TNf+94FPj6z3auDPWPxf8DuHrruH/fJcFs86uhf4LPCcrn0B+GA3/RLgK8D/7p4vGrruKeyHAz534FeBs7vpZwAfBe4DvgScNnTNa2S//Cfgzu7fxueBfzB0zVPeH9cBO4Fvd39PLgIuBi7ulofFM7bu735XFoaq1dtcSJIah48kSY2hIElqDAVJUmMoSJIaQ0GS1BgK0jKSnJzkE92dYO9P8hvdefcrbfOOg3zPM5O85GBeQzoYhoI0RnfTuo8Dv1OLd4J9HvBM4N2rbHpQoQCcyeL1HdIgDAVpvFcAf1NV/x2gqvYBvwi8Oclbk7xv/4pJfrf7H/6lwHd23w9wbZL5JHd303d1N377rm6bbd1V3iRZSPIH3XctXAz8YvcaL+/3R5YMBWk5LwS2jDZU1SPA/2GZe4ZV1UYe//6IN3TNzwd+q6q+H3gEeOtyb1hV24D3A5d3r/G/DvqnkJ4kQ0Garq9V1R910/8DeNmQxUirMRSk8b4K/OBoQ5JnA+uA/8sTf3eescLrLL2PzP75vSOvsdL2Uq8MBWm8W4DvSvJGaF8xeRmLX6v4ALA+ydO6L4c5Y2S7byf5jpH5dUl+pJv+l8AfdtPbeDx0fnJk/W8CzzqEP4f0pBgK0hi1eKfIc4HXJ7mXxTt+/g2LZxf9EfDnLPYm3gt8eWTTTcAdSa7t5u8B3pbkLuA4Hv/u3f8A/EaSzcC+ke0/CZzrgWYNxbukSlPSnU30u1X1ooFLkSZmT0GS1NhTkCQ19hQkSY2hIElqDAVJUmMoSJIaQ0GS1Px/CJ7Tt9lMB0YAAAAASUVORK5CYII=\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "bbNw3OZ95gAd",
"outputId": "1a9b5f32-8cd2-4f99-9f02-ab097eda55d8"
},
"source": [
"input = rtokenizer.encode(\"Moim zdaniem jest za wcześnie na ocenę , czekam aż to wszystko zadziała , ale jestem pozytywnie nastawiona . \");\n",
"output = classfmodel(torch.tensor([input]).to('cuda'))[0]\n",
"print(output)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"tensor([[0.8812]], device='cuda:0', grad_fn=)\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "b9wD6CAM5wfj",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 125
},
"outputId": "2970278f-9d26-4d15-ebc0-708c62b7246f"
},
"source": [
"#let's see some measures of how good the predictions are\n",
"output_after = trainer.predict(full_eval_dataset)\n",
"truth = np.array(full_eval_dataset['label'])\n",
"pred = np.array([0 if out[0]<0.25 else 1 if out[0]>0.75 else 0.5 for out in output_after[0]])"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running Prediction *****\n",
" Num examples = 7009\n",
" Batch size = 16\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [63/63 06:33]\n",
"
\n",
" "
],
"text/plain": [
""
]
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "sRYKogPJ53qW",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 173
},
"outputId": "4aedbc20-a732-4b6d-b2e6-023aaed1845d"
},
"source": [
"pd.crosstab(truth, pred, rownames=['truth'], colnames=['pred'])"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" pred \n",
" 0.0 \n",
" 0.5 \n",
" 1.0 \n",
" \n",
" \n",
" truth \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 0.0 \n",
" 2358 \n",
" 267 \n",
" 61 \n",
" \n",
" \n",
" 0.5 \n",
" 351 \n",
" 1693 \n",
" 147 \n",
" \n",
" \n",
" 1.0 \n",
" 120 \n",
" 262 \n",
" 1750 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"pred 0.0 0.5 1.0\n",
"truth \n",
"0.0 2358 267 61\n",
"0.5 351 1693 147\n",
"1.0 120 262 1750"
]
},
"metadata": {},
"execution_count": 52
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "MuvZUR_755zy",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 125
},
"outputId": "b3cbd127-49c2-4a08-8d9b-66de633a43d7"
},
"source": [
"#on a test set\n",
"output_test = trainer.predict(encoded_dataset[\"test\"])"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running Prediction *****\n",
" Num examples = 6972\n",
" Batch size = 16\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [63/63 09:04]\n",
"
\n",
" "
],
"text/plain": [
""
]
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "oTS4SS2wE3yL"
},
"source": [
"#let's see some measures of how good the predictions are\n",
"truth = np.array(encoded_dataset[\"test\"]['label'])\n",
"pred = np.array([0 if out[0]<0.25 else 1 if out[0]>0.75 else 0.5 for out in output_test[0]])"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "tmix5V7oE8bq"
},
"source": [
"ct = pd.crosstab(truth, pred, rownames=['truth'], colnames=['pred'])"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 173
},
"id": "x9VR0AJlHqLs",
"outputId": "b06ea29d-c33b-4368-aedf-375c7d4d8446"
},
"source": [
"ct"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" pred \n",
" 0.0 \n",
" 0.5 \n",
" 1.0 \n",
" \n",
" \n",
" truth \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 0.0 \n",
" 2364 \n",
" 267 \n",
" 39 \n",
" \n",
" \n",
" 0.5 \n",
" 388 \n",
" 1685 \n",
" 142 \n",
" \n",
" \n",
" 1.0 \n",
" 103 \n",
" 302 \n",
" 1682 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"pred 0.0 0.5 1.0\n",
"truth \n",
"0.0 2364 267 39\n",
"0.5 388 1685 142\n",
"1.0 103 302 1682"
]
},
"metadata": {},
"execution_count": 56
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Vx3wEtKLHrUs"
},
"source": [
"acc = (ct[0][0]+ct[0.5][0.5]+ct[1][1])/ct.values.sum()"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "fZoHiHb4HtLz",
"outputId": "6f77dae7-1d50-4d63-c7de-188be669067c"
},
"source": [
"acc"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.8220022948938611"
]
},
"metadata": {},
"execution_count": 58
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "a6yjegsvKmJ-",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "28f0e02e-4a2d-468f-bca3-f6098903cdc3"
},
"source": [
"#how about using three output neurons? (we've got 3 classes after all)\n",
"classfmodel3 = AutoModelForSequenceClassification.from_pretrained(model_dir, num_labels=3)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"loading configuration file ./roberta/config.json\n",
"Model config RobertaConfig {\n",
" \"architectures\": [\n",
" \"RobertaForMaskedLM\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"bos_token_id\": 0,\n",
" \"classifier_dropout\": null,\n",
" \"eos_token_id\": 2,\n",
" \"gradient_checkpointing\": false,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"LABEL_0\",\n",
" \"1\": \"LABEL_1\",\n",
" \"2\": \"LABEL_2\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"LABEL_0\": 0,\n",
" \"LABEL_1\": 1,\n",
" \"LABEL_2\": 2\n",
" },\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"max_position_embeddings\": 514,\n",
" \"model_type\": \"roberta\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 1,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.10.2\",\n",
" \"type_vocab_size\": 1,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50001\n",
"}\n",
"\n",
"loading weights file ./roberta/pytorch_model.bin\n",
"Some weights of the model checkpoint at ./roberta were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.decoder.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']\n",
"- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./roberta and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "UjZ3x8EmI0G0",
"outputId": "320e1667-4052-4261-93a1-c5d1dbd57bee"
},
"source": [
"encoded_dataset['train']"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Dataset({\n",
" features: ['__index_level_0__', 'attention_mask', 'input_ids', 'label', 'text'],\n",
" num_rows: 56023\n",
"})"
]
},
"metadata": {},
"execution_count": 67
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "v8PQZT6gLj4C"
},
"source": [
"def change_labels(dd):\n",
" return {'label': 0 if dd['label']==0 else 1 if dd['label']==0.5 else 2}"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 113,
"referenced_widgets": [
"9a95dfae21114a1a93bb50310d710754",
"275b3764999748038ce567a2e1b1b61b",
"eb75972d26854d568772f51e2b5426f1",
"ec5926d828874a10a80d06955600833e",
"adb0f07da27a4ddca3072a791f3958f4",
"6f48c9c0c7934e63a3a63d9f7288f06f",
"9384b570a2054e8481537ea14c7c7cbd",
"d834733cce544004a86dfb0aa233599e",
"59d0174058af4e12b058d0e0c95cfb23",
"8c7237abcde143819d6ddf52d42f32a8",
"f2c338fdddc148abb6193454f11dda2b",
"a977ebda8d794f41bae26fab5f14084f",
"e5051acb1a154d5b9a744d5c89dea9e0",
"accf803e6a4a4e8aa08f1726e0fbda1c",
"8b2cbf1f7de64500acb8e4484e37e56a",
"dc8bad987e904cfca8580fe7ed327c58",
"b189b0e55a1344a9b109d098d49ffb82",
"2f6c3884f0604922a192371cdac1290e",
"b231cbedbb474bf080e788f8352ade01",
"7badc04ce6574071947f2df0915b9542",
"46b9861a23bb42babd995f77620f21c8",
"ab051e934c5e481bab6b41a2bbe69906",
"49c090151d4040ed8527da7f65f0f07b",
"e5f57195b58c45a5ae1245d36906ba36",
"7f819f33cd7d49fd80b71e90188d1580",
"36f121566b244fe08e0aa433887cd47c",
"9337dcb073014911abd193e771195c03",
"01620fb554914c38918870f685805890",
"7c19eb696edf4f20861d92b434f88167",
"f35f004fac25428d801cfcd677b4acbe",
"d7976b33e48e4196b0440a89a86a5f52",
"fb373c03d38841fa86f9b359252c7139",
"2294fe7ca886491d9c5517ddb7eec86e"
]
},
"id": "pu_apMIIMMEg",
"outputId": "f666bc76-3deb-471a-e9db-fe6374b14954"
},
"source": [
"relabeled_dataset = encoded_dataset.map(change_labels)"
],
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9a95dfae21114a1a93bb50310d710754",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/56023 [00:00, ?ex/s]"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a977ebda8d794f41bae26fab5f14084f",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/7009 [00:00, ?ex/s]"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "49c090151d4040ed8527da7f65f0f07b",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/6972 [00:00, ?ex/s]"
]
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tDBLZI8CMVQT",
"outputId": "1c7771b1-b8eb-4df0-909e-a7fbe39077c3"
},
"source": [
"relabeled_dataset['train'][0]"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'__index_level_0__': 0,\n",
" 'attention_mask': [1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0],\n",
" 'input_ids': [0,\n",
" 17,\n",
" 5536,\n",
" 6,\n",
" 12026,\n",
" 87,\n",
" 1085,\n",
" 29,\n",
" 5699,\n",
" 13,\n",
" 408,\n",
" 24292,\n",
" 974,\n",
" 7,\n",
" 33821,\n",
" 21,\n",
" 946,\n",
" 6593,\n",
" 180,\n",
" 12,\n",
" 5,\n",
" 2,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1],\n",
" 'label': 0.0,\n",
" 'text': 'W szpitalu w Lubinie niestety zaniża jedynie drastycznie poziom i denerwuje większość pacjentek .'}"
]
},
"metadata": {},
"execution_count": 70
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "iCmxDPGHTKWO",
"outputId": "5cd2e6e2-e5e7-4b84-9107-d6b01da0ddc2"
},
"source": [
"set(relabeled_dataset['train'][::]['label'])"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{0.0, 1.0, 2.0}"
]
},
"metadata": {},
"execution_count": 71
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "S1xYtGQAT3Xu",
"outputId": "18c78ced-eb5f-4878-d8f9-d1267e079e5f"
},
"source": [
"relabeled_dataset['train'].info.features"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'__index_level_0__': Value(dtype='int64', id=None),\n",
" 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),\n",
" 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),\n",
" 'label': Value(dtype='float64', id=None),\n",
" 'text': Value(dtype='string', id=None)}"
]
},
"metadata": {},
"execution_count": 72
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 134,
"referenced_widgets": [
"90a506b640c54985945de67ee75b0b04",
"82fe7eca36cc42d69672983632f04ffd",
"a09cb4c9b74b459f9b5b380abe1d9e4e",
"f71fa21be43646aaa21b3899d266764a",
"e1cc66f6c1cd4c1b9ee370f552768dec",
"6dd1aadc16794777a8102cc8a27bf639",
"aaaeec2e8ceb4acfa8daa90d2e901e16",
"1b7e82f3b0da4ef299cb835a91110be1",
"d2a698d539ae4848896498b05a234224",
"732c9ac9574e4dd79ab845319f9e916c",
"b2e075103c2740c0ba5a7e5fb4fb07e7"
]
},
"id": "NGvwhAngWUy1",
"outputId": "99f83c7d-7fa1-43cb-c0f9-55fa8cd47572"
},
"source": [
"from datasets import ClassLabel, Value\n",
"new_features = relabeled_dataset['train'].features.copy()\n",
"new_features[\"label\"] = ClassLabel(names=['LABEL_0', 'LABEL_1', 'LABEL_2'])\n",
"relabeled_dataset['train'] = relabeled_dataset['train'].cast(new_features)\n",
"relabeled_dataset['train'].features"
],
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "90a506b640c54985945de67ee75b0b04",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/6 [00:00, ?ba/s]"
]
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'__index_level_0__': Value(dtype='int64', id=None),\n",
" 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),\n",
" 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),\n",
" 'label': ClassLabel(num_classes=3, names=['LABEL_0', 'LABEL_1', 'LABEL_2'], names_file=None, id=None),\n",
" 'text': Value(dtype='string', id=None)}"
]
},
"metadata": {},
"execution_count": 73
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 81,
"referenced_widgets": [
"dc86b6ae1b2c49089fdf66d3c4fdb092",
"c3bfc3f461fb414890981bb14a9cdf65",
"59751096baad41b4ab70d77f8bd424d4",
"5693d90b0ef64334bb0dbbe876db4d29",
"4b16edac5070467d85f860876453f27d",
"9c0980093556435c9e1d818ebb5b4573",
"078aa55a428d489f98525190e8c802d9",
"fd52e7c8c00e438eb9f1763dd215cb8a",
"84a59a95194542868f75c4be3639f750",
"fedf2e50afec4a07b8f215b02dc01024",
"f3d66973388048eb9bc9cb3dfdf2b7f9",
"dce01c7c32ca497086441282121489c0",
"c8445136cfd84ef29140f3fca960173e",
"85b6d179d2f94a2e8723c52cb910143f",
"f2a1806e2f754e5b8b1033c6667e547a",
"227376b21fb44ae3aa490f7eb683f622",
"5248261748eb4085875802c2f6cf920d",
"4e6b143070a642f0bf856ac3e07777be",
"9b64280311714e41a802f5a4b9816b85",
"86bd3fa51c4749619f04695e175f3ca9",
"640de67654fa4ab1a4ea408f0630d74d",
"eb9930e1e30747758295405343e5ba77"
]
},
"id": "fw-iTBF_WUfI",
"outputId": "a2f75751-3e06-4e50-c296-2ff123dbf031"
},
"source": [
"relabeled_dataset['dev'] = relabeled_dataset['dev'].cast(new_features)\n",
"relabeled_dataset['test'] = relabeled_dataset['test'].cast(new_features)"
],
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "dc86b6ae1b2c49089fdf66d3c4fdb092",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/1 [00:00, ?ba/s]"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "dce01c7c32ca497086441282121489c0",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/1 [00:00, ?ba/s]"
]
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "35x01giHNmrj",
"outputId": "61816a4b-8819-4352-8b72-9f7d2550eee2"
},
"source": [
"input = rtokenizer.encode(\"Moim zdaniem jest za wcześnie na ocenę , czekam aż to wszystko zadziała , ale jestem pozytywnie nastawiona . \");\n",
"output = classfmodel3(torch.tensor([input]).to('cpu'))[0]\n",
"print(output)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"tensor([[ 0.1173, -0.0035, 0.0580]], grad_fn=)\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "KGa5jz0uNxpV"
},
"source": [
"small_train_rel_dataset = relabeled_dataset[\"train\"].shuffle(seed=42).select(range(1000))\n",
"small_eval_rel_dataset = relabeled_dataset[\"dev\"].shuffle(seed=42).select(range(1000))\n",
"full_train_rel_dataset = relabeled_dataset[\"train\"]\n",
"full_eval_rel_dataset = relabeled_dataset[\"dev\"]"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "D_lR7tR_OHLy"
},
"source": [
"def compute_metrics3(eval_pred):\n",
" outputs, labels = eval_pred\n",
" predictions = np.argmax(outputs, axis=-1)\n",
" #pred_qt = ['LABEL_0' if pred==0 else 'LABEL_1' if pred==1 else 'LABEL_2' for pred in predictions]\n",
" return {'pp_accuracy3':np.sum([1 if x==y else 0 for x, y in zip(predictions, labels)])/len(predictions)}"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TuuznOtQOUTS",
"outputId": "137ab4ae-cb74-4269-ca2c-a618def4ac7c"
},
"source": [
"training_args3 = TrainingArguments(\n",
" \"test-classf2\",\n",
" evaluation_strategy = \"epoch\",\n",
" save_strategy = 'epoch',\n",
" learning_rate=3e-5,\n",
" per_device_train_batch_size=16,\n",
" per_device_eval_batch_size=16,\n",
" num_train_epochs=3,\n",
" weight_decay=0.01,\n",
" load_best_model_at_end=True,\n",
" metric_for_best_model=\"pp_accuracy3\"\n",
")"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"PyTorch: setting up devices\n",
"The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 210
},
"id": "8fYS7iu8PteV",
"outputId": "1fcae89d-3a5e-48ae-edee-213ef62c02b7"
},
"source": [
"#before training\n",
"#we will train on small datasets\n",
"trainer3 = Trainer(\n",
" model=classfmodel3,\n",
" args=training_args3,\n",
" train_dataset=small_train_rel_dataset,\n",
" eval_dataset=small_eval_rel_dataset,\n",
" compute_metrics=compute_metrics3,\n",
")\n",
"trainer3.evaluate()"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 16\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [63/63 00:13]\n",
"
\n",
" "
],
"text/plain": [
""
]
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'eval_loss': 1.092584252357483,\n",
" 'eval_pp_accuracy3': 0.372,\n",
" 'eval_runtime': 14.0461,\n",
" 'eval_samples_per_second': 71.194,\n",
" 'eval_steps_per_second': 4.485}"
]
},
"metadata": {},
"execution_count": 82
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 820
},
"id": "3VnSwr77QC-e",
"outputId": "9ecbf11a-0381-4161-f681-d6ff6c891c10"
},
"source": [
"trainer3.train()"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running training *****\n",
" Num examples = 1000\n",
" Num Epochs = 3\n",
" Instantaneous batch size per device = 16\n",
" Total train batch size (w. parallel, distributed & accumulation) = 16\n",
" Gradient Accumulation steps = 1\n",
" Total optimization steps = 189\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [189/189 03:09, Epoch 3/3]\n",
"
\n",
" \n",
" \n",
" \n",
" Epoch \n",
" Training Loss \n",
" Validation Loss \n",
" Pp Accuracy3 \n",
" \n",
" \n",
" \n",
" \n",
" 1 \n",
" No log \n",
" 0.559895 \n",
" 0.802000 \n",
" \n",
" \n",
" 2 \n",
" No log \n",
" 0.516475 \n",
" 0.812000 \n",
" \n",
" \n",
" 3 \n",
" No log \n",
" 0.497726 \n",
" 0.822000 \n",
" \n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 16\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [63/63 01:14]\n",
"
\n",
" "
],
"text/plain": [
""
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"Saving model checkpoint to test-classf2/checkpoint-63\n",
"Configuration saved in test-classf2/checkpoint-63/config.json\n",
"Model weights saved in test-classf2/checkpoint-63/pytorch_model.bin\n",
"The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 16\n",
"Saving model checkpoint to test-classf2/checkpoint-126\n",
"Configuration saved in test-classf2/checkpoint-126/config.json\n",
"Model weights saved in test-classf2/checkpoint-126/pytorch_model.bin\n",
"The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 16\n",
"Saving model checkpoint to test-classf2/checkpoint-189\n",
"Configuration saved in test-classf2/checkpoint-189/config.json\n",
"Model weights saved in test-classf2/checkpoint-189/pytorch_model.bin\n",
"\n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"Loading best model from test-classf2/checkpoint-189 (score: 0.822).\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"TrainOutput(global_step=189, training_loss=0.547641532130973, metrics={'train_runtime': 190.3365, 'train_samples_per_second': 15.762, 'train_steps_per_second': 0.993, 'total_flos': 197335063296000.0, 'train_loss': 0.547641532130973, 'epoch': 3.0})"
]
},
"metadata": {},
"execution_count": 83
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 125
},
"id": "We3tvHRgSFKo",
"outputId": "59ee6c8f-3574-4d0e-e4e4-e928c002769a"
},
"source": [
"output_test3 = trainer3.predict(relabeled_dataset[\"test\"])"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text, __index_level_0__.\n",
"***** Running Prediction *****\n",
" Num examples = 6972\n",
" Batch size = 16\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [436/436 01:37]\n",
"
\n",
" "
],
"text/plain": [
""
]
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "kB9I9DNSiP83",
"outputId": "5010326c-a7d7-4566-ea6a-e609d474b0a7"
},
"source": [
"output_test3"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"PredictionOutput(predictions=array([[ 1.9131775 , -0.9798152 , -1.3284094 ],\n",
" [ 2.034114 , -1.0943435 , -1.4057848 ],\n",
" [ 2.0149415 , -0.91457725, -1.5157132 ],\n",
" ...,\n",
" [-0.99631196, -1.0695031 , 2.2872145 ],\n",
" [-1.0586506 , -0.9808752 , 2.2321594 ],\n",
" [ 1.9522315 , -0.9538001 , -1.3699281 ]], dtype=float32), label_ids=array([0, 0, 0, ..., 2, 2, 1]), metrics={'test_loss': 0.49948015809059143, 'test_pp_accuracy3': 0.8181296615031555, 'test_runtime': 98.0096, 'test_samples_per_second': 71.136, 'test_steps_per_second': 4.449})"
]
},
"metadata": {},
"execution_count": 85
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "EAgK6daVSUvc"
},
"source": [
"#let's see some measures of how good the predictions are\n",
"truth = np.array(relabeled_dataset[\"test\"]['label'])\n",
"pred = np.array([np.argmax(out) for out in output_test3[0]])"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "U7hp-SmcUFOI"
},
"source": [
"ct = pd.crosstab(truth, pred, rownames=['truth'], colnames=['pred'])"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 173
},
"id": "qSKYwC1xgo6J",
"outputId": "b7a29f18-aaa5-457f-c08e-00f5da8f2c6d"
},
"source": [
"ct"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" pred \n",
" 0 \n",
" 1 \n",
" 2 \n",
" \n",
" \n",
" truth \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 2057 \n",
" 350 \n",
" 263 \n",
" \n",
" \n",
" 1 \n",
" 196 \n",
" 1815 \n",
" 204 \n",
" \n",
" \n",
" 2 \n",
" 65 \n",
" 190 \n",
" 1832 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"pred 0 1 2\n",
"truth \n",
"0 2057 350 263\n",
"1 196 1815 204\n",
"2 65 190 1832"
]
},
"metadata": {},
"execution_count": 88
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wCo3xLExgsD_",
"outputId": "2814fe4b-95d9-47db-b9b9-7f4dfa72f0d8"
},
"source": [
"#accuracy\n",
"#percentage of correct guesses (positive and others)\n",
"(ct[0][0]+ct[1][1]+ct[2][2])/ct.values.sum()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.8181296615031555"
]
},
"metadata": {},
"execution_count": 89
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "exp8ns__gwAF"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "gbGjbhE9i7fz"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}