From 0b9c89245952eebc39452c66da240e7535db1d4a Mon Sep 17 00:00:00 2001 From: Karol Dobiczek Date: Thu, 14 Mar 2024 00:42:21 +0100 Subject: [PATCH 1/9] add initial relitc files --- Project.toml | 6 +- dev/notebooks/.gitignore | 3 + dev/notebooks/CondaPkg.toml | 2 + dev/notebooks/Project.toml | 7 + dev/notebooks/RELITC.ipynb | 199 ++++++++++++++++++++++++++ dev/notebooks/word_attributions.ipynb | 143 ++++++++++++++++++ 6 files changed, 357 insertions(+), 3 deletions(-) create mode 100644 dev/notebooks/.gitignore create mode 100644 dev/notebooks/CondaPkg.toml create mode 100644 dev/notebooks/Project.toml create mode 100644 dev/notebooks/RELITC.ipynb create mode 100644 dev/notebooks/word_attributions.ipynb diff --git a/Project.toml b/Project.toml index 481ba63ee..948a20e96 100755 --- a/Project.toml +++ b/Project.toml @@ -40,10 +40,9 @@ LaplaceReduxExt = "LaplaceRedux" MPIExt = "MPI" [compat] +CUDA = "3, 4, 5" CategoricalArrays = "0.10" ChainRulesCore = "1.15" -CUDA = "3, 4, 5" -cuDNN = "1" DataFrames = "1" DecisionTree = "0.12.3" Distributions = "0.25.97" @@ -57,6 +56,7 @@ MLJBase = "0.21, 1" MLJDecisionTreeInterface = "0.4.0" MLUtils = "0.2, 0.3, 0.4" MPI = "0.20" +MPIPreferences = "0.2" MultivariateStats = "0.9, 0.10" PackageExtensionCompat = "1" Parameters = "0.12" @@ -67,8 +67,8 @@ Statistics = "1" StatsBase = "0.33, 0.34" Tables = "1" UUIDs = "1.6, 1.7, 1.8, 1.9, 1.10" +cuDNN = "1" julia = "1.6, 1.7, 1.8, 1.9, 1.10" -MPIPreferences = "0.2" [extras] EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" diff --git a/dev/notebooks/.gitignore b/dev/notebooks/.gitignore new file mode 100644 index 000000000..95afe1b38 --- /dev/null +++ b/dev/notebooks/.gitignore @@ -0,0 +1,3 @@ +.ipynb_checkpoints +.CondaPkg +model \ No newline at end of file diff --git a/dev/notebooks/CondaPkg.toml b/dev/notebooks/CondaPkg.toml new file mode 100644 index 000000000..bc61831aa --- /dev/null +++ b/dev/notebooks/CondaPkg.toml @@ -0,0 +1,2 @@ +[deps] +transformers-interpret = "" diff --git a/dev/notebooks/Project.toml b/dev/notebooks/Project.toml new file mode 100644 index 000000000..04986f3de --- /dev/null +++ b/dev/notebooks/Project.toml @@ -0,0 +1,7 @@ +[deps] +CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab" +IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a" +PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" +PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" +Transformers = "21ca0261-441d-5938-ace7-c90938fde4d4" +TrillionDollarWords = "d66529d5-f4f4-49d9-a69b-da67f5535f0a" diff --git a/dev/notebooks/RELITC.ipynb b/dev/notebooks/RELITC.ipynb new file mode 100644 index 000000000..0afdfa5ea --- /dev/null +++ b/dev/notebooks/RELITC.ipynb @@ -0,0 +1,199 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "098c3a0a-8fc0-44da-ab50-fe289ef9f56f", + "metadata": {}, + "outputs": [], + "source": [ + "using Pkg" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "88746cde-6260-40cd-92c3-0316deced701", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m\u001b[1mStatus\u001b[22m\u001b[39m `C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\Project.toml`\n", + " \u001b[90m[438e738f] \u001b[39mPyCall v1.96.4\n", + " \u001b[90m[21ca0261] \u001b[39mTransformers v0.2.8\n", + " \u001b[90m[d66529d5] \u001b[39mTrillionDollarWords v0.1.0\n" + ] + } + ], + "source": [ + "Pkg.status()" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d12f0762-09fa-4013-b52a-466e30028eef", + "metadata": {}, + "outputs": [], + "source": [ + "import Transformers\n", + "using Transformers.TextEncoders\n", + "using Transformers.HuggingFace" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "66f296ec-2419-43db-bd2a-5a62de2eafeb", + "metadata": {}, + "outputs": [], + "source": [ + "using TrillionDollarWords " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7eb64489-f40e-4dbe-b447-0e87d8e20104", + "metadata": {}, + "outputs": [], + "source": [ + "cls = TrillionDollarWords.load_model(; output_hidden_states=true)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c97eb28b-1ad1-416b-a97b-8a4c02ed1a62", + "metadata": {}, + "outputs": [], + "source": [ + "df = load_all_sentences()\n", + "n = 2\n", + "queries = df[1:n, :]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "667a0a96-53ae-4aec-8ec7-14013291ec0e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(hidden_state = Float32[-0.6870835 -0.24942288 … -0.0025752056 -0.0017432718; 0.6610133 0.31153536 … 0.013969824 0.0137721775; … ; -0.8090527 -1.3571459 … -0.17556852 -0.17569499; -0.5847161 -1.2344005 … -0.044540238 -0.04152311;;;], logit = Float32[-3.221773; -3.1708598; 6.291086;;])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "logits = cls([queries[1, :].sentence])" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "930bdca2-83c3-4f68-bb43-43fc4c470c25", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m\u001b[1m Building\u001b[22m\u001b[39m Conda ─→ `C:\\Users\\drobi\\.julia\\scratchspaces\\44cfe95a-1eb2-52ea-b672-e2afdf69b78f\\51cab8e982c5b598eea9c8ceaced4b58d9dd37c9\\build.log`\n", + "\u001b[32m\u001b[1m Building\u001b[22m\u001b[39m PyCall → `C:\\Users\\drobi\\.julia\\scratchspaces\\44cfe95a-1eb2-52ea-b672-e2afdf69b78f\\9816a3826b0ebf49ab4926e2b18842ad8b5c8f04\\build.log`\n" + ] + } + ], + "source": [ + "ENV[\"Python\"] = raw\"C:\\Users\\drobi\\anaconda3\\envs\\relitc\\python.exe\"\n", + "using Pkg\n", + "Pkg.build(\"PyCall\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b52122d1-7e3f-41b5-94ac-a6329eb7b7e8", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "PyObject " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "using PyCall\n", + "ti = pyimport(\"transformers_interpret\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f9e9713-ddb3-4f79-841b-100c9a09328a", + "metadata": {}, + "outputs": [], + "source": [ + "Transformers.HuggingFace.save_model(\"Julia_FOMC\", cls.mod; path = pwd(), weight_name = \"weight\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "e0b2bb70-d889-4f73-92c0-a8719a4aa6a3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"C:\\\\Users\\\\drobi\\\\Desktop\\\\uni\\\\master_thesis\\\\CounterfactualExplanations.jl\\\\dev\\\\notebooks\\\\Julia_FOMC\\\\config.json\"" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Transformers.HuggingFace.save_config(\"Julia_FOMC\", cls.cfg; path = pwd(), config_name = \"config.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0a2a74a-a93f-4df9-85df-5e61de487128", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 1.10.2", + "language": "julia", + "name": "julia-1.10" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dev/notebooks/word_attributions.ipynb b/dev/notebooks/word_attributions.ipynb new file mode 100644 index 000000000..0429325b7 --- /dev/null +++ b/dev/notebooks/word_attributions.ipynb @@ -0,0 +1,143 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "255175ec-037e-4238-9db3-24c24d324544", + "metadata": {}, + "outputs": [], + "source": [ + "import transformers\n", + "import transformers_interpret" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "dbe08d3e", + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import AutoTokenizer, RobertaForSequenceClassification" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "a713410b", + "metadata": {}, + "outputs": [], + "source": [ + "PATH = \"C:/Users/drobi/Desktop/uni/master_thesis/CounterfactualExplanations.jl/dev/notebooks/Julia_FOMC\"\n", + "model = RobertaForSequenceClassification.from_pretrained(PATH, local_files_only=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d226e1d3", + "metadata": {}, + "outputs": [], + "source": [ + "from transformers_interpret import SequenceClassificationExplainer" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "1be89e16", + "metadata": {}, + "outputs": [], + "source": [ + "tokenizer = AutoTokenizer.from_pretrained(\"gtfintechlab/FOMC-RoBERTa\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "99fe89ef", + "metadata": {}, + "outputs": [], + "source": [ + "scorer = SequenceClassificationExplainer(model, tokenizer, attribution_type='lig')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "164b6931", + "metadata": {}, + "outputs": [], + "source": [ + "x = \"dict, it contains the list of tokens and the list of feature importances\"\n", + "attributions = scorer(x, index=0, internal_batch_size=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "e7ef3cfc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('', 0.0),\n", + " ('dict', -0.1658131611215076),\n", + " (',', -0.016083339129156157),\n", + " ('it', -0.05116318593480035),\n", + " ('contains', 0.06590468067645444),\n", + " ('the', -0.47440672660259087),\n", + " ('list', 0.10948647313489888),\n", + " ('of', -0.14820110831227676),\n", + " ('tokens', -0.155580645658077),\n", + " ('and', -0.047361977300854),\n", + " ('the', 0.07312371424594372),\n", + " ('list', 0.029488012987564528),\n", + " ('of', -0.06527359636271021),\n", + " ('feature', -0.021884019853495997),\n", + " ('import', 0.2501637522551888),\n", + " ('ances', -0.7786443696445894),\n", + " ('', 0.0)]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "attributions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47dfe7e9", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "relitc", + "language": "python", + "name": "relitc" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 83846b8442d2e1747f6013a33ab6c9adbd383974 Mon Sep 17 00:00:00 2001 From: Karol Dobiczek Date: Thu, 14 Mar 2024 19:22:56 +0100 Subject: [PATCH 2/9] add masked LM --- dev/notebooks/Masked_LM_Training.ipynb | 103 +++++++++++++++++++++++++ dev/notebooks/RELITC.ipynb | 85 ++++++++------------ 2 files changed, 134 insertions(+), 54 deletions(-) create mode 100644 dev/notebooks/Masked_LM_Training.ipynb diff --git a/dev/notebooks/Masked_LM_Training.ipynb b/dev/notebooks/Masked_LM_Training.ipynb new file mode 100644 index 000000000..e9d4737de --- /dev/null +++ b/dev/notebooks/Masked_LM_Training.ipynb @@ -0,0 +1,103 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 16, + "id": "0de19327-1494-4f9c-a799-d5551fac5826", + "metadata": {}, + "outputs": [], + "source": [ + "using Transformers\n", + "using Transformers.TextEncoders\n", + "using Transformers.HuggingFace\n", + "using TrillionDollarWords" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "a596f684-1b82-4823-987b-adc633545977", + "metadata": {}, + "outputs": [], + "source": [ + "bert_enc = hgf\"bert-base-uncased:tokenizer\"\n", + "bert_model = hgf\"bert-base-uncased:ForMaskedLM\";" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "d756f4e6-026b-4f3c-9f3d-3274953e16de", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(hidden_state = Float32[-0.025804881 -0.6098507 … -0.24308763 0.8577285; 0.2154967 0.3834608 … 0.22081807 0.2481272; … ; -0.0866155 0.20339105 … 0.11783406 -0.6430016; -0.017820084 0.3142122 … -0.13285579 -0.32952666;;;], attention_mask = NeuralAttentionlib.LengthMask{1, Vector{Int32}}(Int32[10]), logit = Float32[-6.5805893 -6.8866 … -11.95905 -12.119816; -6.526532 -6.55237 … -12.090809 -11.829693; … ; -5.7084327 -5.6025305 … -11.248217 -9.540642; -3.9600742 -4.9789352 … -4.581249 -9.307364;;;])" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out = bert_model(encode(bert_enc, \"hello world and my [MASK] of [MASK]!\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "3d58e401-25ed-4389-9593-5a38701c57ce", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10×1 Matrix{String}:\n", + " \".\"\n", + " \"hello\"\n", + " \"world\"\n", + " \"and\"\n", + " \"my\"\n", + " \"love\"\n", + " \"of\"\n", + " \"friends\"\n", + " \"!\"\n", + " \".\"" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "decode(bert_enc, out.logit)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff831709-34d2-4823-9b6d-021a61eaf132", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 1.10.2", + "language": "julia", + "name": "julia-1.10" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dev/notebooks/RELITC.ipynb b/dev/notebooks/RELITC.ipynb index 0afdfa5ea..7b804c322 100644 --- a/dev/notebooks/RELITC.ipynb +++ b/dev/notebooks/RELITC.ipynb @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "id": "66f296ec-2419-43db-bd2a-5a62de2eafeb", "metadata": {}, "outputs": [], @@ -65,80 +65,65 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "c97eb28b-1ad1-416b-a97b-8a4c02ed1a62", "metadata": {}, - "outputs": [], - "source": [ - "df = load_all_sentences()\n", - "n = 2\n", - "queries = df[1:n, :]" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "667a0a96-53ae-4aec-8ec7-14013291ec0e", - "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
2×8 DataFrame
Rowsentence_iddoc_iddateevent_typelabelsentencescorespeaker
Int64Int64DateString31String7StringFloat64String?
1111996-01-30meeting minutesneutralThe Committee then turned to a discussion of the economic and financial outlook, the ranges for the growth of money and debt in 1996, and the implementation of monetary policy over the intermeeting period ahead.0.999848missing
2211996-01-30meeting minutesneutralConsumer spending had expanded modestly on balance, growth in business invest- ment in capital goods appeared to have slackened somewhat recently, and housing demand seemed to have leveled out.0.999584missing
" + ], + "text/latex": [ + "\\begin{tabular}{r|ccccccc}\n", + "\t& sentence\\_id & doc\\_id & date & event\\_type & label & sentence & \\\\\n", + "\t\\hline\n", + "\t& Int64 & Int64 & Date & String31 & String7 & String & \\\\\n", + "\t\\hline\n", + "\t1 & 1 & 1 & 1996-01-30 & meeting minutes & neutral & The Committee then turned to a discussion of the economic and financial outlook, the ranges for the growth of money and debt in 1996, and the implementation of monetary policy over the intermeeting period ahead. & $\\dots$ \\\\\n", + "\t2 & 2 & 1 & 1996-01-30 & meeting minutes & neutral & Consumer spending had expanded modestly on balance, growth in business invest- ment in capital goods appeared to have slackened somewhat recently, and housing demand seemed to have leveled out. & $\\dots$ \\\\\n", + "\\end{tabular}\n" + ], "text/plain": [ - "(hidden_state = Float32[-0.6870835 -0.24942288 … -0.0025752056 -0.0017432718; 0.6610133 0.31153536 … 0.013969824 0.0137721775; … ; -0.8090527 -1.3571459 … -0.17556852 -0.17569499; -0.5847161 -1.2344005 … -0.044540238 -0.04152311;;;], logit = Float32[-3.221773; -3.1708598; 6.291086;;])" + "\u001b[1m2×8 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m sentence_id \u001b[0m\u001b[1m doc_id \u001b[0m\u001b[1m date \u001b[0m\u001b[1m event_type \u001b[0m\u001b[1m label \u001b[0m\u001b[1m sentence \u001b[0m ⋯\n", + " │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Date \u001b[0m\u001b[90m String31 \u001b[0m\u001b[90m String7 \u001b[0m\u001b[90m String \u001b[0m ⋯\n", + "─────┼──────────────────────────────────────────────────────────────────────────\n", + " 1 │ 1 1 1996-01-30 meeting minutes neutral The Committe ⋯\n", + " 2 │ 2 1 1996-01-30 meeting minutes neutral Consumer spe\n", + "\u001b[36m 3 columns omitted\u001b[0m" ] }, - "execution_count": 7, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "logits = cls([queries[1, :].sentence])" + "df = load_all_sentences()\n", + "n = 2\n", + "queries = df[1:n, :]" ] }, { "cell_type": "code", - "execution_count": 1, - "id": "930bdca2-83c3-4f68-bb43-43fc4c470c25", + "execution_count": 4, + "id": "667a0a96-53ae-4aec-8ec7-14013291ec0e", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m\u001b[1m Building\u001b[22m\u001b[39m Conda ─→ `C:\\Users\\drobi\\.julia\\scratchspaces\\44cfe95a-1eb2-52ea-b672-e2afdf69b78f\\51cab8e982c5b598eea9c8ceaced4b58d9dd37c9\\build.log`\n", - "\u001b[32m\u001b[1m Building\u001b[22m\u001b[39m PyCall → `C:\\Users\\drobi\\.julia\\scratchspaces\\44cfe95a-1eb2-52ea-b672-e2afdf69b78f\\9816a3826b0ebf49ab4926e2b18842ad8b5c8f04\\build.log`\n" - ] - } - ], - "source": [ - "ENV[\"Python\"] = raw\"C:\\Users\\drobi\\anaconda3\\envs\\relitc\\python.exe\"\n", - "using Pkg\n", - "Pkg.build(\"PyCall\")" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "b52122d1-7e3f-41b5-94ac-a6329eb7b7e8", - "metadata": { - "scrolled": true - }, "outputs": [ { "data": { "text/plain": [ - "PyObject " + "(hidden_state = Float32[-0.6870835 -0.24942288 … -0.0025752056 -0.0017432718; 0.6610133 0.31153536 … 0.013969824 0.0137721775; … ; -0.8090527 -1.3571459 … -0.17556852 -0.17569499; -0.5847161 -1.2344005 … -0.044540238 -0.04152311;;;], logit = Float32[-3.221773; -3.1708598; 6.291086;;])" ] }, - "execution_count": 2, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "using PyCall\n", - "ti = pyimport(\"transformers_interpret\")" + "logits = cls([queries[1, :].sentence])" ] }, { @@ -171,14 +156,6 @@ "source": [ "Transformers.HuggingFace.save_config(\"Julia_FOMC\", cls.cfg; path = pwd(), config_name = \"config.json\")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b0a2a74a-a93f-4df9-85df-5e61de487128", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 742ceb2e88c87201eb9840945837ac111226b131 Mon Sep 17 00:00:00 2001 From: Karol Dobiczek Date: Mon, 18 Mar 2024 23:55:58 +0100 Subject: [PATCH 3/9] add python call --- dev/notebooks/CondaPkg.toml | 6 + dev/notebooks/Masked_LM_Training.ipynb | 103 ---- dev/notebooks/Project.toml | 3 + dev/notebooks/RELITC.ipynb | 647 ++++++++++++++++++++++--- dev/notebooks/RELITC_training.ipynb | 176 +++++++ dev/notebooks/importances.py | 18 + dev/notebooks/word_attributions.ipynb | 87 +--- 7 files changed, 815 insertions(+), 225 deletions(-) delete mode 100644 dev/notebooks/Masked_LM_Training.ipynb create mode 100644 dev/notebooks/RELITC_training.ipynb create mode 100644 dev/notebooks/importances.py diff --git a/dev/notebooks/CondaPkg.toml b/dev/notebooks/CondaPkg.toml index bc61831aa..6a4dc0c33 100644 --- a/dev/notebooks/CondaPkg.toml +++ b/dev/notebooks/CondaPkg.toml @@ -1,2 +1,8 @@ +channels = ["anaconda", "pytorch", "nvidia", "conda-forge"] + [deps] +pytorch-cuda = "12.1" +pytorch = "" transformers-interpret = "" +python = ">=3.8,<4" +transformers = "4.15.0" diff --git a/dev/notebooks/Masked_LM_Training.ipynb b/dev/notebooks/Masked_LM_Training.ipynb deleted file mode 100644 index e9d4737de..000000000 --- a/dev/notebooks/Masked_LM_Training.ipynb +++ /dev/null @@ -1,103 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 16, - "id": "0de19327-1494-4f9c-a799-d5551fac5826", - "metadata": {}, - "outputs": [], - "source": [ - "using Transformers\n", - "using Transformers.TextEncoders\n", - "using Transformers.HuggingFace\n", - "using TrillionDollarWords" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "a596f684-1b82-4823-987b-adc633545977", - "metadata": {}, - "outputs": [], - "source": [ - "bert_enc = hgf\"bert-base-uncased:tokenizer\"\n", - "bert_model = hgf\"bert-base-uncased:ForMaskedLM\";" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "d756f4e6-026b-4f3c-9f3d-3274953e16de", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(hidden_state = Float32[-0.025804881 -0.6098507 … -0.24308763 0.8577285; 0.2154967 0.3834608 … 0.22081807 0.2481272; … ; -0.0866155 0.20339105 … 0.11783406 -0.6430016; -0.017820084 0.3142122 … -0.13285579 -0.32952666;;;], attention_mask = NeuralAttentionlib.LengthMask{1, Vector{Int32}}(Int32[10]), logit = Float32[-6.5805893 -6.8866 … -11.95905 -12.119816; -6.526532 -6.55237 … -12.090809 -11.829693; … ; -5.7084327 -5.6025305 … -11.248217 -9.540642; -3.9600742 -4.9789352 … -4.581249 -9.307364;;;])" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "out = bert_model(encode(bert_enc, \"hello world and my [MASK] of [MASK]!\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "3d58e401-25ed-4389-9593-5a38701c57ce", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "10×1 Matrix{String}:\n", - " \".\"\n", - " \"hello\"\n", - " \"world\"\n", - " \"and\"\n", - " \"my\"\n", - " \"love\"\n", - " \"of\"\n", - " \"friends\"\n", - " \"!\"\n", - " \".\"" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "decode(bert_enc, out.logit)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ff831709-34d2-4823-9b6d-021a61eaf132", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Julia 1.10.2", - "language": "julia", - "name": "julia-1.10" - }, - "language_info": { - "file_extension": ".jl", - "mimetype": "application/julia", - "name": "julia", - "version": "1.10.2" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/dev/notebooks/Project.toml b/dev/notebooks/Project.toml index 04986f3de..190dc572a 100644 --- a/dev/notebooks/Project.toml +++ b/dev/notebooks/Project.toml @@ -1,7 +1,10 @@ [deps] +CUDNN_jll = "62b44479-cb7b-5706-934f-f13b2eb2e645" CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab" IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a" +PreferenceTools = "ba661fbb-e901-4445-b070-854aec6bfbc5" PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" Transformers = "21ca0261-441d-5938-ace7-c90938fde4d4" TrillionDollarWords = "d66529d5-f4f4-49d9-a69b-da67f5535f0a" +cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" diff --git a/dev/notebooks/RELITC.ipynb b/dev/notebooks/RELITC.ipynb index 7b804c322..d771637d7 100644 --- a/dev/notebooks/RELITC.ipynb +++ b/dev/notebooks/RELITC.ipynb @@ -2,160 +2,687 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, - "id": "098c3a0a-8fc0-44da-ab50-fe289ef9f56f", + "execution_count": 29, + "id": "a521610f-12dc-4e6e-80e7-b51328794d51", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m\u001b[1m Resolving\u001b[22m\u001b[39m package versions...\n", + "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m `C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\Project.toml`\n", + "\u001b[33m⌅\u001b[39m \u001b[90m[62b44479] \u001b[39m\u001b[92m+ CUDNN_jll v8.9.4+0\u001b[39m\n", + "\u001b[32m\u001b[1m No Changes\u001b[22m\u001b[39m to `C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\Manifest.toml`\n" + ] + } + ], + "source": [ + "using Pkg\n", + "Pkg.add(\"CUDNN_jll\")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "0de19327-1494-4f9c-a799-d5551fac5826", "metadata": {}, "outputs": [], "source": [ - "using Pkg" + "using DataFrames\n", + "using Transformers\n", + "using Transformers.TextEncoders\n", + "using Transformers.HuggingFace\n", + "using TrillionDollarWords" + ] + }, + { + "cell_type": "markdown", + "id": "b4e29646-4837-4121-a9fe-6426a352811e", + "metadata": {}, + "source": [ + "### Load data" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "a9c6c853-d63f-4f48-b188-0d12d9a11be0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
10×7 DataFrame
Rowsentenceyearlabelseedsentence_splittingevent_typesplit
StringInt64String7Int64BoolString31String7
1remained well below their levels at the beginning of the year, and that weaker demand and earlier declines in oil prices had been holding down consumer price inflation.1996hawkish5768truemeeting minutestest
2A few participants also noted that uncertainty about the extent of resource slack in the economy was considerable and that it was quite possible that the economy could soon be operating close to potential, particularly if labor force participation rates did not turn up much while employment continued to register gains.1996neutral5768truemeeting minutestest
3inflation was projected to pick up gradually in association with a partial reversal of the decline in energy prices this year.1996neutral5768truemeeting minutestest
4They noted that the realization of such a development could make it harder for the Committee to achieve 2 percent inflation over the longer run.1996neutral5768truemeeting minutestest
5In the view of one member, however, aggregate final demand was so strong that, with economic activity and the associated demand for labor having expanded at an unsustainable pace for some time, one could be reasonably confident that inflation would most likely pick up in the absence of policy action.1996hawkish5768truemeeting minutestest
6In the circumstances, most members endorsed a proposal to delete as no longer necessary the previous summary statement relating to the risks to growth and inflation taken together.1996neutral5768truemeeting minutestest
7In the staff forecast prepared for this meeting, the economy was seen as likely to expand at a moderate pace, supported by accommodative monetary policy and financial conditions.1996dovish5768truemeeting minutestest
8Housing starts and the demand for new homes had declined further, house prices in many parts of the country were falling faster than they had towards the end of 2007, and inventories of unsold homes remained quite elevated.1996dovish5768truemeeting minutestest
9Pressures on resources would rise as the anticipated upturn and possible above-trend growth brought the economy closer to full capacity utilization.1996hawkish5768truemeeting minutestest
10Price inflation had picked up a little but, abstracting from energy, had remained relatively subdued.1996neutral5768truemeeting minutestest
" + ], + "text/latex": [ + "\\begin{tabular}{r|cc}\n", + "\t& sentence & \\\\\n", + "\t\\hline\n", + "\t& String & \\\\\n", + "\t\\hline\n", + "\t1 & remained well below their levels at the beginning of the year, and that weaker demand and earlier declines in oil prices had been holding down consumer price inflation. & $\\dots$ \\\\\n", + "\t2 & A few participants also noted that uncertainty about the extent of resource slack in the economy was considerable and that it was quite possible that the economy could soon be operating close to potential, particularly if labor force participation rates did not turn up much while employment continued to register gains. & $\\dots$ \\\\\n", + "\t3 & inflation was projected to pick up gradually in association with a partial reversal of the decline in energy prices this year. & $\\dots$ \\\\\n", + "\t4 & They noted that the realization of such a development could make it harder for the Committee to achieve 2 percent inflation over the longer run. & $\\dots$ \\\\\n", + "\t5 & In the view of one member, however, aggregate final demand was so strong that, with economic activity and the associated demand for labor having expanded at an unsustainable pace for some time, one could be reasonably confident that inflation would most likely pick up in the absence of policy action. & $\\dots$ \\\\\n", + "\t6 & In the circumstances, most members endorsed a proposal to delete as no longer necessary the previous summary statement relating to the risks to growth and inflation taken together. & $\\dots$ \\\\\n", + "\t7 & In the staff forecast prepared for this meeting, the economy was seen as likely to expand at a moderate pace, supported by accommodative monetary policy and financial conditions. & $\\dots$ \\\\\n", + "\t8 & Housing starts and the demand for new homes had declined further, house prices in many parts of the country were falling faster than they had towards the end of 2007, and inventories of unsold homes remained quite elevated. & $\\dots$ \\\\\n", + "\t9 & Pressures on resources would rise as the anticipated upturn and possible above-trend growth brought the economy closer to full capacity utilization. & $\\dots$ \\\\\n", + "\t10 & Price inflation had picked up a little but, abstracting from energy, had remained relatively subdued. & $\\dots$ \\\\\n", + "\\end{tabular}\n" + ], + "text/plain": [ + "\u001b[1m10×7 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m sentence \u001b[0m\u001b[1m year \u001b[0m\u001b[1m label \u001b[0m\u001b[1m seed \u001b[0m\u001b[1m sentence_spli\u001b[0m ⋯\n", + " │\u001b[90m String \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m String7 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Bool \u001b[0m ⋯\n", + "─────┼──────────────────────────────────────────────────────────────────────────\n", + " 1 │ remained well below their levels… 1996 hawkish 5768 ⋯\n", + " 2 │ A few participants also noted th… 1996 neutral 5768\n", + " 3 │ inflation was projected to pick … 1996 neutral 5768\n", + " 4 │ They noted that the realization … 1996 neutral 5768\n", + " 5 │ In the view of one member, howev… 1996 hawkish 5768 ⋯\n", + " 6 │ In the circumstances, most membe… 1996 neutral 5768\n", + " 7 │ In the staff forecast prepared f… 1996 dovish 5768\n", + " 8 │ Housing starts and the demand fo… 1996 dovish 5768\n", + " 9 │ Pressures on resources would ris… 1996 hawkish 5768 ⋯\n", + " 10 │ Price inflation had picked up a … 1996 neutral 5768\n", + "\u001b[36m 3 columns omitted\u001b[0m" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n = 10\n", + "data = load_training_sentences()\n", + "texts = filter(:split => n -> n == \"test\", data)[1:n, :]" + ] + }, + { + "cell_type": "markdown", + "id": "c716f911-920d-468b-92e8-8ca639367303", + "metadata": {}, + "source": [ + "### Get attributions" ] }, { "cell_type": "code", - "execution_count": 2, - "id": "88746cde-6260-40cd-92c3-0316deced701", + "execution_count": 4, + "id": "e00a7c85-2a74-41bf-ad75-899f3317dac9", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\CondaPkg.toml\n", + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\CondaPkg.toml\n", + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mResolving changes\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m\u001b[32m+ transformers-interpret\u001b[39m\n", + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mInstalling packages\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90mC:\\Users\\drobi\\.julia\\conda\\3\\x86_64\\Scripts\\mamba.exe\u001b[39m\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90minstall\u001b[39m\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90m-y\u001b[39m\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90m-p C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\.CondaPkg\\env\u001b[39m\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90m--override-channels\u001b[39m\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90m--no-channel-priority\u001b[39m\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90mpython[version='>=3.8,<4']\u001b[39m\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90mpython[version='>=3.7,<4',channel='anaconda']\u001b[39m\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90mpytorch[version='*']\u001b[39m\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90mpytorch-cuda[version='12.1']\u001b[39m\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90mtransformers[version='4.15.0']\u001b[39m\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90mtransformers-interpret[version='*']\u001b[39m\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90m-c anaconda\u001b[39m\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90m-c conda-forge\u001b[39m\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90m-c nvidia\u001b[39m\n", + "\u001b[32m\u001b[1m \u001b[22m\u001b[39m└ \u001b[90m-c pytorch\u001b[39m\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m\u001b[1mStatus\u001b[22m\u001b[39m `C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\Project.toml`\n", - " \u001b[90m[438e738f] \u001b[39mPyCall v1.96.4\n", - " \u001b[90m[21ca0261] \u001b[39mTransformers v0.2.8\n", - " \u001b[90m[d66529d5] \u001b[39mTrillionDollarWords v0.1.0\n" + "anaconda/win-64 Using cache\n", + "anaconda/noarch Using cache\n", + "conda-forge/win-64 Using cache\n", + "conda-forge/noarch Using cache\n", + "nvidia/win-64 Using cache\n", + "nvidia/noarch Using cache\n", + "pytorch/win-64 Using cache\n", + "pytorch/noarch Using cache\n", + "anaconda/win-64 Using cache\n", + "anaconda/noarch Using cache\n", + "Transaction\n", + "\n", + " Prefix: C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\.CondaPkg\\env\n", + "\n", + " Updating specs:\n", + "\n", + " - python[version='>=3.8,<4']\n", + " - anaconda::python[version='>=3.7,<4']\n", + " - pytorch=*\n", + " - pytorch-cuda==12.1\n", + " - transformers==4.15.0\n", + " - transformers-interpret=*\n", + " - ca-certificates\n", + " - certifi\n", + " - openssl\n", + "\n", + "\n", + " Package Version Build Channel Size\n", + "-------------------------------------------------------------------------------\n", + " Install:\n", + "-------------------------------------------------------------------------------\n", + "\n", + " + xorg-libxau 1.0.11 hcd874cb_0 conda-forge 51kB\n", + " + libjpeg-turbo 3.0.0 hcfcfb64_1 conda-forge 823kB\n", + " + lerc 4.0.0 h63175ca_0 conda-forge 194kB\n", + " + libdeflate 1.19 hcfcfb64_0 conda-forge 153kB\n", + " + libbrotlicommon 1.1.0 hcfcfb64_1 conda-forge 71kB\n", + " + libpng 1.6.43 h19919ed_0 conda-forge 348kB\n", + " + pthread-stubs 0.4 hcd874cb_1001 conda-forge 6kB\n", + " + xorg-libxdmcp 1.1.3 hcd874cb_0 conda-forge 68kB\n", + " + libbrotlienc 1.1.0 hcfcfb64_1 conda-forge 247kB\n", + " + libbrotlidec 1.1.0 hcfcfb64_1 conda-forge 33kB\n", + " + libxcb 1.15 hcd874cb_0 conda-forge 970kB\n", + " + brotli-bin 1.1.0 hcfcfb64_1 conda-forge 21kB\n", + " + brotli 1.1.0 hcfcfb64_1 conda-forge 20kB\n", + " + libwebp-base 1.3.2 h2bbff1b_0 anaconda 338kB\n", + " + lz4-c 1.9.4 h2bbff1b_0 anaconda 143kB\n", + " + freetype 2.12.1 ha860e81_0 anaconda 528kB\n", + " + zstd 1.5.5 hd43e919_0 anaconda 2MB\n", + " + backcall 0.2.0 pyhd3eb1b0_0 anaconda 14kB\n", + " + munkres 1.1.4 py_0 anaconda 13kB\n", + " + parso 0.8.3 pyhd3eb1b0_0 anaconda 71kB\n", + " + pickleshare 0.7.5 pyhd3eb1b0_1003 anaconda 13kB\n", + " + decorator 5.1.1 pyhd3eb1b0_0 anaconda 12kB\n", + " + zipp 3.17.0 pyhd8ed1ab_0 conda-forge Cached\n", + " + python-dateutil 2.9.0 pyhd8ed1ab_0 conda-forge Cached\n", + " + pyparsing 3.1.2 pyhd8ed1ab_0 conda-forge 89kB\n", + " + cycler 0.12.1 pyhd8ed1ab_0 conda-forge 13kB\n", + " + wcwidth 0.2.13 pyhd8ed1ab_0 conda-forge Cached\n", + " + traitlets 5.14.2 pyhd8ed1ab_0 conda-forge Cached\n", + " + pygments 2.17.2 pyhd8ed1ab_0 conda-forge Cached\n", + " + jedi 0.19.1 pyhd8ed1ab_0 conda-forge Cached\n", + " + importlib_resources 6.3.0 pyhd8ed1ab_0 conda-forge 31kB\n", + " + prompt-toolkit 3.0.42 pyha770c72_0 conda-forge Cached\n", + " + importlib-resources 6.3.0 pyhd8ed1ab_0 conda-forge 10kB\n", + " + unicodedata2 15.1.0 py39ha55989b_0 conda-forge 373kB\n", + " + kiwisolver 1.4.5 py39h1f6ef14_1 conda-forge 56kB\n", + " + contourpy 1.2.0 py39h1f6ef14_0 conda-forge 186kB\n", + " + libtiff 4.6.0 h6e2ebb7_2 conda-forge 787kB\n", + " + fonttools 4.49.0 py39ha55989b_0 conda-forge 2MB\n", + " + openjpeg 2.5.2 h3d672ee_0 conda-forge 238kB\n", + " + lcms2 2.16 h67d730c_0 conda-forge 508kB\n", + " + pillow 10.2.0 py39h368b509_0 conda-forge 42MB\n", + " + matplotlib-base 3.8.3 py39hf19769e_0 conda-forge 7MB\n", + " + matplotlib-inline 0.1.6 py39haa95532_0 anaconda 18kB\n", + " + captum 0.7.0 0 pytorch 1MB\n", + " + ipython 7.33.0 py39hcbf5309_0 conda-forge 1MB\n", + " + transformers-interpret 0.8.1 pyhd8ed1ab_0 conda-forge 35kB\n", + "\n", + " Reinstall:\n", + "-------------------------------------------------------------------------------\n", + "\n", + " o ca-certificates 2024.2.2 h56e8100_0 conda-forge Cached\n", + " o openssl 3.2.1 hcfcfb64_0 conda-forge Cached\n", + " o python 3.9.18 h1aa4202_0 anaconda Cached\n", + " o certifi 2024.2.2 pyhd8ed1ab_0 conda-forge Cached\n", + "\n", + " Summary:\n", + "\n", + " Install: 46 packages\n", + " Reinstall: 4 packages\n", + "\n", + " Total download: 61MB\n", + "\n", + "-------------------------------------------------------------------------------\n", + "\n", + "\n", + "\n", + "Looking for: [\"python[version='>=3.8,<4']\", \"anaconda::python[version='>=3.7,<4']\", 'pytorch=', 'pytorch-cuda==12.1', 'transformers==4.15.0', 'transformers-interpret=']\n", + "\n", + "\n", + "Downloading and Extracting Packages: ...working... done\n", + "Preparing transaction: ...working... done\n", + "Verifying transaction: ...working... done\n", + "Executing transaction: ...working... done\n" ] } ], "source": [ - "Pkg.status()" + "using CondaPkg\n", + "# CondaPkg.add(\"pytorch\")\n", + "# CondaPkg.add(\"transformers\"; version=\"4.15.0\")\n", + "# CondaPkg.add(\"transformers-interpret\")" ] }, { "cell_type": "code", - "execution_count": 1, - "id": "d12f0762-09fa-4013-b52a-466e30028eef", + "execution_count": 5, + "id": "2a605169-6109-41fb-973b-7d5a044e71f1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m\u001b[1mPrecompiling\u001b[22m\u001b[39m PythonCall\n", + "\u001b[32m ✓ \u001b[39mPythonCall\n", + " 1 dependency successfully precompiled in 20 seconds. 24 already precompiled.\n" + ] + } + ], "source": [ - "import Transformers\n", - "using Transformers.TextEncoders\n", - "using Transformers.HuggingFace" + "using PythonCall" ] }, { "cell_type": "code", - "execution_count": 1, - "id": "66f296ec-2419-43db-bd2a-5a62de2eafeb", + "execution_count": 12, + "id": "12c9f296-f8bb-4142-a7db-ece5061b635e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Python: " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transformers_interpret = PythonCall.pyimport(\"transformers_interpret\")\n", + "transformers = PythonCall.pyimport(\"transformers\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "e4ced2a6-e9dd-4179-b917-007e377cccd9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Python: PreTrainedTokenizerFast(name_or_path='gtfintechlab/FOMC-RoBERTa', vocab_size=50265, model_max_len=512, is_fast=True, padding_side='right', special_tokens={'bos_token': '', 'eos_token': '', 'unk_token': '', 'sep_token': '', 'pad_token': '', 'cls_token': '', 'mask_token': AddedToken(\"\", rstrip=False, lstrip=True, single_word=False, normalized=False)})" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# load pre-trained classifier and corresponding tokenizer\n", + "model = transformers.RobertaForSequenceClassification.from_pretrained(\"model\", local_files_only=true)\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(\"gtfintechlab/FOMC-RoBERTa\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "21ab1d00-867f-4bd4-a2f6-83e4940554be", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Python: " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scorer = transformers_interpret.SequenceClassificationExplainer(model, tokenizer, attribution_type=\"lig\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "ec54e441-653c-44ef-9cf5-684267a663bd", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Python: [('', 0.0), ('rem', -0.09617849663936606), ('ained', -0.29702715615956665), ('well', 0.25551209840018674), ('below', -0.4015987185377847), ('their', -0.013975036770146217), ('levels', -0.13794157805498133), ('at', -0.002103001077563767), ('the', 0.14926277592717438), ('beginning', 0.08228264530334284), ('of', 0.08006335674570937), ('the', 0.24284418735078794), ('year', 0.20003154332566433), (',', 0.13104142887853437), ('and', 0.17795123590947837), ('that', 0.177958452206897), ('weaker', 0.07827504560711215), ('demand', 0.06225654464144761), ('and', 0.20338402474663345), ('earlier', -0.2643532357406369), ('declines', 0.14990056209456956), ('in', -0.3258371366392156), ('oil', -0.06976564966318043), ('prices', 0.011989646035101107), ('had', -0.15355389676950998), ('been', -0.10492307025848874), ('holding', -0.07771222018418246), ('down', -0.024919108109520634), ('consumer', 0.09788758630638592), ('price', 0.011979387105458176), ('inflation', -0.09257150340664654), ('.', 0.15492799445950947), ('', -0.3214473479860127), ('', 0.0)]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "using TrillionDollarWords " + "scorer(texts[1, :].sentence, index=0, internal_batch_size=1)" ] }, { "cell_type": "code", "execution_count": null, - "id": "7eb64489-f40e-4dbe-b447-0e87d8e20104", + "id": "ea2c5458-657c-4f69-8499-6fc46aa27760", "metadata": {}, "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "b25039fe-329f-46e4-b308-33bc42c734ea", + "metadata": {}, + "source": [ + "### Mask the word attributions" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "48278c99-7368-4eaa-b26f-1b91674fc514", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[33m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[33m\u001b[1mWarning: \u001b[22m\u001b[39mfuse_unk is unsupported, the tokenization result might be slightly different in some cases.\n", + "\u001b[33m\u001b[1m└ \u001b[22m\u001b[39m\u001b[90m@ Transformers.HuggingFace C:\\Users\\drobi\\.julia\\packages\\Transformers\\lD5nW\\src\\huggingface\\tokenizer\\utils.jl:42\u001b[39m\n", + "\u001b[33m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[33m\u001b[1mWarning: \u001b[22m\u001b[39mmatch token `` require to match with space on either side but that is not implemented here, the tokenization result might be slightly different in some cases.\n", + "\u001b[33m\u001b[1m└ \u001b[22m\u001b[39m\u001b[90m@ Transformers.HuggingFace C:\\Users\\drobi\\.julia\\packages\\Transformers\\lD5nW\\src\\huggingface\\tokenizer\\utils.jl:42\u001b[39m\n" + ] + }, + { + "data": { + "text/plain": [ + "BaselineModel(GPT2TextEncoder(\n", + "├─ TextTokenizer(MatchTokenization(CodeNormalizer(BPETokenization(GPT2Tokenization, bpe = CachedBPE(BPE(50000 merges))), codemap = CodeMap{UInt8 => UInt16}(3 code-ranges)), 5 patterns)),\n", + "├─ vocab = Vocab{String, SizedArray}(size = 50265, unk = , unki = 4),\n", + "├─ codemap = CodeMap{UInt8 => UInt16}(3 code-ranges),\n", + "├─ startsym = ,\n", + "├─ endsym = ,\n", + "├─ padsym = ,\n", + "├─ trunc = 256,\n", + "└─ process = Pipelines:\n", + " ╰─ target[token] := TextEncodeBase.nestedcall(string_getvalue, source)\n", + " ╰─ target[token] := Transformers.TextEncoders.grouping_sentence(target.token)\n", + " ╰─ target[(token, segment)] := SequenceTemplate{String}(: Input: : (: Input: :)...)(target.token)\n", + " ╰─ target[attention_mask] := (NeuralAttentionlib.LengthMask ∘ Transformers.TextEncoders.getlengths(256))(target.token)\n", + " ╰─ target[token] := TextEncodeBase.trunc_or_pad(256, , tail, tail)(target.token)\n", + " ╰─ target[token] := TextEncodeBase.nested2batch(target.token)\n", + " ╰─ target := (target.token, target.attention_mask)\n", + "), HGFRobertaForSequenceClassification(HGFRobertaModel(Chain(CompositeEmbedding(token = Embed(1024, 50265), position = ApplyEmbed(.+, FixedLenPositionEmbed(1024, 514), Transformers.HuggingFace.roberta_pe_indices(1,)), segment = ApplyEmbed(.+, Embed(1024, 1), Transformers.HuggingFace.bert_ones_like)), DropoutLayer(LayerNorm(1024, ϵ = 1.0e-5))), Transformer<24>(PostNormTransformerBlock(DropoutLayer(SelfAttention(MultiheadQKVAttenOp(head = 16, p = nothing), Fork<3>(Dense(W = (1024, 1024), b = true)), Dense(W = (1024, 1024), b = true))), LayerNorm(1024, ϵ = 1.0e-5), DropoutLayer(Chain(Dense(σ = NNlib.gelu, W = (1024, 4096), b = true), Dense(W = (4096, 1024), b = true))), LayerNorm(1024, ϵ = 1.0e-5))), nothing), Branch{(:logit,) = (:hidden_state,)}(Chain(DropoutLayer(Transformers.HuggingFace.FirstTokenPooler()), DropoutLayer(Dense(σ = NNlib.tanh_fast, W = (1024, 1024), b = true)), Dense(W = (1024, 3), b = true)))), Transformers.HuggingFace.HGFConfig{:roberta, JSON3.Object{Vector{UInt8}, Vector{UInt64}}, Dict{Symbol, Any}}(:use_cache => true, :torch_dtype => \"float32\", :vocab_size => 50265, :output_hidden_states => true, :hidden_act => \"gelu\", :num_hidden_layers => 24, :num_attention_heads => 16, :classifier_dropout => nothing, :type_vocab_size => 1, :intermediate_size => 4096…))" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "cls = TrillionDollarWords.load_model(; output_hidden_states=true)" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "c97eb28b-1ad1-416b-a97b-8a4c02ed1a62", + "execution_count": 9, + "id": "9badf647-2250-450d-95c5-e83f79117514", "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
2×8 DataFrame
Rowsentence_iddoc_iddateevent_typelabelsentencescorespeaker
Int64Int64DateString31String7StringFloat64String?
1111996-01-30meeting minutesneutralThe Committee then turned to a discussion of the economic and financial outlook, the ranges for the growth of money and debt in 1996, and the implementation of monetary policy over the intermeeting period ahead.0.999848missing
2211996-01-30meeting minutesneutralConsumer spending had expanded modestly on balance, growth in business invest- ment in capital goods appeared to have slackened somewhat recently, and housing demand seemed to have leveled out.0.999584missing
" - ], - "text/latex": [ - "\\begin{tabular}{r|ccccccc}\n", - "\t& sentence\\_id & doc\\_id & date & event\\_type & label & sentence & \\\\\n", - "\t\\hline\n", - "\t& Int64 & Int64 & Date & String31 & String7 & String & \\\\\n", - "\t\\hline\n", - "\t1 & 1 & 1 & 1996-01-30 & meeting minutes & neutral & The Committee then turned to a discussion of the economic and financial outlook, the ranges for the growth of money and debt in 1996, and the implementation of monetary policy over the intermeeting period ahead. & $\\dots$ \\\\\n", - "\t2 & 2 & 1 & 1996-01-30 & meeting minutes & neutral & Consumer spending had expanded modestly on balance, growth in business invest- ment in capital goods appeared to have slackened somewhat recently, and housing demand seemed to have leveled out. & $\\dots$ \\\\\n", - "\\end{tabular}\n" - ], "text/plain": [ - "\u001b[1m2×8 DataFrame\u001b[0m\n", - "\u001b[1m Row \u001b[0m│\u001b[1m sentence_id \u001b[0m\u001b[1m doc_id \u001b[0m\u001b[1m date \u001b[0m\u001b[1m event_type \u001b[0m\u001b[1m label \u001b[0m\u001b[1m sentence \u001b[0m ⋯\n", - " │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Date \u001b[0m\u001b[90m String31 \u001b[0m\u001b[90m String7 \u001b[0m\u001b[90m String \u001b[0m ⋯\n", - "─────┼──────────────────────────────────────────────────────────────────────────\n", - " 1 │ 1 1 1996-01-30 meeting minutes neutral The Committe ⋯\n", - " 2 │ 2 1 1996-01-30 meeting minutes neutral Consumer spe\n", - "\u001b[36m 3 columns omitted\u001b[0m" + "256-element Vector{String}:\n", + " \"\"\n", + " \"rem\"\n", + " \"ained\"\n", + " \" well\"\n", + " \" below\"\n", + " \" their\"\n", + " \" levels\"\n", + " \" at\"\n", + " \" the\"\n", + " \" beginning\"\n", + " \" of\"\n", + " \" the\"\n", + " \" year\"\n", + " ⋮\n", + " \"\"\n", + " \"\"\n", + " \"\"\n", + " \"\"\n", + " \"\"\n", + " \"\"\n", + " \"\"\n", + " \"\"\n", + " \"\"\n", + " \"\"\n", + " \"\"\n", + " \"\"" ] }, - "execution_count": 3, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df = load_all_sentences()\n", - "n = 2\n", - "queries = df[1:n, :]" + "toks = decode(cls.tkr, encode(cls.tkr, texts[1, :].sentence).token)" ] }, { "cell_type": "code", - "execution_count": 4, - "id": "667a0a96-53ae-4aec-8ec7-14013291ec0e", + "execution_count": 16, + "id": "7c4729b8-962e-447d-8219-4696ed9b9a2d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(hidden_state = Float32[-0.6870835 -0.24942288 … -0.0025752056 -0.0017432718; 0.6610133 0.31153536 … 0.013969824 0.0137721775; … ; -0.8090527 -1.3571459 … -0.17556852 -0.17569499; -0.5847161 -1.2344005 … -0.044540238 -0.04152311;;;], logit = Float32[-3.221773; -3.1708598; 6.291086;;])" + "merge_truncated_tokens (generic function with 2 methods)" ] }, - "execution_count": 4, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "logits = cls([queries[1, :].sentence])" + "decoded = Vector{Char}()\n", + "for token in toks\n", + " token = string(token)\n", + " if startswith(token, \"<\")\n", + " continue\n", + " else\n", + " if startswith(token, \" \")\n", + " append!(decoded, token)\n", + " else\n", + " if length(decoded) == 0\n", + " append!(decoded, token)\n", + " else\n", + " last = pop!(decoded)\n", + " new_token = last * token\n", + " append!(decoded, new_token)\n", + " end\n", + " end\n", + " end\n", + "end" ] }, { "cell_type": "code", - "execution_count": null, - "id": "9f9e9713-ddb3-4f79-841b-100c9a09328a", + "execution_count": 11, + "id": "b9c6a175-ffa9-47a4-818a-04ec2b6445aa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"remained well below their levels at the beginning of the year, and that weaker demand and earlier declines in oil prices had been holding down consumer price inflation.\"" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "String(decoded)" + ] + }, + { + "cell_type": "markdown", + "id": "1faeb671-dead-4ba1-867f-39eaf990a507", + "metadata": {}, + "source": [ + "### Fill in masks" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "a596f684-1b82-4823-987b-adc633545977", "metadata": {}, "outputs": [], "source": [ - "Transformers.HuggingFace.save_model(\"Julia_FOMC\", cls.mod; path = pwd(), weight_name = \"weight\")" + "bert_enc = hgf\"bert-base-uncased:tokenizer\"\n", + "bert_model = hgf\"bert-base-uncased:ForMaskedLM\";" ] }, { "cell_type": "code", - "execution_count": 15, - "id": "e0b2bb70-d889-4f73-92c0-a8719a4aa6a3", + "execution_count": 30, + "id": "ff831709-34d2-4823-9b6d-021a61eaf132", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "\"C:\\\\Users\\\\drobi\\\\Desktop\\\\uni\\\\master_thesis\\\\CounterfactualExplanations.jl\\\\dev\\\\notebooks\\\\Julia_FOMC\\\\config.json\"" + "(hidden_state = Float32[0.019434169 -0.006751724 … -0.3122035 0.96587026; 0.28009748 0.6731811 … 0.13828917 0.39761153; … ; -0.123422645 -0.15654448 … 0.09904317 -0.6344952; -0.05732409 -0.19550925 … 0.006869527 -0.23544282;;; -0.33048484 -0.22132668 … -0.24672607 -0.2684891; 0.046722244 -0.2989306 … 0.27542412 0.28430456; … ; 0.08061004 -0.17982382 … 0.35699606 0.35447824; 0.06478204 0.4334174 … 0.28634638 0.2961332], attention_mask = NeuralAttentionlib.LengthMask{1, Vector{Int32}}(Int32[11, 9]), logit = Float32[-6.646805 -11.58166 … -11.6885395 -12.899181; -6.594739 -11.566789 … -11.892482 -12.609668; … ; -5.804957 -10.092522 … -10.914755 -10.116999; -3.9648962 -9.437453 … -4.6001024 -10.999382;;; -6.8107824 -10.228295 … -6.8980503 -6.914237; -6.7649107 -10.272557 … -7.056045 -7.0753527; … ; -5.953972 -7.588567 … -6.2830176 -6.3007765; -3.8673246 -5.0237412 … -3.1228065 -3.1453354])" ] }, - "execution_count": 15, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "Transformers.HuggingFace.save_config(\"Julia_FOMC\", cls.cfg; path = pwd(), config_name = \"config.json\")" + "out = bert_model(encode(bert_enc, [\"hello [MASK] world [MASK] [MASK] and my [MASK]!\", \"bonjour mes [MASK].\"]))" ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "a1a1381f-a69f-47dd-ad61-8609f1103bbf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "11×2 Matrix{String}:\n", + " \".\" \".\"\n", + " \"hello\" \"bon\"\n", + " \",\" \"##jou\"\n", + " \"world\" \"##s\"\n", + " \",\" \"me\"\n", + " \"me\" \"##s\"\n", + " \"and\" \"##nant\"\n", + " \"my\" \".\"\n", + " \"friends\" \".\"\n", + " \"!\" \"de\"\n", + " \".\" \"de\"" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out = decode(bert_enc, out.logit)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "b014982f-8df6-4f10-b675-b60822dcfea2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "merge_truncated_words (generic function with 4 methods)" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function merge_truncated_words(tokens, in_word=\"##\", between_word=\"\", separators=[\".\", \"de\"])\n", + " decoded = Vector{Char}()\n", + " for token in tokens\n", + " token = string(token)\n", + " if token in separators\n", + " continue\n", + " else\n", + " if startswith(token, in_word)\n", + " if length(decoded) == 0\n", + " append!(decoded, token)\n", + " else\n", + " last = pop!(decoded)\n", + " new_token = last * chop(token, head=2, tail=0)\n", + " append!(decoded, new_token)\n", + " end\n", + " else\n", + " append!(decoded, \" \" * token)\n", + " end\n", + " end\n", + " end\n", + " return decoded\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "8adb05cb-84f0-4b14-a74b-1cfec686280a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\" hello , world , me and my friends ! bonjous mesnant\"" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "String(merge_truncated_words(out))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77d8ee90-b306-4237-a76f-dab338639c96", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/dev/notebooks/RELITC_training.ipynb b/dev/notebooks/RELITC_training.ipynb new file mode 100644 index 000000000..7b804c322 --- /dev/null +++ b/dev/notebooks/RELITC_training.ipynb @@ -0,0 +1,176 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "098c3a0a-8fc0-44da-ab50-fe289ef9f56f", + "metadata": {}, + "outputs": [], + "source": [ + "using Pkg" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "88746cde-6260-40cd-92c3-0316deced701", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m\u001b[1mStatus\u001b[22m\u001b[39m `C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\Project.toml`\n", + " \u001b[90m[438e738f] \u001b[39mPyCall v1.96.4\n", + " \u001b[90m[21ca0261] \u001b[39mTransformers v0.2.8\n", + " \u001b[90m[d66529d5] \u001b[39mTrillionDollarWords v0.1.0\n" + ] + } + ], + "source": [ + "Pkg.status()" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d12f0762-09fa-4013-b52a-466e30028eef", + "metadata": {}, + "outputs": [], + "source": [ + "import Transformers\n", + "using Transformers.TextEncoders\n", + "using Transformers.HuggingFace" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "66f296ec-2419-43db-bd2a-5a62de2eafeb", + "metadata": {}, + "outputs": [], + "source": [ + "using TrillionDollarWords " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7eb64489-f40e-4dbe-b447-0e87d8e20104", + "metadata": {}, + "outputs": [], + "source": [ + "cls = TrillionDollarWords.load_model(; output_hidden_states=true)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c97eb28b-1ad1-416b-a97b-8a4c02ed1a62", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
2×8 DataFrame
Rowsentence_iddoc_iddateevent_typelabelsentencescorespeaker
Int64Int64DateString31String7StringFloat64String?
1111996-01-30meeting minutesneutralThe Committee then turned to a discussion of the economic and financial outlook, the ranges for the growth of money and debt in 1996, and the implementation of monetary policy over the intermeeting period ahead.0.999848missing
2211996-01-30meeting minutesneutralConsumer spending had expanded modestly on balance, growth in business invest- ment in capital goods appeared to have slackened somewhat recently, and housing demand seemed to have leveled out.0.999584missing
" + ], + "text/latex": [ + "\\begin{tabular}{r|ccccccc}\n", + "\t& sentence\\_id & doc\\_id & date & event\\_type & label & sentence & \\\\\n", + "\t\\hline\n", + "\t& Int64 & Int64 & Date & String31 & String7 & String & \\\\\n", + "\t\\hline\n", + "\t1 & 1 & 1 & 1996-01-30 & meeting minutes & neutral & The Committee then turned to a discussion of the economic and financial outlook, the ranges for the growth of money and debt in 1996, and the implementation of monetary policy over the intermeeting period ahead. & $\\dots$ \\\\\n", + "\t2 & 2 & 1 & 1996-01-30 & meeting minutes & neutral & Consumer spending had expanded modestly on balance, growth in business invest- ment in capital goods appeared to have slackened somewhat recently, and housing demand seemed to have leveled out. & $\\dots$ \\\\\n", + "\\end{tabular}\n" + ], + "text/plain": [ + "\u001b[1m2×8 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m sentence_id \u001b[0m\u001b[1m doc_id \u001b[0m\u001b[1m date \u001b[0m\u001b[1m event_type \u001b[0m\u001b[1m label \u001b[0m\u001b[1m sentence \u001b[0m ⋯\n", + " │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Date \u001b[0m\u001b[90m String31 \u001b[0m\u001b[90m String7 \u001b[0m\u001b[90m String \u001b[0m ⋯\n", + "─────┼──────────────────────────────────────────────────────────────────────────\n", + " 1 │ 1 1 1996-01-30 meeting minutes neutral The Committe ⋯\n", + " 2 │ 2 1 1996-01-30 meeting minutes neutral Consumer spe\n", + "\u001b[36m 3 columns omitted\u001b[0m" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = load_all_sentences()\n", + "n = 2\n", + "queries = df[1:n, :]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "667a0a96-53ae-4aec-8ec7-14013291ec0e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(hidden_state = Float32[-0.6870835 -0.24942288 … -0.0025752056 -0.0017432718; 0.6610133 0.31153536 … 0.013969824 0.0137721775; … ; -0.8090527 -1.3571459 … -0.17556852 -0.17569499; -0.5847161 -1.2344005 … -0.044540238 -0.04152311;;;], logit = Float32[-3.221773; -3.1708598; 6.291086;;])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "logits = cls([queries[1, :].sentence])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f9e9713-ddb3-4f79-841b-100c9a09328a", + "metadata": {}, + "outputs": [], + "source": [ + "Transformers.HuggingFace.save_model(\"Julia_FOMC\", cls.mod; path = pwd(), weight_name = \"weight\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "e0b2bb70-d889-4f73-92c0-a8719a4aa6a3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"C:\\\\Users\\\\drobi\\\\Desktop\\\\uni\\\\master_thesis\\\\CounterfactualExplanations.jl\\\\dev\\\\notebooks\\\\Julia_FOMC\\\\config.json\"" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Transformers.HuggingFace.save_config(\"Julia_FOMC\", cls.cfg; path = pwd(), config_name = \"config.json\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 1.10.2", + "language": "julia", + "name": "julia-1.10" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dev/notebooks/importances.py b/dev/notebooks/importances.py new file mode 100644 index 000000000..f47dacd25 --- /dev/null +++ b/dev/notebooks/importances.py @@ -0,0 +1,18 @@ +import os +import json + +from transformers import AutoTokenizer, RobertaForSequenceClassification +from transformers_interpret import SequenceClassificationExplainer + +def extract_importances(model_path, input_strings): + model = RobertaForSequenceClassification.from_pretrained(model_path, local_files_only=True) + tokenizer = AutoTokenizer.from_pretrained("gtfintechlab/FOMC-RoBERTa") + + scorer = SequenceClassificationExplainer(model, tokenizer, attribution_type='lig') + + attributions = [] + for t in input_strings: + attributions.append(scorer(t, index=0, internal_batch_size=1)) + + with open('temp/attributions.json', 'w') as f: + f.write(json.dumps(attributions)) \ No newline at end of file diff --git a/dev/notebooks/word_attributions.ipynb b/dev/notebooks/word_attributions.ipynb index 0429325b7..65e5b25e4 100644 --- a/dev/notebooks/word_attributions.ipynb +++ b/dev/notebooks/word_attributions.ipynb @@ -2,59 +2,47 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 21, "id": "255175ec-037e-4238-9db3-24c24d324544", "metadata": {}, "outputs": [], "source": [ + "import os\n", + "import json\n", "import transformers\n", - "import transformers_interpret" + "import transformers_interpret\n", + "\n", + "from transformers import AutoTokenizer, RobertaForSequenceClassification\n", + "from transformers_interpret import SequenceClassificationExplainer" ] }, { "cell_type": "code", - "execution_count": 2, - "id": "dbe08d3e", + "execution_count": 22, + "id": "558306bd", "metadata": {}, "outputs": [], "source": [ - "from transformers import AutoTokenizer, RobertaForSequenceClassification" + "PATH = \"C:/Users/drobi/Desktop/uni/master_thesis/CounterfactualExplanations.jl/dev/notebooks/model\"\n", + "\n", + "input_strings = [\"dict, it contains the list of tokens and the list of feature importances\",\n", + " \"we should invest in the new technologies to increase our chances in the following term\"]" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 23, "id": "a713410b", "metadata": {}, "outputs": [], "source": [ - "PATH = \"C:/Users/drobi/Desktop/uni/master_thesis/CounterfactualExplanations.jl/dev/notebooks/Julia_FOMC\"\n", - "model = RobertaForSequenceClassification.from_pretrained(PATH, local_files_only=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "d226e1d3", - "metadata": {}, - "outputs": [], - "source": [ - "from transformers_interpret import SequenceClassificationExplainer" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "1be89e16", - "metadata": {}, - "outputs": [], - "source": [ + "model = RobertaForSequenceClassification.from_pretrained(PATH, local_files_only=True)\n", "tokenizer = AutoTokenizer.from_pretrained(\"gtfintechlab/FOMC-RoBERTa\")" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 24, "id": "99fe89ef", "metadata": {}, "outputs": [], @@ -64,56 +52,31 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 25, "id": "164b6931", "metadata": {}, "outputs": [], "source": [ - "x = \"dict, it contains the list of tokens and the list of feature importances\"\n", - "attributions = scorer(x, index=0, internal_batch_size=1)" + "attributions = []\n", + "for t in input_strings:\n", + " attributions.append(scorer(t, index=0, internal_batch_size=1))" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 36, "id": "e7ef3cfc", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('', 0.0),\n", - " ('dict', -0.1658131611215076),\n", - " (',', -0.016083339129156157),\n", - " ('it', -0.05116318593480035),\n", - " ('contains', 0.06590468067645444),\n", - " ('the', -0.47440672660259087),\n", - " ('list', 0.10948647313489888),\n", - " ('of', -0.14820110831227676),\n", - " ('tokens', -0.155580645658077),\n", - " ('and', -0.047361977300854),\n", - " ('the', 0.07312371424594372),\n", - " ('list', 0.029488012987564528),\n", - " ('of', -0.06527359636271021),\n", - " ('feature', -0.021884019853495997),\n", - " ('import', 0.2501637522551888),\n", - " ('ances', -0.7786443696445894),\n", - " ('', 0.0)]" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "attributions" + "with open('temp/attributions.json', 'w') as f:\n", + " f.write(json.dumps(attributions))" ] }, { "cell_type": "code", "execution_count": null, - "id": "47dfe7e9", + "id": "352a1979", "metadata": {}, "outputs": [], "source": [] From 18d85037fa7830a3f502588a2ec9f6d2e17d234b Mon Sep 17 00:00:00 2001 From: Karol Dobiczek Date: Thu, 21 Mar 2024 15:31:03 +0100 Subject: [PATCH 4/9] add cmlm --- dev/notebooks/CondaPkg.toml | 1 + dev/notebooks/Dataset_masking.ipynb | 97 ++++++ dev/notebooks/Load_CMLM.ipynb | 172 +++++++++++ dev/notebooks/RELITC.ipynb | 455 +++++++++++++++------------- 4 files changed, 508 insertions(+), 217 deletions(-) create mode 100644 dev/notebooks/Dataset_masking.ipynb create mode 100644 dev/notebooks/Load_CMLM.ipynb diff --git a/dev/notebooks/CondaPkg.toml b/dev/notebooks/CondaPkg.toml index 6a4dc0c33..ad30c47dc 100644 --- a/dev/notebooks/CondaPkg.toml +++ b/dev/notebooks/CondaPkg.toml @@ -2,6 +2,7 @@ channels = ["anaconda", "pytorch", "nvidia", "conda-forge"] [deps] pytorch-cuda = "12.1" +cudnn = "" pytorch = "" transformers-interpret = "" python = ">=3.8,<4" diff --git a/dev/notebooks/Dataset_masking.ipynb b/dev/notebooks/Dataset_masking.ipynb new file mode 100644 index 000000000..f8bd79b41 --- /dev/null +++ b/dev/notebooks/Dataset_masking.ipynb @@ -0,0 +1,97 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 6, + "id": "14e30426-1457-4c05-ba3f-272e9241b139", + "metadata": {}, + "outputs": [], + "source": [ + "using Transformers.TextEncoders\n", + "using Transformers.HuggingFace" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "30228c72-e074-43ab-9b28-129c5811963e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BertTextEncoder(\n", + "├─ TextTokenizer(MatchTokenization(WordPieceTokenization(bert_uncased_tokenizer, WordPiece(vocab_size = 30522, unk = [UNK], max_char = 100)), 5 patterns)),\n", + "├─ vocab = Vocab{String, SizedArray}(size = 30522, unk = [UNK], unki = 101),\n", + "├─ startsym = [CLS],\n", + "├─ endsym = [SEP],\n", + "├─ padsym = [PAD],\n", + "├─ trunc = 512,\n", + "└─ process = Pipelines:\n", + " ╰─ target[token] := TextEncodeBase.nestedcall(string_getvalue, source)\n", + " ╰─ target[token] := Transformers.TextEncoders.grouping_sentence(target.token)\n", + " ╰─ target[(token, segment)] := SequenceTemplate{String}([CLS]: Input[1]: [SEP]: (Input[2]: [SEP]:)...)(target.token)\n", + " ╰─ target[attention_mask] := (NeuralAttentionlib.LengthMask ∘ Transformers.TextEncoders.getlengths(512))(target.token)\n", + " ╰─ target[token] := TextEncodeBase.trunc_and_pad(512, [PAD], tail, tail)(target.token)\n", + " ╰─ target[token] := TextEncodeBase.nested2batch(target.token)\n", + " ╰─ target[segment] := TextEncodeBase.trunc_and_pad(512, 1, tail, tail)(target.segment)\n", + " ╰─ target[segment] := TextEncodeBase.nested2batch(target.segment)\n", + " ╰─ target := (target.token, target.segment, target.attention_mask)\n", + ")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bert_enc = hgf\"bert-base-uncased:tokenizer\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "96d7bb3c-85a8-4fdd-bd72-c98ee6904758", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(token = Bool[0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0;;; 0 0 … 1 1; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0], segment = [1 1; 1 1; … ; 1 1; 1 1], attention_mask = NeuralAttentionlib.LengthMask{1, Vector{Int32}}(Int32[11, 9]))" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "encode(bert_enc, [\"hello [MASK] world [MASK] [MASK] and my [MASK]!\", \"bonjour mes [MASK].\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72aefb08-9c24-4c99-afe6-f3cafe743c22", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 1.10.2", + "language": "julia", + "name": "julia-1.10" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dev/notebooks/Load_CMLM.ipynb b/dev/notebooks/Load_CMLM.ipynb new file mode 100644 index 000000000..6949dacdb --- /dev/null +++ b/dev/notebooks/Load_CMLM.ipynb @@ -0,0 +1,172 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "f5b3440e-acd9-4c06-ad66-acd36c663ade", + "metadata": {}, + "outputs": [], + "source": [ + "using Transformers" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "96c619a7-9c69-4952-9b7f-a02509f7b896", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "load_model (generic function with 1 method)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function load_model(; load_head = true, kwrgs...)\n", + " model_name = \"karoldobiczek/relitc-FOMC-CMLM\"\n", + " tkr = Transformers.load_tokenizer(model_name)\n", + " cfg = Transformers.HuggingFace.HGFConfig(Transformers.load_config(model_name); kwrgs...)\n", + " mod = Transformers.load_model(model_name, \"ForMaskedLM\"; config = cfg)\n", + "\n", + " return tkr, mod, cfg\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a1fa980d-7508-469d-8f0c-db26bd82cc40", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(BertTextEncoder(\n", + "├─ TextTokenizer(MatchTokenization(WordPieceTokenization(bert_uncased_tokenizer, WordPiece(vocab_size = 30522, unk = [UNK], max_char = 100)), 5 patterns)),\n", + "├─ vocab = Vocab{String, SizedArray}(size = 30522, unk = [UNK], unki = 101),\n", + "├─ startsym = [CLS],\n", + "├─ endsym = [SEP],\n", + "├─ padsym = [PAD],\n", + "├─ trunc = 512,\n", + "└─ process = Pipelines:\n", + " ╰─ target[token] := TextEncodeBase.nestedcall(string_getvalue, source)\n", + " ╰─ target[token] := Transformers.TextEncoders.grouping_sentence(target.token)\n", + " ╰─ target[(token, segment)] := SequenceTemplate{String}([CLS]: Input[1]: [SEP]: (Input[2]: [SEP]:)...)(target.token)\n", + " ╰─ target[attention_mask] := (NeuralAttentionlib.LengthMask ∘ Transformers.TextEncoders.getlengths(512))(target.token)\n", + " ╰─ target[token] := TextEncodeBase.trunc_and_pad(512, [PAD], head, tail)(target.token)\n", + " ╰─ target[token] := TextEncodeBase.nested2batch(target.token)\n", + " ╰─ target[segment] := TextEncodeBase.trunc_and_pad(512, 1, head, tail)(target.segment)\n", + " ╰─ target[segment] := TextEncodeBase.nested2batch(target.segment)\n", + " ╰─ target := (target.token, target.segment, target.attention_mask)\n", + "), HGFBertForMaskedLM(HGFBertModel(Chain(CompositeEmbedding(token = Embed(768, 30522), position = ApplyEmbed(.+, FixedLenPositionEmbed(768, 512)), segment = ApplyEmbed(.+, Embed(768, 2), Transformers.HuggingFace.bert_ones_like)), DropoutLayer(LayerNorm(768, ϵ = 1.0e-12))), Transformer<12>(PostNormTransformerBlock(DropoutLayer(SelfAttention(MultiheadQKVAttenOp(head = 12, p = nothing), Fork<3>(Dense(W = (768, 768), b = true)), Dense(W = (768, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12), DropoutLayer(Chain(Dense(σ = NNlib.gelu, W = (768, 3072), b = true), Dense(W = (3072, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12))), nothing), Branch{(:logit,) = (:hidden_state,)}(Chain(Dense(σ = NNlib.gelu, W = (768, 768), b = true), LayerNorm(768, ϵ = 1.0e-12), EmbedDecoder(Embed(768, 30522), bias = true)))), Transformers.HuggingFace.HGFConfig{:bert, JSON3.Object{Vector{UInt8}, Vector{UInt64}}, Nothing}(:_name_or_path => \"bert-base-uncased\", :architectures => [\"BertForMaskedLM\"], :attention_probs_dropout_prob => 0.1, :classifier_dropout => nothing, :gradient_checkpointing => false, :hidden_act => \"gelu\", :hidden_dropout_prob => 0.1, :hidden_size => 768, :initializer_range => 0.02, :intermediate_size => 3072…))" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tkr, model = load_model()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "0f83d8a1-bb74-445c-ba2f-908fe19f5149", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(token = Bool[0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0], segment = [1, 1, 1, 1, 1, 1], attention_mask = NeuralAttentionlib.LengthMask{1, Vector{Int32}}(Int32[6]))" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "input = TextEncoders.encode(tkr, \"[SEP] hello world!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "3caedfa7-6d74-468a-8d58-42cc0ace447c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(hidden_state = Float32[-0.0068912334 1.0873084 … -0.60200036 1.0715828; 0.12056827 0.10844539 … -0.6849348 0.116397835; … ; -0.026128935 -0.5781372 … -0.010042516 -0.5654973; 0.13880502 -0.25988412 … -0.09458274 -0.27784675;;;], attention_mask = NeuralAttentionlib.LengthMask{1, Vector{Int32}}(Int32[6]), logit = Float32[-6.8121076 -13.4576 … -11.932923 -13.186207; -6.7392855 -13.34565 … -12.13822 -13.120222; … ; -6.0473905 -10.81515 … -10.769718 -10.597319; -4.2169976 -12.677718 … -3.9266496 -12.219977;;;])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out = model(input)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "af5e931b-d4cd-4d65-a3e8-79b43bc60f58", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6×1 Matrix{String}:\n", + " \".\"\n", + " \".\"\n", + " \"hello\"\n", + " \"world\"\n", + " \"!\"\n", + " \".\"" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "TextEncoders.decode(tkr, out.logit)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "666d9c78-b952-43f0-b1c0-764520bb2acd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 1.10.2", + "language": "julia", + "name": "julia-1.10" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dev/notebooks/RELITC.ipynb b/dev/notebooks/RELITC.ipynb index d771637d7..78d1890ba 100644 --- a/dev/notebooks/RELITC.ipynb +++ b/dev/notebooks/RELITC.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 29, + "execution_count": 13, "id": "a521610f-12dc-4e6e-80e7-b51328794d51", "metadata": {}, "outputs": [ @@ -11,8 +11,7 @@ "output_type": "stream", "text": [ "\u001b[32m\u001b[1m Resolving\u001b[22m\u001b[39m package versions...\n", - "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m `C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\Project.toml`\n", - "\u001b[33m⌅\u001b[39m \u001b[90m[62b44479] \u001b[39m\u001b[92m+ CUDNN_jll v8.9.4+0\u001b[39m\n", + "\u001b[32m\u001b[1m No Changes\u001b[22m\u001b[39m to `C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\Project.toml`\n", "\u001b[32m\u001b[1m No Changes\u001b[22m\u001b[39m to `C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\Manifest.toml`\n" ] } @@ -24,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 14, "id": "0de19327-1494-4f9c-a799-d5551fac5826", "metadata": {}, "outputs": [], @@ -46,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 15, "id": "a9c6c853-d63f-4f48-b188-0d12d9a11be0", "metadata": {}, "outputs": [ @@ -91,7 +90,7 @@ "\u001b[36m 3 columns omitted\u001b[0m" ] }, - "execution_count": 31, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -112,159 +111,21 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "e00a7c85-2a74-41bf-ad75-899f3317dac9", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\CondaPkg.toml\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\CondaPkg.toml\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mResolving changes\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m\u001b[32m+ transformers-interpret\u001b[39m\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mInstalling packages\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90mC:\\Users\\drobi\\.julia\\conda\\3\\x86_64\\Scripts\\mamba.exe\u001b[39m\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90minstall\u001b[39m\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90m-y\u001b[39m\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90m-p C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\.CondaPkg\\env\u001b[39m\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90m--override-channels\u001b[39m\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90m--no-channel-priority\u001b[39m\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90mpython[version='>=3.8,<4']\u001b[39m\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90mpython[version='>=3.7,<4',channel='anaconda']\u001b[39m\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90mpytorch[version='*']\u001b[39m\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90mpytorch-cuda[version='12.1']\u001b[39m\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90mtransformers[version='4.15.0']\u001b[39m\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90mtransformers-interpret[version='*']\u001b[39m\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90m-c anaconda\u001b[39m\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90m-c conda-forge\u001b[39m\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m│ \u001b[90m-c nvidia\u001b[39m\n", - "\u001b[32m\u001b[1m \u001b[22m\u001b[39m└ \u001b[90m-c pytorch\u001b[39m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "anaconda/win-64 Using cache\n", - "anaconda/noarch Using cache\n", - "conda-forge/win-64 Using cache\n", - "conda-forge/noarch Using cache\n", - "nvidia/win-64 Using cache\n", - "nvidia/noarch Using cache\n", - "pytorch/win-64 Using cache\n", - "pytorch/noarch Using cache\n", - "anaconda/win-64 Using cache\n", - "anaconda/noarch Using cache\n", - "Transaction\n", - "\n", - " Prefix: C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\.CondaPkg\\env\n", - "\n", - " Updating specs:\n", - "\n", - " - python[version='>=3.8,<4']\n", - " - anaconda::python[version='>=3.7,<4']\n", - " - pytorch=*\n", - " - pytorch-cuda==12.1\n", - " - transformers==4.15.0\n", - " - transformers-interpret=*\n", - " - ca-certificates\n", - " - certifi\n", - " - openssl\n", - "\n", - "\n", - " Package Version Build Channel Size\n", - "-------------------------------------------------------------------------------\n", - " Install:\n", - "-------------------------------------------------------------------------------\n", - "\n", - " + xorg-libxau 1.0.11 hcd874cb_0 conda-forge 51kB\n", - " + libjpeg-turbo 3.0.0 hcfcfb64_1 conda-forge 823kB\n", - " + lerc 4.0.0 h63175ca_0 conda-forge 194kB\n", - " + libdeflate 1.19 hcfcfb64_0 conda-forge 153kB\n", - " + libbrotlicommon 1.1.0 hcfcfb64_1 conda-forge 71kB\n", - " + libpng 1.6.43 h19919ed_0 conda-forge 348kB\n", - " + pthread-stubs 0.4 hcd874cb_1001 conda-forge 6kB\n", - " + xorg-libxdmcp 1.1.3 hcd874cb_0 conda-forge 68kB\n", - " + libbrotlienc 1.1.0 hcfcfb64_1 conda-forge 247kB\n", - " + libbrotlidec 1.1.0 hcfcfb64_1 conda-forge 33kB\n", - " + libxcb 1.15 hcd874cb_0 conda-forge 970kB\n", - " + brotli-bin 1.1.0 hcfcfb64_1 conda-forge 21kB\n", - " + brotli 1.1.0 hcfcfb64_1 conda-forge 20kB\n", - " + libwebp-base 1.3.2 h2bbff1b_0 anaconda 338kB\n", - " + lz4-c 1.9.4 h2bbff1b_0 anaconda 143kB\n", - " + freetype 2.12.1 ha860e81_0 anaconda 528kB\n", - " + zstd 1.5.5 hd43e919_0 anaconda 2MB\n", - " + backcall 0.2.0 pyhd3eb1b0_0 anaconda 14kB\n", - " + munkres 1.1.4 py_0 anaconda 13kB\n", - " + parso 0.8.3 pyhd3eb1b0_0 anaconda 71kB\n", - " + pickleshare 0.7.5 pyhd3eb1b0_1003 anaconda 13kB\n", - " + decorator 5.1.1 pyhd3eb1b0_0 anaconda 12kB\n", - " + zipp 3.17.0 pyhd8ed1ab_0 conda-forge Cached\n", - " + python-dateutil 2.9.0 pyhd8ed1ab_0 conda-forge Cached\n", - " + pyparsing 3.1.2 pyhd8ed1ab_0 conda-forge 89kB\n", - " + cycler 0.12.1 pyhd8ed1ab_0 conda-forge 13kB\n", - " + wcwidth 0.2.13 pyhd8ed1ab_0 conda-forge Cached\n", - " + traitlets 5.14.2 pyhd8ed1ab_0 conda-forge Cached\n", - " + pygments 2.17.2 pyhd8ed1ab_0 conda-forge Cached\n", - " + jedi 0.19.1 pyhd8ed1ab_0 conda-forge Cached\n", - " + importlib_resources 6.3.0 pyhd8ed1ab_0 conda-forge 31kB\n", - " + prompt-toolkit 3.0.42 pyha770c72_0 conda-forge Cached\n", - " + importlib-resources 6.3.0 pyhd8ed1ab_0 conda-forge 10kB\n", - " + unicodedata2 15.1.0 py39ha55989b_0 conda-forge 373kB\n", - " + kiwisolver 1.4.5 py39h1f6ef14_1 conda-forge 56kB\n", - " + contourpy 1.2.0 py39h1f6ef14_0 conda-forge 186kB\n", - " + libtiff 4.6.0 h6e2ebb7_2 conda-forge 787kB\n", - " + fonttools 4.49.0 py39ha55989b_0 conda-forge 2MB\n", - " + openjpeg 2.5.2 h3d672ee_0 conda-forge 238kB\n", - " + lcms2 2.16 h67d730c_0 conda-forge 508kB\n", - " + pillow 10.2.0 py39h368b509_0 conda-forge 42MB\n", - " + matplotlib-base 3.8.3 py39hf19769e_0 conda-forge 7MB\n", - " + matplotlib-inline 0.1.6 py39haa95532_0 anaconda 18kB\n", - " + captum 0.7.0 0 pytorch 1MB\n", - " + ipython 7.33.0 py39hcbf5309_0 conda-forge 1MB\n", - " + transformers-interpret 0.8.1 pyhd8ed1ab_0 conda-forge 35kB\n", - "\n", - " Reinstall:\n", - "-------------------------------------------------------------------------------\n", - "\n", - " o ca-certificates 2024.2.2 h56e8100_0 conda-forge Cached\n", - " o openssl 3.2.1 hcfcfb64_0 conda-forge Cached\n", - " o python 3.9.18 h1aa4202_0 anaconda Cached\n", - " o certifi 2024.2.2 pyhd8ed1ab_0 conda-forge Cached\n", - "\n", - " Summary:\n", - "\n", - " Install: 46 packages\n", - " Reinstall: 4 packages\n", - "\n", - " Total download: 61MB\n", - "\n", - "-------------------------------------------------------------------------------\n", - "\n", - "\n", - "\n", - "Looking for: [\"python[version='>=3.8,<4']\", \"anaconda::python[version='>=3.7,<4']\", 'pytorch=', 'pytorch-cuda==12.1', 'transformers==4.15.0', 'transformers-interpret=']\n", - "\n", - "\n", - "Downloading and Extracting Packages: ...working... done\n", - "Preparing transaction: ...working... done\n", - "Verifying transaction: ...working... done\n", - "Executing transaction: ...working... done\n" - ] - } - ], + "outputs": [], "source": [ - "using CondaPkg\n", + "# using CondaPkg\n", "# CondaPkg.add(\"pytorch\")\n", "# CondaPkg.add(\"transformers\"; version=\"4.15.0\")\n", - "# CondaPkg.add(\"transformers-interpret\")" + "# CondaPkg.add(\"transformers-interpret\")\n", + "# CondaPkg.add(\"cuDNN\")" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "id": "2a605169-6109-41fb-973b-7d5a044e71f1", "metadata": {}, "outputs": [ @@ -272,9 +133,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m\u001b[1mPrecompiling\u001b[22m\u001b[39m PythonCall\n", - "\u001b[32m ✓ \u001b[39mPythonCall\n", - " 1 dependency successfully precompiled in 20 seconds. 24 already precompiled.\n" + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\CondaPkg.toml\n", + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\CondaPkg.toml\n", + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mDependencies already up to date\n" ] } ], @@ -284,7 +145,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 2, "id": "12c9f296-f8bb-4142-a7db-ece5061b635e", "metadata": {}, "outputs": [ @@ -294,7 +155,7 @@ "Python: " ] }, - "execution_count": 12, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -306,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 3, "id": "e4ced2a6-e9dd-4179-b917-007e377cccd9", "metadata": {}, "outputs": [ @@ -316,7 +177,7 @@ "Python: PreTrainedTokenizerFast(name_or_path='gtfintechlab/FOMC-RoBERTa', vocab_size=50265, model_max_len=512, is_fast=True, padding_side='right', special_tokens={'bos_token': '', 'eos_token': '', 'unk_token': '', 'sep_token': '
', 'pad_token': '', 'cls_token': '', 'mask_token': AddedToken(\"\", rstrip=False, lstrip=True, single_word=False, normalized=False)})" ] }, - "execution_count": 13, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -329,17 +190,17 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 4, "id": "21ab1d00-867f-4bd4-a2f6-83e4940554be", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Python: " + "Python: " ] }, - "execution_count": 20, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -350,7 +211,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 16, "id": "ec54e441-653c-44ef-9cf5-684267a663bd", "metadata": { "scrolled": true @@ -362,22 +223,61 @@ "Python: [('', 0.0), ('rem', -0.09617849663936606), ('ained', -0.29702715615956665), ('well', 0.25551209840018674), ('below', -0.4015987185377847), ('their', -0.013975036770146217), ('levels', -0.13794157805498133), ('at', -0.002103001077563767), ('the', 0.14926277592717438), ('beginning', 0.08228264530334284), ('of', 0.08006335674570937), ('the', 0.24284418735078794), ('year', 0.20003154332566433), (',', 0.13104142887853437), ('and', 0.17795123590947837), ('that', 0.177958452206897), ('weaker', 0.07827504560711215), ('demand', 0.06225654464144761), ('and', 0.20338402474663345), ('earlier', -0.2643532357406369), ('declines', 0.14990056209456956), ('in', -0.3258371366392156), ('oil', -0.06976564966318043), ('prices', 0.011989646035101107), ('had', -0.15355389676950998), ('been', -0.10492307025848874), ('holding', -0.07771222018418246), ('down', -0.024919108109520634), ('consumer', 0.09788758630638592), ('price', 0.011979387105458176), ('inflation', -0.09257150340664654), ('.', 0.15492799445950947), ('', -0.3214473479860127), ('', 0.0)]" ] }, - "execution_count": 32, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "scorer(texts[1, :].sentence, index=0, internal_batch_size=1)" + "attribs = scorer(texts[1, :].sentence, index=0, internal_batch_size=1)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "ea2c5458-657c-4f69-8499-6fc46aa27760", + "execution_count": 17, + "id": "c6680f1d-690d-45a0-b98c-17b4986dee46", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "34-element Vector{Tuple{String, Float64}}:\n", + " (\"\", 0.0)\n", + " (\"rem\", -0.09617849663936606)\n", + " (\"ained\", -0.29702715615956665)\n", + " (\"well\", 0.25551209840018674)\n", + " (\"below\", -0.4015987185377847)\n", + " (\"their\", -0.013975036770146217)\n", + " (\"levels\", -0.13794157805498133)\n", + " (\"at\", -0.002103001077563767)\n", + " (\"the\", 0.14926277592717438)\n", + " (\"beginning\", 0.08228264530334284)\n", + " (\"of\", 0.08006335674570937)\n", + " (\"the\", 0.24284418735078794)\n", + " (\"year\", 0.20003154332566433)\n", + " ⋮\n", + " (\"oil\", -0.06976564966318043)\n", + " (\"prices\", 0.011989646035101107)\n", + " (\"had\", -0.15355389676950998)\n", + " (\"been\", -0.10492307025848874)\n", + " (\"holding\", -0.07771222018418246)\n", + " (\"down\", -0.024919108109520634)\n", + " (\"consumer\", 0.09788758630638592)\n", + " (\"price\", 0.011979387105458176)\n", + " (\"inflation\", -0.09257150340664654)\n", + " (\".\", 0.15492799445950947)\n", + " (\"\", -0.3214473479860127)\n", + " (\"\", 0.0)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "attributions = pyconvert(Array{Tuple{String, Float64}}, attribs)" + ] }, { "cell_type": "markdown", @@ -389,9 +289,11 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 18, "id": "48278c99-7368-4eaa-b26f-1b91674fc514", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stderr", @@ -425,7 +327,7 @@ "), HGFRobertaForSequenceClassification(HGFRobertaModel(Chain(CompositeEmbedding(token = Embed(1024, 50265), position = ApplyEmbed(.+, FixedLenPositionEmbed(1024, 514), Transformers.HuggingFace.roberta_pe_indices(1,)), segment = ApplyEmbed(.+, Embed(1024, 1), Transformers.HuggingFace.bert_ones_like)), DropoutLayer(LayerNorm(1024, ϵ = 1.0e-5))), Transformer<24>(PostNormTransformerBlock(DropoutLayer(SelfAttention(MultiheadQKVAttenOp(head = 16, p = nothing), Fork<3>(Dense(W = (1024, 1024), b = true)), Dense(W = (1024, 1024), b = true))), LayerNorm(1024, ϵ = 1.0e-5), DropoutLayer(Chain(Dense(σ = NNlib.gelu, W = (1024, 4096), b = true), Dense(W = (4096, 1024), b = true))), LayerNorm(1024, ϵ = 1.0e-5))), nothing), Branch{(:logit,) = (:hidden_state,)}(Chain(DropoutLayer(Transformers.HuggingFace.FirstTokenPooler()), DropoutLayer(Dense(σ = NNlib.tanh_fast, W = (1024, 1024), b = true)), Dense(W = (1024, 3), b = true)))), Transformers.HuggingFace.HGFConfig{:roberta, JSON3.Object{Vector{UInt8}, Vector{UInt64}}, Dict{Symbol, Any}}(:use_cache => true, :torch_dtype => \"float32\", :vocab_size => 50265, :output_hidden_states => true, :hidden_act => \"gelu\", :num_hidden_layers => 24, :num_attention_heads => 16, :classifier_dropout => nothing, :type_vocab_size => 1, :intermediate_size => 4096…))" ] }, - "execution_count": 8, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -436,9 +338,11 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 19, "id": "9badf647-2250-450d-95c5-e83f79117514", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { @@ -472,7 +376,7 @@ " \"\"" ] }, - "execution_count": 9, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -483,62 +387,161 @@ }, { "cell_type": "code", - "execution_count": 16, - "id": "7c4729b8-962e-447d-8219-4696ed9b9a2d", + "execution_count": 20, + "id": "acf928f4-af1f-4cce-9c85-bb51d0609f6a", + "metadata": {}, + "outputs": [], + "source": [ + "word_attributions = []\n", + "for (i, (dec_tok, attrib)) in enumerate(zip(toks, attributions))\n", + " if startswith(dec_tok, \"<\")\n", + " continue\n", + " elseif length(word_attributions) == 0 || startswith(dec_tok, \" \")\n", + " push!(word_attributions, ([i], [attrib[1]], [attrib[2]]))\n", + " else \n", + " last_processed = last(word_attributions)\n", + " push!(last_processed[1], i)\n", + " push!(last_processed[2], attrib[1])\n", + " push!(last_processed[3], attrib[2])\n", + " end\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "49be7015-c7c2-43de-83b3-a17468fbc778", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "merge_truncated_tokens (generic function with 2 methods)" + "28-element Vector{Any}:\n", + " ([4], [\"well\"], [0.25551209840018674])\n", + " ([12], [\"the\"], [0.24284418735078794])\n", + " ([19], [\"and\"], [0.20338402474663345])\n", + " ([13, 14], [\"year\", \",\"], [0.20003154332566433, 0.13104142887853437])\n", + " ([16], [\"that\"], [0.177958452206897])\n", + " ([15], [\"and\"], [0.17795123590947837])\n", + " ([31, 32], [\"inflation\", \".\"], [-0.09257150340664654, 0.15492799445950947])\n", + " ([21], [\"declines\"], [0.14990056209456956])\n", + " ([9], [\"the\"], [0.14926277592717438])\n", + " ([29], [\"consumer\"], [0.09788758630638592])\n", + " ([10], [\"beginning\"], [0.08228264530334284])\n", + " ([11], [\"of\"], [0.08006335674570937])\n", + " ([17], [\"weaker\"], [0.07827504560711215])\n", + " ⋮\n", + " ([8], [\"at\"], [-0.002103001077563767])\n", + " ([6], [\"their\"], [-0.013975036770146217])\n", + " ([28], [\"down\"], [-0.024919108109520634])\n", + " ([23], [\"oil\"], [-0.06976564966318043])\n", + " ([27], [\"holding\"], [-0.07771222018418246])\n", + " ([2, 3], [\"rem\", \"ained\"], [-0.09617849663936606, -0.29702715615956665])\n", + " ([26], [\"been\"], [-0.10492307025848874])\n", + " ([7], [\"levels\"], [-0.13794157805498133])\n", + " ([25], [\"had\"], [-0.15355389676950998])\n", + " ([20], [\"earlier\"], [-0.2643532357406369])\n", + " ([22], [\"in\"], [-0.3258371366392156])\n", + " ([5], [\"below\"], [-0.4015987185377847])" ] }, - "execution_count": 16, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "decoded = Vector{Char}()\n", - "for token in toks\n", - " token = string(token)\n", - " if startswith(token, \"<\")\n", - " continue\n", - " else\n", - " if startswith(token, \" \")\n", - " append!(decoded, token)\n", - " else\n", - " if length(decoded) == 0\n", - " append!(decoded, token)\n", - " else\n", - " last = pop!(decoded)\n", - " new_token = last * token\n", - " append!(decoded, new_token)\n", - " end\n", - " end\n", - " end\n", - "end" + "sorted = sort(word_attributions, by = x -> -maximum(x[3]))" ] }, { "cell_type": "code", - "execution_count": 11, - "id": "b9c6a175-ffa9-47a4-818a-04ec2b6445aa", + "execution_count": 22, + "id": "e513d132-c2c8-4947-8ea8-2bda508c99b5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "\"remained well below their levels at the beginning of the year, and that weaker demand and earlier declines in oil prices had been holding down consumer price inflation.\"" + "10-element Vector{Any}:\n", + " ([4], [\"well\"], [0.25551209840018674])\n", + " ([12], [\"the\"], [0.24284418735078794])\n", + " ([19], [\"and\"], [0.20338402474663345])\n", + " ([13, 14], [\"year\", \",\"], [0.20003154332566433, 0.13104142887853437])\n", + " ([16], [\"that\"], [0.177958452206897])\n", + " ([15], [\"and\"], [0.17795123590947837])\n", + " ([31, 32], [\"inflation\", \".\"], [-0.09257150340664654, 0.15492799445950947])\n", + " ([21], [\"declines\"], [0.14990056209456956])\n", + " ([9], [\"the\"], [0.14926277592717438])\n", + " ([29], [\"consumer\"], [0.09788758630638592])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "first(sorted, 10)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "a596f684-1b82-4823-987b-adc633545977", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(BertTextEncoder(\n", + "├─ TextTokenizer(MatchTokenization(WordPieceTokenization(bert_uncased_tokenizer, WordPiece(vocab_size = 30522, unk = [UNK], max_char = 100)), 5 patterns)),\n", + "├─ vocab = Vocab{String, SizedArray}(size = 30522, unk = [UNK], unki = 101),\n", + "├─ startsym = [CLS],\n", + "├─ endsym = [SEP],\n", + "├─ padsym = [PAD],\n", + "├─ trunc = 512,\n", + "└─ process = Pipelines:\n", + " ╰─ target[token] := TextEncodeBase.nestedcall(string_getvalue, source)\n", + " ╰─ target[token] := Transformers.TextEncoders.grouping_sentence(target.token)\n", + " ╰─ target[(token, segment)] := SequenceTemplate{String}([CLS]: Input[1]: [SEP]: (Input[2]: [SEP]:)...)(target.token)\n", + " ╰─ target[attention_mask] := (NeuralAttentionlib.LengthMask ∘ Transformers.TextEncoders.getlengths(512))(target.token)\n", + " ╰─ target[token] := TextEncodeBase.trunc_and_pad(512, [PAD], head, tail)(target.token)\n", + " ╰─ target[token] := TextEncodeBase.nested2batch(target.token)\n", + " ╰─ target[segment] := TextEncodeBase.trunc_and_pad(512, 1, head, tail)(target.segment)\n", + " ╰─ target[segment] := TextEncodeBase.nested2batch(target.segment)\n", + " ╰─ target := (target.token, target.segment, target.attention_mask)\n", + "), HGFBertForMaskedLM(HGFBertModel(Chain(CompositeEmbedding(token = Embed(768, 30522), position = ApplyEmbed(.+, FixedLenPositionEmbed(768, 512)), segment = ApplyEmbed(.+, Embed(768, 2), Transformers.HuggingFace.bert_ones_like)), DropoutLayer(LayerNorm(768, ϵ = 1.0e-12))), Transformer<12>(PostNormTransformerBlock(DropoutLayer(SelfAttention(MultiheadQKVAttenOp(head = 12, p = nothing), Fork<3>(Dense(W = (768, 768), b = true)), Dense(W = (768, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12), DropoutLayer(Chain(Dense(σ = NNlib.gelu, W = (768, 3072), b = true), Dense(W = (3072, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12))), nothing), Branch{(:logit,) = (:hidden_state,)}(Chain(Dense(σ = NNlib.gelu, W = (768, 768), b = true), LayerNorm(768, ϵ = 1.0e-12), EmbedDecoder(Embed(768, 30522), bias = true)))), Transformers.HuggingFace.HGFConfig{:bert, JSON3.Object{Vector{UInt8}, Vector{UInt64}}, Nothing}(:_name_or_path => \"bert-base-uncased\", :architectures => [\"BertForMaskedLM\"], :attention_probs_dropout_prob => 0.1, :classifier_dropout => nothing, :gradient_checkpointing => false, :hidden_act => \"gelu\", :hidden_dropout_prob => 0.1, :hidden_size => 768, :initializer_range => 0.02, :intermediate_size => 3072…))" ] }, - "execution_count": 11, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "String(decoded)" + "function load_model(; kwrgs...)\n", + " model_name = \"karoldobiczek/relitc-FOMC-CMLM\"\n", + " tkr = Transformers.load_tokenizer(model_name)\n", + " cfg = Transformers.HuggingFace.HGFConfig(Transformers.load_config(model_name); kwrgs...)\n", + " mod = Transformers.load_model(model_name, \"ForMaskedLM\"; config = cfg)\n", + "\n", + " return tkr, mod, cfg\n", + "end\n", + "cmlm_tkr, cmlm_model = load_model()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3da8c6d2-6b2c-4a03-8b2f-345ffe7a3c97", + "metadata": {}, + "outputs": [], + "source": [ + "encoded_sentence = encode(cls.tkr, texts[1, :].sentence)\n", + "encoded_sentence" ] }, { @@ -551,39 +554,57 @@ }, { "cell_type": "code", - "execution_count": 12, - "id": "a596f684-1b82-4823-987b-adc633545977", + "execution_count": null, + "id": "28f760e4-68e9-44ef-aee6-654dedabe619", "metadata": {}, "outputs": [], - "source": [ - "bert_enc = hgf\"bert-base-uncased:tokenizer\"\n", - "bert_model = hgf\"bert-base-uncased:ForMaskedLM\";" - ] + "source": [] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 27, "id": "ff831709-34d2-4823-9b6d-021a61eaf132", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(hidden_state = Float32[0.019434169 -0.006751724 … -0.3122035 0.96587026; 0.28009748 0.6731811 … 0.13828917 0.39761153; … ; -0.123422645 -0.15654448 … 0.09904317 -0.6344952; -0.05732409 -0.19550925 … 0.006869527 -0.23544282;;; -0.33048484 -0.22132668 … -0.24672607 -0.2684891; 0.046722244 -0.2989306 … 0.27542412 0.28430456; … ; 0.08061004 -0.17982382 … 0.35699606 0.35447824; 0.06478204 0.4334174 … 0.28634638 0.2961332], attention_mask = NeuralAttentionlib.LengthMask{1, Vector{Int32}}(Int32[11, 9]), logit = Float32[-6.646805 -11.58166 … -11.6885395 -12.899181; -6.594739 -11.566789 … -11.892482 -12.609668; … ; -5.804957 -10.092522 … -10.914755 -10.116999; -3.9648962 -9.437453 … -4.6001024 -10.999382;;; -6.8107824 -10.228295 … -6.8980503 -6.914237; -6.7649107 -10.272557 … -7.056045 -7.0753527; … ; -5.953972 -7.588567 … -6.2830176 -6.3007765; -3.8673246 -5.0237412 … -3.1228065 -3.1453354])" + "(hidden_state = Float32[-0.00065652834 0.148323 … -0.2656779 1.094993; 0.23470013 0.49248493 … -0.01542875 0.3666349; … ; -0.04797123 0.12968777 … 0.18314485 -0.5371204; -0.060299776 -0.20840645 … -0.004100313 -0.24968606;;; -0.39127347 -0.25087723 … -0.3486044 -0.3757742; -0.0037795287 -0.28035903 … 0.23318379 0.24946694; … ; 0.17403156 0.048295036 … 0.5432 0.53646606; 0.07622922 0.5221045 … 0.3127785 0.3254434], attention_mask = NeuralAttentionlib.LengthMask{1, Vector{Int32}}(Int32[11, 9]), logit = Float32[-6.6425447 -11.8635645 … -11.268725 -13.800734; -6.5911 -11.931757 … -11.690113 -13.577938; … ; -5.8243623 -10.447554 … -10.825976 -10.625192; -4.114048 -9.63646 … -5.55217 -11.8213415;;; -6.785516 -10.138466 … -6.143492 -6.1645293; -6.736014 -10.05456 … -6.352455 -6.376718; … ; -5.999074 -6.742794 … -5.291667 -5.314741; -3.9786663 -5.861206 … -3.6715782 -3.697316])" ] }, - "execution_count": 30, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "out = bert_model(encode(bert_enc, [\"hello [MASK] world [MASK] [MASK] and my [MASK]!\", \"bonjour mes [MASK].\"]))" + "out = cmlm_model(encode(cmlm_tkr, [\"hello [MASK] world [MASK] [MASK] and my [MASK]!\", \"bonjour mes [MASK].\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "245bbd2a-c8e4-48c9-9ece-2c196d86bed2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CartesianIndex(102, 1)CartesianIndex(101, 1)CartesianIndex(104, 1)CartesianIndex(103, 1)" + ] + } + ], + "source": [ + "enc = encode(cmlm_tkr, [\"[UNK] [MASK]\"])\n", + "for i in 1:size(enc.token)[2]\n", + " print(argmax(enc.token[:, i, :]))\n", + "end" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 30, "id": "a1a1381f-a69f-47dd-ad61-8609f1103bbf", "metadata": {}, "outputs": [ @@ -593,24 +614,24 @@ "11×2 Matrix{String}:\n", " \".\" \".\"\n", " \"hello\" \"bon\"\n", - " \",\" \"##jou\"\n", + " \"to\" \"##jou\"\n", " \"world\" \"##s\"\n", - " \",\" \"me\"\n", + " \"of\" \"me\"\n", " \"me\" \"##s\"\n", - " \"and\" \"##nant\"\n", + " \"and\" \"##q\"\n", " \"my\" \".\"\n", " \"friends\" \".\"\n", " \"!\" \"de\"\n", " \".\" \"de\"" ] }, - "execution_count": 31, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "out = decode(bert_enc, out.logit)" + "out = decode(cmlm_tkr, out.logit)" ] }, { From 5debd42851b5deaae59e3d35acaf0a59f9341d20 Mon Sep 17 00:00:00 2001 From: Karol Dobiczek Date: Thu, 21 Mar 2024 18:20:12 +0100 Subject: [PATCH 5/9] working infilling --- dev/notebooks/RELITC.ipynb | 403 ++++++++++++++++++++++++++++++++----- 1 file changed, 355 insertions(+), 48 deletions(-) diff --git a/dev/notebooks/RELITC.ipynb b/dev/notebooks/RELITC.ipynb index 78d1890ba..b1c5a73a8 100644 --- a/dev/notebooks/RELITC.ipynb +++ b/dev/notebooks/RELITC.ipynb @@ -338,7 +338,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 125, "id": "9badf647-2250-450d-95c5-e83f79117514", "metadata": { "scrolled": true @@ -376,7 +376,7 @@ " \"\"" ] }, - "execution_count": 19, + "execution_count": 125, "metadata": {}, "output_type": "execute_result" } @@ -407,6 +407,53 @@ "end" ] }, + { + "cell_type": "code", + "execution_count": 106, + "id": "02b63bfd-134c-44ce-b2de-67ee99067745", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "28-element Vector{Any}:\n", + " ([2, 3], [\"rem\", \"ained\"], [-0.09617849663936606, -0.29702715615956665])\n", + " ([4], [\"well\"], [0.25551209840018674])\n", + " ([5], [\"below\"], [-0.4015987185377847])\n", + " ([6], [\"their\"], [-0.013975036770146217])\n", + " ([7], [\"levels\"], [-0.13794157805498133])\n", + " ([8], [\"at\"], [-0.002103001077563767])\n", + " ([9], [\"the\"], [0.14926277592717438])\n", + " ([10], [\"beginning\"], [0.08228264530334284])\n", + " ([11], [\"of\"], [0.08006335674570937])\n", + " ([12], [\"the\"], [0.24284418735078794])\n", + " ([13, 14], [\"year\", \",\"], [0.20003154332566433, 0.13104142887853437])\n", + " ([15], [\"and\"], [0.17795123590947837])\n", + " ([16], [\"that\"], [0.177958452206897])\n", + " ⋮\n", + " ([20], [\"earlier\"], [-0.2643532357406369])\n", + " ([21], [\"declines\"], [0.14990056209456956])\n", + " ([22], [\"in\"], [-0.3258371366392156])\n", + " ([23], [\"oil\"], [-0.06976564966318043])\n", + " ([24], [\"prices\"], [0.011989646035101107])\n", + " ([25], [\"had\"], [-0.15355389676950998])\n", + " ([26], [\"been\"], [-0.10492307025848874])\n", + " ([27], [\"holding\"], [-0.07771222018418246])\n", + " ([28], [\"down\"], [-0.024919108109520634])\n", + " ([29], [\"consumer\"], [0.09788758630638592])\n", + " ([30], [\"price\"], [0.011979387105458176])\n", + " ([31, 32], [\"inflation\", \".\"], [-0.09257150340664654, 0.15492799445950947])" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "word_attributions" + ] + }, { "cell_type": "code", "execution_count": 21, @@ -456,38 +503,75 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 51, "id": "e513d132-c2c8-4947-8ea8-2bda508c99b5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "10-element Vector{Any}:\n", - " ([4], [\"well\"], [0.25551209840018674])\n", - " ([12], [\"the\"], [0.24284418735078794])\n", - " ([19], [\"and\"], [0.20338402474663345])\n", - " ([13, 14], [\"year\", \",\"], [0.20003154332566433, 0.13104142887853437])\n", - " ([16], [\"that\"], [0.177958452206897])\n", - " ([15], [\"and\"], [0.17795123590947837])\n", - " ([31, 32], [\"inflation\", \".\"], [-0.09257150340664654, 0.15492799445950947])\n", - " ([21], [\"declines\"], [0.14990056209456956])\n", - " ([9], [\"the\"], [0.14926277592717438])\n", - " ([29], [\"consumer\"], [0.09788758630638592])" + "12-element Vector{Any}:\n", + " 4\n", + " 12\n", + " 19\n", + " 13\n", + " 14\n", + " 16\n", + " 15\n", + " 31\n", + " 32\n", + " 21\n", + " 9\n", + " 29" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "idx_to_mask = []\n", + "for row in first(sorted, 10)\n", + " append!(idx_to_mask, row[1])\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "d2071233-269a-49f6-b9d3-5e87f5be6d9e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"remained [MASK] below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK]\"" ] }, - "execution_count": 22, + "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "first(sorted, 10)" + "masked_text = Vector{Char}()\n", + "for (i, token) in enumerate(toks)\n", + " if startswith(token, \"<\")\n", + " continue\n", + " elseif i in idx_to_mask\n", + " append!(masked_text, \" [MASK]\")\n", + " else\n", + " append!(masked_text, token)\n", + " end\n", + "end\n", + "\n", + "masked_text = String(masked_text)" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 79, "id": "a596f684-1b82-4823-987b-adc633545977", "metadata": { "scrolled": true @@ -516,7 +600,7 @@ "), HGFBertForMaskedLM(HGFBertModel(Chain(CompositeEmbedding(token = Embed(768, 30522), position = ApplyEmbed(.+, FixedLenPositionEmbed(768, 512)), segment = ApplyEmbed(.+, Embed(768, 2), Transformers.HuggingFace.bert_ones_like)), DropoutLayer(LayerNorm(768, ϵ = 1.0e-12))), Transformer<12>(PostNormTransformerBlock(DropoutLayer(SelfAttention(MultiheadQKVAttenOp(head = 12, p = nothing), Fork<3>(Dense(W = (768, 768), b = true)), Dense(W = (768, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12), DropoutLayer(Chain(Dense(σ = NNlib.gelu, W = (768, 3072), b = true), Dense(W = (3072, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12))), nothing), Branch{(:logit,) = (:hidden_state,)}(Chain(Dense(σ = NNlib.gelu, W = (768, 768), b = true), LayerNorm(768, ϵ = 1.0e-12), EmbedDecoder(Embed(768, 30522), bias = true)))), Transformers.HuggingFace.HGFConfig{:bert, JSON3.Object{Vector{UInt8}, Vector{UInt64}}, Nothing}(:_name_or_path => \"bert-base-uncased\", :architectures => [\"BertForMaskedLM\"], :attention_probs_dropout_prob => 0.1, :classifier_dropout => nothing, :gradient_checkpointing => false, :hidden_act => \"gelu\", :hidden_dropout_prob => 0.1, :hidden_size => 768, :initializer_range => 0.02, :intermediate_size => 3072…))" ] }, - "execution_count": 24, + "execution_count": 79, "metadata": {}, "output_type": "execute_result" } @@ -540,8 +624,30 @@ "metadata": {}, "outputs": [], "source": [ + "mask_idx = 104\n", + "\n", "encoded_sentence = encode(cls.tkr, texts[1, :].sentence)\n", - "encoded_sentence" + "tokens = copy(encoded_sentence.token)\n", + "# for i in 1:size(tokens)[2]\n", + "# if i - 1 in idx_to_mask\n", + "# encoded_sentence.token[:, i, :] = 0\n", + "# encoded_sentence.token\n", + "# # print(argmax(enc.token[:, i, :]))\n", + "# end\n", + "# encoded_sentence\n", + "tokens[:, idx_to_mask, :] .= 0\n", + "tokens[mask_idx, idx_to_mask, :] .= 1\n", + "encoded_sentence.token = tokens" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff1a1f63-8efc-477f-a39f-fb5f5cba2a5c", + "metadata": {}, + "outputs": [], + "source": [ + "decode(cmlm_tkr, tokens)" ] }, { @@ -554,31 +660,144 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "28f760e4-68e9-44ef-aee6-654dedabe619", + "execution_count": 149, + "id": "ff76df2f-f5e5-40bf-9a37-a517ac17acc7", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "merge_tokens (generic function with 2 methods)" + ] + }, + "execution_count": 149, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function merge_tokens(tokens, idx_to_mask=[])\n", + " merged_text = Vector{Char}()\n", + " for (i, token) in enumerate(tokens)\n", + " if startswith(token, \"<\")\n", + " continue\n", + " elseif i in idx_to_mask\n", + " append!(merged_text, \" [MASK]\")\n", + " else\n", + " append!(merged_text, token)\n", + " end\n", + " end\n", + " \n", + " return String(merged_text)\n", + "end" + ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 162, + "id": "1ce0fbff-a40c-4e95-9489-c1f36478b29d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "left_to_right_filling (generic function with 1 method)" + ] + }, + "execution_count": 162, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function left_to_right_filling(tokens, mask_positions, model, tokenizer)\n", + " if length(mask_positions) == 0\n", + " return merge_tokens(tokens)\n", + " end\n", + "\n", + " masked_text = merge_tokens(tokens, mask_positions)\n", + " # print(masked_text * \"\\n\")\n", + " \n", + " mask_positions = sort(mask_positions)\n", + " next_position = popfirst!(mask_positions)\n", + " \n", + " out = decode(cmlm_tkr, cmlm_model(encode(cmlm_tkr, masked_text)).logit)\n", + " # print(out)\n", + "\n", + " next_token = out[next_position]\n", + " if startswith(next_token, \"##\")\n", + " next_token = chop(next_token, head=2, tail=0)\n", + " else\n", + " next_token = \" \" * next_token\n", + " end\n", + " \n", + " tokens[next_position + 1] = next_token\n", + "\n", + " return left_to_right_filling(tokens, mask_positions, model, tokenizer)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "id": "2d3bd657-d656-462c-97b9-58b5dd923394", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"remained well below their levels at the beginning of the and to to the in demand and earlier declines in oil prices had been holding down consumer price inflation . .\"" + ] + }, + "execution_count": 163, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "left_to_right_filling(copy(toks), idx_to_mask, cmlm_model, cmlm_tkr)" + ] + }, + { + "cell_type": "code", + "execution_count": 122, "id": "ff831709-34d2-4823-9b6d-021a61eaf132", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(hidden_state = Float32[-0.00065652834 0.148323 … -0.2656779 1.094993; 0.23470013 0.49248493 … -0.01542875 0.3666349; … ; -0.04797123 0.12968777 … 0.18314485 -0.5371204; -0.060299776 -0.20840645 … -0.004100313 -0.24968606;;; -0.39127347 -0.25087723 … -0.3486044 -0.3757742; -0.0037795287 -0.28035903 … 0.23318379 0.24946694; … ; 0.17403156 0.048295036 … 0.5432 0.53646606; 0.07622922 0.5221045 … 0.3127785 0.3254434], attention_mask = NeuralAttentionlib.LengthMask{1, Vector{Int32}}(Int32[11, 9]), logit = Float32[-6.6425447 -11.8635645 … -11.268725 -13.800734; -6.5911 -11.931757 … -11.690113 -13.577938; … ; -5.8243623 -10.447554 … -10.825976 -10.625192; -4.114048 -9.63646 … -5.55217 -11.8213415;;; -6.785516 -10.138466 … -6.143492 -6.1645293; -6.736014 -10.05456 … -6.352455 -6.376718; … ; -5.999074 -6.742794 … -5.291667 -5.314741; -3.9786663 -5.861206 … -3.6715782 -3.697316])" + "(hidden_state = Float32[-0.26276532 -0.304134 … -0.4412557 0.8395367; -0.3135784 -0.24896632 … -0.5622759 -0.06703592; … ; 0.21413484 0.26681167 … 0.3372178 -0.5381135; -0.25502548 -0.17415507 … -0.120725214 -0.17302077;;;], attention_mask = NeuralAttentionlib.LengthMask{1, Vector{Int32}}(Int32[32]), logit = Float32[-6.7711673 -7.257094 … -6.7281137 -13.482519; -6.676459 -6.8120203 … -6.3364005 -12.82685; … ; -6.089307 -6.884933 … -7.574874 -10.116859; -4.113231 -5.430876 … -3.934112 -11.168427;;;])" + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out = cmlm_model(encode(cmlm_tkr, masked_text))" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "0a989b05-9fc6-45d7-86b5-2352d26bf163", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(30522, 11, 2)" ] }, - "execution_count": 27, + "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "out = cmlm_model(encode(cmlm_tkr, [\"hello [MASK] world [MASK] [MASK] and my [MASK]!\", \"bonjour mes [MASK].\"]))" + "size(out.logit)" ] }, { @@ -604,28 +823,43 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 81, "id": "a1a1381f-a69f-47dd-ad61-8609f1103bbf", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "11×2 Matrix{String}:\n", - " \".\" \".\"\n", - " \"hello\" \"bon\"\n", - " \"to\" \"##jou\"\n", - " \"world\" \"##s\"\n", - " \"of\" \"me\"\n", - " \"me\" \"##s\"\n", - " \"and\" \"##q\"\n", - " \"my\" \".\"\n", - " \"friends\" \".\"\n", - " \"!\" \"de\"\n", - " \".\" \"de\"" + "32×1 Matrix{String}:\n", + " \".\"\n", + " \"and\"\n", + " \"well\"\n", + " \"below\"\n", + " \"their\"\n", + " \"levels\"\n", + " \"at\"\n", + " \"the\"\n", + " \"beginning\"\n", + " \"of\"\n", + " \"august\"\n", + " \"quarter\"\n", + " \"to\"\n", + " ⋮\n", + " \"in\"\n", + " \"oil\"\n", + " \"prices\"\n", + " \"had\"\n", + " \"been\"\n", + " \"holding\"\n", + " \"down\"\n", + " \"the\"\n", + " \"price\"\n", + " \"stability\"\n", + " \".\"\n", + " \".\"" ] }, - "execution_count": 30, + "execution_count": 81, "metadata": {}, "output_type": "execute_result" } @@ -636,7 +870,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 82, "id": "b014982f-8df6-4f10-b675-b60822dcfea2", "metadata": {}, "outputs": [ @@ -646,7 +880,7 @@ "merge_truncated_words (generic function with 4 methods)" ] }, - "execution_count": 56, + "execution_count": 82, "metadata": {}, "output_type": "execute_result" } @@ -678,30 +912,103 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 85, "id": "8adb05cb-84f0-4b14-a74b-1cfec686280a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "\" hello , world , me and my friends ! bonjous mesnant\"" + "\"and well below their levels at the beginning of august quarter to to of strong demand and earlier increases in oil prices had been holding down the price stability\"" ] }, - "execution_count": 57, + "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "String(merge_truncated_words(out))" + "String(merge_truncated_words(out)[2:end])" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 56, "id": "77d8ee90-b306-4237-a76f-dab338639c96", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3-element Vector{Int64}:\n", + " 1\n", + " 3\n", + " 4" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "idx = [1, 3, 4]\n", + "[1, 2, 3, 4, 5][idx]" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "8b99dd1e-95ce-4a21-8e1a-2106b53b7ee0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4×4 Matrix{Float64}:\n", + " 0.480747 0.166508 0.897285 0.770908\n", + " 0.401932 0.578816 0.878291 0.501171\n", + " 0.0753838 0.932655 0.0544905 0.132249\n", + " 0.301956 0.469797 0.485735 0.935682" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = rand(4, 4)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "32023e43-20ed-4859-b23d-fcf6ccad4568", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2×2 view(::Matrix{Float64}, 1:2, [2, 3]) with eltype Float64:\n", + " 0.0 0.0\n", + " 0.0 0.0" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x[1:2, [2, 3]] .= 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c6cc184-c8f7-4201-8886-f5da43785223", + "metadata": {}, "outputs": [], "source": [] } From e6010a1ed4f08cbd04355113ddc174a54a3d767f Mon Sep 17 00:00:00 2001 From: Karol Dobiczek Date: Fri, 22 Mar 2024 17:07:35 +0100 Subject: [PATCH 6/9] finished basic relitc --- dev/notebooks/RELITC.ipynb | 742 ++++++++++++++++++++----------------- 1 file changed, 406 insertions(+), 336 deletions(-) diff --git a/dev/notebooks/RELITC.ipynb b/dev/notebooks/RELITC.ipynb index b1c5a73a8..d37d7a277 100644 --- a/dev/notebooks/RELITC.ipynb +++ b/dev/notebooks/RELITC.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 13, + "execution_count": 3, "id": "a521610f-12dc-4e6e-80e7-b51328794d51", "metadata": {}, "outputs": [ @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 4, "id": "0de19327-1494-4f9c-a799-d5551fac5826", "metadata": {}, "outputs": [], @@ -111,28 +111,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "e00a7c85-2a74-41bf-ad75-899f3317dac9", "metadata": {}, - "outputs": [], - "source": [ - "# using CondaPkg\n", - "# CondaPkg.add(\"pytorch\")\n", - "# CondaPkg.add(\"transformers\"; version=\"4.15.0\")\n", - "# CondaPkg.add(\"transformers-interpret\")\n", - "# CondaPkg.add(\"cuDNN\")" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "2a605169-6109-41fb-973b-7d5a044e71f1", - "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\CondaPkg.toml\n", + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\CondaPkg.toml\n", + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mDependencies already up to date\n", + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\CondaPkg.toml\n", + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\CondaPkg.toml\n", + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mDependencies already up to date\n", + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\CondaPkg.toml\n", + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\CondaPkg.toml\n", + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mDependencies already up to date\n", "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\CondaPkg.toml\n", "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\CondaPkg.toml\n", "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mDependencies already up to date\n" @@ -140,102 +135,81 @@ } ], "source": [ - "using PythonCall" + "using CondaPkg\n", + "CondaPkg.add(\"pytorch\")\n", + "CondaPkg.add(\"transformers\"; version=\"4.15.0\")\n", + "CondaPkg.add(\"transformers-interpret\")\n", + "CondaPkg.add(\"cuDNN\")" ] }, { "cell_type": "code", - "execution_count": 2, - "id": "12c9f296-f8bb-4142-a7db-ece5061b635e", + "execution_count": 1, + "id": "2a605169-6109-41fb-973b-7d5a044e71f1", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "Python: " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "transformers_interpret = PythonCall.pyimport(\"transformers_interpret\")\n", - "transformers = PythonCall.pyimport(\"transformers\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e4ced2a6-e9dd-4179-b917-007e377cccd9", - "metadata": {}, - "outputs": [ + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\CondaPkg.toml\n", + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\CondaPkg.toml\n", + "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mDependencies already up to date\n" + ] + }, { "data": { "text/plain": [ - "Python: PreTrainedTokenizerFast(name_or_path='gtfintechlab/FOMC-RoBERTa', vocab_size=50265, model_max_len=512, is_fast=True, padding_side='right', special_tokens={'bos_token': '', 'eos_token': '', 'unk_token': '', 'sep_token': '', 'pad_token': '', 'cls_token': '', 'mask_token': AddedToken(\"\", rstrip=False, lstrip=True, single_word=False, normalized=False)})" + "Python: " ] }, - "execution_count": 3, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "using PythonCall\n", + "\n", + "transformers_interpret = PythonCall.pyimport(\"transformers_interpret\")\n", + "transformers = PythonCall.pyimport(\"transformers\")\n", + "\n", "# load pre-trained classifier and corresponding tokenizer\n", "model = transformers.RobertaForSequenceClassification.from_pretrained(\"model\", local_files_only=true)\n", - "tokenizer = transformers.AutoTokenizer.from_pretrained(\"gtfintechlab/FOMC-RoBERTa\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "21ab1d00-867f-4bd4-a2f6-83e4940554be", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Python: " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ + "tokenizer = transformers.AutoTokenizer.from_pretrained(\"gtfintechlab/FOMC-RoBERTa\")\n", + "\n", "scorer = transformers_interpret.SequenceClassificationExplainer(model, tokenizer, attribution_type=\"lig\")" ] }, { "cell_type": "code", - "execution_count": 16, - "id": "ec54e441-653c-44ef-9cf5-684267a663bd", - "metadata": { - "scrolled": true - }, + "execution_count": 245, + "id": "161a51e5-e3cb-4ca7-bbe2-4e95bfbfc1a9", + "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Python: [('', 0.0), ('rem', -0.09617849663936606), ('ained', -0.29702715615956665), ('well', 0.25551209840018674), ('below', -0.4015987185377847), ('their', -0.013975036770146217), ('levels', -0.13794157805498133), ('at', -0.002103001077563767), ('the', 0.14926277592717438), ('beginning', 0.08228264530334284), ('of', 0.08006335674570937), ('the', 0.24284418735078794), ('year', 0.20003154332566433), (',', 0.13104142887853437), ('and', 0.17795123590947837), ('that', 0.177958452206897), ('weaker', 0.07827504560711215), ('demand', 0.06225654464144761), ('and', 0.20338402474663345), ('earlier', -0.2643532357406369), ('declines', 0.14990056209456956), ('in', -0.3258371366392156), ('oil', -0.06976564966318043), ('prices', 0.011989646035101107), ('had', -0.15355389676950998), ('been', -0.10492307025848874), ('holding', -0.07771222018418246), ('down', -0.024919108109520634), ('consumer', 0.09788758630638592), ('price', 0.011979387105458176), ('inflation', -0.09257150340664654), ('.', 0.15492799445950947), ('', -0.3214473479860127), ('', 0.0)]" + "get_attributions (generic function with 1 method)" ] }, - "execution_count": 16, + "execution_count": 245, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "attribs = scorer(texts[1, :].sentence, index=0, internal_batch_size=1)" + "function get_attributions(text, scorer)\n", + " attribs = scorer(text, index=0, internal_batch_size=1)\n", + " attributions = pyconvert(Array{Tuple{String, Float64}}, attribs)\n", + " return attributions\n", + "end" ] }, { "cell_type": "code", - "execution_count": 17, - "id": "c6680f1d-690d-45a0-b98c-17b4986dee46", + "execution_count": 248, + "id": "409785d3-9953-426f-937f-b5ebaf9a6ea7", "metadata": {}, "outputs": [ { @@ -270,13 +244,13 @@ " (\"\", 0.0)" ] }, - "execution_count": 17, + "execution_count": 248, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "attributions = pyconvert(Array{Tuple{String, Float64}}, attribs)" + "attributions = get_attributions(texts[1, :].sentence, scorer)" ] }, { @@ -338,78 +312,44 @@ }, { "cell_type": "code", - "execution_count": 125, - "id": "9badf647-2250-450d-95c5-e83f79117514", - "metadata": { - "scrolled": true - }, + "execution_count": 246, + "id": "08a37453-82d2-4319-a719-a21d6685c88f", + "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "256-element Vector{String}:\n", - " \"\"\n", - " \"rem\"\n", - " \"ained\"\n", - " \" well\"\n", - " \" below\"\n", - " \" their\"\n", - " \" levels\"\n", - " \" at\"\n", - " \" the\"\n", - " \" beginning\"\n", - " \" of\"\n", - " \" the\"\n", - " \" year\"\n", - " ⋮\n", - " \"\"\n", - " \"\"\n", - " \"\"\n", - " \"\"\n", - " \"\"\n", - " \"\"\n", - " \"\"\n", - " \"\"\n", - " \"\"\n", - " \"\"\n", - " \"\"\n", - " \"\"" + "group_into_words (generic function with 3 methods)" ] }, - "execution_count": 125, + "execution_count": 246, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "toks = decode(cls.tkr, encode(cls.tkr, texts[1, :].sentence).token)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "acf928f4-af1f-4cce-9c85-bb51d0609f6a", - "metadata": {}, - "outputs": [], - "source": [ - "word_attributions = []\n", - "for (i, (dec_tok, attrib)) in enumerate(zip(toks, attributions))\n", - " if startswith(dec_tok, \"<\")\n", - " continue\n", - " elseif length(word_attributions) == 0 || startswith(dec_tok, \" \")\n", - " push!(word_attributions, ([i], [attrib[1]], [attrib[2]]))\n", - " else \n", - " last_processed = last(word_attributions)\n", - " push!(last_processed[1], i)\n", - " push!(last_processed[2], attrib[1])\n", - " push!(last_processed[3], attrib[2])\n", + "function group_into_words(text, attributions, cls_tkr)\n", + " toks = decode(cls_tkr, encode(cls_tkr, text).token)\n", + " word_attributions = []\n", + " for (i, (dec_tok, attrib)) in enumerate(zip(toks, attributions))\n", + " if startswith(dec_tok, \"<\")\n", + " continue\n", + " elseif length(word_attributions) == 0 || startswith(dec_tok, \" \")\n", + " push!(word_attributions, ([i], [attrib[1]], [attrib[2]]))\n", + " else \n", + " last_processed = last(word_attributions)\n", + " push!(last_processed[1], i)\n", + " push!(last_processed[2], attrib[1])\n", + " push!(last_processed[3], attrib[2])\n", + " end\n", " end\n", + " return word_attributions\n", "end" ] }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 249, "id": "02b63bfd-134c-44ce-b2de-67ee99067745", "metadata": {}, "outputs": [ @@ -445,66 +385,48 @@ " ([31, 32], [\"inflation\", \".\"], [-0.09257150340664654, 0.15492799445950947])" ] }, - "execution_count": 106, + "execution_count": 249, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "word_attributions" + "text = texts[1, :].sentence\n", + "word_attributions = group_into_words(text, attributions, cls.tkr)" ] }, { "cell_type": "code", - "execution_count": 21, - "id": "49be7015-c7c2-43de-83b3-a17468fbc778", + "execution_count": 252, + "id": "e513d132-c2c8-4947-8ea8-2bda508c99b5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "28-element Vector{Any}:\n", - " ([4], [\"well\"], [0.25551209840018674])\n", - " ([12], [\"the\"], [0.24284418735078794])\n", - " ([19], [\"and\"], [0.20338402474663345])\n", - " ([13, 14], [\"year\", \",\"], [0.20003154332566433, 0.13104142887853437])\n", - " ([16], [\"that\"], [0.177958452206897])\n", - " ([15], [\"and\"], [0.17795123590947837])\n", - " ([31, 32], [\"inflation\", \".\"], [-0.09257150340664654, 0.15492799445950947])\n", - " ([21], [\"declines\"], [0.14990056209456956])\n", - " ([9], [\"the\"], [0.14926277592717438])\n", - " ([29], [\"consumer\"], [0.09788758630638592])\n", - " ([10], [\"beginning\"], [0.08228264530334284])\n", - " ([11], [\"of\"], [0.08006335674570937])\n", - " ([17], [\"weaker\"], [0.07827504560711215])\n", - " ⋮\n", - " ([8], [\"at\"], [-0.002103001077563767])\n", - " ([6], [\"their\"], [-0.013975036770146217])\n", - " ([28], [\"down\"], [-0.024919108109520634])\n", - " ([23], [\"oil\"], [-0.06976564966318043])\n", - " ([27], [\"holding\"], [-0.07771222018418246])\n", - " ([2, 3], [\"rem\", \"ained\"], [-0.09617849663936606, -0.29702715615956665])\n", - " ([26], [\"been\"], [-0.10492307025848874])\n", - " ([7], [\"levels\"], [-0.13794157805498133])\n", - " ([25], [\"had\"], [-0.15355389676950998])\n", - " ([20], [\"earlier\"], [-0.2643532357406369])\n", - " ([22], [\"in\"], [-0.3258371366392156])\n", - " ([5], [\"below\"], [-0.4015987185377847])" + "get_top_k_idx (generic function with 2 methods)" ] }, - "execution_count": 21, + "execution_count": 252, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "sorted = sort(word_attributions, by = x -> -maximum(x[3]))" + "function get_top_k_idx(attributions, k=10)\n", + " sorted = sort(attributions, by = x -> -maximum(x[3]))\n", + " idx_to_mask = []\n", + " for row in first(sorted, k)\n", + " append!(idx_to_mask, row[1])\n", + " end\n", + " return idx_to_mask\n", + "end" ] }, { "cell_type": "code", - "execution_count": 51, - "id": "e513d132-c2c8-4947-8ea8-2bda508c99b5", + "execution_count": 262, + "id": "ee80bad8-3641-43d1-8d6d-6dcd87e40f15", "metadata": {}, "outputs": [ { @@ -525,53 +447,74 @@ " 29" ] }, - "execution_count": 51, + "execution_count": 262, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "idx_to_mask = []\n", - "for row in first(sorted, 10)\n", - " append!(idx_to_mask, row[1])\n", - "end" + "idx_to_mask = get_top_k_idx(word_attributions)" ] }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 265, "id": "d2071233-269a-49f6-b9d3-5e87f5be6d9e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "\"remained [MASK] below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK]\"" + "mask_toks_at_idx (generic function with 1 method)" ] }, - "execution_count": 78, + "execution_count": 265, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "masked_text = Vector{Char}()\n", - "for (i, token) in enumerate(toks)\n", - " if startswith(token, \"<\")\n", - " continue\n", - " elseif i in idx_to_mask\n", - " append!(masked_text, \" [MASK]\")\n", - " else\n", - " append!(masked_text, token)\n", + "function mask_toks_at_idx(toks, idx_to_mask)\n", + " masked_text = Vector{Char}()\n", + " for (i, token) in enumerate(toks)\n", + " if startswith(token, \"<\")\n", + " continue\n", + " elseif i in idx_to_mask\n", + " append!(masked_text, \" [MASK]\")\n", + " else\n", + " append!(masked_text, token)\n", + " end\n", " end\n", - "end\n", - "\n", - "masked_text = String(masked_text)" + " \n", + " return String(masked_text)\n", + "end" ] }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 266, + "id": "04662dad-be82-4888-9c6f-c26093347e80", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"remained [MASK] below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK]\"" + ] + }, + "execution_count": 266, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "toks = decode(cls.tkr, encode(cls.tkr, text).token)\n", + "mask_toks_at_idx(toks, idx_to_mask)" + ] + }, + { + "cell_type": "code", + "execution_count": 166, "id": "a596f684-1b82-4823-987b-adc633545977", "metadata": { "scrolled": true @@ -600,7 +543,7 @@ "), HGFBertForMaskedLM(HGFBertModel(Chain(CompositeEmbedding(token = Embed(768, 30522), position = ApplyEmbed(.+, FixedLenPositionEmbed(768, 512)), segment = ApplyEmbed(.+, Embed(768, 2), Transformers.HuggingFace.bert_ones_like)), DropoutLayer(LayerNorm(768, ϵ = 1.0e-12))), Transformer<12>(PostNormTransformerBlock(DropoutLayer(SelfAttention(MultiheadQKVAttenOp(head = 12, p = nothing), Fork<3>(Dense(W = (768, 768), b = true)), Dense(W = (768, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12), DropoutLayer(Chain(Dense(σ = NNlib.gelu, W = (768, 3072), b = true), Dense(W = (3072, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12))), nothing), Branch{(:logit,) = (:hidden_state,)}(Chain(Dense(σ = NNlib.gelu, W = (768, 768), b = true), LayerNorm(768, ϵ = 1.0e-12), EmbedDecoder(Embed(768, 30522), bias = true)))), Transformers.HuggingFace.HGFConfig{:bert, JSON3.Object{Vector{UInt8}, Vector{UInt64}}, Nothing}(:_name_or_path => \"bert-base-uncased\", :architectures => [\"BertForMaskedLM\"], :attention_probs_dropout_prob => 0.1, :classifier_dropout => nothing, :gradient_checkpointing => false, :hidden_act => \"gelu\", :hidden_dropout_prob => 0.1, :hidden_size => 768, :initializer_range => 0.02, :intermediate_size => 3072…))" ] }, - "execution_count": 79, + "execution_count": 166, "metadata": {}, "output_type": "execute_result" } @@ -619,35 +562,111 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "3da8c6d2-6b2c-4a03-8b2f-345ffe7a3c97", + "execution_count": 207, + "id": "af3721c6-c528-4256-bb22-a4476a1e4568", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "32-element Vector{String}:\n", + " \"[CLS]\"\n", + " \"remained\"\n", + " \"[MASK]\"\n", + " \"below\"\n", + " \"their\"\n", + " \"levels\"\n", + " \"at\"\n", + " \"[MASK]\"\n", + " \"beginning\"\n", + " \"of\"\n", + " \"[MASK]\"\n", + " \"[MASK]\"\n", + " \"[MASK]\"\n", + " ⋮\n", + " \"in\"\n", + " \"oil\"\n", + " \"prices\"\n", + " \"had\"\n", + " \"been\"\n", + " \"holding\"\n", + " \"down\"\n", + " \"[MASK]\"\n", + " \"price\"\n", + " \"[MASK]\"\n", + " \"[MASK]\"\n", + " \"[SEP]\"" + ] + }, + "execution_count": 207, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "mask_idx = 104\n", - "\n", - "encoded_sentence = encode(cls.tkr, texts[1, :].sentence)\n", - "tokens = copy(encoded_sentence.token)\n", - "# for i in 1:size(tokens)[2]\n", - "# if i - 1 in idx_to_mask\n", - "# encoded_sentence.token[:, i, :] = 0\n", - "# encoded_sentence.token\n", - "# # print(argmax(enc.token[:, i, :]))\n", - "# end\n", - "# encoded_sentence\n", - "tokens[:, idx_to_mask, :] .= 0\n", - "tokens[mask_idx, idx_to_mask, :] .= 1\n", - "encoded_sentence.token = tokens" + "cmlm_decoded = decode(cmlm_tkr, encode(cmlm_tkr, masked_text).token)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "ff1a1f63-8efc-477f-a39f-fb5f5cba2a5c", + "execution_count": 267, + "id": "6ee6f417-25b9-4775-b0fd-165750e0584f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "get_idx_cmlm (generic function with 1 method)" + ] + }, + "execution_count": 267, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "decode(cmlm_tkr, tokens)" + "function get_idx_cmlm(cmlm_decoded)\n", + " idx_to_mask = []\n", + " for (i, tok) in enumerate(cmlm_decoded)\n", + " if tok == \"[MASK]\"\n", + " push!(idx_to_mask, i)\n", + " end\n", + " end\n", + " return idx_to_mask\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 268, + "id": "af757265-243c-450e-8af4-adcd11b03485", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "12-element Vector{Any}:\n", + " 3\n", + " 8\n", + " 11\n", + " 12\n", + " 13\n", + " 14\n", + " 15\n", + " 18\n", + " 20\n", + " 28\n", + " 30\n", + " 31" + ] + }, + "execution_count": 268, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "idx_to_mask = get_idx_cmlm(cmlm_decoded)" ] }, { @@ -660,7 +679,7 @@ }, { "cell_type": "code", - "execution_count": 149, + "execution_count": 217, "id": "ff76df2f-f5e5-40bf-9a37-a517ac17acc7", "metadata": {}, "outputs": [ @@ -670,7 +689,7 @@ "merge_tokens (generic function with 2 methods)" ] }, - "execution_count": 149, + "execution_count": 217, "metadata": {}, "output_type": "execute_result" } @@ -679,22 +698,51 @@ "function merge_tokens(tokens, idx_to_mask=[])\n", " merged_text = Vector{Char}()\n", " for (i, token) in enumerate(tokens)\n", - " if startswith(token, \"<\")\n", - " continue\n", - " elseif i in idx_to_mask\n", + " if i in idx_to_mask\n", " append!(merged_text, \" [MASK]\")\n", " else\n", - " append!(merged_text, token)\n", + " append!(merged_text, \" \" * token)\n", " end\n", " end\n", " \n", - " return String(merged_text)\n", + " return chop(String(merged_text), head=1, tail=0)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 218, + "id": "7263a950-f7d8-4b02-a071-5314e7ad2559", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "group_into_words (generic function with 2 methods)" + ] + }, + "execution_count": 218, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function group_into_words(cmlm_out, delim=\"##\")\n", + " word_list = []\n", + " for token in cmlm_out\n", + " if startswith(delim, token) && length(word_list) != 0\n", + " last(word_list) = last(word_list) * chop(token, head=2, tail=0)\n", + " else \n", + " push(word_list, token)\n", + " end\n", + " end\n", + " return word_list\n", "end" ] }, { "cell_type": "code", - "execution_count": 162, + "execution_count": 271, "id": "1ce0fbff-a40c-4e95-9489-c1f36478b29d", "metadata": {}, "outputs": [ @@ -704,7 +752,7 @@ "left_to_right_filling (generic function with 1 method)" ] }, - "execution_count": 162, + "execution_count": 271, "metadata": {}, "output_type": "execute_result" } @@ -716,22 +764,16 @@ " end\n", "\n", " masked_text = merge_tokens(tokens, mask_positions)\n", - " # print(masked_text * \"\\n\")\n", + " println(masked_text)\n", " \n", " mask_positions = sort(mask_positions)\n", " next_position = popfirst!(mask_positions)\n", " \n", " out = decode(cmlm_tkr, cmlm_model(encode(cmlm_tkr, masked_text)).logit)\n", - " # print(out)\n", "\n", - " next_token = out[next_position]\n", - " if startswith(next_token, \"##\")\n", - " next_token = chop(next_token, head=2, tail=0)\n", - " else\n", - " next_token = \" \" * next_token\n", - " end\n", - " \n", - " tokens[next_position + 1] = next_token\n", + " next_token = out[next_position+1]\n", + "\n", + " tokens[next_position] = next_token\n", "\n", " return left_to_right_filling(tokens, mask_positions, model, tokenizer)\n", "end" @@ -739,133 +781,99 @@ }, { "cell_type": "code", - "execution_count": 163, + "execution_count": 273, "id": "2d3bd657-d656-462c-97b9-58b5dd923394", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "\"remained well below their levels at the beginning of the and to to the in demand and earlier declines in oil prices had been holding down consumer price inflation . .\"" - ] - }, - "execution_count": 163, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "left_to_right_filling(copy(toks), idx_to_mask, cmlm_model, cmlm_tkr)" - ] - }, - { - "cell_type": "code", - "execution_count": 122, - "id": "ff831709-34d2-4823-9b6d-021a61eaf132", - "metadata": {}, - "outputs": [ + "name": "stdout", + "output_type": "stream", + "text": [ + "[CLS] remained [MASK] below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to alleviate [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to alleviate concerns [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price expectations [MASK] [SEP]\n" + ] + }, { "data": { "text/plain": [ - "(hidden_state = Float32[-0.26276532 -0.304134 … -0.4412557 0.8395367; -0.3135784 -0.24896632 … -0.5622759 -0.06703592; … ; 0.21413484 0.26681167 … 0.3372178 -0.5381135; -0.25502548 -0.17415507 … -0.120725214 -0.17302077;;;], attention_mask = NeuralAttentionlib.LengthMask{1, Vector{Int32}}(Int32[32]), logit = Float32[-6.7711673 -7.257094 … -6.7281137 -13.482519; -6.676459 -6.8120203 … -6.3364005 -12.82685; … ; -6.089307 -6.884933 … -7.574874 -10.116859; -4.113231 -5.430876 … -3.934112 -11.168427;;;])" + "\"[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price expectations . [SEP]\"" ] }, - "execution_count": 122, + "execution_count": 273, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "out = cmlm_model(encode(cmlm_tkr, masked_text))" + "left_to_right_filling(copy(cmlm_decoded), idx_to_mask, cmlm_model, cmlm_tkr)" ] }, { "cell_type": "code", - "execution_count": 70, - "id": "0a989b05-9fc6-45d7-86b5-2352d26bf163", + "execution_count": 242, + "id": "c2a089f0-e814-43ba-8da6-e22e1f087a7c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(30522, 11, 2)" + "uncertainty_filling (generic function with 1 method)" ] }, - "execution_count": 70, + "execution_count": 242, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "size(out.logit)" + "function uncertainty_filling(tokens, mask_positions, model, tokenizer)\n", + " if length(mask_positions) == 0\n", + " return merge_tokens(tokens)\n", + " end\n", + "\n", + " masked_text = merge_tokens(tokens, mask_positions)\n", + " \n", + " mask_positions = sort(mask_positions)\n", + " next_position = popfirst!(mask_positions)\n", + "\n", + " logits = cmlm_model(encode(cmlm_tkr, masked_text)).logit\n", + " out = decode(cmlm_tkr, logits)\n", + " print(size(logits))\n", + "\n", + " next_token = out[next_position+1]\n", + "\n", + " tokens[next_position] = next_token\n", + " return\n", + " return left_to_right_filling(tokens, mask_positions, model, tokenizer)\n", + "end" ] }, { "cell_type": "code", - "execution_count": 47, - "id": "245bbd2a-c8e4-48c9-9ece-2c196d86bed2", + "execution_count": 243, + "id": "67ff3d94-59e9-4235-9285-4aeb3ba841ed", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CartesianIndex(102, 1)CartesianIndex(101, 1)CartesianIndex(104, 1)CartesianIndex(103, 1)" + "(30522, 34, 1)" ] } ], "source": [ - "enc = encode(cmlm_tkr, [\"[UNK] [MASK]\"])\n", - "for i in 1:size(enc.token)[2]\n", - " print(argmax(enc.token[:, i, :]))\n", - "end" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "id": "a1a1381f-a69f-47dd-ad61-8609f1103bbf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "32×1 Matrix{String}:\n", - " \".\"\n", - " \"and\"\n", - " \"well\"\n", - " \"below\"\n", - " \"their\"\n", - " \"levels\"\n", - " \"at\"\n", - " \"the\"\n", - " \"beginning\"\n", - " \"of\"\n", - " \"august\"\n", - " \"quarter\"\n", - " \"to\"\n", - " ⋮\n", - " \"in\"\n", - " \"oil\"\n", - " \"prices\"\n", - " \"had\"\n", - " \"been\"\n", - " \"holding\"\n", - " \"down\"\n", - " \"the\"\n", - " \"price\"\n", - " \"stability\"\n", - " \".\"\n", - " \".\"" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "out = decode(cmlm_tkr, out.logit)" + "uncertainty_filling(copy(cmlm_decoded), idx_to_mask, cmlm_model, cmlm_tkr)" ] }, { @@ -933,81 +941,143 @@ }, { "cell_type": "code", - "execution_count": 56, - "id": "77d8ee90-b306-4237-a76f-dab338639c96", + "execution_count": 244, + "id": "6c6cc184-c8f7-4201-8886-f5da43785223", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
10×7 DataFrame
Rowsentenceyearlabelseedsentence_splittingevent_typesplit
StringInt64String7Int64BoolString31String7
1remained well below their levels at the beginning of the year, and that weaker demand and earlier declines in oil prices had been holding down consumer price inflation.1996hawkish5768truemeeting minutestest
2A few participants also noted that uncertainty about the extent of resource slack in the economy was considerable and that it was quite possible that the economy could soon be operating close to potential, particularly if labor force participation rates did not turn up much while employment continued to register gains.1996neutral5768truemeeting minutestest
3inflation was projected to pick up gradually in association with a partial reversal of the decline in energy prices this year.1996neutral5768truemeeting minutestest
4They noted that the realization of such a development could make it harder for the Committee to achieve 2 percent inflation over the longer run.1996neutral5768truemeeting minutestest
5In the view of one member, however, aggregate final demand was so strong that, with economic activity and the associated demand for labor having expanded at an unsustainable pace for some time, one could be reasonably confident that inflation would most likely pick up in the absence of policy action.1996hawkish5768truemeeting minutestest
6In the circumstances, most members endorsed a proposal to delete as no longer necessary the previous summary statement relating to the risks to growth and inflation taken together.1996neutral5768truemeeting minutestest
7In the staff forecast prepared for this meeting, the economy was seen as likely to expand at a moderate pace, supported by accommodative monetary policy and financial conditions.1996dovish5768truemeeting minutestest
8Housing starts and the demand for new homes had declined further, house prices in many parts of the country were falling faster than they had towards the end of 2007, and inventories of unsold homes remained quite elevated.1996dovish5768truemeeting minutestest
9Pressures on resources would rise as the anticipated upturn and possible above-trend growth brought the economy closer to full capacity utilization.1996hawkish5768truemeeting minutestest
10Price inflation had picked up a little but, abstracting from energy, had remained relatively subdued.1996neutral5768truemeeting minutestest
" + ], + "text/latex": [ + "\\begin{tabular}{r|cc}\n", + "\t& sentence & \\\\\n", + "\t\\hline\n", + "\t& String & \\\\\n", + "\t\\hline\n", + "\t1 & remained well below their levels at the beginning of the year, and that weaker demand and earlier declines in oil prices had been holding down consumer price inflation. & $\\dots$ \\\\\n", + "\t2 & A few participants also noted that uncertainty about the extent of resource slack in the economy was considerable and that it was quite possible that the economy could soon be operating close to potential, particularly if labor force participation rates did not turn up much while employment continued to register gains. & $\\dots$ \\\\\n", + "\t3 & inflation was projected to pick up gradually in association with a partial reversal of the decline in energy prices this year. & $\\dots$ \\\\\n", + "\t4 & They noted that the realization of such a development could make it harder for the Committee to achieve 2 percent inflation over the longer run. & $\\dots$ \\\\\n", + "\t5 & In the view of one member, however, aggregate final demand was so strong that, with economic activity and the associated demand for labor having expanded at an unsustainable pace for some time, one could be reasonably confident that inflation would most likely pick up in the absence of policy action. & $\\dots$ \\\\\n", + "\t6 & In the circumstances, most members endorsed a proposal to delete as no longer necessary the previous summary statement relating to the risks to growth and inflation taken together. & $\\dots$ \\\\\n", + "\t7 & In the staff forecast prepared for this meeting, the economy was seen as likely to expand at a moderate pace, supported by accommodative monetary policy and financial conditions. & $\\dots$ \\\\\n", + "\t8 & Housing starts and the demand for new homes had declined further, house prices in many parts of the country were falling faster than they had towards the end of 2007, and inventories of unsold homes remained quite elevated. & $\\dots$ \\\\\n", + "\t9 & Pressures on resources would rise as the anticipated upturn and possible above-trend growth brought the economy closer to full capacity utilization. & $\\dots$ \\\\\n", + "\t10 & Price inflation had picked up a little but, abstracting from energy, had remained relatively subdued. & $\\dots$ \\\\\n", + "\\end{tabular}\n" + ], "text/plain": [ - "3-element Vector{Int64}:\n", - " 1\n", - " 3\n", - " 4" + "\u001b[1m10×7 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m sentence \u001b[0m\u001b[1m year \u001b[0m\u001b[1m label \u001b[0m\u001b[1m seed \u001b[0m\u001b[1m sentence_spli\u001b[0m ⋯\n", + " │\u001b[90m String \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m String7 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Bool \u001b[0m ⋯\n", + "─────┼──────────────────────────────────────────────────────────────────────────\n", + " 1 │ remained well below their levels… 1996 hawkish 5768 ⋯\n", + " 2 │ A few participants also noted th… 1996 neutral 5768\n", + " 3 │ inflation was projected to pick … 1996 neutral 5768\n", + " 4 │ They noted that the realization … 1996 neutral 5768\n", + " 5 │ In the view of one member, howev… 1996 hawkish 5768 ⋯\n", + " 6 │ In the circumstances, most membe… 1996 neutral 5768\n", + " 7 │ In the staff forecast prepared f… 1996 dovish 5768\n", + " 8 │ Housing starts and the demand fo… 1996 dovish 5768\n", + " 9 │ Pressures on resources would ris… 1996 hawkish 5768 ⋯\n", + " 10 │ Price inflation had picked up a … 1996 neutral 5768\n", + "\u001b[36m 3 columns omitted\u001b[0m" ] }, - "execution_count": 56, + "execution_count": 244, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "idx = [1, 3, 4]\n", - "[1, 2, 3, 4, 5][idx]" + "n = 10\n", + "data = load_training_sentences()\n", + "texts = filter(:split => n -> n == \"test\", data)[1:n, :]" ] }, { "cell_type": "code", - "execution_count": 63, - "id": "8b99dd1e-95ce-4a21-8e1a-2106b53b7ee0", + "execution_count": 269, + "id": "6a14a2e5-4dc6-4fb4-94ee-01cf6526550c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "4×4 Matrix{Float64}:\n", - " 0.480747 0.166508 0.897285 0.770908\n", - " 0.401932 0.578816 0.878291 0.501171\n", - " 0.0753838 0.932655 0.0544905 0.132249\n", - " 0.301956 0.469797 0.485735 0.935682" + "Python: " ] }, - "execution_count": 63, + "execution_count": 269, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "x = rand(4, 4)" + "cmlm_tkr, cmlm_model = load_model()\n", + "cls = TrillionDollarWords.load_model(; output_hidden_states=true)\n", + "\n", + "using PythonCall\n", + "\n", + "transformers_interpret = PythonCall.pyimport(\"transformers_interpret\")\n", + "transformers = PythonCall.pyimport(\"transformers\")\n", + "\n", + "# load pre-trained classifier and corresponding tokenizer\n", + "model = transformers.RobertaForSequenceClassification.from_pretrained(\"model\", local_files_only=true)\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(\"gtfintechlab/FOMC-RoBERTa\")\n", + "\n", + "scorer = transformers_interpret.SequenceClassificationExplainer(model, tokenizer, attribution_type=\"lig\")" ] }, { "cell_type": "code", - "execution_count": 65, - "id": "32023e43-20ed-4859-b23d-fcf6ccad4568", + "execution_count": 270, + "id": "5fb7e883-bad7-47fe-b44b-930a961225ee", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[33m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[33m\u001b[1mWarning: \u001b[22m\u001b[39mfuse_unk is unsupported, the tokenization result might be slightly different in some cases.\n", + "\u001b[33m\u001b[1m└ \u001b[22m\u001b[39m\u001b[90m@ Transformers.HuggingFace C:\\Users\\drobi\\.julia\\packages\\Transformers\\lD5nW\\src\\huggingface\\tokenizer\\utils.jl:42\u001b[39m\n", + "\u001b[33m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[33m\u001b[1mWarning: \u001b[22m\u001b[39mmatch token `` require to match with space on either side but that is not implemented here, the tokenization result might be slightly different in some cases.\n", + "\u001b[33m\u001b[1m└ \u001b[22m\u001b[39m\u001b[90m@ Transformers.HuggingFace C:\\Users\\drobi\\.julia\\packages\\Transformers\\lD5nW\\src\\huggingface\\tokenizer\\utils.jl:42\u001b[39m\n" + ] + }, { "data": { "text/plain": [ - "2×2 view(::Matrix{Float64}, 1:2, [2, 3]) with eltype Float64:\n", - " 0.0 0.0\n", - " 0.0 0.0" + "\"[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price expectations . [SEP]\"" ] }, - "execution_count": 65, + "execution_count": 270, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "x[1:2, [2, 3]] .= 0" + "attributions = get_attributions(texts[1, :].sentence, scorer)\n", + "\n", + "text = texts[1, :].sentence\n", + "word_attributions = group_into_words(text, attributions, cls.tkr)\n", + "idx_to_mask = get_top_k_idx(word_attributions)\n", + "\n", + "toks = decode(cls.tkr, encode(cls.tkr, text).token)\n", + "mask_toks_at_idx(toks, idx_to_mask)\n", + "\n", + "cmlm_decoded = decode(cmlm_tkr, encode(cmlm_tkr, masked_text).token)\n", + "\n", + "idx_to_mask = get_idx_cmlm(cmlm_decoded)\n", + "\n", + "left_to_right_filling(copy(cmlm_decoded), idx_to_mask, cmlm_model, cmlm_tkr)" ] }, { "cell_type": "code", "execution_count": null, - "id": "6c6cc184-c8f7-4201-8886-f5da43785223", + "id": "41f12b10-ce78-4d5d-99bb-7cbde4742ec2", "metadata": {}, "outputs": [], "source": [] From 954aa0bdd9c502033855aea3f288ad581c83880d Mon Sep 17 00:00:00 2001 From: Karol Dobiczek Date: Tue, 26 Mar 2024 23:37:12 +0100 Subject: [PATCH 7/9] add uncertainty based filling --- dev/notebooks/Project.toml | 3 + dev/notebooks/RELITC.ipynb | 616 ++++++++++++++++++++++++++++++------- 2 files changed, 513 insertions(+), 106 deletions(-) diff --git a/dev/notebooks/Project.toml b/dev/notebooks/Project.toml index 190dc572a..084d5ebee 100644 --- a/dev/notebooks/Project.toml +++ b/dev/notebooks/Project.toml @@ -1,10 +1,13 @@ [deps] CUDNN_jll = "62b44479-cb7b-5706-934f-f13b2eb2e645" CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab" +Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" PreferenceTools = "ba661fbb-e901-4445-b070-854aec6bfbc5" PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Transformers = "21ca0261-441d-5938-ace7-c90938fde4d4" TrillionDollarWords = "d66529d5-f4f4-49d9-a69b-da67f5535f0a" cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" diff --git a/dev/notebooks/RELITC.ipynb b/dev/notebooks/RELITC.ipynb index d37d7a277..5b7bf8bc1 100644 --- a/dev/notebooks/RELITC.ipynb +++ b/dev/notebooks/RELITC.ipynb @@ -32,7 +32,9 @@ "using Transformers\n", "using Transformers.TextEncoders\n", "using Transformers.HuggingFace\n", - "using TrillionDollarWords" + "using TrillionDollarWords\n", + "using StatsBase\n", + "using Flux" ] }, { @@ -45,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 5, "id": "a9c6c853-d63f-4f48-b188-0d12d9a11be0", "metadata": {}, "outputs": [ @@ -90,7 +92,7 @@ "\u001b[36m 3 columns omitted\u001b[0m" ] }, - "execution_count": 15, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -111,7 +113,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 1, "id": "e00a7c85-2a74-41bf-ad75-899f3317dac9", "metadata": {}, "outputs": [ @@ -144,26 +146,17 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "2a605169-6109-41fb-973b-7d5a044e71f1", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\CondaPkg.toml\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\CondaPkg.toml\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mDependencies already up to date\n" - ] - }, { "data": { "text/plain": [ - "Python: " + "Python: " ] }, - "execution_count": 1, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -183,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 245, + "execution_count": 6, "id": "161a51e5-e3cb-4ca7-bbe2-4e95bfbfc1a9", "metadata": {}, "outputs": [ @@ -193,7 +186,7 @@ "get_attributions (generic function with 1 method)" ] }, - "execution_count": 245, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -208,7 +201,7 @@ }, { "cell_type": "code", - "execution_count": 248, + "execution_count": 7, "id": "409785d3-9953-426f-937f-b5ebaf9a6ea7", "metadata": {}, "outputs": [ @@ -244,7 +237,7 @@ " (\"
\", 0.0)" ] }, - "execution_count": 248, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -263,7 +256,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 8, "id": "48278c99-7368-4eaa-b26f-1b91674fc514", "metadata": { "scrolled": true @@ -301,7 +294,7 @@ "), HGFRobertaForSequenceClassification(HGFRobertaModel(Chain(CompositeEmbedding(token = Embed(1024, 50265), position = ApplyEmbed(.+, FixedLenPositionEmbed(1024, 514), Transformers.HuggingFace.roberta_pe_indices(1,)), segment = ApplyEmbed(.+, Embed(1024, 1), Transformers.HuggingFace.bert_ones_like)), DropoutLayer(LayerNorm(1024, ϵ = 1.0e-5))), Transformer<24>(PostNormTransformerBlock(DropoutLayer(SelfAttention(MultiheadQKVAttenOp(head = 16, p = nothing), Fork<3>(Dense(W = (1024, 1024), b = true)), Dense(W = (1024, 1024), b = true))), LayerNorm(1024, ϵ = 1.0e-5), DropoutLayer(Chain(Dense(σ = NNlib.gelu, W = (1024, 4096), b = true), Dense(W = (4096, 1024), b = true))), LayerNorm(1024, ϵ = 1.0e-5))), nothing), Branch{(:logit,) = (:hidden_state,)}(Chain(DropoutLayer(Transformers.HuggingFace.FirstTokenPooler()), DropoutLayer(Dense(σ = NNlib.tanh_fast, W = (1024, 1024), b = true)), Dense(W = (1024, 3), b = true)))), Transformers.HuggingFace.HGFConfig{:roberta, JSON3.Object{Vector{UInt8}, Vector{UInt64}}, Dict{Symbol, Any}}(:use_cache => true, :torch_dtype => \"float32\", :vocab_size => 50265, :output_hidden_states => true, :hidden_act => \"gelu\", :num_hidden_layers => 24, :num_attention_heads => 16, :classifier_dropout => nothing, :type_vocab_size => 1, :intermediate_size => 4096…))" ] }, - "execution_count": 18, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -312,17 +305,17 @@ }, { "cell_type": "code", - "execution_count": 246, + "execution_count": 9, "id": "08a37453-82d2-4319-a719-a21d6685c88f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "group_into_words (generic function with 3 methods)" + "group_into_words (generic function with 1 method)" ] }, - "execution_count": 246, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -349,7 +342,7 @@ }, { "cell_type": "code", - "execution_count": 249, + "execution_count": 10, "id": "02b63bfd-134c-44ce-b2de-67ee99067745", "metadata": {}, "outputs": [ @@ -385,7 +378,7 @@ " ([31, 32], [\"inflation\", \".\"], [-0.09257150340664654, 0.15492799445950947])" ] }, - "execution_count": 249, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -397,7 +390,7 @@ }, { "cell_type": "code", - "execution_count": 252, + "execution_count": 11, "id": "e513d132-c2c8-4947-8ea8-2bda508c99b5", "metadata": {}, "outputs": [ @@ -407,7 +400,7 @@ "get_top_k_idx (generic function with 2 methods)" ] }, - "execution_count": 252, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -425,7 +418,7 @@ }, { "cell_type": "code", - "execution_count": 262, + "execution_count": 12, "id": "ee80bad8-3641-43d1-8d6d-6dcd87e40f15", "metadata": {}, "outputs": [ @@ -447,7 +440,7 @@ " 29" ] }, - "execution_count": 262, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -458,7 +451,7 @@ }, { "cell_type": "code", - "execution_count": 265, + "execution_count": 13, "id": "d2071233-269a-49f6-b9d3-5e87f5be6d9e", "metadata": {}, "outputs": [ @@ -468,7 +461,7 @@ "mask_toks_at_idx (generic function with 1 method)" ] }, - "execution_count": 265, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -492,7 +485,7 @@ }, { "cell_type": "code", - "execution_count": 266, + "execution_count": 32, "id": "04662dad-be82-4888-9c6f-c26093347e80", "metadata": {}, "outputs": [ @@ -502,19 +495,19 @@ "\"remained [MASK] below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK]\"" ] }, - "execution_count": 266, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "toks = decode(cls.tkr, encode(cls.tkr, text).token)\n", - "mask_toks_at_idx(toks, idx_to_mask)" + "masked_text = mask_toks_at_idx(toks, idx_to_mask)" ] }, { "cell_type": "code", - "execution_count": 166, + "execution_count": 15, "id": "a596f684-1b82-4823-987b-adc633545977", "metadata": { "scrolled": true @@ -543,7 +536,7 @@ "), HGFBertForMaskedLM(HGFBertModel(Chain(CompositeEmbedding(token = Embed(768, 30522), position = ApplyEmbed(.+, FixedLenPositionEmbed(768, 512)), segment = ApplyEmbed(.+, Embed(768, 2), Transformers.HuggingFace.bert_ones_like)), DropoutLayer(LayerNorm(768, ϵ = 1.0e-12))), Transformer<12>(PostNormTransformerBlock(DropoutLayer(SelfAttention(MultiheadQKVAttenOp(head = 12, p = nothing), Fork<3>(Dense(W = (768, 768), b = true)), Dense(W = (768, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12), DropoutLayer(Chain(Dense(σ = NNlib.gelu, W = (768, 3072), b = true), Dense(W = (3072, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12))), nothing), Branch{(:logit,) = (:hidden_state,)}(Chain(Dense(σ = NNlib.gelu, W = (768, 768), b = true), LayerNorm(768, ϵ = 1.0e-12), EmbedDecoder(Embed(768, 30522), bias = true)))), Transformers.HuggingFace.HGFConfig{:bert, JSON3.Object{Vector{UInt8}, Vector{UInt64}}, Nothing}(:_name_or_path => \"bert-base-uncased\", :architectures => [\"BertForMaskedLM\"], :attention_probs_dropout_prob => 0.1, :classifier_dropout => nothing, :gradient_checkpointing => false, :hidden_act => \"gelu\", :hidden_dropout_prob => 0.1, :hidden_size => 768, :initializer_range => 0.02, :intermediate_size => 3072…))" ] }, - "execution_count": 166, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -562,7 +555,7 @@ }, { "cell_type": "code", - "execution_count": 207, + "execution_count": 33, "id": "af3721c6-c528-4256-bb22-a4476a1e4568", "metadata": {}, "outputs": [ @@ -598,7 +591,7 @@ " \"[SEP]\"" ] }, - "execution_count": 207, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -609,7 +602,7 @@ }, { "cell_type": "code", - "execution_count": 267, + "execution_count": 34, "id": "6ee6f417-25b9-4775-b0fd-165750e0584f", "metadata": {}, "outputs": [ @@ -619,7 +612,7 @@ "get_idx_cmlm (generic function with 1 method)" ] }, - "execution_count": 267, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -638,7 +631,7 @@ }, { "cell_type": "code", - "execution_count": 268, + "execution_count": 178, "id": "af757265-243c-450e-8af4-adcd11b03485", "metadata": {}, "outputs": [ @@ -660,7 +653,7 @@ " 31" ] }, - "execution_count": 268, + "execution_count": 178, "metadata": {}, "output_type": "execute_result" } @@ -679,7 +672,7 @@ }, { "cell_type": "code", - "execution_count": 217, + "execution_count": 36, "id": "ff76df2f-f5e5-40bf-9a37-a517ac17acc7", "metadata": {}, "outputs": [ @@ -689,7 +682,7 @@ "merge_tokens (generic function with 2 methods)" ] }, - "execution_count": 217, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -711,17 +704,17 @@ }, { "cell_type": "code", - "execution_count": 218, + "execution_count": 37, "id": "7263a950-f7d8-4b02-a071-5314e7ad2559", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "group_into_words (generic function with 2 methods)" + "group_into_words (generic function with 3 methods)" ] }, - "execution_count": 218, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -742,7 +735,7 @@ }, { "cell_type": "code", - "execution_count": 271, + "execution_count": 122, "id": "1ce0fbff-a40c-4e95-9489-c1f36478b29d", "metadata": {}, "outputs": [ @@ -752,7 +745,7 @@ "left_to_right_filling (generic function with 1 method)" ] }, - "execution_count": 271, + "execution_count": 122, "metadata": {}, "output_type": "execute_result" } @@ -764,12 +757,12 @@ " end\n", "\n", " masked_text = merge_tokens(tokens, mask_positions)\n", - " println(masked_text)\n", + " # println(masked_text)\n", + " \n", + " out = decode(cmlm_tkr, cmlm_model(encode(cmlm_tkr, masked_text)).logit)\n", " \n", " mask_positions = sort(mask_positions)\n", " next_position = popfirst!(mask_positions)\n", - " \n", - " out = decode(cmlm_tkr, cmlm_model(encode(cmlm_tkr, masked_text)).logit)\n", "\n", " next_token = out[next_position+1]\n", "\n", @@ -781,7 +774,7 @@ }, { "cell_type": "code", - "execution_count": 273, + "execution_count": 39, "id": "2d3bd657-d656-462c-97b9-58b5dd923394", "metadata": {}, "outputs": [ @@ -809,7 +802,7 @@ "\"[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price expectations . [SEP]\"" ] }, - "execution_count": 273, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -820,7 +813,7 @@ }, { "cell_type": "code", - "execution_count": 242, + "execution_count": 176, "id": "c2a089f0-e814-43ba-8da6-e22e1f087a7c", "metadata": {}, "outputs": [ @@ -830,7 +823,7 @@ "uncertainty_filling (generic function with 1 method)" ] }, - "execution_count": 242, + "execution_count": 176, "metadata": {}, "output_type": "execute_result" } @@ -842,34 +835,43 @@ " end\n", "\n", " masked_text = merge_tokens(tokens, mask_positions)\n", - " \n", - " mask_positions = sort(mask_positions)\n", - " next_position = popfirst!(mask_positions)\n", + " # println(masked_text)\n", "\n", " logits = cmlm_model(encode(cmlm_tkr, masked_text)).logit\n", " out = decode(cmlm_tkr, logits)\n", - " print(size(logits))\n", "\n", + " probs = softmax(logits[:, mask_positions, :], dims=1)\n", + " \n", + " entrs = []\n", + " for i in 1:length(mask_positions)\n", + " push!(entrs, entropy(probs[:, i]))\n", + " end\n", + " \n", + " next_position = mask_positions[argmin(entrs)]\n", + " filter!(x -> x != next_position, mask_positions)\n", + " \n", " next_token = out[next_position+1]\n", "\n", " tokens[next_position] = next_token\n", - " return\n", - " return left_to_right_filling(tokens, mask_positions, model, tokenizer)\n", + " return uncertainty_filling(tokens, mask_positions, model, tokenizer)\n", "end" ] }, { "cell_type": "code", - "execution_count": 243, + "execution_count": 179, "id": "67ff3d94-59e9-4235-9285-4aeb3ba841ed", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "(30522, 34, 1)" - ] + "data": { + "text/plain": [ + "\"[CLS] remained well below their levels at the beginning of august to help alleviate the weaker demand that earlier gains in oil prices had been holding down on price stability . [SEP]\"" + ] + }, + "execution_count": 179, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -878,7 +880,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 42, "id": "b014982f-8df6-4f10-b675-b60822dcfea2", "metadata": {}, "outputs": [ @@ -888,7 +890,7 @@ "merge_truncated_words (generic function with 4 methods)" ] }, - "execution_count": 82, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -920,28 +922,7 @@ }, { "cell_type": "code", - "execution_count": 85, - "id": "8adb05cb-84f0-4b14-a74b-1cfec686280a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\"and well below their levels at the beginning of august quarter to to of strong demand and earlier increases in oil prices had been holding down the price stability\"" - ] - }, - "execution_count": 85, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "String(merge_truncated_words(out)[2:end])" - ] - }, - { - "cell_type": "code", - "execution_count": 244, + "execution_count": 44, "id": "6c6cc184-c8f7-4201-8886-f5da43785223", "metadata": {}, "outputs": [ @@ -986,7 +967,7 @@ "\u001b[36m 3 columns omitted\u001b[0m" ] }, - "execution_count": 244, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -999,17 +980,27 @@ }, { "cell_type": "code", - "execution_count": 269, + "execution_count": 45, "id": "6a14a2e5-4dc6-4fb4-94ee-01cf6526550c", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[33m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[33m\u001b[1mWarning: \u001b[22m\u001b[39mfuse_unk is unsupported, the tokenization result might be slightly different in some cases.\n", + "\u001b[33m\u001b[1m└ \u001b[22m\u001b[39m\u001b[90m@ Transformers.HuggingFace C:\\Users\\drobi\\.julia\\packages\\Transformers\\lD5nW\\src\\huggingface\\tokenizer\\utils.jl:42\u001b[39m\n", + "\u001b[33m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[33m\u001b[1mWarning: \u001b[22m\u001b[39mmatch token `` require to match with space on either side but that is not implemented here, the tokenization result might be slightly different in some cases.\n", + "\u001b[33m\u001b[1m└ \u001b[22m\u001b[39m\u001b[90m@ Transformers.HuggingFace C:\\Users\\drobi\\.julia\\packages\\Transformers\\lD5nW\\src\\huggingface\\tokenizer\\utils.jl:42\u001b[39m\n" + ] + }, { "data": { "text/plain": [ - "Python: " + "Python: " ] }, - "execution_count": 269, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -1032,18 +1023,26 @@ }, { "cell_type": "code", - "execution_count": 270, + "execution_count": 46, "id": "5fb7e883-bad7-47fe-b44b-930a961225ee", "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[33m\u001b[1mWarning: \u001b[22m\u001b[39mfuse_unk is unsupported, the tokenization result might be slightly different in some cases.\n", - "\u001b[33m\u001b[1m└ \u001b[22m\u001b[39m\u001b[90m@ Transformers.HuggingFace C:\\Users\\drobi\\.julia\\packages\\Transformers\\lD5nW\\src\\huggingface\\tokenizer\\utils.jl:42\u001b[39m\n", - "\u001b[33m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[33m\u001b[1mWarning: \u001b[22m\u001b[39mmatch token `` require to match with space on either side but that is not implemented here, the tokenization result might be slightly different in some cases.\n", - "\u001b[33m\u001b[1m└ \u001b[22m\u001b[39m\u001b[90m@ Transformers.HuggingFace C:\\Users\\drobi\\.julia\\packages\\Transformers\\lD5nW\\src\\huggingface\\tokenizer\\utils.jl:42\u001b[39m\n" + "[CLS] remained [MASK] below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to alleviate [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to alleviate concerns [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price [MASK] [MASK] [SEP]\n", + "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price expectations [MASK] [SEP]\n" ] }, { @@ -1052,7 +1051,7 @@ "\"[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price expectations . [SEP]\"" ] }, - "execution_count": 270, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -1074,10 +1073,415 @@ "left_to_right_filling(copy(cmlm_decoded), idx_to_mask, cmlm_model, cmlm_tkr)" ] }, + { + "cell_type": "code", + "execution_count": 107, + "id": "7c0fde75-655e-48d1-8e26-c0553ff510c7", + "metadata": {}, + "outputs": [], + "source": [ + "using Plots" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "079dd980-cfe5-4fbe-aad7-b26500f0b646", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.2599115233639782" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "entropy([.2, .3, .5, .1])" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "ca6bef83-20a1-4897-a4ba-b4f0bb24e3b7", + "metadata": {}, + "outputs": [], + "source": [ + "enc = encode(cmlm_tkr, \"remained [MASK] below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK]\")\n", + "out = cmlm_model(enc)\n", + "colors = []\n", + "masks = []\n", + "for i in 1:size(enc.token)[2]\n", + " if argmax(enc.token[:, i, :])[1] == 104\n", + " push!(masks, i)\n", + " push!(colors, :red)\n", + " else\n", + " push!(colors, :blue)\n", + " end\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "id": "65837706-437e-433f-8c22-f510e1addc7e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\"well\"] 0.8925405\n", + "[\"the\"] 0.001739249\n", + "[\"august\"] 2.5585306\n", + "[\"quarter\"] 4.2470326\n", + "[\"to\"] 5.439382\n", + "[\"to\"] 5.78602\n", + "[\"of\"] 4.4509435\n", + "[\"and\"] 2.307382\n", + "[\"increases\"] 2.3528287\n", + "[\"the\"] 4.842994\n", + "[\"stability\"] 2.4602342\n", + "[\".\"] 2.0014744\n" + ] + } + ], + "source": [ + "entropies = []\n", + "toks = []\n", + "\n", + "probs = softmax(out.logit, dims=1)\n", + "\n", + "for i in 1:size(out.logit)[2]\n", + " row = out.logit[:, i, :]\n", + " tok = decode(cmlm_tkr, row)\n", + " push!(toks, tok)\n", + " entr = entropy(probs[:, i])\n", + " push!(entropies, entr)\n", + " if i in masks\n", + " println(string(tok) * \" \" * string(entr))\n", + " end\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "id": "7570a204-98e7-41dd-9b16-7949c822078f", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAAGQCAIAAAD9V4nPAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nOzdZ1wUZ/c38GuBpSO9igKKiooFsAdQERRUbNhb7L1rbMmtJrYYSxJ7N/Zeomhs2IMEQcWCBTsIShUp22eeF/N3n3VhlxlcWGB/348v5GLOztllzp7pw6NpmgAAAOgqPW0nAAAAoE1ohAAAoNPQCAEAQKehEQIAgE5DIwQAAJ2GRggAADoNjRAAAHQaGiEAAOg0NEIAANBpaIQAAKDTyqkR5uXlLVy4kP30NE1zvfcbRVEckyIymawChtA0zfW9VKWPqxTvnWsIRVFcP66KuaiQUn1cVWZRQWWVaUhVqqwSlVMjTE9P379/P/vpJRKJWCzmNAuhUMj1z1ZYWMhp+vIJoShKKBRyChGLxVKplFNIYWEh10W2Yn5cUqmU66IiEom41lLFfO8URQkEAk4hqCxOIagsTiEVtrJKhF2jAACg09AIAQBAp6ERAgCATkMjBAAAnVbhGuGrV69CQwd5eLSuVatNx44DX7x4oe2MAACgKjPQdgJfefDgQUjIiIyMtTTdhhCSlna7TZt+Fy/uaNq0qbZTAwCAqqlibRGOGfNjevpfTBckhNB064yMfWPG/KjdrAAAoAqrWI3wzZv3hHh/PVb/7dsP2skGAAB0QMVqhITwtJ0AAADolop1jNDFxe7jxyRC6iiMvXJ2ttFaQgAAlVNOTk5YWJj8zjgURenpcdjyYe6Pw+Nx2DgpRQj7rHg83t69e728vNi/OHsVqxFu3PhLePiQzMztX3aQPrazG7Vp0+9aTgsAoLL59OlTcnLy6dOntZ2IZowfP/79+/c60QhbtWp5+fKWMWPmvn2bRgjPzc1py5ZNOGUUAKAUjIyM/Pz8tJ2FZlSrVq3sXrxiNUJCSJMmTf77L1IsFtM0bWRkpO10AACgiqtoJ8sAAACUKzRCAADQaWiEAACg09AIAQBAnVI8oLhyQSMEAIBiZGVlzZo1x9m5lpGREZ/P9/BouGzZ8rJ4QLzWVbizRgEAQOuePHkSGBiSl+clEv1BSFNCJG/exC5ZsnrHjn3//hvl5OSk2dklJib+/fffjx498vb2njdvnmZfvETYIgQAgK+IRKJOnbrn5AwQiS4R0o2QmoTUJmSAQBCTktKoR4/+Gp9jdHT0u3fvCgoKbt26pfEXLxEaIQAAfOXw4cOZmXoy2fIi9382EIu33L//8MaNG6V75bi4uG3btsl/jImJ2blzJyFk1KhRmzZtCggIKH3S3wCNEAAAvnLu3GWhsJeKY2eWNB16+fLl0r2ym5vbzJkzMzIymB9//vlnsVhc2jQ1Bo0QAAC+kpLykaZdVf1WLK6RnJxWule2t7fv0qXL3r17CSHv3r2Ljo4eOHBgKbPUHDRCAAD4irV1NUJyVf1WXz/H1tay1C8+fvz4rVu30jS9devWfv36lelNRFlCIwQAgK+0bdvSxOSSil9ShoZRrVq1KvWLBwYG8vn8q1ev7tq1a+zYsaV+HQ1CIwQAgK8MGTKEkHhCThT9FY/3u6WlrGvXrt/y+mPHjh0+fLizs3MFeTgGGiEAAHzF0dFx9+7tfP4QHm+Zwj7Sj/r6MwwNFx47ts/Y2PhbXn/IkCEZGRljxoyRjxw5csTGxmbp0qW3b9+2sbGZPHnyt7w+V7igHgAAlPXp08fBwWH8+BnPni00MalN01KB4HWLFu22bo3x9vb+xhdPTU01NjZWPE2mb9++ffv2/caXLTU0QgAAKEbbtm0TE+Nfvnz59OlTfX39xo0bu7i4fPvLrl27dufOndOmTTM3N//2V9MINEIAAFCpdu3atWvX1uAL2tjYLFq0KDw8XIOv+Y3QCAEAoPwMHjxY2ykow8kyAACg09AIAQBAp6ERAgCATkMjBAAAnYaTZQAAqiAjI6N3797Z2NhoOxHNyMvL+/nnn8voxdEIAQCqIBcXl+zsbJlMxvyYn5/P6bo9iURCUZSRkRH7EKFQaGBgYGDAoa1wysra2pr9K3OCRggAUDUpPtjBwMDAwsKCfaxEIpHJZJxupSYQCPh8PqdGyDWrMoJjhAAAoNPQCAEAQKehEQIAgE5DIwQAAJ2GRggAADqNcyPMzs4ePHiwo6NjREREbm5uyQEAAAAVGLfLJ4RCYadOnXr27Dlz5swpU6b8+uuvy5cvZxMok8ny8vI2b97M/Mjj8QICAurVq6dmepqm5VfAsJwFE8U1hP305RMi+4JTCE3TenocVmuYWfB4PK4h7Kcvn5DSfVx6enpV4L1TFFW6RQWVxT4ElcUppAJWFpuUuDXCEydO2Nvbz58/nxAye/bsixcvPnz4sHbt2qampuoDxWKxUCiMi4uTjzg4ONSqVUvV9BKJhOvyJ5FI9PX1uYZIJBL205dPiEwm4xoikUg4vXF5VpwW2Yr5cTFXO+nr63MKIYRw+mavmO+doqhSfFyoLE6zQGVxCiEVr7L4fH6J74JbI8zPz09OTmZuH7Bnz547d+5cvnw5Jyfn0qVLjRo1UhNoYmJib2+/fft2ljPS09OjaZrTTQ0oijI2NuZarpwuFy2fEGZ1klMIj8fT09Pj8/nsQ6RSqbGxMddyLYePi0mM/fT6+voyjpf90jTN9bLfirmoUBRF0zSnEFSWzlYW15CqVFkl4rayM3jwYCMjo/r16zdt2jQnJycxMTExMbF3796TJ0/WbFoAAADlg1sjNDU1jY2NjYqKatmy5YwZM0xNTXk8Xr9+/VJSUsooPwAAgDLF+axRPT29WrVqFRYWXrx4kRAik8m2bNkyZMiQMsgNAACgzJXypts//PBDcHDwf//9l5+f36hRox9//FGzaQEAAJSPUjZCX1/fR48enT171tPTs3379prNCQAAoNyU/jFMLi4uo0eP1mAqAAAA5Q+3WAMAAJ2GRggAADoNjRAAAHQaGiEAAOg0NEIAANBpaIQAAKDT0AgBAECnoRECAIBOQyMEAACdhkYIAAA6DY0QAAB0GhohAADoNDRCAADQaWiEAACg09AIAQBAp6ERAgCATkMjBAAAnYZGCAAAOg2NEAAAdBoaIQAA6DQ0QgAA0GlohAAAoNPQCAEAQKehEQIAgE5DIwQAAJ2GRggAADoNjRAAAHQaGiEAAOg0NEIAANBpaIQAAKDT0AgBAECnoRECAIBOQyMEAACdhkYIAAA6DY0QAAB0GhohAADoNDRCAADQaWiEAACg09AIAQBAp6ERAgCATkMjBAAAnYZGCAAAOg2NEAAAdBoaIQAA6DQ0QgAA0GlohAAAoNPQCAEAQKehEQIAgE6rOo0wLS3t8+fP2s4CAAAqmUrfCGma/vPPLc7OTdq0mVavXq+GDdvFxcVpOykAAKg0DLSdwLf6+edVa9a8ysuLIcSEEJKe/qJz54HXr++uX7++tlMDAIBKoHJvEUokkk2b9uXlrWW6ICGEEM+MjFXz56/WZloAAFB5VO5GmJyczOPVI4T/9fB3CQmPtJMQAABUNpW7EZqYmBCSX2S4wMjISAvZAABAJVS5G6Gzs7OZWTYhKYqDhob7e/bsqK2UAACgcqncjZAQ8tdfq+3tu/N4/xAiICTD1HRl7doHfvxxmrbzAgCAyqHSN8KAgO/u3v170KB/6tQJ8fX9/n//M0xIuGJmZqbtvAAAoHIozeUTsbGxe/fuXblypbGxscYTKgVXV9e9e9cWFhYaGxvr6VX61g4AAOWJc9u4ePFijx49AgMDDQ0NyyIhAACA8sR5i3DSpEk7d+4MDQ0lhGRlZVWrVo3P55cYVVBQkJqaGhQUJB/p37//gAEDVE0vFotpmhaLxewTEwgEEomE0xZhfn7RM061HyKTycRisUwmYx8iEon09PTY/CHkCgoKKIri8XjsQ8rn4yooKKBpmv30EolEJpNJJBL2IQKBgM/nGxhwWPgr5qJCUZRQKKQoin0IKktnK4trSJWpLGNj4xL/gtwaYV5e3suXL0NCQjIyMgYNGhQVFWViYrJs2bIpU6aoDzQ1NbWxsZk/f758pEmTJhYWFqqmZ8qV01UQ+vr6pdg1qiYHbYXIZDKRSGRqaso+xNDQkGu58ng8MzMzTuVKyuXj4vF45ubm7KdnypXTXnoDAwOu5Uoq5KJCUZSBgQGnI+KoLJ2tLK4hVayy1OOWsbm5uZWV1e3bt7du3RoSEvLPP/9cvHhxwIABDg4O/fv3VxPI4/GMjY2Dg4O/LVsAAAAN47aWx+Pxxo8fP3HixHv37v3www/6+vphYWGLFy/evn17GeUHAABQpjifLLNw4UILC4tHjx5FR0czI87OzriTC1RVRw8ebOPp2a527WbVq08eMiQ7O1vbGQGAhnE+WYbP50dGRg4cOLBHjx6rVq2ytraeN28etgihSvr1p5+erF8fmZtrQwgh5NihQ51u3br68CGno5gAUMGV5jpCKyurM2fOHDx4cPfu3Xp6ehs2bGjbtq3GMwPQrs+fPx/dujU2N1f/y0hvqfRDauq2deumz5unzcwAQKNKefm5vr7+4MGDL126dOHChY4dcWNPqIISEhICZDL9rwfDxOLb589rJyEAKBu4DwtA8Xg8XtEL9ChCeLh7EUDVgpIGKJ6Pj89NfX3p14NnjI0DunbVTkIAUDbQCAGKZ2ZmNnz69H5WVmmEEEIoQnYZGh5xdR05YYKWMwMAjSrNyTIAOmLKvHl1mzQZ+MMPeVlZekZGweHhl3/91cTERNt5AYAmoRECqBPauXNo5855eXkav6sTAFQQ2DUKAAA6DY0QAAB0Ghoh6JZPnz7dvn377t27QqFQ27kAQIWAY4SgKyiK+nnWrPP797eUSkV6encMDGYtXz5w2DBt5wUAWoZGCLpiyZw5kq1bbxcUMLtB8gnpN2OGnZNTx9BQLWcGAFqFXaOgEyiKOr5nz+IvXZAQYk7I+pyctQsWaDMtAKgA0AhBJ2RnZ7vweEo3DvUg5GNamnYSAoAKA40QdIK5uXkOTSsNFhJiYGiolXwAoOJAIwSdYGxs7FC7dszXg9uMjcP799dOQgBQYeBkGdAVaw8c6N2u3cCPH0OEQiEhBywtn3l5Hf/pJ23nBQBahkYIusLd3f3W06c7Nm5ceemSqZlZSN++a/r04fF42s4LALQMjRB0iLGx8cQZM0aMH8/n8w0MsPADACE4RggAADoOjRAAAHQaGiEAAOg0NEIAANBpaIQAAKDT0AgBAECnoRECAIBOQyMEAACdhkYIAAA6DY0QAAB0GhohAEAVJxaLMzMztZ1FxYVGCABQZT1//rxrq1btatSY0KpVE2fn9StXUhSl7aQqHNx3GACgakpNTe3ftu32Dx98CSGECAiZ98svi96//+WPP7ScWQWDLUIAgKpp3dKl89PTfb/8aELImvz8s/v35+fnazOtigeNEACgarp/+3bg1ztC9QhpQciTJ0+0lVLFhEYIAFA1GRkbC4oMFvJ4RkZGWsimAkMjBAComoIjIg6bmCiO5BKSYGDQsGFDbaVUMaERAgBUTaMnTTrn6bnCzCyHEBkhMYR0trX9Zd06fX19badWseCsUQCAqsnIyOhSfPzGVav6HjyY+/lzA2/v7StX1q9fX9t5VThohAAAVRafz586b97UefPy8vIsLCy0nU4FhV2jAACg09AIAQBAp6ERAgCATkMjBAAAnYZGCKBh586cCW/Zsl39+mF+fkcPHtR2OgBQApw1CqBJ8yZOTDlwYN2nT+6EpL5/v3j8+MunTm05fFjbeQGAStgiBNCYpKSkuCNH9nz65E4IIcSFkE25udlRUbGxsdpNDADUQCME0JhrV670ysnhfT3YNysrKjJSOwkBAAtohAAaIxIKjYs89dSYEFFBgVbyAQA20AgBNMa3efNr1tZKg1erVfNr21Yr+QAAG2iEABrTpk2brHr1NhsayrcKDxgY3K1Zs3OXLtpMCwDUQiOEyurt27eDw8Jaubt/5+HR/bvvHjx4oO2MCCHk8KVLb0eN8rG3D3Vw8HVwiBkw4NTNm7jZP0BFhssnoFJ69uzZwLZt/0hPD6BpQsjDDx/GBAWtOHYssF077SZmZma2fMOG5Rs2vH792t3dncfjlRwDAFqFLUKolBZNnrzh40emCxJCGhFyNCvrx3HjtJuVIjs7O3RBgEoBjRAqpeePH7f6esSVEGlOjlgs1k5CAFBpoRFCFULT9JdtRAAAltAIoVLy9PKK+3okjRB9KysjIyPtJAQAlRYaIVRKC9etG+vg8N+XH58R0sfW9peNG7WZEwBUTjhrFCqlBg0aHLp1a/7Ysa+fPCE07VCjxh+bNjVr1kzbeQFA5YNGCJVVnTp1jl65IpFIZDKZsbGxttMBgMoKu0YBAECnlaYRrl27NiYmRuOpAAAAlD/Ou0bT0tJmz55tZGR04cKFVq1alRzwhVQqffXqlfxHJycnU1NTrnMHAADQLM6N0NnZuVatWsOHD+/UqRP7Xpifn5+amtqhQwf5yMSJE8epvg+IWCymaVoikbBPrLCwUCqV6ulx2MYtKCjgeu+PcgiRyWQikYgq8jQfNUQikZ6eHp/P55QVTdOcEiu3j4vT9MwxQqlUyj5EIBDw+XwDAw4Lf8VcVCiKEgqFnC6dRGXpcmVxCqkylWVsbFxiSqU5Wcbd3b1nz54SiYTphQkJCb169bK3t1cTYm5uXrNmzRcvXrCcBVOunK4J09PTMzY25lSuNE2bm5uzn758QmQyGZ/P57S5zOfzuZYrIcTMzIzT8lQ+HxchhFNIKU6W0dfX51quFXNRoShKX1/fzMyMfQgqS2cri2tIVaqsEpWmETZv3jwmJmb+/PmEkHbt2nl6evbu3VuzaQEAAJSP0pwswzRCQohUKrW1tU1OTk5KStJ0YgAAAOWh9I1w8eLFJ0+efPDgwZw5c1auXKnxzAAAAMpBaXaNOjo6CoXCEydOXL582dbWdv78+ZwOqAIAAFQcpbyzzLlz58zMzGxtbf/vVbgcHQUAAKg4StnAatasqdk8AAAAtAK3WAMAAJ2GRggAADoNjRAAAHQaGiEAAOg0NEIAANBpaIQAAKDT0AgBAECnoRECAIBOQyMEAACdhkYIAAA6DY0QAAB0GhohAADoNDRCAADQaWiEAACg09AIAQBAp6ERAgCATkMjBAAAnYZGCAAAOg2NEAAAdJqBthMAAJKSknL69NnExDdt2jTu1aunsbGxtjMC0CHYIgTQsnXrdvj69p4yhWzY4D9q1Is6db6Li4vXdlIAOgRbhADa9PDhw59/3peVdZMQPiFEIOiSkjKkV6+Ily9j+Hy+trMD0AnYIgTQpm3bDmdlTWe64BfuBQWtY2JitJYTgI5BIwTQprdvPxBSU2mwsNAtLS1NK/kA6CA0QgBtqlevJo+XpDRoZva8Zk3l7ggAZQSNEECbxo4dZGu7ipDPCmP3ra0ftWjRQms5AegYnCwDoE21a9fevPmnSZPa5eeHFxa6Wlndc3B4GBm5T08PK6kA5QSNEEDLIiLCO3Zsd+3atZcvX/v69vf3X48uCFCe0AgBtM/CwiI8PDwvL8/CwkLbuQDoHKx4AgCATkMjBAAAnYZGCAAAOg2NEAAAdBoaIQAA6DQ0QgAA0GlohAAAoNPQCAEAQKehEUKFIBKJflu4sF2DBu3q1x8QEpKQkKDtjABAV+DOMqB9AoGgo59fxNu3kYWFZoTcS0mZHBQ0Zd263gMHajs1AKj6sEUI2rd17dqer19PKyw0J4RHiC8hZ7Ozl8ycKZVKtZ0aAFR92CIE7bty6tQ6oVBxxIKQpjJZYmJi48aNtZUVALBE0/T58+f/i4oysbDo0KVLs2bNtJ0RN2iEoH0Ssdi4yKAxTYtEIi1kAwBcZGdn9+3Qof7r18G5uUJClq1fb9u27ebDh/X19bWdGlvYNQra5+vvf/XrmqEIucPjNWjQQFspAQBLU4cMmfrw4brc3O6E9CPkRGamw/nzm9as0XZeHKARgvZNmjfvVweHGB6P+bGAkIkWFj2GDTMzM9NuYgCgnkQieXLnTrhMpjg4u6DgyI4d2kqpFNAIQfucnJyO37q1MjCwpYNDkL19YPXqvsuW/bRihbbzAs4unj/fycenfb1633l6rl68WCwWazsjKFu5ubn2RZ4jbUlIYX6+VvIpHRwjhAqhVq1ax69dk0ql6enpLi4u2k6nakpNTY2JiaEoqk2bNmXxIa/6+eeYP//cnpNTgxABIet+/bXzyZMX7typRMeKgCtra+sPNE0TwlMYTCfE0tpaazlxhy1CqEAMDAyqVaum7SyqIJqm/zd1at+mTR99/33isGF9mzadP3EiRVEanEVWVtaRDRsO5+TUIIQQYkLI7MLC5klJRw4e1OBcoKLR19cPDA3dZGQkH6EImWNpOXLWLC1mxRUaIUDVt3bFioKdO29mZCzIz19QUHAzI0OyZ88fy5ZpcBaxsbEhIpHSpl+v/Pzrp05pcC5QAa3YsiUmODjc3n6toeEKE5MAe/s648cP/P57befFAXaNAlR9+zZtupGfL995xSNkcX6+/9atM376SVOzkEql/CKbmHxCJDhMWNUZGxvviYx88uRJdHS0g5nZ0cDASnd0A40QoOqjhUKTr0eMCeF9fRODb+Tn57fSxGTR16dIXDQxadmpkwbnAhVW/fr13d3d+Xy+gUHlayvYNQpQ9cn09ZU21mhCJBo9h8XFxcWva9eZ5uaFX0aO6esfd3EZOnKkBucCUBbQCAGqvuCuXXfy+Yoju/n8oLAwzc5l9fbtdRYvDnR1bWNv7+fkdK1//3P//WdsXPSuQQAVS+XbhgUArhb9/nvfBw/inj3r+ekTj5BTVlav69Q5unatZueip6c3btq0cdOmZWVl2draavbFAcoOGiFA1WdmZnY2Jub8+fPXTp+maTq8W7cwTW8OKjI0NCy7FwfQODRCAF0RGhoaFBRE07SRwlVfAIBjhAAAoB2yr29Sqi1ohAAAUK5omt7/118tPTw61q3r5+w8pk+f9PR0LeZTyl2jhYWFpqammk0FAAB0wdK5c99s3nzx82dLQgghZ06eDIuNvfrwobbusMh5i/Ddu3dBQUHm5uZubm6xsbFlkRMAAFRVnz9/PrVr19YvXZAQEi6TjU5L2/LHH9pKidsWYW5ubocOHcaPH79///41a9YMHjz4+fPnbALFYvGnT5+WLl0qHwkICGjZsqWa6Wma5pSbSCTi8Xh6RR4Ioj6E6+lt5RAik8lEIhGnG/aLRCI9PT1O91AWiUQGBgY8Hq/kSRVCyufj4n99xZt6EolEJpNxfSMURXE6OFExFxWKopi/I/sQVJYuVxankLKrrPj4eH+ZTGl56iSRzLx4UTRnDpu5cHojfD6/xKWX2xbhb7/95u/vP2PGDGdn52XLlr1///7du3csYymK+qRAs3e+BwCASkFPT09aZFBKiBYf18Vti9Dd3b1fv37M//l8fu3atdPT02vWrFlioKGhoY2NzcqVK1nOiMfjcT3JWyaTGRkZcVpvFYvFXM8jL4cQZn2K61z09PS4bkgZGRlxWt0rn4+LSYz99Hp6esyfnn0IRVFc74hYMRcViqIoiuIUgsrS2criGlJ2ldW8efOp+vpiQhQ3606ZmLTv0YPN7Erx3kvEbYtw9OjRjRs3lv9oamoqkUgIIW/fvj1//rxmMwMAgKrH1NR03Lx5EdbWbwkhhEgJ2WxkdKpmzRHjx2srpW+6fILP58tksrdv37Zv3z45OVlTOQEAQBU2bvr0GceOjfHx8XNyCvDw+DBp0qW7d7V4W9pvurOMnp7e69evhw0bNmvWrNGjR2sqJwAAqNraBwW1v3s3Ly/PwsJC27l8WyPU19cfO3bsH3/8MWbMGE0lBAAAUJ6+qRF26dKlf//+6IIAAFB5fVMjnDlzpqbyAAAA0ArcaxQAAHQaGiEAAOg0NEIAANBpaIQAAKDT0AgBAECnoRECAIBOQyMEAACdhkYIAAA6DY0QAAB0GhohAADoNDRCAADQaWiEAACg09AI2aIoKiEh4Z9//nn06BFN09pOBwAANOObnj6hOxITEyMixmVluQsEtUxMTjk6pp04sbVOnTrazgsAAL4VGmHJCgoKOnYc/P79AUK8CCH5+SQjIyE4eMDz5/8aGRlpOzsAAPgm2DVaspMn/87OjmC64BdNcnI6nj9/Xms5AQCAhqARluzhw5cCQSOlwby8xg8fJmklH4DykZKSMnToVB+f0BYtwn/99U+xWKztjADKBHaNlszZ2cbA4INU+tWgoeEHFxc7LWUEUOZu3vy3d++pGRmLaXo5IXlPnuzZvTvozp3z5ubm2k4NQMOwRViynj27Wln9RYhIYazQ0nJ/ly5h2koJoKwNGzYzPf0UTYcRYkqIY2HhD69fD16y5A9t5wWgeWiEJXNzc/v559H29kE83hFCEni8g/b2QStXznR0dNR2agBlIi0tLT/fhhBXxUGRaNCpUxe1lRJA2cGuUVYmTBgeFtZu27YDCQnn/Py8xow55urqWnIYQOUkEAh4vKK7QE1FIlExUwNUcmiEbHl4eCxb9mNeXp6FhQWb6UUi0bJlfx448HdhocjV1XnlytmBgQFlnSSARtSoUYOmnxEiIYSvMBzdpIm31nICKDPYNVomxGJxs2YdV60yfPHicmpqXGzs7z16/Pr771u0nRcAK3w+f/z4wRYWkwkRfBl7YW8/a/nyWdpMC6BsYIuwTOzYsefVq5DCwmlfBjxzck4uX95yzJjBZmZm2swMgJ2FC2dZWW1ZsaIVRbkTkm9vT+3cub5+/frazgtA89AIy8SpU9cLC+d+PWYok7WLj48PDAzUTk4AXPB4vGnTxk2bNi4pKcnOzs7a2lrbGQGUFR1thEKh8NSpUwm3brnXr9+5W7caNWpo9vXFYgkhhkqDFMWXSCSanRFAWatevbqxsbG2swAoQ7p4jPD+/fv+des+HznSf8MG3tSpffz8tvz+u2ZnERTUjM+//PUYbWBwo2nTppqdEQAAfCOda4RSqSv7ZR0AACAASURBVHRkt25Hk5MXFBZ2IWSMTHYzI+PI0qX379/X4FymTh3j5LRdT09+M9ICC4up/foF2draanAuAADw7XSuEd65c6d5YaGHwgifkFlZWYe2aPKUzmrVqsXGnu3S5ZCjo5+9fYCra9tlyxqtXbtEg7MAAACN0LljhB8+fKghFCoN1iQk7c0bzc7Iycnp9Om/CCHp6ekODg6afXGtKCgoYHkNJQBAJaJzW4Q1atRIMjFRGnzO47l5eRU7/bczKTK7ykUsFv/vf786O/s0btzPycl30KCJ2dnZ2k4KAEBjdG6L0NfX97mNzd3MTN8vI3mE/GZru2fCBG2mVYH16jXyypWGAkEsc5ORI0eOxMeHP3x4jc/nlxgLAFDx6dwWoZ6e3v7z56fVrz/exmabnt4ic/O2Tk5zNmyoU6eOtlOriBITE2NjcwWCufJbbUmlfVNSOhw6dFS7iQEAaIrObRESQjw8PK4/fnzz5s37d+/61a49o23batWqaTupCuru3bu5uUFKgwUFwVFRp4YMGaiVlAAANEsXGyEhhMfjBQYG+vj44OwP9fh8vr5+0QcOCI2MsF9UyyiKevnyZXp6uo+Pj6mpqbbTAajEdG7XKHDi7+9vZnaGEEpx0MrqWM+eypuJUJ4uXozy8Gjp7/9Tjx673d0Dpk9fIJVKtZ0UQGWFRgjqVK9effjwTlZWAwl5Rwgh5JOFxTxf3+zQ0E5azkyH3bt3b+DAhe/eRaanH87M3JqREbt1K2/y5B+1nReUB4FAcPny5f3799+8eZOiqJIDgAU0QijBb7/9b//+YX5+46pXb9awYa/ly2tfunRE20nptAUL1mZlrSLE8cuAfmHhouPHLwsEAnVhUPlFRV2rWzegT5+LkybJevQ44OUV8OTJE20nVRXo6DFC4KRz59DOnUPZP5QYytSTJ08J8ft6jMfjNXj79q1XmV0OC1r34cOHAQNmZGRcIMSeEJKdTbKzE8PChjx/Hm1oqHyLf+AEW4QAlYy5uQUhOUWGs3Dyc9W2d++RnJzxTBf8okFubuD169e1llNVgUYIUMkMHBhuarrt67EX1tZ5Li4u2kkIysWTJ2+l0rpKg7m5Xq9fv9FGOlUKGiFAJTNt2timTWMsLWcQ8oCQdwYGe5yc+hw6tE7beUHZcnNz1NNLURo0N3/n5ORY7PTAHhohQCVjaGh469bp9etbdunye6tWU2bNSk5MvIJHXVZ5gwdH2NhsIETxmQHpZmZnO3TooLWcqgqcLANQ+fB4vMGD+w0c2EcgEJiZmWk7HSgPtWvXXr58wvz5gVlZoynKw9DwoY3N3gMH/sAC8O3QCAEAKodRowZ37hx0/PjfCQnnW7Vq0K/fdZzIrRFohAAAlYaLi8vkyeNxLZNm4RghAADoNDRCAADQaWiEAABQaeTm5mr8NdEIAQCgoissLJwy5UdHx6Z+fsMcHX2+/37qp0+fNPXiOFkGAAAqupCQfnFxncXiu4ToEUIfOLD/7t0e9+9H6evrf/uLY4sQAAD+v+zs7JEjZ9arF1CvXmBExOh3795pOyMSHR397Jm1WDz+S8/iSaWD373zPXv2nEZeH40QAAD+T3JycuPGwXv2tHn9+ua7d7dPnhzQrFmP+Pi72s3qzp272dntlQY/fw66fj1eI6+PXaNVAUVRenpYpwGoZHJycvbtO3T79uMGDdwHDYrw8PDQdkZkypRFqam/0XQw8yNNB2VkHB4xYnxCwmUtZmVkZKinJ5TJlIaFJiaaef4Uvj0rt+jo202ahLi7B9ao0bp1626PHj3SdkZfEQqFJU8EoJMuXoxq0CB4xgzq4MH+CxY4tWw5eM2azdpOisTG3qNppZuX1klLy5NIJNpJiBBCSFBQO2vrE4TQioO2tke7dg3SyOujEVYgGRkZgwZN8vBoVadOQGBgxMOHD9VPf/78pW7d5j14sCU9/c7Hj3ExMUvatx9x//79Emd07969/fv3nz59OisrS0O5fyU3N3fEiOlOTj4NG/Z0cfFZsGCFWCwuixlBFfD335HffderYcN2gYG9L1y4yCbk6dOnixevnDx5wc6dfwkEghKnf/XqVYcO/VxdW3p4BHp7B0VFXSkxRCqVbt26q3//yYMGTTl48AhN0yWGnDlzzssrsFattq6uzfv2HZuenq5m4oKCgu+/n/XhwwWpdCIh/jQ9OCPj2rJlhxITE0ucUZmiaR4hPKVBHo8vK7I5Vp7q1q3bp4+fldX3hCQTQgj5aGk5qV07i1atWmlmBnS5ePHiRe3atdlPLxKJhEIhp1kUFBTIZDJOIZ8/f+Y0fZmGpKSkVK/uo6//NyEUITQh921tW58/f0lNSJ06bQhJI4RW+Pfgu+96qAnJysoKb9Omn53dWgODn83N/Rwcdqxfr9k3IpFIGjQI4PP3fnkjYhOTJZ07D2I5l7y8PJZTMsRisUAg4BRSWFgokUg4hVSoRUVOJpPl5+dzCqlolTV8+DQrq+8JeUEIRcgza+t+kyf/qD5kzpyldnbtCTlIyBUjo19dXX3v3bunZvoXL144OvryeNFfauSdtXXHgwePqQn58OFDnTqtzcx+ISSOkJhq1WY1bdpB/Ztas2aTlVUvQj4wc9HXP+3q6pOZmalq+sjIyGrV5n1dvDSPd3TOnF/Uv31G2S1dbdp0J+Th14llubu3YBNb1pV18uRpP7+w6tV9mzTpuGfPQYqiOM1LjdI0woyMjPj4eLFYzD4EjbBEAwdO5PEiv17+Pnh4tFQ1vUQisbdvoVRIhNDOzn5q5tIjIOCMvr58aiEhnWxsbt64ocE3cvjwEQuLuUpZ2dl1f/jwIZtwNEL2KnsjvHfvnq1t+NeLCmVnF/zs2TNVIVevXrO27vFlHYv599LdvZmaDHv2HMnjXfl6Lp9cXX3VJNalyxAe7x/FEEPDnePGzVY1vUgkcnBoTIhIMcTAYM/MmQtVhezatUtff12R+o0eMGCSmsTkym7piouLs7NrRcizLymlWlt3OnbsFJvYCltZJeK8a3Tnzp1eXl7dunVzc3PbunWrZjZLgZBbt/6j6dCvxxwLCsxzcnKKnZ7H4xFCFfebYgcJISQ9Pf3zs2ddFXZxGBGyJDt7x2+/lSZjFaKi4vLylB+QlpMTcudOnAbnAlXAuXNR2dl9vx7jZWf3uXAhSlXI5s1HcnJmfr3vrlZBQcOEhARVIXFx92m67ddjlhKJQ0ZGhqqQ2NgHSsUoFg+JjFSZ1dOnT2nah5CvTtyQSjtfuhStKsTDw6NatSdKgwYGiY0a1VIVUj78/Pz++Wdd48YTHRzaODj416nT58CBGRER3bWbVVnjdtZocnLy3Llz79+/7+rqeubMmREjRty/f3/Dhg08nvI+ZSX5+fnv3r1TPCdq0qRJY8eOVTU9s7nJ6fBsYWGhVCrldPJkQUFBiZmXWwhFUcUdstXPzc3l8/nFhjg4mGdkvCJEsXKiGzSom5+fX+z0z549q00pt8l6hLx68UJViCLW750mRPmIoL6+UCo1ZDkXFrP4/yQSiUwmk0ql7EMEAgGfzzcw4LDwV6hFRY6iKGbzjn1Ihaqs7OzPNK38vU9RZp8+pahaVFJSPhBSXWlQJHJ9+/ZtnTp1VM9K+SOiKIlAICh2LhKJhKaNiwwbSCS0qqwEAgFNF10BpWhaZUiTJk2srGbl5NwlxPfLWJq19fp+/Y5rtBhLE+Ll5fXvvycLCgqkUqmlpSUhhE1KpKJWlrGxcYkpcWuEiYmJrq6urq6uhJDw8PDo6Oh27do5Ozv/73//Ux9obm7u4uISFfX/V6mcnJxMTU1VTc+Uq5GREfvc9PT0jI2NOZUrTdPm5ubspy/TED+/JikpNwhRXHXNNTTMdHd3VxWyY8eKrl37ZWauI6QVIYTHu+joOGfz5qOqZufh4fG+yOeTTIhL9epsMmT5Rnr3Djly5MinT50VQy0tz3TsuJvlR8fpE2YaobFx0W8ulfT19bmWa4VaVOQoitLX1+f0XNYKVVlt2zbfti3q8+eeioPW1tcDAgaqCm/QoNa//z4h5KvLDIyMEhs0GKoqJDCw1YED/9B0V4WxdHPzvJo1a6pKzNBQRIiIEMVPKdPKykzVLHx9ffX1EwgREGIiH+TzI8PCAtV8DpcvH+7efeSHDzU/f25sbv7O3Dx67951tWqx2iIsh6XLyMioylQWqxdlLzs729zcPD4+Xj4SHx9vbGz8+PFj9YE4RliipKQkBwcfQv77sms+ucRD+jRNP3/+PCRkgIuLr6urX0TE6JSUFPXTt2/UKO7rgxKDraxOnzypwTdC03RQUIS5+XxCcpljDFZWQ0o8A0IOxwjZq+zHCKVSacOGgfr6h+UHCA0Mdvv5hag5CSIxMdHWtvWXRYs5weSSr2+Imrm8f//exaWpwmloD2xt2/zzz0U1IUuXrrGwmEKIVH4w3dKy/969h9SEbNq0y9o6jJB3TAiff9jNzS8nJ0f9J0BRVFxc3I4dO27cuCESidRPrKgclq6qVFkl4nyyzLx58zw9PTMyMuQj48aNmzlzpvooNEI2kpKS2raNcHT0c3Bo0aBBu0uXLrMMFAqFLM9devXqVcvateebmZ0jZD+PF2xn99PkySznwv6NyGSytWu31KsX6OLi16RJyIkTf7MMpNEIuajsjZCm6ezs7P79x9nb+zg4hNvb+wwbNjU3N1d9yNGjfzs7+1hZ/WBouNrOrl/Llp0/fPigPiQ9PX3AgAmurn4uLn7+/j3v37+vfnqKoubOXWJv38zaeoaNzRRHR9/VqzeW+F6ioq40btzB0dGvenW/4cOnZ2dnlxjCqJhLV1WqrBJxboRSqTQsLKxRo0Zv375lRpYuXTphwgT1UVwb4ePHjx88eMApsWvXrpVYD0pOnTrFaS2MpumjR49ymr6wsPD06dOcQpKTk2+wO41T7u7du8+fP2c5sUQiOXr06Pc9ey5bsCAxMZH9XI4fP87pC5GiqGPHStiiVfL58+fz589zCnnx4sWdO3c4hcTExLx584ZTSGRkZEFBAacQrouKWCw+yW7TXC49PT0qKopTSCkq6/r161wr6++//+ZUWVKpdOvWreyXrvz8/MjIyJkzZyYkJLCfS3Jy8q1bt9hPn5OTs23btkOHDnFaObt48WKJG4JKyqey/vnnH04hVaay2CjN5RNCofD777+3sbFZtGjR+vXrXV1dS1zD4toIFy1aNH/+fE5Z9erV68iRI5xC6tat++TJE/bTi8ViPp/PaRbx8fFNmjThFLJr165Bg9hedceYPn36ihUrOIUEBwdzLQwnJ6fU1FT202dkZNja2nKaxZUrVwIDAzmF/P777xMnTuQUMnz48C1btnAKad68+e3btzmFmJqactpce/HihYeHB6dZnDx5Mjw8nFNIKSorIiLi8OHDnELq1avHaR1LIpEYGBhwmkVVqixnZ+f379+znz4zM9PGxobTLK5evRoQEMAppMpUFhulubOMkZHRX3/9dejQoaSkpOjo6JMnTzZp0kTjRy7LLaqi0eX3Xgq6/HGV4l3g4yqfqCpApz6u0t90OyQkJCQkRIOpAAAAlD/caxQAAHQar3y2ZB8/fty6devQ0NCSJyWEEJKYmEhRlLe3N/tZ3L59u0aNGsw1jixduHChTZs2FhYWLKenafrEiRMRERHsZ/Hp06c7d+5w2nR+8+ZNenp6ixYt2IckJCQYGxvXq1ePfciNGzfq1avn6OjIPiQyMjI4OJj9dUVisfj8+fPdunVjP4uMjIzHjx+3a9eOfUhSUlJ+fr6Pjw/7kLi4OBsbG5YXbDGioqJ8fHxsbGzYh5w8ebJbt27sH59dUFBw48aNsLAw9rNITU19/fr1d999xz6kdJXl6upao0YN9iGoLFQWS+VQWcHBwWPGjFE/TTk1QuasMHt7e5bTM6dQW1lZsZ/Fx48fraysOF0pnJyc7OrqyukmBW/fvnVzc2M/vUwmS0tL49SemRtesP+sCCE5OTkGBgbsv3cIIWlpaXZ2dqruWVOsd+/eqbkGuVhcPy6JRJKRkeHi4sI+JD8/XyQS2drasg/JzMw0MTHhdB36+/fvnZyc2Nce4f7eaZpOSUnh1G9EIlFOTo6TkxP7kNJVlqWlJacLq1FZqCyWyqGyGjduXOKqTDk1QgAAgIoJxwgBAECnoRECAIBOQyMEAACdVrEaIaeH6VTkEIqiqCIPPNLsLKpSSMXMqnxCaJqWKTwhsixmUZVCUFlcp+d6FkjFfCOlC2GvAjXC9PR0Pz+/ffv2sQ9JSEjw8vL677//2IecO3fO29v7zZs37ENWr17dvn37vLw8ltNTFDVixIhRo0axr9iCgoKQkJDly5ezzyo5OblRo0anT59mH3L79m0vL6+HDx+yDzly5IiPj09aWhr7kAULFnTt2lUoFLKcXiaTDRkyZPr06ewr9tOnT999993GjRvZZ5WUlNSgQYMrV66wD7lx44aXl9eTJ8pPT1Vj+/btrVq1ys7OZjk9TdNTp07t378/+wcEikSinj17zp07l31W5VNZ//zzT8OGDXWzsuLj4xs2bFgBK2vo0KHjx4/XwcriTLN3bPsW7du3X7FihZWV1aFD6h53IpeXl9e0adMlS5bY2Nj8999/bEKePn3aokWLKVOm1K5d+927d2xC/v777x49enTr1q1t27Ysb3C3ZMmSqVOnNmnSZMSIESzvpTtw4MAlS5Y4OTmtXr2azfQymax169YrVqywtLQ8c+YMm5DMzMwmTZosXLjQwcGB5W2X4+LiAgICRo8e7e3t/fHjRzYhe/bsGTRoUIcOHTp37szyKQc//PDD3Llz69WrN23aNDXP31EUHh7+66+/2trabtu2jc30IpHIz89v+fLllpaWV65cYROSnJzs4+Mzd+7c6tWrs7yh+dWrV0NCQgYNGtSsWTOWt11et27dmDFjWrdu3bdvX5a37R87duzChQvd3NwWLFjAZnqapoOCgpjKOnjwIJvp8/PzuVbWs2fPWrRoMXXqVE6V1b179+7duwcGBupaZcXHxzOV1bBhQ5aVtXfv3kGDBgUHB4eFhbF8KIS8sqZOnVrWlWVlZVV2lXXt2jV5ZbF/pgcnFagRFhYW0jR969Yt9r2QCfnrr7/YVywTMnv2bJYVK5FIRCKRSCRi3wuZWTDlwbJimZDExET2FcuEXLhwgX3FMiHr169nX7GFhYUURY0bN45lLxSJRFKptLCwkH0vZLJ6//49+17IhMTHx9va2m7dupXF+/i/kOPHj7PvhUzI0qVLWVasTCYTCARSqXTw4MEse6FAIKAoKjc3t3Xr1n369GHTC5msXr16xb4XKlYWy17IhOzevdvGxiYmJoZ9yJw5c7hWFvteWPUqa/z48Q0bNmTzcA95ZbHvhUxWqamp7HuhvLLs7Oy4Vhb7Xsi1siiKUqyssuiFFaIRRkVFKS7T//77r5WV1d69e1VNT1HU5ctfPavv4MGD6iu2sLDw33//VRz58ccf3dzcXr9+rSokNTX10aNH8h/FYnGPHj38/f3VPA0rISEhPT1d/mN2dnazZs2GDx+upmJv3Lih2C2ePn3q4uKybNkyVdPTNK303i9evGhpaXnq1ClV00ulUqUFdOvWreorNjc3V3HFgqKoiRMnenl5qXn6xOvXr1+8eCH/kanYTp06qanYO3fuKHaLDx8+NGjQQH0vvHr1qmK3uHfvnp2d3YYNG1RNTxf5uCIjI62trZUGFYnF4uvXryuOrFy50snJSc3jFDIzM+/evSv/USqVDh061MfHJysrS1XIkydPkpOT5T/m5+cHBgb27t1bzXMlo6OjFbvF27dvPTw85syZo2p6WnOVpeb5AIWFhUrPNmIq69WrV6pCiq2s7777rspUlpqHQ33+/FmpsiZNmlRiZSUlJcl/LHVljR07VrOVdenSJcUfy7OyMjMz1SRWCtpvhDKZLDg4eOTIkewrNisrq169er///rvioPpe+PjxY2dn57NnzyoOqu+FkZGRLi4uDx8+lI+UWLGrVq3y9vbmVLHDhg3r0qUL+4oViUStW7dmjqjJqa/Y1NRUd3f37du3Kw6q74V37txxcnK6du2afKTEij106JCbmxunil2wYEHz5s05VWzPnj2V9iKqr9j8/Hxmt5XiYGRkpJWVlaqKffXqVfXq1ZUePKS+Yq9du+bk5BQbGysfKbEXbtu2rU6dOkq9sG3bthEREap64dSpU5X2STC9cPbs2cVOX+rKWrNmjeKg+l7IVFZkZKTioPpeePbsWa6VtXr1aqV9EmwqS2mfRHlWlqpeGBcX5+joePXqVfkIU1n16tVTVVmHDx+uWbNm0crq2LGjqspauHChtipLTS9kKktpb5/6yrp+/bqqytJsL9RyIzx48OD8+fOZPWlKi6Cqil2wYMG+ffuYPWk7duxQerVie+HgwYOjo6Pj4+OdnZ2LXXtV6oVCoTAsLOzNmzfHjx93cXFR3M+jqmLfvn3brVu3z58/L1mypFGjRorlp6pir1+/PnLkSLFYPGjQoIiICMVfqarYLVu2/Prrr8yetEWLFin+SlXFzpgx49SpU69evXJ3d1d6oGWxvZCiqIiIiPv379+8edPJyenevXuKvyq2F37+/Dk0NDQtLe2vv/5yc3NTXA9Q1QufPHkSERFRWFg4e/bsli1bMue2MVRV7NmzZ6dMmSIUCrt16zZixAjFX6mq2JUrV27cuJHZk6a02qSqF44dO/bSpUuJiYnVq1dXekRwsRUrlUq7du369OnT8+fPOzk5PXv2TPFXxfbC9PT0zp07Z2VlrVu3ztPTU3FBUtUL4+PjBw4cKBKJxo0bFxwcrPjJqOqFpausvXv3qqmsor2wxMpS6oWlq6zw8PDPnz8vXbpUqbJycnLUVJZIJBo0aFCvXr0Uf/Xs2bMSK0vpy73UlaXUC5nKunfv3q1btxwdHYtWVtFeKK+s3bt316xZs2hlFe2FT58+ZSprzpw5XCure/fuw4cPV/xViZXVtGlTpdUmVb1QsbKUnteoqrLCw8OfPn164cIFVZWlwV6o5UaYnZ3t5+fHVOzjx4+VfltsxT579qxGjRpMLyz6NMtie2FUVJSTk1N0dPTjx4+Z3dOKiu2FW7ZsqVWr1ps3b+Lj45WWm2IrlqKoyZMnM4NxcXFF32bRihUIBKGhoUOHDhWLxYpVwSi2FzKLMlOxRfetF1uxCQkJLi4up06dev36ddHlptheePr0aWdn53v37j148EDpOeNFe+HChQtjYmJWrlxZv379tLS0ou+92F4ok8mGDRvWsWPHwsLCoiHFVmxeXl5AQMDEiROFQmHRDdliK/bt27e1a9dmKrbodn+xvTAmJsbJyenSpUvPnz9nbsupSKliN2/efOrUqf3797u6uj59+vTevXuK3zu06l74008/MYNF33uxvVAsFvfu3btnz54ikSg+Pl4ppNheyFTWvHnz2FfW8+fPa9SowfRCVZWl1AvZVJZSL9yyZYuHhwenypoyZYqqyiq2F8orSyQSFa2sYnuhYmUpfucySl1ZSr2wxMoq2gtXrlzp5eWlprKUeqFMJhs+fHhISEjpKqvohmzpKqtoL2Qq6+LFi2wqi3HgwIESK0tTvVD7u0azs7NDQkJSUlKK/e369etr1aolFAqTkpKYTS6app89exYcHKy0JMmNHTs2NDSUpukLFy5MnjyZ+ZNHRUUNHDhQVQ5t2rT54YcflAa3bNmitG4oV1BQ4OjouGfPHsVBpmKPHDlSbEhSUpKxsbHiNj5N0wKBoEePHkVrlXH69GkbGxvF1UCapj98+BAUFKRqh9vSpUu9vb2VFpqEhISwsDBVe5D69+/fr1+/orMeM2ZMsdPLZLJGjRotWbKEpuljx44FBQUxK+krV65ctWpVsSHZ2dlWVlZK3yMymWzEiBGqnuV97949ExMTpW/wvLy8sLAwVUfX9+7d6+zsXHR7on379qpOxJg9e3br1q2Vvo5jYmJ69uypag9SWFjY6NGjaZq+fft2kyZNmL1PBw4cUNrqkhOLxR4eHuvWrVMa/+mnn1SdmJeammpmZhYVFUXTdP/+/Zlj22KxeMCAATdv3iw25Pr16+bm5m/evGE+Iua7iaksxb2viuSVpTj4/Plz9ZXVqVMnpcESK2vWrFlKg1u2bFF1mo9iZU2ePJnZKKcoaurUqUo7q+WSkpJMTEw4VdaZM2eYysrMzAwLC2N6D1NZqr5YS1dZffv2VRo8ffo0s/wUxVTW4sWLlcZLrKyTJ08qvc7IkSO5Vlbnzp1VVda+fftUVVZeXl6xIXPmzCm2snr06FFiZSkqsbLWrl1b7G+50n4jVEMkEgUGBjJrlFKpdODAgW3atCm6NqEoOTm5bdu2zDTp6emNGjUaNWqU+rPLrly5MnDgQMVFPCMjo+h6t6I///yT6QRyFy9eVPM3pml6/Pjxx44dUxwp8byvsLAwZp3u5cuXzIrCtm3bXr58qWr6/Pz87777rui6vBrPnj3r2LFjQUGBfEQoFKo/u+zkyZPyNcqZM2fOmTPn/fv3I0eOVBOyePFiZnkVCoXMxnp8fLzS3iQlQ4YMUarkZcuWFf2OkJPJZEFBQUU3fdTIysry9/fPyMhgHxIfH9+tWzemT/z5558DBgzIzc0dMmSIqs5B0/Rff/2ltI5V4t/9hx9+2LVrF/P/I0eOWFpaKh6sLVavXr1u3LjB/H/hwoU1a9ZUs5zQNC0SiQICAhSnkUqlPXr0UDr9QVFKSkrbtm0/ffqkOKj+vVy9elWpskq0du1aeWWdP3+ezXmb31JZNE3PmTOnbt266qumdJUVEhLCqbJOnTo1ZswYltc5MJYsWcK1ExStLPWKrSz1Z/V/Y2WxtHv37qLrWKVWoRshTdOKiwXLXkhR1Pnz55m1PJa9UGnhO3z4cOPGjdUUcNGFtUWLFmpOFSsasmPHjtq1a6taW1cKWbx4ZwFiOAAAIABJREFUcZ06dX7++ecST5tWnItMJjt16tTmzZvVnBlbNLFx48ZNmDBBfTXKf/vixQtbW1tPT09m86XE6W/fvm1pablmzRpvb2/1p00rJZCVleXl5aX0Law+5L///jt8+LCak/HoIh/X5cuXT5w4of6yB3lIRkaGm5ubm5vbgQMH2Gclk8l8fHyU9iWoD2HTC5VC2PRCpZBz58716tWLfZlkZGSEhITMmDFDzfRKIR8+fNi+ffudO3fUTy+Tyfbu3csEsumFSm9k586d7CuLMXfu3BJ7oWKIRCI5fPjwyZMn1V/u8i2VRdN0cnLytm3b7t+/z34WSUlJmzZtOnfunJq/o1JIXFzc+vXro6Oj2c9FKpW6uLhwCmGDa0gpZqFGeTfCo0ePTpo0qRTvQSAQzJw5s23btgYGBurPtKZpeubMmfLjWCx7oSKKogIDA9evX69qAplMJt9JlZqa6u/v37ZtW7bvhKZ37drVpk0bpX2e6gUFBRkaGqr/alMklUpDQ0O7d+8+btw4W1vb3bt3lxiSkpLSv39/5imvJVYsIzs7u3HjxjY2NkrnVqixevVqQsimTZtYTk/TdFRUVOvWrSdNmsRyeoqiRo0a5e3tHRQUZGpqunnz5hJDCgoK2rVr5+/v36JFCzs7O6XTZIolEonatGnj6Oj4888/s0wsOjq6c+fO3t7e+vr66nuh3LFjxzp06FC9enULCwul63+KRVHUmjVr2rdvb2lp6e7urn4dSG7ZsmXNmzdneYkhTdOFhYW+vr4sr7lmvH79umbNmuHh4ba2tvPnz1ezdGVnZ/v6+spXK1luFzK4VpZMJlu6dGn79u3NzMzUX8OgGBIcHBwQEODl5dWuXTs217QxleXv78++spiTSrp162ZlZaX+kg+5yMjIOnXqTJ8+vVmzZh06dGCT2Jo1a5o0aTJ9+vRatWqNGjVKzaU7Sn755ZcBAwaon2bs2LFqrjwpSiaThYWFqdqbXdbKtRHeuXOHOTBbil44Y8aM8ePHS6XS1NTU0NDQEnuh4h4Plr0wMzOzffv2GzZskEqlcXFxdnZ2qg4YPH/+3NnZWV42M2bM4PP5T548UfXKeXl5it+trq6uzK7O6OjoAQMGTJo0SXEXSrFWr149derUOnXqsNw/s379+lGjRtE0nZub26JFi3379qmfXiaTtWjRgvl2fvDgQd26ddlULEVRT548uXPnjo2Nzc6dO9kkduTIkQ0bNlhaWrLfP8N0KU9PT5b3Hzl48KD8yM3NmzetrKxOnDihPmT+/PnyHZh79uwxNzdnc1l0YmLiixcvatSo8csvv5Q4cVpaWo0aNZjv95MnT1pZWZXYC2NjY+vVq/f+/XuRSLRq1SorK6sSe+GuXbs6dOiQl5eXm5s7fvx4lr2QeRd9+vQpcUrG3r17Bw8eTNN0QUHBb7/9Fh4err5RicXiefPmMfvTkpOTvby8pkyZombpkslkT58+lfdL9r2wRo0aTGXdvn2bTWX9+eef3bt3LywszM7OHjp0aIm9kKKoq1evDhkyhKZpkUg0ZMgQPz8/9S2HqSxmTZRlZYnF4gkTJpw7d46m6RcvXri7u//vf/9TMz1N00Kh0M3N7c2bNzRN79ixo8SvR5qmk5KS6tWrx9TU9OnThw4dyn5TIT093cLCQtWJHSKRKCEhQX4yEZsXzMzMfPXqlfxkIpZpaFD5NUKhUNi4cePFixfn5eV169aN+ZvJRUdHq99B7OXlJd9LIBQKfX19Nd4Ls7Oz7ezsOnTo0KRJk6tXr44ePZq5TV+x8vPzExMT5fvKZ8yY4enpqWrJuHv3rrW1tXwfmre397x583r16uXs7LxkyZLAwEClI46qzJgxo06dOsxcVqxYoWYDcciQIUePHs3NzfX392e64L1799TsjUxNTbWxsZH/+PLlSxMTkwkTJqiaPjMzc9++fSdOnGC+aO7evWtra6t+O+/atWu9e/dmzi08e/aspaUlU+pRUVGqzoOQY3ph165d2RxFmDp1qmJn2r17d+PGjdWHtG/fXvGMg1mzZjFf9MXKz88/fPjwoUOHmJ2ob968cXd3L3G78MiRI926dZP/ePTo0RK3CxcvXqx4fHHp0qUlbhf26dNH8VzQ/v37c+qFLLduz5w54+XltXz58po1a4aFha1cudLS0lLpgMWBAwfkKy7BwcG1a9eWr8OlpaWV2AuZgh09ejRTsCx7YaNGjebNmxcREcGysjp16vT3338z/6coqnPnzup74erVqz09PeW3XGFu5qm+FypV1qtXr0xNTdVUFkVRLVu29PDwkH96zAKmvhc+fvzYz8+Ppuk9e/YEBgYyX4yqTtxjHDp06Pvvv6dpeu7cuUwXFIlEavrW9u3bGzRosGnTJiax77//XtW5hPfv33d0dLxx40ZCQkKvXr0U/8oURSldU884fvy4m5vby5cvT58+PW3aNMVfCQQClvc2+hbl0QjT0tKYrwxm4V65cqXSBCKRyNPTU/39orp27aoYuHHjRjc3txUrVqiftVIv9Pb2vnjxYtHJ8vPzmb/Wpk2bBg0adPv27ebNm7dr187AwEDN6klMTIyNjY38e2fevHnyLlVUdHS0tbU1M/GTJ0+GDBmyfPly5pyrFStW/Pjjj0VDZDLZ4sWLHR0dFdOePXt29erVu3Tp0q1bNzU3MFuyZEmvXr0CAgKYLkhRVJcuXZTOKVAkEomsrKwUz7cOCwvz8vJSuvyOkZCQUKtWrb59+3p7e9etW5c5kB4bG2tjY/PXX38V+/onTpxo0KCB4ilIZ8+etba2HjBggJeXl+LFwnKbNm1ycXGxs7ObNm2aQCAoKCgICgoKDQ0tepT0zZs3zAHdgwcPCgSCnTt3Kl5wlpaWZmlpqRQiPwOLORw1ffp0xUs5L1265O/vX+wbSU5Orl+/fo8ePVq1aiW/eI7pIuq/c5krqeXdQiaTNWjQwMbGRs2xlgMHDvj5+cmPVX/69Kl69ep+fn5qVuaYLzX5jzdv3nRzcxs0aFDRKYVC4aRJk2xsbNzc3Ji9x8wXrtL1qUoKCwuZuf/+++9Dhw5lvtQkEomLi4tiM5DJZD179pSvJt6/f9/W1lbxezMtLa1x48bqN7uVeuHZs2c9PT2LPUZOURSzQqZUWb/99luxlSU3ceJExV3ukZGRbm5uarrU58+f/f39W7duLd+LyPRCpWvpFDGVpXhkNDQ0tH79+sVWFvM9GR0dXa1aNcXXfPPmTYMGDdSs+BYUFFSrVm3x4sXyLnjz5s1mzZoVu6rBLFH37t2rXr264rbgxo0bhw0bpjjl48ePmWs9f//996SkpEuXLvXp08fGxmbMmDEHDhxwcHBQ+gp6+vQp858bN244OjoqncpL0/Tdu3cNDQ0VG5s8ZOfOnW5ubkX3eB04cMDc3JzTgaRSKPNGmJaW5uvrKz+ilp6eXuzhX+bM76Lj2dnZzB8pOjrawsLiwoULzPikSZNOnjxZ9Bvh06dPU6ZMCQgImD17NrNAKPZCVWeaDB06tFWrVrGxsVKp1NfX99atWzKZbMeOHTVr1iy2ccoxN+WTH4GbMWNGmzZtVE2s2Avlbt68qXSHEbnZs2f37t07NTV106ZNenp68jMJL1y4sH///qLvXSaT3b59m1lws7KyatasGRoaKhaLJRLJrFmzunfvXmxVyHf/rlq1qlatWsxFVB8/fmzUqNGjR48sLS2LHjlo0aIFc3qhTCabPXu2h4cHU8CxsbHF3mxQIBC4uLgw7zEjI2Pbtm1MJTx79mzjxo3FXgpy/Phxf3//9PT0jx8/ytd4mO3Cohe6hIWFDRkyZPHixb1795ZIJGKxuFWrVuHh4cxX86+//qp0fr9IJKpVq9bSpUsHDRrErGinp6dXr1592rRpQqFQIpEMHTp0+fLlRbOiaToiIkJ+QHTdunU2NjbMvo0XL14cP3686PQSiUR+jk/Pnj07duzI9MIbN2707t17y5YtISEhSiFCoZCpBYFA0KRJkwkTJjDfXMyO9GK3iT9//sx8JaWkpDg4OMiv1tiyZcuyZcuKDZk2bdqECRNEIlFMTAyfz1fs6PKNJEUZGRmdO3d2cHCwt7ffuHGj4oc5YsQIpbV4mqYpisrKyho3bhyzAXH//n07OzvFwGJPRktISBg+fPiAAQOYW9Uo9cJi63fXrl2Ojo42NjahoaGKX6O3bt3y9PRUf9bMixcvrK2t5Wtvv/322/r164s9VJadnc18nzC9sFevXoq9UH1lrV69WrGyvL29mcoq+ncJDg5mdpMU/a4o9uNKSUmJjo5mfrVs2TJ9fX1mvSo+Pt7T07PodYQ0Ta9bt87Ly4vpKwMGDLC1tWU+otOnT3t6eqalpSlOvHTp0lq1av3www+KF7a+evVq9uzZDg4OPB5P8ZjL5cuXmzZtKj/XLDY2ttizGhXvrrdp06ZOnTrJv8quXr1a7NtUDCkjZd4IFy1a5OXlVYrLHsVicf/+/Qkhrq6uzNcrc1/Xzp07d+rUKSAgoOjyKpFIWrVqtXz58v/++69Pnz4+Pj5SqZS5CMnLy0vN0QKpVLpp0yZ7e/sRI0YcPXq0WbNmzN+m2FXvhISEsWPHTpgwgdm+UeqF6o/hKS7fzM2s69Wrp7hmIP/STExM9PHxEQqFL1++bNiw4ebNm42NjeW9UPEFmYNzQqGwY8eOtWrVMjExYTZNkpKSWrdubW5ubmNjM3bs2KJvPyUlxdfXlxDSqlWr169fUxS1aNEiU1PTdu3a1axZ89ChQ1Kp1Nzc/NOnT69fv5avxMlkMj6fL/+DUhQVEBCg/p6EHz58sLGxefTo0bJly6ytrUNCQiws/l973x0XxfW9jbFiUHYp0qUjTapIkS5gBwSxgYqFYsMSEQsalKCCaMTYEBtR7BWCJYJGRFG+RhGlKVUEqUvfvjPvH+eT+85vGwsiYMLzF2V3Z3Zm7n3uPec5zxklXD3o6uoKRwwLC4MVK5x/R0cHUdUJP1MoFCUlJWVlZbQ+bWxs9PT0FBcXV1VVdXV1RVzLYDDgc/Lz84cNG+bs7Iw+6uPHjxYWFiQSSV5efvny5UgQ2NDQQCwqUFdXJ5ofLly4cMuWLYK+xdOnT+Xk5AYPHuzn59fe3t7a2urt7S0tLe3i4qKmpvbu3bvs7GxjY2PiW86fPz9y5MgRI0Zs376dw+FUVFSYm5vr6ektWrRIRUWFeGiEnTt3/vDDD5KSkvB4ZGdnq6ioTJo0ycfHR0dHB9mSffr0Cc3XHA5HVlaWwWBQqVRXV1fgJ7gy1dXVfB/7GTNmAL+mp6ePHDkSpvVXr15BkBNdLjab7eTkBKlQJpM5e/ZsZM+bk5MjKyvLW1KJkJ2draGhceLEiX379klJScFmrra21sDAQNBFfvLkiampaX19PZvNnjRp0urVq3EBIwt98b1796qqqlpaWkKQGUy8HB0dZ8+ebWhoyKtMplKp8+bNU1FRIZPJK1eupNFo7e3tjo6OXM6IRPCOrF27dqGRdenSJTSyuN54/fp1LS0t+FiQKXDJ3JhM5rZt2+Cxv3XrlqysrIqKipGRUWlpKXw1cXFxBQUFLS0tLqMfQGJiop2dHRoOHR0dgYGBI0aMkJOTs7GxQV7BRUVFKKxqb28/fPhw3s0ojUbbtWvXpEmT4FcMwyZOnOjv7y+6+KO5uVlOTo43QNgn+OZEiGHYqlWrjI2Nu1RWguP48ePHly1bxmAwLl++LCkpCavUurq68+fP37hxg69w+fbt21DBWlVVZWJigqwNMAzju1NpaGiIi4uLjY2FJEpjY+Pq1aulpKRGjx4tSBGXnZ2tpaWVmJh48OBBaWlpSCy9fPlSSkpKkOguJSXF09PT19cXAgVELuTy48jJyVFXV4eoHZPJfP36NZvNNjU1hRIxX19fcXFxLkEjdGBISEhISkpauXIlhmHFxcXa2to//fQTvKCmpkaQxsTX1zc+Pp5Op4eHhysqKsLUVlVVlZKSAruc2NhYuJ6PHz8GAxF4o6WlZUxMDPqc8PDw8PBwvod48+YNOOtHRkaSSCRPT08wjwgKChLkbQ/rG0dHx7t37yIWxHF8+fLlqFQOkJmZCbHo1tbWkJAQc3NzPz8/4ory8+fPJSUlxJF54sQJR0fHjo6OoqKin376iddh5OPHj1xLmXfv3ikqKqJNko+PDzGYlpCQQAxFcsHAwCA7O7u+vn7mzJm2trawz3v//v0ff/zR3NzMZrO9vb2JdWD19fXjxo2rqKj4+PGjiYnJkiVLOBwOk8m8d+/e2bNn+a6xnj17Zm9v39LSkpmZqaioCPelra3t+vXr58+fJ0ZZbG1tEQkxmUxJScnPnz8jFuRwOBYWFoKS7g0NDerq6vj/HVmNjY1sNpuYO6iuri4tLb1165aioqIgLlRSUqqoqOB7lKlTpyJZ04cPH6SkpCApWFtbK2hPEBAQAAHYqKgo2KXBheXrdIPjeGho6Jw5c8rLy/fv3z906FDYxDc3N1+5cuXixYt83xISErJ582YMwz59+oSCUm1tbba2toLE0n5+fidOnBBlZAGqqqoOHjwI/Ofs7Iy44fnz50pKSsSZk81moz5fxsbGpaWlGIaFh4erqKgAjTGZTKJxHdceWktLi9fVs729nav9xYsXL1xdXWEXcezYsbCwMA0NDd67RqVSR44ciYKWsH3n8rYVjoKCAgUFhbi4OBFf/+3QGznCbnDh7t27p06disyZ0tLSEBcKwYkTJ4KCgohjtaioSNBasrS0VEtLKzg42Nvbe/To0ai+++3bt66urlwmwghOTk5Q0XXjxg1LS0u0tnr58iXfhMfly5cnTJiQlpZ2/PjxUaNGAR8DF3J9nZMnT9bW1iYkJKirqyN1w9OnT11cXODn+fPn37lzhzc6BFxobW2N1oBVVVVELkRAcQ8ajbZmzRpiUC46OhqNWBzHm5ub/fz87O3ta2trbWxs8vLy0tLS5OXl4cSys7N//PFHkNfW1NQYGBgI6oG1detWPT09rmFWUlKipaVFnNYpFIqfn19jY+ONGzdAln3y5ElJScmFCxfCoHrw4IGhoSExBvDbb79RKJTDhw+jvCyYigEXYhh24sQJ4rUC4SWDwQgMDHRycoLdz4cPHxAX0mi0+Ph4rvXslClTMjIycnJyFBUVYR1TWlpKJpMjIiLodDpokbh8PQBlZWUbNmyYP38+/MpkMr28vBAX4jiel5cHOxh0xMzMTH9/f2QaAFUEwIV8ry2O47///ruPjw96kMA8jLhGIQKWRMnJyXDExYsXS0pKImqMiIjgyg8R0djYSCaTP3z4YGJiAvvj1tZWPT09rkDW5cuXYSV369YtlJzj4kIi2eTn54eEhKAvaGFhQRwU+/fv71TIunLlypiYmD179qBYZXh4OJcLNkJubq6ZmRmDwYAoy8GDB0eMGME3oE2EvLx8R0cHUXcGahroowSvETKyYmJi+I4sYmBj165dZDJ53Lhxd+7ceffunZSUFPpAXm5GXEi0UY2MjERcSISHhweRZiQlJYmvwTCMa+R++vQJ7TFiY2NR5HPr1q3AhRiG7dixAzaIb968kZeXJ66zv18u7CXVaFe5MD4+ftCgQUT/AuBCvoYvDAYD1lBFRUWSkpLGxsbAghwOZ/r06YKEed7e3ijscOfOHQkJCeG1q4CxY8dWV1cTWXD//v2CBDI4jmtqasJKCtQraCrPzs7mogfwcKqtrU1MTESrtvLycjKZfPny5bi4OAsLC97HCzIxf//9N5lMJrpNAhcSNWB1dXXS0tJg+Q+GhEOGDCFqd6Ojo5WUlNBM/ejRI5jmrl69qqSklJ+fTxSdZmZmampqkslk3ugN1+mFhIRADQCO41QqNS4uTlVVlSvzCooDHR0daEINb1y+fLmSklJkZOS6det0dHRQUh2wfft2MzMzCoVy+vRptGCiUCgWFhZTp06dM2fOvHnziGEDoCIvLy94WlD24sOHD8rKykuXLp00aRKvCg7iZk+fPi0pKUGT/vv3701NTSUlJSUlJQWxDrDyuHHjUB4ITmDGjBnwK51O55LPffjwAUrHiB9iZmYmJPT66NGjkSNHEvUgwIVJSUnoLxUVFQ4ODhAgbWlpQXLN5uZmCwsLCwuLmJgYHx8fR0dH3shBe3v7gQMHgMAg1AxFL2w2e8GCBcTNNGolASs5rtaVYCLKlQ2trKyk0WhTpkxBZB8VFWVqaopi+GfPnuUr88Fx/P79+3D058+f//jjj25ubjCynj9/rqmpKch4gUql/u9//2MymSjKMnv27JEjR/LVMSKoqak9ffoU6c7YbLaRkRFxGQcjC9bBaGQRlboxMTHKysrEkcUV0GptbdXQ0EhOTnZ2dp4yZYqbm5uQRUlDQwNwoYyMDPE6R0ZGWllZcb24pKTk48ePaOwsWLBg/vz5iL+vXr3KZUo+Z84cSLTj/8Rm0fy5Y8cOBQUFR0fHhQsXwguePn3KK/X6Trmw98onusGF0tLSxMDLu3fv+F5cBoOho6MDk35sbKyMjEx8fHx6ejqXmfqRI0eI05a+vj5xAKxfvz44OLjTs1qwYIGrqytiwaKiIg0NDUGGexwOR0JCgkqlorgNjuNr164lykPKy8tRRV1UVBTa/yHcvn3b3Nx87ty5RFX3u3fvnJycmpqagoODo6KicEKMFL2GQqFw7W8KCgo6OjpgCoARa2JiQkzfEs1xtm/fDnl7HMevXLnCt6q3rKyMrxCDGJgCN2HYF3I4nH379vHtzvPw4UMSiWRmZka8OCkpKRs3bjx06BCa2goLC5GLzfbt2z08PLg+h0ql/vLLL8S93aNHj2AiACrilaF/+fJl69atxL3I8ePH4+Pj4ee//vpr3bp1vCdcWVkpvEaNuEOFvzCZTCGt1/B/uJB4qSkUinC5x6NHjyQlJYmFkiUlJcQHkssnHVJuYJLHZDJPnTq1bt268+fP84oU6uvrzczMwsLCYGnS0NAwceJEY2PjsLCwiRMnBgYGosEYHR1NdGd9+vQpb6KIyWRyGXZPnTo1NDSUSqVOmTIFFkZMJnPy5MlmZmapqan37t3T1tbmm0hOSEgwMzNDisRjx46NGjUqICAgKChIW1ubbxqViPT09OnTp8PPXl5eycnJwt2a9uzZM2TIENg6YxgWGhrKayjYpZGFUF9f7+7uDs9zYmIicNL169fV1NQkJCS4HI58fX2vXLny999/GxoatrS0EGOk6DV8qzjodLqGhgboBurq6jQ1NadNm3b79u2YmBhNTU2u0hpIfyJpDBcXPnny5Pbt251mAbvhYVJQUKCoqNiHXNirBfUcDmfFihXGxsbQbpHrTnM4nBMnTqxcuRIRw6lTp6SkpISoKphMJlzr5ORkTU1NeKDv3Lkzc+ZMFxeXhIQE4j2DWQZoA8fx4OBgYo3L+fPnUSCLC+/fv4+Ojoa1D6jyli1b9vHjxwcPHujo6PB1Ibl//z6MimnTpllZWSEWTEtLMzMzQ89HeXm5oaEhMa0toqqIzWbPnz9fWloa8oLwR1F6ab548YJEIiUnJ+M4jmFYUFAQ36VJTEwMSI1EORlewHIBcSGHw5kwYQJvjBQhOjp68eLFjY2NixcvJnIhV1yosLBQX1+fONMJ6XwLSEtLMzY2RmsIJpPZaaHxxYsXlZSUOrU44IuCgoIlS5Z4enpCDri5uXnixIlce1MuPHv2bM2aNQcOHICnF55SIa6qOI7fvHkzODj43LlzcOszMjLIZLIQ0wAhXCjkKN7e3siUp7y8/MuXL0wm89KlS7/88gtxBVlfXy8rK3vo0CEhH0UEh8OB+bqpqWnixImhoaHE6ZLJZEZGRpqYmEyePJmvwzjoe7meWBihCQkJiAkqKiogToNh2JYtW8hksoODA8QnCwsLpaWlU1JSjhw5YmlpyXsR2tvbAwMDyWSyvr7+9evXgdjGjBkTGBhoa2s7e/ZsRJypqaloe/Ty5UsSiQTLKVDrdLroxzAMWpd4e3uXlJRYWVkBKVKpVGKzRkBGRgaJRNLW1kZRVjabvWjRIuH9n6Oion7//XcojIHi2ubm5i1btjg6Oq5atQoFYBsbGydOnAgBT+FcKCKAC4ODg0GfIYqC5v3793Jycr/99huHw+nUXLfH0dsWa8CFBgYG48ePJy5G2Gy2h4eHj4/P0aNHVVVVUd/tEydOyMjICOrwOXfuXFSbMXXqVERygkDkwoaGBlVV1SVLltTU1Hz58sXCwoKYFwwJCQENXmZmprKy8pIlS2RkZKDE6tOnTz4+PsrKys7OzoJyY/v374cgWGlpqYyMTEBAwMuXL6EBDXFPAN1wOi0nJ2L9+vVAyZAemzhxInE9AftC4TXXT548IZPJsIGGdkhmZmZcElziyOkSkO5/6tSpRC5cvnz5+vXr+dYL0mi0cePGwZ4PYqSmpqbV1dWHDx/mMu9PT0+XlJQEFhcRJ06ckJWVFW4Dy4VurGcB+fn56urq8fHxhw8fVlZWBgUd7AsFOTNcuHBBX1//8OHDHh4e5ubmEOmCp5RYaUDEtm3bbG1tjx8/bmFhMXv2bFBYwJXh6/gKri7Nzc1d5UISiVRTU1NQUODh4SEhISEhISGo9wWEtkRxf+ZwOEuWLEEldE1NTcLtm3Ecb29vX7duHdrjPnz40NramvgCvjGJvXv3gsFFYmIiNIyMjIyUlpaG3Mr58+f19fVnz57NV3/k7+8fFhbGYDBSUlKGDBkCA/bZs2dxcXFcmruWlhYrKysUS+QaWUuXLjU1NeVbjMFgMN6/fw/PfHt7+7Zt20gk0rRp0wwNDbnWTDk5OStXruRwOO3t7SYmJlzTBewLieF0IsrLyw0MDJDtg7q6upARfebMmaamJngxFxdmZWWRyeSukhOMI2tra3d3dxHN24ALra2tBfWa/nboA9NtDoezbds2Lm47dOgQhDFfvHhhaGioqamJum88fvxY0JQUHh5uZ2dnbm4FbExXAAAgAElEQVS+cOHCtLQ0MpksJF0HIHJhdXW1u7v70KFDpaSkUCgMAOXhZ8+eXbhwIWxBysvLNTU1hSRsiGhtbSWTySB1q6ys9Pf3NzY2XrJkCVHQBQAu7NQCDcdxNptdUVEBopWsrCyoeFuwYAHiQiaTWVdXBwltrvc2NzcfO3bszp078C+uEUvsl43QDS6k0+nq6upQtg9c6OTkVFVV9fr1a21tbb77yzdv3ujp6RFHMofDAcu6mTNn8vpfE89cRCQkJEhLS/OtqRKE7nHhokWLUGi6pqZGVVUVlLEUCoVvvK6iokJbW7uurq6trc3BwcHBwQE1Yf/w4QPvo4LjeHp6uo2NDYPB+PTpk5GRkYWFBWrC/uLFC74h+v379y9atAiUHUQu/PLly44dO4R8wbCwMCkpKRKJ9Msvv7S2tkZHRwupNBeRCy9evEgmk0Wx9ERgMpkBAQHIv/DTp0/i4uLE2SMuLo63rAjH8U2bNmlpac2dOxfJQ06fPo24UBDa2toUFRUxDCOqY4TEToVzId+R9ebNGx0dHTU1tWHDhm3duhXeWFxc7OHhgepSAKWlpTCOII1Kp9NhX4i4sKqqis1m8w2e//HHH1OnTiWGFjrlwmfPnikrK8NqlYsL8/PzhTuM80VdXd3GjRu71FPi/fv33Vh/fz36svvE9u3bUYwlJiamrq6utLRUT0+vuLg4IyNDTEyMt0oXwGAwIHTQ3t6uqalZWFh48uRJNTU1OTk5Ia3RPn/+DPeSK0Yq6CkHLlRVVUXLmcrKSuFcSAy7hYaGcjVSFwQRuRC01CDgRG3hQLYwYcKEnJwcT09Pvpm85uZmXV1dLy8vLS0tcFbERWMU4ELhYTqElJQUBwcHAwODoUOHAhfS6fSVK1cOHTpUVlaWt2M1wvr160eNGsVVqCTENKcfcuGbN28YDMbMmTOJNSHnz5+3t7cX/i4wRvDw8Dh+/DiTydTU1ERciHD27Fkk079582ZWVlZbW5uZmdmjR49qamqGDRuGuJAXMTExLi4uiHgwnr7KXGAwGKmpqenp6bBqKSgogEcaw7DZs2cL77MBXCikTBAgSqsHXrx69crV1RWYfsOGDXp6ejADPHr0SENDg1gGfvjwYVQDt2nTJjExMeJGGbhQSKa2sbFRWlq6trYWsSCDwZgwYQJfGvj8+bOXl5eNjc0PP/yAAuDCn08ajaajowN76/z8fF1dXWK/Pa5b7+LismnTJhqNNm3aNDRVAhfGx8ffuHFDR0dHUCueyspKLS0tR0dH4gNcUlLi6ekpZHDt379fEBf+u9E3RIhhWEVFRUZGhry8PDHfsGzZMkiutLW1WVtbc61YfX194Zl+/PixjIxMYGBgXV3dpUuXZs6cieN4U1PTunXrDhw4wHu4/Pz88ePHa2try8jIwOKRiwsRWCwWDLaCgoKamprs7GwSiUT0OqqsrLSysuI7j1RXV8vJyXl4eMBT/vnz51GjRom4+AUuJIr9iECpNRBwFhYWcnWnCgsLGzt2bExMDO9ekEajnT59Ggiyvb192rRpKGKZlpYmSGiOICIXFhQUqKurwyYG2gYhO7f29vZOc43r169HylJR0K+4sLy8XEVFJTs7+8yZM2pqaigtlJyc7Obm1ulR0tLSpk2bBj97eHiEhYUR94Ll5eUlJSVqamrEkrVff/0VxY7s7e03b94spPOwmJgY0RAANGt8XSIbGhosLS3d3NzMzMxcXFxQvq20tNTT03Px4sWdZnq+ERfevn2bSqUuWLAA7IVZLFZ4eLi4uLi0tLSRkREx7l1eXp6Xl6ekpETkQg0NDWKg6O7du8LVMW5ubkTrxA0bNgjqDTtp0iSYTz5+/Ghqaoq4kGtkxcfHoyXy69ev9fT00L/A1Jer7IHJZELar7m52dLSEqoYiS/43//+Z2xs7OjoyOXYzAXgQt4yKl5UV1ejxQSRC1tbWyMiIrqxF/zu0DdE+Pr1ayi5zcjIsLe3R7fZy8sLejHv3r1769at6PVUKjUvL+/58+fy8vLAhY2NjSEhIdLS0vv27bO2tkb6Rl7Q6fTx48fDjiQxMVFRUREJPtXV1bk2InFxcdbW1i9evDAwMIAlJ4qRivK9WltbDx06pKKiYmZmlpiY6Ofnt23bNhGviSAufP78uZGRERpIqampXbLpCQ4OVlVVRRsyOp1O5EJR8PHjR2VlZaBSQZRw9OhR4vY3ISEBVSvzxalTp8aNG2dmZoakRlu2bBHi1MqLXuNCQ0NDUCTx/e5lZWVRUVHgNo5hmK+vr6am5pUrV+7evaunp8c3aZecnOzr63vs2DH4wOvXr1tZWdHp9NzcXH19feKU19jYCLX8JSUlpqamKFAcERGxZMkSDMNu3rwJq0Ah2Lx589ixY4nkKui7TJ8+Hb7I+fPnyWQyVKdgGLZ06dLDhw+L6BiSm5s7ZsyYo0ePslgsIWEA4EJRbvfDhw8NDQ0rKiog+IGs9ul0OpcfWFVVlby8fFpaGjhQI7nTtm3b+DqPo/Vle3v75s2bHRwcNmzYUFNTU1VVpaOjM3ny5EOHDnl4eEyfPh0F927cuIGuJJVKHTx4MLostbW1Y8aM4RVGcTicuXPn2tjYwGnX19dz9VObPHkyUegE2mYUWmhubu60fpqIkydPmpiY2NnZgb8HcCFv20jkHcPhcAIDA2VkZCQkJNzc3GB1QuTC/wh6mwiRDcHt27cVFRVzc3OJA+zt27dycnJjxoxxcnIiJjwyMzPl5eX//vvv58+fE0uLXr9+bWtrKy4urqurK2iOzs7OBictUB6Dkh42asTgNWz/WSzWjBkzBg0aRNQFQGsFIW3t0tPTg4KCIiIiILLBYDDOnj1raGgoISGBLIhEAfQMunr1anp6OjAEm83W19f/mkbMMJUT1e10On369OkiNngDABfu3r1bUMOwP//8U0NDA5Fre3u7np6elJQU39LMuLg4BweHgoKCxMTEH374AY359evXCxLu8gVUXKSkpCQnJwsvBUM4duzYmDFjcnNzjx49KkiBxQW4gMHBwcuWLePV6WzatGnEiBEorA1OHDY2Ni4uLnxpICMjQ1dX99ChQyYmJrNmzYJE75QpU6SlpRUUFJDKCUxbcBzPyclRUFBISUkhDpOGhgZ9fX1ZWVk9PT3e+R3DsAsXLuzYsQMlJrdv366qqiq8B0VNTY2npyeO46mpqRMnTmxoaLCwsAAuFOUqEQFcaG5uLtyFXJSlz5MnT4KCgpBMA7gQkQpCfX09XB+0VuaibcgXEsMzLBZLW1sbZDsLFy4MCQlJTU318/NTUFAoKChoa2v79ddfV61alZSURPyo7du3a2hoABdiGKaqqkpcggcGBmpraxP33+i0fX19ra2tIYy5fv16IyMjSOxRKBQ1NTXixu7cuXOysrJdDR0Dbt26ZWdn9/z58/PnzysoKMBeAhI6XOlb8PRISkq6efPmggULmExme3t7QECAlpYWSHiioqLAr+4/gl4lwpycHENDQyT2vX//Pu/9bm9vLyoqQg8fKgVNTU2Vl5eHRgdEgARZSBPd9+/fa2hogAYHEtEfP360tLQkvoZKperr60Nl0qpVq6ZPn46eWgB0huMbjk9MTDQzM/v9999Xr15NtJDAMOzevXuCCgYEAbjQ2NgYXZnuCThZLBZSzYDPL1Ei2I3GyGVlZerq6lyFPrm5uUFBQWw2G7qVenh4wPLl0qVLq1at2rdv37x587g+p7q6WkdHp6mpqb6+3tTUNDY2duTIkUipJGLHQQTgQgsLC9FNi6DszN3dXfQcPsRIYVnN4XDABLW0tPTcuXNsNnvx4sVaWlqdFnIAQkNDoZyuo6PDxcVl2rRpdDod2u+hHUxNTY2ZmRlacOTk5PDqO6AkkTdmxWazPT09Z82atXPnTjk5OXRhN2/erKmpKdzCn8Vi0el0NTU1IAwobxelpTMvcnNz+SYpuLBhwwYIiVMoFL7nFhAQMHjwYGS1j/+zYSKGHyoqKoyMjJDbQ1ZWFt8U4OnTp7kionl5eXJycr/++ivRKH/nzp1GRkZCBsiOHTtUVVVhFXX+/Hk5OTm4U2w229ramq9upbS0NCIigkQiAYUzmczg4OBRo0bNnDlTXV2d16dNeB8bQbh3756fnx9a/YBeHWISlZWVvAsa4EInJyeklscwbNasWSgU11XV9HeNXi2oNzc3FyI848WdO3cmTpyI9hmZmZmiB/SIx500aZKEhAQMj9bWVktLS2Is4vXr14WFhQ8ePJCXlwcuZLFYPj4+iAuzs7O/fPnCN1DO4XAUFBRgI0ismv8aZGVlca0PusqFVVVVxsbG4uLiOjo6kEHpht0DL3gr8EAy5+Pjw2Kxmpqa3N3dpaSkrK2t9fT0Kisr//jjD94kGYvFgt22i4sLBGdWrVo1bNgwZHHXVTx9+rSrNrapqaldUrLhhO9eWFhIIpEOHDgwfvx4YEQoCeAqoObFjRs3ZsyYMXHiRFQJTqVSXVxceKUu/v7+Tk5O3UvM7Nu3DxoyQ3hfRkYGrRGvXbvGe/cZDMaNGzcuXLgAJ19UVKSqqspisTAMc3Nz61LZSfcAXGhlZcU3uwGlcrq6usR5nMlkEi/17NmzZ82aJfqDzWazo6OjYdkBXKimpobezuFwlJWVhVflE7kwLi5OQkJi2rRpRkZGfLNxMHiTkpLu3btna2uLtrPQzVRQZyURuTApKQldt5UrVw4aNIgYR921a5dwkxDo90Ss7UlOTnZ0dBR+0H8lenVHWFFRoaGhIWKNCIvF0tPTEyQc5QtU1VdQUGBvby8vLz9v3ryKiorKykoTExNjY+OVK1dqa2tzNQ+7cuWKsrIycCEK0wEXmpubx8XFaWpqovpWDofj7u6O8kzQdhz/vyy4c+fOHtdZdYkLV6xYER8fz+FwDh8+LCUlBTMvdFYSMYQoOohciON4fn5+RkYGjUajUqm2trbEDoi3bt1CO5uCggLUdWHt2rWnTp3qRgiur5CUlCQmJkbUm3TKhZmZmbq6usePH7exsbGwsEBhfyqVumzZMq51z9eo9X766afm5ubi4mJQX1+6dGnQoEGC4iVtbW0WFhYLFixYsGCBgoIC1Ck5OjpaWVlNmjSJ6Mr0TbFp0yauTpnPnz/ft29fSkoKBBuEX9umpiYLCwt/f39RuJDNZjMYjKCgIPBex//hQtQMFcMwXV3dTpukE7mwqqrq4sWLRLMx4oYyPDwcCQXAOoc3tMsXnXIhhUJ59eqVgoICcCGGYQEBAerq6ih1unv3bt7sIBeeP38uKSmJWmpHREQQPeX/O+jtHGGXuFB01ROO4xQKRUFBISYmhsPhGBgY3Lx5My8vb+3atTIyMkVFRXQ6/ebNm0ePHkWxVg6HgxwvL168qKKiwmVRyOFwYmJilixZwpVcOXv2rJSUFCJdNTU1b29vxIKpqaldyguKDiFciNJdDAZj6dKldnZ26F/nzp2TlpYGd56uboME4fXr18SWyC0tLUZGRvPnz0fq0KysrPHjx+/bt4/4rlOnTsnIyAAX1tbWksnka9euXbp0SVdXV7iKr7/h7t27v/32G4lEIvYbYbPZq1ev5luj9vLly5CQEHAaYjAYnp6ednZ2gmz5AF+pXJ87dy7EPD59+jRp0iRBs+FPP/0EabyqqipdXV1IbkFnY1GctL4RTp8+ra+vDwazDg4ODQ0NPciFv/32G/jNBgUFoZ0QcKGvr29qauqqVatmzZrF+0YKhRISEjJ58uSdO3dCAJ/IhUQwGAxra2tkwb97926iJdvbt2+HDRvG66TIF0K4sLa2VklJKSsr69WrV9ra2qjKJSAgQElJ6eDBg3v27OHb6BTDsKNHj3p7ex86dAj1uRw9evSECRNcXFycnJxEDPL/y9AHqtFvx4WFhYUKCgphYWHE1FR4eDhvs1xwMCFWBIoikUJxDCIXPnjwYPDgwUeOHPny5Qu47xNLYnsWxcXFysrKUPWBNqktLS1jxoxBRo5hYWFiYmJElUpiYqKMjEwPPt+lpaVjx44ltjK/e/fuiBEjQkJC4NeOjg6+a+rTp0+jHWpycrK+vr6zszOXm3Z/xtu3b318fGA6zszMRFyYk5MjqG8XjuMrV64cMmQIMiUBB+oe5EIOh3Pq1Km1a9ciZYetrS3UXEdERAixW7K2tn7z5k11dTXqKfHkyZO+nQchwADhARqN5u3tDSWzbDY7PDxcSMa9Uy6ESZ/ovU5M+QMXmpubJyUl8V7wjo6O8ePH//LLL1euXHF0dNTX14dnYNu2bVpaWsR8TVlZWVlZGTSiAi4sLi6WlJRECpr8/PyFCxeK4u8P2LhxI6RRa2pqIAWDcskPHz4Ebw3iAhe4cOTIkUeOHOF7KxcvXuzt7X39+nUbGxs3Nzf4NMgXCtHe/+vRN+UTPcuFFArF398fphXgQmNjY7SepVKpP/74I1elamVlpaKiInFb0ylYLJahoSFyhyJy4V9//WVtbS0tLT1z5kxBQf+eAnChp6eni4sL2oFVVFR0dHSkpqbCr7z9w3pcCV1aWqqqqoq48PLly5GRkXy/e2pqKjH0Fx8fLyUl1akzcj/Es2fPtLS0iJrAzMxMaWlpPz8/TU1N1LiYF7DqMjAwQBMTk8lcs2aNcDdtXGQunDdvnq+v78mTJy0tLWfOnAmhDklJyXHjxtnZ2fHqj5qbmydNmlRcXLx48eK1a9eamprCl4K2DF0qL+kRvHnzBkZrY2OjnJycnp4eGrwdHR1ycnLCvWAQhHBhVlYW6n3BZDKRkzgReXl5KEDKhdOnTyOxOpvN9vLymjNnDvyKMr6ACxcuQBu1mzdvoiXItWvXRo8evWXLlosXL5qamnapHAL/hwvNzc0fPXpEoVAsLS3RIjgtLY3X9w5KRfX09LjKS3Acv3z5sru7O4Zhubm5BgYGDg4OqCVZXl7ef6FeUBD6zFmmrKxMTU0Nnjzhlvy4UC6sra2l0+mrV69GS2zgQtQqlkKhkEgk3ukAauqFK7wRkpOTk5KSoMAIKSe5YqS9huLi4kWLFnG5Qr9//15aWhop0MLDw/nGbb4G2dnZAQEB4eHhMKGXlpZqaGgsWLDgyJEjampqgvbBoaGhXNXT06ZNQ21rvhdgGKajowML+fb29gsXLkCos6ys7Ndff+Vrh/bmzZuoqKgLFy4wGAwRBTW8aG1tnTRpEjh4FRUV8VoTpKenQ3UQSG9Qsqe6ujorK4uvlUFMTAy04CgoKJCQkABnZBqN5ufnt3bt2i6d3tcDwzAnJ6c1a9YA+d25c2fYsGFEl6UpU6aI7i7b2Nhoamq6dOlSDodTUFAAn1lXV9dVi1ouHDx4kKuP7vDhw7kGIGLrkydPqqurc91o0FfPmTOne6exZcsWaEUwf/78WbNmdRq1hn2hiYkJkdswDEtOTi4oKACn2bdv39bV1Q0dOtTZ2bl7FvP/JvSlxRpwITQs7TR9BdUwYMZNdPDbtGmTu7s7cCFSwQAXTpky5cCBAxYWFoK6e4jOhUVFRSoqKhcuXKiqqiIa7/YVFxLR0dEBwRnoPoGmwrCwME1NzW7obPnizp07Ojo68fHxy5cvR7TX2Ni4YcMGHx8fvvshtL7ZuHEjkQvBk7P/q2MaGxsnT54M4TgWi/Xjjz/m5uYeP35cXl7ezs6ORCKhNim8uHr1qr6+fmRkpKur64QJE6AF1ddwoZub2/jx46urq6FUEQlNjxw5sn79eiqV6urqCrf+y5cvQmpeFy9ebGJigrYUmZmZGhoacnJyUlJS27Zt6+U9AbQVbGtrs7OzQ4nMO3fujB49+vjx49CyUVVVtUuPCnDhjBkzTExM0Ba8G14KRHz8+FFCQgKVM7a2tv7444/ERcb27dsXL16MfhUSHvhKQB/sTjuH4DiOYdjz58/z8vJgfouKikI27hEREciL0d7ePjY29r+8FwT0JRHiOF5WVjZ37lxBdnlcAC60sbGBxuUwobBYrLlz50JZGPHhAC708PAQpQOcoDxKW1sbiv4VFRVpa2tzhVhxHD9z5oxwS8lvjZ07dyKnGC4u7CmGbmhogA7DYAE8bdo0YsUkX1CpVE1NTXT1Nm7cqK6ufvfu3cTERAMDg/5fovThw4eWlpa9e/caGBjATT969OioUaMcHR1BHxgREUHMkhJBp9OVlZWhGu/w4cNubm6gBuJwOFu2bOG7fRSO1tbWefPmlZSUQBNBPz8/KEDEcfzly5fy8vIuLi5omgsODhYS88/IyJCQkOA685KSkj7ZE0BVT2RkZFtbG7EZGewL1dTU5s+fD871XUJjY6OPjw+XFldELkTXoba2ds2aNYGBgZDDPnv2rISExN69e6Gd4a5du9Bbvnz5Iisr26mxXE9BdC7EcfzTp08aGhr29vbEAPuOHTt8fX1ZLNbhw4eJ/P1fRh8TYVdRWVkZGhoKlUBIi8VisU6fPs37WBQWFoqS/hXEhW1tbba2tkQRhKBUTV+Z0n7+/Hn69Omurq7i4uKCuLDbyM3N9ff3h6ViRUUFBF0XLVq0d+9eDMNsbGyEcGFsbKyBgYGTk9PQoUPRnHvy5EldXV0nJ6fvQh3z888/29jYtLS07Nmzh3eDVV9fb2BggKK7LBZr2bJlSF2MikOOHDkyZcoUYMETJ058vQ7z6NGjfn5+DAbDz88PiR5XrFihrKz84sWL+vr67du329nZ8cZXmpubV69eDWY3GRkZkpKSXWr+1bNobW1ds2YN7Kjq6upQJzwi7t69SyaTeV1avgadcmF+fj5UUrFYrPHjx2/ZsmXjxo1kMhnK0p88eTJ79mwHB4fjx49z3cr8/HxRTFZ7Cl3iwp9//nn48OGhoaHoLxQKZcKECUOHDrWzs+tqfOLfiu+MCAFv374dMmRIDwYki4qKFBUVY2JiKBQK0sTT6fSZM2ei5Xw/hL29PRh/1NfXOzk5IS589erVmTNnvuaTP3z4AEJ/VCCI43hubu6ECRPg58DAwE2bNiE7DyJevnypp6cHZ/L27VslJSVBO6d+CAzD4EthGLZu3TroeUR8AYvF+v333zU1Nbm6hfz8889jx46FCZ1Op0tKSm7YsAGx4MOHD3mly109Mfwfq1jgQmQjwGazIyMjVVRUZGVl165dy7u3o9FoZmZme/fuRU8ySF5Faf7V4+jo6LC1tSU6fgmaze/evUsikXqZC6OjoxUUFK5evRoUFAR/uXXrFolE4msbS0T/5ML29vaSkpKSkhINDQ3U5BXMZoXbDP3X8F0SIf6PEF9I8/quArhQQ0ODaBXdn7mQyWT+8MMPaJpubm5WVFR0dXUV0mNFdMyYMWPVqlUMBsPLywtNAVlZWRoaGk1NTR8+fNDR0SFmH+/fv49+PXXqFNEyFDpqfRdcCF2KkCYLwzBUaUpEbGwsX/n7vn37EBceOXJk8ODBkESEVkHdCPEh5OTkoA4V0NyqS2Y6+/bt4/WNhG4+367URxAwDFu5cmWn3dsBd+7cEZLv7B6OHTuGejWjwvaWlhZU1B8dHT1y5EiiXf7t27f7IRdWV1fr6uquW7cOwzDeLjcsFmvt2rVoJQcx0k2bNrFYrDVr1ghq+/yfxfdKhPi34UJeZXN/5sJx48YRA1yLFi2ytrYWbqokHEwmE/Jh7e3t9vb2vB4TK1asGDlypKysLNc6PSAgwMXFBbjw3bt3JBKJmKExNTUl1lr0W7x9+3bEiBF8ya9TJCcnu7u7jx49Gtlbnzx5csyYMeLi4tAtsttnVV9fLyMj01XNPRHz5s3jKnMEAuies/PXo0tc+C0AXLh9+3ZoX4phGOgPED1ER0dLSkqibsA4jt++fbvTnmV4H3HhokWLDAwMuIosAwICAgMDiSqYT58+mZqajho1KiAgYEAdw4XvmAjxb8CFfEGn02fMmNFPuJDNZufn58NDn5KSgmJHX7580dPTq66u5i2aFBFQa4ySi+3t7ciNl4jPnz8T94JpaWkLFixgMBjLli2D9gU4jq9Zs8bIyAi6rN29e9fT0xPqDvuk93SXcO/evW6kpjIyMqARRG1tbWBgILHVg/CqeRFx8OBBOTk5Xsd5EbF3715ig1Ymk2lubt5Vf/OeRZ9zYUJCgoODA9ydOXPmXLt2raKiQlNTEz3zBw4c6N41h+Ybx44do9FoQnTFPYXq6mp7e3s4T+RBU1VVNXbsWA6HA61Ipk+fHhERARFR3uLCAeDfOxHi/zEurK6utrS0tLCwGDNmTEBAAI1Gu3z5MolE0tPTk5eXh+ymkpKS8HadgnDx4kUpKamuvpfBYLi7u8+dO5fBYCAVDIvF2rJli7i4uLa29rhx4yD+VlZW1m1n7d6EiFxI1PSHhITs378f/erv799p26OuoqtcSKPRrl27BuOivb1dV1d3/vz5X758aWpqWrhwYZcsfL8RoO67D7kQrQygl++1a9eqq6uJKeGv5MLx48dzNT/6poDNKDg6USgUGRmZPXv2mJmZmZiYxMfHa2pqIsONAfDiuydCHMcTEhJkZGTevHnDZDJ7toScCCqV6ubmNnXq1NzcXBE9cXoc06dPP3fuHI7jOTk5ZDIZ8hx0Oj07Oxtmk4MHD7q7u3f783fu3ImyXMJBoVC8vLxgWcBgMHx9fXlzYM3Nze/fv++RnGUvo1MupNFoGhoaqDh63759Xl5e6L/5+fnQU7Nnz0p0LmxtbYXurDIyMqDPrK2t9fLyGjFihKSk5M6dO/tJZKxvufDChQsoRJGXlxcYGMj7mpiYmG43okI9sHoBMTExxcXFKSkpiAsfP37s7++flJQEfO/t7f010fV/Pf4NRIj/w4VTp07l6izRs4CyZeQc3cugUqmysrI4jpeVlY0fPx6q2ZBuk06n+/j4ODs7oz7m3YMoXMhgMMzNzWNjY7/mQP0cQrjwwYMHmZmZb968gZa5OI7X1NTIy8ujzNCff/65evVqLg/3HoEoXAjWo9Aco7a2dsKECYsWLYJaBcf287IAAAUgSURBVDqd3ldW2oLA4XBWrFhhbGyclZXl7+/fa8e9f/8+iUTqq81oj+Ps2bOqqqrFxcX37t3j3QwcO3bMysqqr6q8vgv8S4gQx/GEhAQh/sI9BSqV2u1UzVeCzWZLSUmlp6cbGRkBC7a0tKipqSG5/Pv373ukUB24UPjeOj4+Xk5ODhmU/CuRnJxMIpHS0tLa2tqIsffHjx/LyckBF966dQv+mJ2draSk5Obmtn79+rFjx367K7N//34FBQWIQqNA/cOHD5GLSlBQkIGBAeoK1NTUROTCfgjgQjKZ3Jvry46ODmdnZ+RL8J2iqKjo4sWL8PPZs2d5nT2am5sdHBxcXFz+NZT/jfDvIcL/ArZt24Z6b9JoNA8PD2JqqgcxwIUA4MIJEyYkJibSaDSkG3z8+LGysjLXno9CocTHx+/fv797OVrRAVwYHR2NylQCAwMjIyPhZ7BlJwb6gAv5Sp/6CTgczrdLagjC986FRUVFOjo6xNUDXz/FvLy8/hYG6IcYIML+iLS0NBicnz9/dnJyGjJkiImJyf3798HBRFpaes6cOVpaWlu3bv12j/iOHTvU1dXLy8tramoEdQf9j3DhuXPnaDTalClTiB1u+9Yr9eDBg+bm5ugcOBwOnU4PDQ2FfSFwIaJGvOdaUf7L8F1zYWFhoaKi4qFDh/r6RP4NGCDCfgcMw1xdXWfMmAFS1TNnzjQ3N587d05SUhJ0oW/fvr148WIvlEIDF5qZmQkpJf4vcCGO43V1dfr6+qtWreo/i2tI+fj5+YFLJ4ZhwcHByNQb2jgTLTEHwBcDXDgAfIAI+ydgcM6YMYOoPExLSxs9erSIBuU9hV27dt2/f1/4aw4fPowUHP2HJ3octbW1hoaGAQEB/eQ7UqnUDx8+oIQlzsOFxcXFKioqqGPzAASho6PD0dERGff322QqXwxwYY9ggAj7ETAMmzNnDuyugAtHjBhBtAS0srLqn12kgQsvX77s5ubWT3jiW6BfceHjx48VFRXfvXv3+PFjcEjBebiwl5dN3y8QF4aEhIjiINOvMMCFX48BIuwvaGlp+fTp0/Xr12F2w//hQmdnZ/C/YLFYqDdsP8R/JEbaH7gQFbfcuHGDtwcIh8NZunSpqanp9xjr60PAcFuxYkX/7xHGiwEu/EoMwnFcbAD9ADdv3gwNDX306NGrV68ePnx44sQJMTExKpU6a9as8vJyLy+v7OxsCwuL2NjYvj5TgWhubiaRSH19Ft8cdXV1kydPtra2jo+PHzRoUC8fPTk5OSoqKiMjY/jw4WJiYn/99ZelpaW4uDjxNWAXPnXq1F4+t+8ddDp9+PDhvX9PewT5+fnOzs7btm3T0NCg0Wg+Pj59fUbfEwaIsO9RUVGhqqoqJiYWHx8fExPz9OlTRUVF9F/gwvr6+lu3bmlqavbdaQ7g/6O2ttbZ2dnJyWnz5s03b95cv3597xyXyWQaGBj4+Pjs2bOnd444gO8IwIXKysr379+XkZHp69P5njBAhH2MpKSkhISE9PT0wYMHi4mJ3bt3b/LkycOGDSO+hkqlpqenz5o1q4/OcQB8AFxIoVCuXr1qZ2fXa8etqKhwcnLy9/ffuXNnrx10AN8LCgsLZWRkBliwqxggwr5EW1sblAP22pZiAD2I2trasrIyKyurXj7uABcOYAA9iyF9fQL/aYwaNerPP/90dXUdNmzYqlWr+vp0BtA1yMnJycnJ9f5xVVVVHz9+7OTkJCYmNsCFAxjA1+P/AfR8Sk7KDz4ZAAAAAElFTkSuQmCC", + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "execution_count": 127, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "masked_toks = toks\n", + "# entr = -log.(-entropies[masks])\n", + "scatter(entropies, markercolor=colors, xticks = (1:size(masked_toks)[1], masked_toks), rot=45)" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "id": "b22d9957-5a84-4eeb-95d8-5b067870b5d7", + "metadata": {}, + "outputs": [ + { + "ename": "LoadError", + "evalue": "MethodError: no method matching ~(::Vector{Int64})\n\n\u001b[0mClosest candidates are:\n\u001b[0m ~(\u001b[91m::TimeZones.Class\u001b[39m)\n\u001b[0m\u001b[90m @\u001b[39m \u001b[32mTimeZones\u001b[39m \u001b[90mC:\\Users\\drobi\\.julia\\packages\\TimeZones\\F3BhS\\src\\\u001b[39m\u001b[90m\u001b[4mclass.jl:52\u001b[24m\u001b[39m\n\u001b[0m ~(\u001b[91m::Py\u001b[39m)\n\u001b[0m\u001b[90m @\u001b[39m \u001b[32mPythonCall\u001b[39m \u001b[90mC:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\src\\\u001b[39m\u001b[90m\u001b[4mPy.jl:379\u001b[24m\u001b[39m\n\u001b[0m ~(\u001b[91m::Missing\u001b[39m)\n\u001b[0m\u001b[90m @\u001b[39m \u001b[90mBase\u001b[39m \u001b[90m\u001b[4mmissing.jl:101\u001b[24m\u001b[39m\n\u001b[0m ...\n", + "output_type": "error", + "traceback": [ + "MethodError: no method matching ~(::Vector{Int64})\n\n\u001b[0mClosest candidates are:\n\u001b[0m ~(\u001b[91m::TimeZones.Class\u001b[39m)\n\u001b[0m\u001b[90m @\u001b[39m \u001b[32mTimeZones\u001b[39m \u001b[90mC:\\Users\\drobi\\.julia\\packages\\TimeZones\\F3BhS\\src\\\u001b[39m\u001b[90m\u001b[4mclass.jl:52\u001b[24m\u001b[39m\n\u001b[0m ~(\u001b[91m::Py\u001b[39m)\n\u001b[0m\u001b[90m @\u001b[39m \u001b[32mPythonCall\u001b[39m \u001b[90mC:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\src\\\u001b[39m\u001b[90m\u001b[4mPy.jl:379\u001b[24m\u001b[39m\n\u001b[0m ~(\u001b[91m::Missing\u001b[39m)\n\u001b[0m\u001b[90m @\u001b[39m \u001b[90mBase\u001b[39m \u001b[90m\u001b[4mmissing.jl:101\u001b[24m\u001b[39m\n\u001b[0m ...\n", + "", + "Stacktrace:", + " [1] top-level scope", + " @ In[154]:1" + ] + } + ], + "source": [ + "[1, 2, 3][~[2]]" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "41f12b10-ce78-4d5d-99bb-7cbde4742ec2", + "id": "c052df85-cd7e-4fbf-80f7-9eda15700c1b", "metadata": {}, "outputs": [], "source": [] From 2126dc5de10c9d1244750ef209fc1e931b45686c Mon Sep 17 00:00:00 2001 From: Karol Dobiczek Date: Tue, 2 Apr 2024 18:10:45 +0200 Subject: [PATCH 8/9] add comments --- dev/notebooks/RELITC.ipynb | 540 ++++++++----------------------------- 1 file changed, 110 insertions(+), 430 deletions(-) diff --git a/dev/notebooks/RELITC.ipynb b/dev/notebooks/RELITC.ipynb index 5b7bf8bc1..806f41bca 100644 --- a/dev/notebooks/RELITC.ipynb +++ b/dev/notebooks/RELITC.ipynb @@ -1,29 +1,28 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "e7aa29e0-5b18-477d-bee4-9d9b42aedc7e", + "metadata": {}, + "source": [ + "## RELITC\n", + "[Relevance-based Infilling for Natural Language Counterfactuals](https://dl.acm.org/doi/10.1145/3583780.3615029) (RELITC) is a Language Model counterfactual explanation method. It uses LM feature attributions to identify tokens in the original text which contribute the most to the LM classification. Once idendified, the tokens are masked and a Conditional Masked LM (CMLM), like BERT is used to fill the masks, creating a new text that should be classified to the target class. RELITC additionally tries to quantify the uncertainty of the CMLM to guide the infilling process. The only parameter of the method, $K$, percentage of masked tokens is established through beam search." + ] + }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "a521610f-12dc-4e6e-80e7-b51328794d51", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m\u001b[1m Resolving\u001b[22m\u001b[39m package versions...\n", - "\u001b[32m\u001b[1m No Changes\u001b[22m\u001b[39m to `C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\Project.toml`\n", - "\u001b[32m\u001b[1m No Changes\u001b[22m\u001b[39m to `C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\Manifest.toml`\n" - ] - } - ], + "outputs": [], "source": [ - "using Pkg\n", - "Pkg.add(\"CUDNN_jll\")" + "# using Pkg\n", + "# Pkg.add(\"CUDNN_jll\")" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "0de19327-1494-4f9c-a799-d5551fac5826", "metadata": {}, "outputs": [], @@ -42,61 +41,16 @@ "id": "b4e29646-4837-4121-a9fe-6426a352811e", "metadata": {}, "source": [ - "### Load data" + "### Load data\n", + "The data used here is from the [Trillion Dollar Words](https://aclanthology.org/2023.acl-long.368/) dataset using the [TrillionDollarWords.jl](https://github.com/pat-alt/TrillionDollarWords.jl) package." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "a9c6c853-d63f-4f48-b188-0d12d9a11be0", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
10×7 DataFrame
Rowsentenceyearlabelseedsentence_splittingevent_typesplit
StringInt64String7Int64BoolString31String7
1remained well below their levels at the beginning of the year, and that weaker demand and earlier declines in oil prices had been holding down consumer price inflation.1996hawkish5768truemeeting minutestest
2A few participants also noted that uncertainty about the extent of resource slack in the economy was considerable and that it was quite possible that the economy could soon be operating close to potential, particularly if labor force participation rates did not turn up much while employment continued to register gains.1996neutral5768truemeeting minutestest
3inflation was projected to pick up gradually in association with a partial reversal of the decline in energy prices this year.1996neutral5768truemeeting minutestest
4They noted that the realization of such a development could make it harder for the Committee to achieve 2 percent inflation over the longer run.1996neutral5768truemeeting minutestest
5In the view of one member, however, aggregate final demand was so strong that, with economic activity and the associated demand for labor having expanded at an unsustainable pace for some time, one could be reasonably confident that inflation would most likely pick up in the absence of policy action.1996hawkish5768truemeeting minutestest
6In the circumstances, most members endorsed a proposal to delete as no longer necessary the previous summary statement relating to the risks to growth and inflation taken together.1996neutral5768truemeeting minutestest
7In the staff forecast prepared for this meeting, the economy was seen as likely to expand at a moderate pace, supported by accommodative monetary policy and financial conditions.1996dovish5768truemeeting minutestest
8Housing starts and the demand for new homes had declined further, house prices in many parts of the country were falling faster than they had towards the end of 2007, and inventories of unsold homes remained quite elevated.1996dovish5768truemeeting minutestest
9Pressures on resources would rise as the anticipated upturn and possible above-trend growth brought the economy closer to full capacity utilization.1996hawkish5768truemeeting minutestest
10Price inflation had picked up a little but, abstracting from energy, had remained relatively subdued.1996neutral5768truemeeting minutestest
" - ], - "text/latex": [ - "\\begin{tabular}{r|cc}\n", - "\t& sentence & \\\\\n", - "\t\\hline\n", - "\t& String & \\\\\n", - "\t\\hline\n", - "\t1 & remained well below their levels at the beginning of the year, and that weaker demand and earlier declines in oil prices had been holding down consumer price inflation. & $\\dots$ \\\\\n", - "\t2 & A few participants also noted that uncertainty about the extent of resource slack in the economy was considerable and that it was quite possible that the economy could soon be operating close to potential, particularly if labor force participation rates did not turn up much while employment continued to register gains. & $\\dots$ \\\\\n", - "\t3 & inflation was projected to pick up gradually in association with a partial reversal of the decline in energy prices this year. & $\\dots$ \\\\\n", - "\t4 & They noted that the realization of such a development could make it harder for the Committee to achieve 2 percent inflation over the longer run. & $\\dots$ \\\\\n", - "\t5 & In the view of one member, however, aggregate final demand was so strong that, with economic activity and the associated demand for labor having expanded at an unsustainable pace for some time, one could be reasonably confident that inflation would most likely pick up in the absence of policy action. & $\\dots$ \\\\\n", - "\t6 & In the circumstances, most members endorsed a proposal to delete as no longer necessary the previous summary statement relating to the risks to growth and inflation taken together. & $\\dots$ \\\\\n", - "\t7 & In the staff forecast prepared for this meeting, the economy was seen as likely to expand at a moderate pace, supported by accommodative monetary policy and financial conditions. & $\\dots$ \\\\\n", - "\t8 & Housing starts and the demand for new homes had declined further, house prices in many parts of the country were falling faster than they had towards the end of 2007, and inventories of unsold homes remained quite elevated. & $\\dots$ \\\\\n", - "\t9 & Pressures on resources would rise as the anticipated upturn and possible above-trend growth brought the economy closer to full capacity utilization. & $\\dots$ \\\\\n", - "\t10 & Price inflation had picked up a little but, abstracting from energy, had remained relatively subdued. & $\\dots$ \\\\\n", - "\\end{tabular}\n" - ], - "text/plain": [ - "\u001b[1m10×7 DataFrame\u001b[0m\n", - "\u001b[1m Row \u001b[0m│\u001b[1m sentence \u001b[0m\u001b[1m year \u001b[0m\u001b[1m label \u001b[0m\u001b[1m seed \u001b[0m\u001b[1m sentence_spli\u001b[0m ⋯\n", - " │\u001b[90m String \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m String7 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Bool \u001b[0m ⋯\n", - "─────┼──────────────────────────────────────────────────────────────────────────\n", - " 1 │ remained well below their levels… 1996 hawkish 5768 ⋯\n", - " 2 │ A few participants also noted th… 1996 neutral 5768\n", - " 3 │ inflation was projected to pick … 1996 neutral 5768\n", - " 4 │ They noted that the realization … 1996 neutral 5768\n", - " 5 │ In the view of one member, howev… 1996 hawkish 5768 ⋯\n", - " 6 │ In the circumstances, most membe… 1996 neutral 5768\n", - " 7 │ In the staff forecast prepared f… 1996 dovish 5768\n", - " 8 │ Housing starts and the demand fo… 1996 dovish 5768\n", - " 9 │ Pressures on resources would ris… 1996 hawkish 5768 ⋯\n", - " 10 │ Price inflation had picked up a … 1996 neutral 5768\n", - "\u001b[36m 3 columns omitted\u001b[0m" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "n = 10\n", "data = load_training_sentences()\n", @@ -108,67 +62,41 @@ "id": "c716f911-920d-468b-92e8-8ca639367303", "metadata": {}, "source": [ - "### Get attributions" + "### Get attributions\n", + "The feature attributions are computed using the transformers-interpret Python library and loaded to Julia using PythonCall.\n", + "\n", + "The `scorer` outputs a per-token score of the degree of contribution to a specified class." ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "e00a7c85-2a74-41bf-ad75-899f3317dac9", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\CondaPkg.toml\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\CondaPkg.toml\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mDependencies already up to date\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\CondaPkg.toml\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\CondaPkg.toml\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mDependencies already up to date\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\CondaPkg.toml\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\CondaPkg.toml\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mDependencies already up to date\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\CondaPkg.toml\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mFound dependencies: C:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\CondaPkg.toml\n", - "\u001b[32m\u001b[1m CondaPkg \u001b[22m\u001b[39m\u001b[0mDependencies already up to date\n" - ] - } - ], + "outputs": [], "source": [ - "using CondaPkg\n", - "CondaPkg.add(\"pytorch\")\n", - "CondaPkg.add(\"transformers\"; version=\"4.15.0\")\n", - "CondaPkg.add(\"transformers-interpret\")\n", - "CondaPkg.add(\"cuDNN\")" + "# Install necessary dependencies\n", + "# using CondaPkg\n", + "# CondaPkg.add(\"pytorch\")\n", + "# CondaPkg.add(\"transformers\"; version=\"4.15.0\")\n", + "# CondaPkg.add(\"transformers-interpret\")\n", + "# CondaPkg.add(\"cuDNN\")" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "2a605169-6109-41fb-973b-7d5a044e71f1", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Python: " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "using PythonCall\n", "\n", "transformers_interpret = PythonCall.pyimport(\"transformers_interpret\")\n", "transformers = PythonCall.pyimport(\"transformers\")\n", "\n", - "# load pre-trained classifier and corresponding tokenizer\n", - "model = transformers.RobertaForSequenceClassification.from_pretrained(\"model\", local_files_only=true)\n", + "# Load pre-trained classifier and corresponding tokenizer\n", + "model = transformers.RobertaForSequenceClassification.from_pretrained(\"gtfintechlab/FOMC-RoBERTa\")\n", "tokenizer = transformers.AutoTokenizer.from_pretrained(\"gtfintechlab/FOMC-RoBERTa\")\n", "\n", "scorer = transformers_interpret.SequenceClassificationExplainer(model, tokenizer, attribution_type=\"lig\")" @@ -176,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "161a51e5-e3cb-4ca7-bbe2-4e95bfbfc1a9", "metadata": {}, "outputs": [ @@ -186,12 +114,14 @@ "get_attributions (generic function with 1 method)" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Returns a list of tuples containing the token at the first position \n", + "# and attribution score at second\n", "function get_attributions(text, scorer)\n", " attribs = scorer(text, index=0, internal_batch_size=1)\n", " attributions = pyconvert(Array{Tuple{String, Float64}}, attribs)\n", @@ -201,7 +131,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "409785d3-9953-426f-937f-b5ebaf9a6ea7", "metadata": {}, "outputs": [ @@ -237,7 +167,7 @@ " (\"\", 0.0)" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -251,61 +181,25 @@ "id": "b25039fe-329f-46e4-b308-33bc42c734ea", "metadata": {}, "source": [ - "### Mask the word attributions" + "### Mask the word attributions\n", + "This step prepares the text for the CMLM. The $K$ tokens with the highest attribution score are masked." ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "48278c99-7368-4eaa-b26f-1b91674fc514", "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[33m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[33m\u001b[1mWarning: \u001b[22m\u001b[39mfuse_unk is unsupported, the tokenization result might be slightly different in some cases.\n", - "\u001b[33m\u001b[1m└ \u001b[22m\u001b[39m\u001b[90m@ Transformers.HuggingFace C:\\Users\\drobi\\.julia\\packages\\Transformers\\lD5nW\\src\\huggingface\\tokenizer\\utils.jl:42\u001b[39m\n", - "\u001b[33m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[33m\u001b[1mWarning: \u001b[22m\u001b[39mmatch token `` require to match with space on either side but that is not implemented here, the tokenization result might be slightly different in some cases.\n", - "\u001b[33m\u001b[1m└ \u001b[22m\u001b[39m\u001b[90m@ Transformers.HuggingFace C:\\Users\\drobi\\.julia\\packages\\Transformers\\lD5nW\\src\\huggingface\\tokenizer\\utils.jl:42\u001b[39m\n" - ] - }, - { - "data": { - "text/plain": [ - "BaselineModel(GPT2TextEncoder(\n", - "├─ TextTokenizer(MatchTokenization(CodeNormalizer(BPETokenization(GPT2Tokenization, bpe = CachedBPE(BPE(50000 merges))), codemap = CodeMap{UInt8 => UInt16}(3 code-ranges)), 5 patterns)),\n", - "├─ vocab = Vocab{String, SizedArray}(size = 50265, unk = , unki = 4),\n", - "├─ codemap = CodeMap{UInt8 => UInt16}(3 code-ranges),\n", - "├─ startsym = ,\n", - "├─ endsym = ,\n", - "├─ padsym = ,\n", - "├─ trunc = 256,\n", - "└─ process = Pipelines:\n", - " ╰─ target[token] := TextEncodeBase.nestedcall(string_getvalue, source)\n", - " ╰─ target[token] := Transformers.TextEncoders.grouping_sentence(target.token)\n", - " ╰─ target[(token, segment)] := SequenceTemplate{String}(: Input: : (: Input: :)...)(target.token)\n", - " ╰─ target[attention_mask] := (NeuralAttentionlib.LengthMask ∘ Transformers.TextEncoders.getlengths(256))(target.token)\n", - " ╰─ target[token] := TextEncodeBase.trunc_or_pad(256, , tail, tail)(target.token)\n", - " ╰─ target[token] := TextEncodeBase.nested2batch(target.token)\n", - " ╰─ target := (target.token, target.attention_mask)\n", - "), HGFRobertaForSequenceClassification(HGFRobertaModel(Chain(CompositeEmbedding(token = Embed(1024, 50265), position = ApplyEmbed(.+, FixedLenPositionEmbed(1024, 514), Transformers.HuggingFace.roberta_pe_indices(1,)), segment = ApplyEmbed(.+, Embed(1024, 1), Transformers.HuggingFace.bert_ones_like)), DropoutLayer(LayerNorm(1024, ϵ = 1.0e-5))), Transformer<24>(PostNormTransformerBlock(DropoutLayer(SelfAttention(MultiheadQKVAttenOp(head = 16, p = nothing), Fork<3>(Dense(W = (1024, 1024), b = true)), Dense(W = (1024, 1024), b = true))), LayerNorm(1024, ϵ = 1.0e-5), DropoutLayer(Chain(Dense(σ = NNlib.gelu, W = (1024, 4096), b = true), Dense(W = (4096, 1024), b = true))), LayerNorm(1024, ϵ = 1.0e-5))), nothing), Branch{(:logit,) = (:hidden_state,)}(Chain(DropoutLayer(Transformers.HuggingFace.FirstTokenPooler()), DropoutLayer(Dense(σ = NNlib.tanh_fast, W = (1024, 1024), b = true)), Dense(W = (1024, 3), b = true)))), Transformers.HuggingFace.HGFConfig{:roberta, JSON3.Object{Vector{UInt8}, Vector{UInt64}}, Dict{Symbol, Any}}(:use_cache => true, :torch_dtype => \"float32\", :vocab_size => 50265, :output_hidden_states => true, :hidden_act => \"gelu\", :num_hidden_layers => 24, :num_attention_heads => 16, :classifier_dropout => nothing, :type_vocab_size => 1, :intermediate_size => 4096…))" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cls = TrillionDollarWords.load_model(; output_hidden_states=true)" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "08a37453-82d2-4319-a719-a21d6685c88f", "metadata": {}, "outputs": [ @@ -315,12 +209,14 @@ "group_into_words (generic function with 1 method)" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# The tokenizer returns tokens instead of words\n", + "# the tokens are grouped into words and max pooling is used to get the word attribution\n", "function group_into_words(text, attributions, cls_tkr)\n", " toks = decode(cls_tkr, encode(cls_tkr, text).token)\n", " word_attributions = []\n", @@ -342,7 +238,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "02b63bfd-134c-44ce-b2de-67ee99067745", "metadata": {}, "outputs": [ @@ -378,7 +274,7 @@ " ([31, 32], [\"inflation\", \".\"], [-0.09257150340664654, 0.15492799445950947])" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -390,7 +286,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "e513d132-c2c8-4947-8ea8-2bda508c99b5", "metadata": {}, "outputs": [ @@ -400,12 +296,13 @@ "get_top_k_idx (generic function with 2 methods)" ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Returns a list of indexes of words with the highest attribution scores\n", "function get_top_k_idx(attributions, k=10)\n", " sorted = sort(attributions, by = x -> -maximum(x[3]))\n", " idx_to_mask = []\n", @@ -418,40 +315,17 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "ee80bad8-3641-43d1-8d6d-6dcd87e40f15", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "12-element Vector{Any}:\n", - " 4\n", - " 12\n", - " 19\n", - " 13\n", - " 14\n", - " 16\n", - " 15\n", - " 31\n", - " 32\n", - " 21\n", - " 9\n", - " 29" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "idx_to_mask = get_top_k_idx(word_attributions)" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "d2071233-269a-49f6-b9d3-5e87f5be6d9e", "metadata": {}, "outputs": [ @@ -461,12 +335,13 @@ "mask_toks_at_idx (generic function with 1 method)" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Masks tokens (here words) at corresponding indexes and forms them into a string\n", "function mask_toks_at_idx(toks, idx_to_mask)\n", " masked_text = Vector{Char}()\n", " for (i, token) in enumerate(toks)\n", @@ -485,7 +360,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 13, "id": "04662dad-be82-4888-9c6f-c26093347e80", "metadata": {}, "outputs": [ @@ -495,7 +370,7 @@ "\"remained [MASK] below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK]\"" ] }, - "execution_count": 32, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -507,7 +382,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 26, "id": "a596f684-1b82-4823-987b-adc633545977", "metadata": { "scrolled": true @@ -536,12 +411,13 @@ "), HGFBertForMaskedLM(HGFBertModel(Chain(CompositeEmbedding(token = Embed(768, 30522), position = ApplyEmbed(.+, FixedLenPositionEmbed(768, 512)), segment = ApplyEmbed(.+, Embed(768, 2), Transformers.HuggingFace.bert_ones_like)), DropoutLayer(LayerNorm(768, ϵ = 1.0e-12))), Transformer<12>(PostNormTransformerBlock(DropoutLayer(SelfAttention(MultiheadQKVAttenOp(head = 12, p = nothing), Fork<3>(Dense(W = (768, 768), b = true)), Dense(W = (768, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12), DropoutLayer(Chain(Dense(σ = NNlib.gelu, W = (768, 3072), b = true), Dense(W = (3072, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12))), nothing), Branch{(:logit,) = (:hidden_state,)}(Chain(Dense(σ = NNlib.gelu, W = (768, 768), b = true), LayerNorm(768, ϵ = 1.0e-12), EmbedDecoder(Embed(768, 30522), bias = true)))), Transformers.HuggingFace.HGFConfig{:bert, JSON3.Object{Vector{UInt8}, Vector{UInt64}}, Nothing}(:_name_or_path => \"bert-base-uncased\", :architectures => [\"BertForMaskedLM\"], :attention_probs_dropout_prob => 0.1, :classifier_dropout => nothing, :gradient_checkpointing => false, :hidden_act => \"gelu\", :hidden_dropout_prob => 0.1, :hidden_size => 768, :initializer_range => 0.02, :intermediate_size => 3072…))" ] }, - "execution_count": 15, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Loads the CMLM model from HuggingFace\n", "function load_model(; kwrgs...)\n", " model_name = \"karoldobiczek/relitc-FOMC-CMLM\"\n", " tkr = Transformers.load_tokenizer(model_name)\n", @@ -555,7 +431,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 27, "id": "af3721c6-c528-4256-bb22-a4476a1e4568", "metadata": {}, "outputs": [ @@ -591,7 +467,7 @@ " \"[SEP]\"" ] }, - "execution_count": 33, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -602,7 +478,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 28, "id": "6ee6f417-25b9-4775-b0fd-165750e0584f", "metadata": {}, "outputs": [ @@ -612,12 +488,14 @@ "get_idx_cmlm (generic function with 1 method)" ] }, - "execution_count": 34, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Since the CMLM model and the classifier models' tokenizers differ\n", + "# we have to create a different masking for the CMLM tokenizer\n", "function get_idx_cmlm(cmlm_decoded)\n", " idx_to_mask = []\n", " for (i, tok) in enumerate(cmlm_decoded)\n", @@ -631,33 +509,10 @@ }, { "cell_type": "code", - "execution_count": 178, + "execution_count": null, "id": "af757265-243c-450e-8af4-adcd11b03485", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "12-element Vector{Any}:\n", - " 3\n", - " 8\n", - " 11\n", - " 12\n", - " 13\n", - " 14\n", - " 15\n", - " 18\n", - " 20\n", - " 28\n", - " 30\n", - " 31" - ] - }, - "execution_count": 178, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "idx_to_mask = get_idx_cmlm(cmlm_decoded)" ] @@ -672,7 +527,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 30, "id": "ff76df2f-f5e5-40bf-9a37-a517ac17acc7", "metadata": {}, "outputs": [ @@ -682,12 +537,13 @@ "merge_tokens (generic function with 2 methods)" ] }, - "execution_count": 36, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Merges a token list into a string, masking at specified indexes\n", "function merge_tokens(tokens, idx_to_mask=[])\n", " merged_text = Vector{Char}()\n", " for (i, token) in enumerate(tokens)\n", @@ -704,7 +560,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 31, "id": "7263a950-f7d8-4b02-a071-5314e7ad2559", "metadata": {}, "outputs": [ @@ -714,12 +570,13 @@ "group_into_words (generic function with 3 methods)" ] }, - "execution_count": 37, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Merges the CMLM output token list into a string\n", "function group_into_words(cmlm_out, delim=\"##\")\n", " word_list = []\n", " for token in cmlm_out\n", @@ -735,7 +592,7 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 32, "id": "1ce0fbff-a40c-4e95-9489-c1f36478b29d", "metadata": {}, "outputs": [ @@ -745,12 +602,16 @@ "left_to_right_filling (generic function with 1 method)" ] }, - "execution_count": 122, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Recursively fills in the tokens\n", + "# The function selects the lowest index from mask_position and uses the CMLM\n", + "# to fill in the predicted token at the given position\n", + "# Once the mask_position list is empty, the merged string is returned\n", "function left_to_right_filling(tokens, mask_positions, model, tokenizer)\n", " if length(mask_positions) == 0\n", " return merge_tokens(tokens)\n", @@ -774,35 +635,17 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 33, "id": "2d3bd657-d656-462c-97b9-58b5dd923394", "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[CLS] remained [MASK] below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to alleviate [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to alleviate concerns [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price expectations [MASK] [SEP]\n" - ] - }, { "data": { "text/plain": [ "\"[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price expectations . [SEP]\"" ] }, - "execution_count": 39, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -813,7 +656,7 @@ }, { "cell_type": "code", - "execution_count": 176, + "execution_count": 34, "id": "c2a089f0-e814-43ba-8da6-e22e1f087a7c", "metadata": {}, "outputs": [ @@ -823,12 +666,16 @@ "uncertainty_filling (generic function with 1 method)" ] }, - "execution_count": 176, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Recursively fills in the tokens using CMLM uncertainty\n", + "# The function selects the masked token with the lowest logit entropy\n", + "# and fills in the predicted token at the given position\n", + "# Once the mask_position list is empty, the merged string is returned\n", "function uncertainty_filling(tokens, mask_positions, model, tokenizer)\n", " if length(mask_positions) == 0\n", " return merge_tokens(tokens)\n", @@ -859,7 +706,7 @@ }, { "cell_type": "code", - "execution_count": 179, + "execution_count": 35, "id": "67ff3d94-59e9-4235-9285-4aeb3ba841ed", "metadata": {}, "outputs": [ @@ -869,7 +716,7 @@ "\"[CLS] remained well below their levels at the beginning of august to help alleviate the weaker demand that earlier gains in oil prices had been holding down on price stability . [SEP]\"" ] }, - "execution_count": 179, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -879,99 +726,19 @@ ] }, { - "cell_type": "code", - "execution_count": 42, - "id": "b014982f-8df6-4f10-b675-b60822dcfea2", + "cell_type": "markdown", + "id": "f3db316c-2cce-4163-9856-e12d69d577b4", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "merge_truncated_words (generic function with 4 methods)" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "function merge_truncated_words(tokens, in_word=\"##\", between_word=\"\", separators=[\".\", \"de\"])\n", - " decoded = Vector{Char}()\n", - " for token in tokens\n", - " token = string(token)\n", - " if token in separators\n", - " continue\n", - " else\n", - " if startswith(token, in_word)\n", - " if length(decoded) == 0\n", - " append!(decoded, token)\n", - " else\n", - " last = pop!(decoded)\n", - " new_token = last * chop(token, head=2, tail=0)\n", - " append!(decoded, new_token)\n", - " end\n", - " else\n", - " append!(decoded, \" \" * token)\n", - " end\n", - " end\n", - " end\n", - " return decoded\n", - "end" + "### Putting it all together" ] }, { "cell_type": "code", - "execution_count": 44, + "execution_count": null, "id": "6c6cc184-c8f7-4201-8886-f5da43785223", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
10×7 DataFrame
Rowsentenceyearlabelseedsentence_splittingevent_typesplit
StringInt64String7Int64BoolString31String7
1remained well below their levels at the beginning of the year, and that weaker demand and earlier declines in oil prices had been holding down consumer price inflation.1996hawkish5768truemeeting minutestest
2A few participants also noted that uncertainty about the extent of resource slack in the economy was considerable and that it was quite possible that the economy could soon be operating close to potential, particularly if labor force participation rates did not turn up much while employment continued to register gains.1996neutral5768truemeeting minutestest
3inflation was projected to pick up gradually in association with a partial reversal of the decline in energy prices this year.1996neutral5768truemeeting minutestest
4They noted that the realization of such a development could make it harder for the Committee to achieve 2 percent inflation over the longer run.1996neutral5768truemeeting minutestest
5In the view of one member, however, aggregate final demand was so strong that, with economic activity and the associated demand for labor having expanded at an unsustainable pace for some time, one could be reasonably confident that inflation would most likely pick up in the absence of policy action.1996hawkish5768truemeeting minutestest
6In the circumstances, most members endorsed a proposal to delete as no longer necessary the previous summary statement relating to the risks to growth and inflation taken together.1996neutral5768truemeeting minutestest
7In the staff forecast prepared for this meeting, the economy was seen as likely to expand at a moderate pace, supported by accommodative monetary policy and financial conditions.1996dovish5768truemeeting minutestest
8Housing starts and the demand for new homes had declined further, house prices in many parts of the country were falling faster than they had towards the end of 2007, and inventories of unsold homes remained quite elevated.1996dovish5768truemeeting minutestest
9Pressures on resources would rise as the anticipated upturn and possible above-trend growth brought the economy closer to full capacity utilization.1996hawkish5768truemeeting minutestest
10Price inflation had picked up a little but, abstracting from energy, had remained relatively subdued.1996neutral5768truemeeting minutestest
" - ], - "text/latex": [ - "\\begin{tabular}{r|cc}\n", - "\t& sentence & \\\\\n", - "\t\\hline\n", - "\t& String & \\\\\n", - "\t\\hline\n", - "\t1 & remained well below their levels at the beginning of the year, and that weaker demand and earlier declines in oil prices had been holding down consumer price inflation. & $\\dots$ \\\\\n", - "\t2 & A few participants also noted that uncertainty about the extent of resource slack in the economy was considerable and that it was quite possible that the economy could soon be operating close to potential, particularly if labor force participation rates did not turn up much while employment continued to register gains. & $\\dots$ \\\\\n", - "\t3 & inflation was projected to pick up gradually in association with a partial reversal of the decline in energy prices this year. & $\\dots$ \\\\\n", - "\t4 & They noted that the realization of such a development could make it harder for the Committee to achieve 2 percent inflation over the longer run. & $\\dots$ \\\\\n", - "\t5 & In the view of one member, however, aggregate final demand was so strong that, with economic activity and the associated demand for labor having expanded at an unsustainable pace for some time, one could be reasonably confident that inflation would most likely pick up in the absence of policy action. & $\\dots$ \\\\\n", - "\t6 & In the circumstances, most members endorsed a proposal to delete as no longer necessary the previous summary statement relating to the risks to growth and inflation taken together. & $\\dots$ \\\\\n", - "\t7 & In the staff forecast prepared for this meeting, the economy was seen as likely to expand at a moderate pace, supported by accommodative monetary policy and financial conditions. & $\\dots$ \\\\\n", - "\t8 & Housing starts and the demand for new homes had declined further, house prices in many parts of the country were falling faster than they had towards the end of 2007, and inventories of unsold homes remained quite elevated. & $\\dots$ \\\\\n", - "\t9 & Pressures on resources would rise as the anticipated upturn and possible above-trend growth brought the economy closer to full capacity utilization. & $\\dots$ \\\\\n", - "\t10 & Price inflation had picked up a little but, abstracting from energy, had remained relatively subdued. & $\\dots$ \\\\\n", - "\\end{tabular}\n" - ], - "text/plain": [ - "\u001b[1m10×7 DataFrame\u001b[0m\n", - "\u001b[1m Row \u001b[0m│\u001b[1m sentence \u001b[0m\u001b[1m year \u001b[0m\u001b[1m label \u001b[0m\u001b[1m seed \u001b[0m\u001b[1m sentence_spli\u001b[0m ⋯\n", - " │\u001b[90m String \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m String7 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Bool \u001b[0m ⋯\n", - "─────┼──────────────────────────────────────────────────────────────────────────\n", - " 1 │ remained well below their levels… 1996 hawkish 5768 ⋯\n", - " 2 │ A few participants also noted th… 1996 neutral 5768\n", - " 3 │ inflation was projected to pick … 1996 neutral 5768\n", - " 4 │ They noted that the realization … 1996 neutral 5768\n", - " 5 │ In the view of one member, howev… 1996 hawkish 5768 ⋯\n", - " 6 │ In the circumstances, most membe… 1996 neutral 5768\n", - " 7 │ In the staff forecast prepared f… 1996 dovish 5768\n", - " 8 │ Housing starts and the demand fo… 1996 dovish 5768\n", - " 9 │ Pressures on resources would ris… 1996 hawkish 5768 ⋯\n", - " 10 │ Price inflation had picked up a … 1996 neutral 5768\n", - "\u001b[36m 3 columns omitted\u001b[0m" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "n = 10\n", "data = load_training_sentences()\n", @@ -980,31 +747,10 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": null, "id": "6a14a2e5-4dc6-4fb4-94ee-01cf6526550c", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[33m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[33m\u001b[1mWarning: \u001b[22m\u001b[39mfuse_unk is unsupported, the tokenization result might be slightly different in some cases.\n", - "\u001b[33m\u001b[1m└ \u001b[22m\u001b[39m\u001b[90m@ Transformers.HuggingFace C:\\Users\\drobi\\.julia\\packages\\Transformers\\lD5nW\\src\\huggingface\\tokenizer\\utils.jl:42\u001b[39m\n", - "\u001b[33m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[33m\u001b[1mWarning: \u001b[22m\u001b[39mmatch token `` require to match with space on either side but that is not implemented here, the tokenization result might be slightly different in some cases.\n", - "\u001b[33m\u001b[1m└ \u001b[22m\u001b[39m\u001b[90m@ Transformers.HuggingFace C:\\Users\\drobi\\.julia\\packages\\Transformers\\lD5nW\\src\\huggingface\\tokenizer\\utils.jl:42\u001b[39m\n" - ] - }, - { - "data": { - "text/plain": [ - "Python: " - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cmlm_tkr, cmlm_model = load_model()\n", "cls = TrillionDollarWords.load_model(; output_hidden_states=true)\n", @@ -1023,39 +769,10 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": null, "id": "5fb7e883-bad7-47fe-b44b-930a961225ee", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[CLS] remained [MASK] below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to alleviate [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to alleviate concerns [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down [MASK] price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price [MASK] [MASK] [SEP]\n", - "[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price expectations [MASK] [SEP]\n" - ] - }, - { - "data": { - "text/plain": [ - "\"[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price expectations . [SEP]\"" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "attributions = get_attributions(texts[1, :].sentence, scorer)\n", "\n", @@ -1074,34 +791,21 @@ ] }, { - "cell_type": "code", - "execution_count": 107, - "id": "7c0fde75-655e-48d1-8e26-c0553ff510c7", + "cell_type": "markdown", + "id": "e944e49b-253c-4850-af4b-d483d870af84", "metadata": {}, - "outputs": [], "source": [ - "using Plots" + "### Detour: Visualizing CMLM uncertainty through entropy" ] }, { "cell_type": "code", - "execution_count": 52, - "id": "079dd980-cfe5-4fbe-aad7-b26500f0b646", + "execution_count": 107, + "id": "7c0fde75-655e-48d1-8e26-c0553ff510c7", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1.2599115233639782" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "entropy([.2, .3, .5, .1])" + "using Plots" ] }, { @@ -1451,37 +1155,13 @@ ], "source": [ "masked_toks = toks\n", - "# entr = -log.(-entropies[masks])\n", "scatter(entropies, markercolor=colors, xticks = (1:size(masked_toks)[1], masked_toks), rot=45)" ] }, - { - "cell_type": "code", - "execution_count": 154, - "id": "b22d9957-5a84-4eeb-95d8-5b067870b5d7", - "metadata": {}, - "outputs": [ - { - "ename": "LoadError", - "evalue": "MethodError: no method matching ~(::Vector{Int64})\n\n\u001b[0mClosest candidates are:\n\u001b[0m ~(\u001b[91m::TimeZones.Class\u001b[39m)\n\u001b[0m\u001b[90m @\u001b[39m \u001b[32mTimeZones\u001b[39m \u001b[90mC:\\Users\\drobi\\.julia\\packages\\TimeZones\\F3BhS\\src\\\u001b[39m\u001b[90m\u001b[4mclass.jl:52\u001b[24m\u001b[39m\n\u001b[0m ~(\u001b[91m::Py\u001b[39m)\n\u001b[0m\u001b[90m @\u001b[39m \u001b[32mPythonCall\u001b[39m \u001b[90mC:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\src\\\u001b[39m\u001b[90m\u001b[4mPy.jl:379\u001b[24m\u001b[39m\n\u001b[0m ~(\u001b[91m::Missing\u001b[39m)\n\u001b[0m\u001b[90m @\u001b[39m \u001b[90mBase\u001b[39m \u001b[90m\u001b[4mmissing.jl:101\u001b[24m\u001b[39m\n\u001b[0m ...\n", - "output_type": "error", - "traceback": [ - "MethodError: no method matching ~(::Vector{Int64})\n\n\u001b[0mClosest candidates are:\n\u001b[0m ~(\u001b[91m::TimeZones.Class\u001b[39m)\n\u001b[0m\u001b[90m @\u001b[39m \u001b[32mTimeZones\u001b[39m \u001b[90mC:\\Users\\drobi\\.julia\\packages\\TimeZones\\F3BhS\\src\\\u001b[39m\u001b[90m\u001b[4mclass.jl:52\u001b[24m\u001b[39m\n\u001b[0m ~(\u001b[91m::Py\u001b[39m)\n\u001b[0m\u001b[90m @\u001b[39m \u001b[32mPythonCall\u001b[39m \u001b[90mC:\\Users\\drobi\\.julia\\packages\\PythonCall\\wXfah\\src\\\u001b[39m\u001b[90m\u001b[4mPy.jl:379\u001b[24m\u001b[39m\n\u001b[0m ~(\u001b[91m::Missing\u001b[39m)\n\u001b[0m\u001b[90m @\u001b[39m \u001b[90mBase\u001b[39m \u001b[90m\u001b[4mmissing.jl:101\u001b[24m\u001b[39m\n\u001b[0m ...\n", - "", - "Stacktrace:", - " [1] top-level scope", - " @ In[154]:1" - ] - } - ], - "source": [ - "[1, 2, 3][~[2]]" - ] - }, { "cell_type": "code", "execution_count": null, - "id": "c052df85-cd7e-4fbf-80f7-9eda15700c1b", + "id": "c564a9bd-ff84-40c3-b4bd-3c301bb8cfa7", "metadata": {}, "outputs": [], "source": [] From 26077fd330e4aa55c2f8f7258b35641ac743e509 Mon Sep 17 00:00:00 2001 From: Karol Dobiczek Date: Wed, 10 Jul 2024 11:54:53 +0200 Subject: [PATCH 9/9] add RELITC with vizualization --- dev/notebooks/RELITC_with_viz.ipynb | 1370 +++++++++++++++++++++++++++ 1 file changed, 1370 insertions(+) create mode 100644 dev/notebooks/RELITC_with_viz.ipynb diff --git a/dev/notebooks/RELITC_with_viz.ipynb b/dev/notebooks/RELITC_with_viz.ipynb new file mode 100644 index 000000000..a11b07150 --- /dev/null +++ b/dev/notebooks/RELITC_with_viz.ipynb @@ -0,0 +1,1370 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e7aa29e0-5b18-477d-bee4-9d9b42aedc7e", + "metadata": {}, + "source": [ + "## RELITC\n", + "[Relevance-based Infilling for Natural Language Counterfactuals](https://dl.acm.org/doi/10.1145/3583780.3615029) (RELITC) is a Language Model counterfactual explanation method. It uses LM feature attributions to identify tokens in the original text which contribute the most to the LM classification. Once idendified, the tokens are masked and a Conditional Masked LM (CMLM), like BERT is used to fill the masks, creating a new text that should be classified to the target class. RELITC additionally tries to quantify the uncertainty of the CMLM to guide the infilling process. The only parameter of the method, $K$, percentage of masked tokens is established through beam search." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a521610f-12dc-4e6e-80e7-b51328794d51", + "metadata": {}, + "outputs": [], + "source": [ + "# using Pkg\n", + "# Pkg.add(\"CUDNN_jll\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0de19327-1494-4f9c-a799-d5551fac5826", + "metadata": {}, + "outputs": [], + "source": [ + "using DataFrames\n", + "using Transformers\n", + "using Transformers.TextEncoders\n", + "using Transformers.HuggingFace\n", + "using TrillionDollarWords\n", + "using StatsBase\n", + "using Flux" + ] + }, + { + "cell_type": "markdown", + "id": "b4e29646-4837-4121-a9fe-6426a352811e", + "metadata": {}, + "source": [ + "### Load data\n", + "The data used here is from the [Trillion Dollar Words](https://aclanthology.org/2023.acl-long.368/) dataset using the [TrillionDollarWords.jl](https://github.com/pat-alt/TrillionDollarWords.jl) package." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a9c6c853-d63f-4f48-b188-0d12d9a11be0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
10×7 DataFrame
Rowsentenceyearlabelseedsentence_splittingevent_typesplit
StringInt64String7Int64BoolString31String7
1remained well below their levels at the beginning of the year, and that weaker demand and earlier declines in oil prices had been holding down consumer price inflation.1996hawkish5768truemeeting minutestest
2A few participants also noted that uncertainty about the extent of resource slack in the economy was considerable and that it was quite possible that the economy could soon be operating close to potential, particularly if labor force participation rates did not turn up much while employment continued to register gains.1996neutral5768truemeeting minutestest
3inflation was projected to pick up gradually in association with a partial reversal of the decline in energy prices this year.1996neutral5768truemeeting minutestest
4They noted that the realization of such a development could make it harder for the Committee to achieve 2 percent inflation over the longer run.1996neutral5768truemeeting minutestest
5In the view of one member, however, aggregate final demand was so strong that, with economic activity and the associated demand for labor having expanded at an unsustainable pace for some time, one could be reasonably confident that inflation would most likely pick up in the absence of policy action.1996hawkish5768truemeeting minutestest
6In the circumstances, most members endorsed a proposal to delete as no longer necessary the previous summary statement relating to the risks to growth and inflation taken together.1996neutral5768truemeeting minutestest
7In the staff forecast prepared for this meeting, the economy was seen as likely to expand at a moderate pace, supported by accommodative monetary policy and financial conditions.1996dovish5768truemeeting minutestest
8Housing starts and the demand for new homes had declined further, house prices in many parts of the country were falling faster than they had towards the end of 2007, and inventories of unsold homes remained quite elevated.1996dovish5768truemeeting minutestest
9Pressures on resources would rise as the anticipated upturn and possible above-trend growth brought the economy closer to full capacity utilization.1996hawkish5768truemeeting minutestest
10Price inflation had picked up a little but, abstracting from energy, had remained relatively subdued.1996neutral5768truemeeting minutestest
" + ], + "text/latex": [ + "\\begin{tabular}{r|cc}\n", + "\t& sentence & \\\\\n", + "\t\\hline\n", + "\t& String & \\\\\n", + "\t\\hline\n", + "\t1 & remained well below their levels at the beginning of the year, and that weaker demand and earlier declines in oil prices had been holding down consumer price inflation. & $\\dots$ \\\\\n", + "\t2 & A few participants also noted that uncertainty about the extent of resource slack in the economy was considerable and that it was quite possible that the economy could soon be operating close to potential, particularly if labor force participation rates did not turn up much while employment continued to register gains. & $\\dots$ \\\\\n", + "\t3 & inflation was projected to pick up gradually in association with a partial reversal of the decline in energy prices this year. & $\\dots$ \\\\\n", + "\t4 & They noted that the realization of such a development could make it harder for the Committee to achieve 2 percent inflation over the longer run. & $\\dots$ \\\\\n", + "\t5 & In the view of one member, however, aggregate final demand was so strong that, with economic activity and the associated demand for labor having expanded at an unsustainable pace for some time, one could be reasonably confident that inflation would most likely pick up in the absence of policy action. & $\\dots$ \\\\\n", + "\t6 & In the circumstances, most members endorsed a proposal to delete as no longer necessary the previous summary statement relating to the risks to growth and inflation taken together. & $\\dots$ \\\\\n", + "\t7 & In the staff forecast prepared for this meeting, the economy was seen as likely to expand at a moderate pace, supported by accommodative monetary policy and financial conditions. & $\\dots$ \\\\\n", + "\t8 & Housing starts and the demand for new homes had declined further, house prices in many parts of the country were falling faster than they had towards the end of 2007, and inventories of unsold homes remained quite elevated. & $\\dots$ \\\\\n", + "\t9 & Pressures on resources would rise as the anticipated upturn and possible above-trend growth brought the economy closer to full capacity utilization. & $\\dots$ \\\\\n", + "\t10 & Price inflation had picked up a little but, abstracting from energy, had remained relatively subdued. & $\\dots$ \\\\\n", + "\\end{tabular}\n" + ], + "text/plain": [ + "\u001b[1m10×7 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m sentence \u001b[0m\u001b[1m year \u001b[0m\u001b[1m label \u001b[0m\u001b[1m seed \u001b[0m\u001b[1m sentence_spli\u001b[0m ⋯\n", + " │\u001b[90m String \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m String7 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Bool \u001b[0m ⋯\n", + "─────┼──────────────────────────────────────────────────────────────────────────\n", + " 1 │ remained well below their levels… 1996 hawkish 5768 ⋯\n", + " 2 │ A few participants also noted th… 1996 neutral 5768\n", + " 3 │ inflation was projected to pick … 1996 neutral 5768\n", + " 4 │ They noted that the realization … 1996 neutral 5768\n", + " 5 │ In the view of one member, howev… 1996 hawkish 5768 ⋯\n", + " 6 │ In the circumstances, most membe… 1996 neutral 5768\n", + " 7 │ In the staff forecast prepared f… 1996 dovish 5768\n", + " 8 │ Housing starts and the demand fo… 1996 dovish 5768\n", + " 9 │ Pressures on resources would ris… 1996 hawkish 5768 ⋯\n", + " 10 │ Price inflation had picked up a … 1996 neutral 5768\n", + "\u001b[36m 3 columns omitted\u001b[0m" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[90m Pkg.update() complete \u001b[39m\n" + ] + } + ], + "source": [ + "n = 10\n", + "data = load_training_sentences()\n", + "texts = filter(:split => n -> n == \"test\", data)[1:n, :]" + ] + }, + { + "cell_type": "markdown", + "id": "c716f911-920d-468b-92e8-8ca639367303", + "metadata": {}, + "source": [ + "### Get attributions\n", + "The feature attributions are computed using the transformers-interpret Python library and loaded to Julia using PythonCall.\n", + "\n", + "The `scorer` outputs a per-token score of the degree of contribution to a specified class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e00a7c85-2a74-41bf-ad75-899f3317dac9", + "metadata": {}, + "outputs": [], + "source": [ + "# Install necessary dependencies\n", + "# using CondaPkg\n", + "# CondaPkg.add(\"pytorch\")\n", + "# CondaPkg.add(\"transformers\"; version=\"4.15.0\")\n", + "# CondaPkg.add(\"transformers-interpret\")\n", + "# CondaPkg.add(\"cuDNN\")" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ff425216-c8a3-49d7-ba21-b89da26e7b4d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "SYSTEM: caught exception of type :MethodError while trying to print a failed Task notice; giving up\n" + ] + }, + { + "data": { + "text/plain": [ + "Python: " + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "using PythonCall\n", + "\n", + "transformers_interpret = PythonCall.pyimport(\"transformers_interpret\")\n", + "transformers = PythonCall.pyimport(\"transformers\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a605169-6109-41fb-973b-7d5a044e71f1", + "metadata": {}, + "outputs": [], + "source": [ + "# Load pre-trained classifier and corresponding tokenizer\n", + "# classifier = \"gtfintechlab/FOMC-RoBERTa\"\n", + "classifier = \"karoldobiczek/roberta-base_fomc\"\n", + "\n", + "println(\"loading\")\n", + "model = transformers.RobertaForSequenceClassification.from_pretrained(classifier).cuda()\n", + "println(\"model done\")\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(classifier)\n", + "println(\"tok done\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d7692016-104c-402c-87c2-2f461d004aee", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "scorer done\n" + ] + } + ], + "source": [ + "scorer = transformers_interpret.SequenceClassificationExplainer(model, tokenizer, attribution_type=\"lig\")\n", + "println(\"scorer done\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f5eb6b90-e0e4-4d11-9136-e363e29cf1a7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Python: " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torch = PythonCall.pyimport(\"torch\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5841a543-b2c7-40d6-aa6e-a4e602937fa9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Python: SequenceClassifierOutput(loss=None, logits=tensor([[ 0.8820, -0.5574, -0.7120]], grad_fn=), hidden_states=None, attentions=None)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "toks = tokenizer(texts[1, :].sentence)\n", + "model(input_ids=torch.Tensor(toks.input_ids).int().unsqueeze(0), attention_mask=torch.Tensor(toks.attention_mask).unsqueeze(0))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "267d0d79-e8b4-41f6-8290-daf5a2d0f256", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "for i in 1:n\n", + " scorer(texts[i, :].sentence)\n", + " scorer.visualize(\"fomc_roberta_viz_\" * string(i) * \".html\")\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "161a51e5-e3cb-4ca7-bbe2-4e95bfbfc1a9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "get_attributions (generic function with 1 method)" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Returns a list of tuples containing the token at the first position \n", + "# and attribution score at second\n", + "function get_attributions(text, scorer)\n", + " \n", + " attribs = scorer(text, internal_batch_size=1)\n", + " attributions = pyconvert(Array{Tuple{String, Float64}}, attribs)\n", + " return attributions\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "409785d3-9953-426f-937f-b5ebaf9a6ea7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/plain": [ + "34-element Vector{Tuple{String, Float64}}:\n", + " (\"\", 0.0)\n", + " (\"rem\", -0.09617849663936606)\n", + " (\"ained\", -0.29702715615956665)\n", + " (\"well\", 0.25551209840018674)\n", + " (\"below\", -0.4015987185377847)\n", + " (\"their\", -0.013975036770146217)\n", + " (\"levels\", -0.13794157805498133)\n", + " (\"at\", -0.002103001077563767)\n", + " (\"the\", 0.14926277592717438)\n", + " (\"beginning\", 0.08228264530334284)\n", + " (\"of\", 0.08006335674570937)\n", + " (\"the\", 0.24284418735078794)\n", + " (\"year\", 0.20003154332566433)\n", + " ⋮\n", + " (\"oil\", -0.06976564966318043)\n", + " (\"prices\", 0.011989646035101107)\n", + " (\"had\", -0.15355389676950998)\n", + " (\"been\", -0.10492307025848874)\n", + " (\"holding\", -0.07771222018418246)\n", + " (\"down\", -0.024919108109520634)\n", + " (\"consumer\", 0.09788758630638592)\n", + " (\"price\", 0.011979387105458176)\n", + " (\"inflation\", -0.09257150340664654)\n", + " (\".\", 0.15492799445950947)\n", + " (\"\", -0.3214473479860127)\n", + " (\"\", 0.0)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "attributions = get_attributions(texts[1, :].sentence, scorer)" + ] + }, + { + "cell_type": "markdown", + "id": "b25039fe-329f-46e4-b308-33bc42c734ea", + "metadata": {}, + "source": [ + "### Mask the word attributions\n", + "This step prepares the text for the CMLM. The $K$ tokens with the highest attribution score are masked." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48278c99-7368-4eaa-b26f-1b91674fc514", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "cls = TrillionDollarWords.load_model(; output_hidden_states=true)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "08a37453-82d2-4319-a719-a21d6685c88f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "group_into_words (generic function with 1 method)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The tokenizer returns tokens instead of words\n", + "# the tokens are grouped into words and max pooling is used to get the word attribution\n", + "function group_into_words(text, attributions, cls_tkr)\n", + " toks = decode(cls_tkr, encode(cls_tkr, text).token)\n", + " word_attributions = []\n", + " for (i, (dec_tok, attrib)) in enumerate(zip(toks, attributions))\n", + " if startswith(dec_tok, \"<\")\n", + " continue\n", + " elseif length(word_attributions) == 0 || startswith(dec_tok, \" \")\n", + " push!(word_attributions, ([i], [attrib[1]], [attrib[2]]))\n", + " else \n", + " last_processed = last(word_attributions)\n", + " push!(last_processed[1], i)\n", + " push!(last_processed[2], attrib[1])\n", + " push!(last_processed[3], attrib[2])\n", + " end\n", + " end\n", + " return word_attributions\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "02b63bfd-134c-44ce-b2de-67ee99067745", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "28-element Vector{Any}:\n", + " ([2, 3], [\"rem\", \"ained\"], [-0.09617849663936606, -0.29702715615956665])\n", + " ([4], [\"well\"], [0.25551209840018674])\n", + " ([5], [\"below\"], [-0.4015987185377847])\n", + " ([6], [\"their\"], [-0.013975036770146217])\n", + " ([7], [\"levels\"], [-0.13794157805498133])\n", + " ([8], [\"at\"], [-0.002103001077563767])\n", + " ([9], [\"the\"], [0.14926277592717438])\n", + " ([10], [\"beginning\"], [0.08228264530334284])\n", + " ([11], [\"of\"], [0.08006335674570937])\n", + " ([12], [\"the\"], [0.24284418735078794])\n", + " ([13, 14], [\"year\", \",\"], [0.20003154332566433, 0.13104142887853437])\n", + " ([15], [\"and\"], [0.17795123590947837])\n", + " ([16], [\"that\"], [0.177958452206897])\n", + " ⋮\n", + " ([20], [\"earlier\"], [-0.2643532357406369])\n", + " ([21], [\"declines\"], [0.14990056209456956])\n", + " ([22], [\"in\"], [-0.3258371366392156])\n", + " ([23], [\"oil\"], [-0.06976564966318043])\n", + " ([24], [\"prices\"], [0.011989646035101107])\n", + " ([25], [\"had\"], [-0.15355389676950998])\n", + " ([26], [\"been\"], [-0.10492307025848874])\n", + " ([27], [\"holding\"], [-0.07771222018418246])\n", + " ([28], [\"down\"], [-0.024919108109520634])\n", + " ([29], [\"consumer\"], [0.09788758630638592])\n", + " ([30], [\"price\"], [0.011979387105458176])\n", + " ([31, 32], [\"inflation\", \".\"], [-0.09257150340664654, 0.15492799445950947])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text = texts[1, :].sentence\n", + "word_attributions = group_into_words(text, attributions, cls.tkr)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e513d132-c2c8-4947-8ea8-2bda508c99b5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "get_top_k_idx (generic function with 2 methods)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Returns a list of indexes of words with the highest attribution scores\n", + "function get_top_k_idx(attributions, k=10)\n", + " sorted = sort(attributions, by = x -> -maximum(x[3]))\n", + " idx_to_mask = []\n", + " for row in first(sorted, k)\n", + " append!(idx_to_mask, row[1])\n", + " end\n", + " return idx_to_mask\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee80bad8-3641-43d1-8d6d-6dcd87e40f15", + "metadata": {}, + "outputs": [], + "source": [ + "idx_to_mask = get_top_k_idx(word_attributions)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d2071233-269a-49f6-b9d3-5e87f5be6d9e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mask_toks_at_idx (generic function with 1 method)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Masks tokens (here words) at corresponding indexes and forms them into a string\n", + "function mask_toks_at_idx(toks, idx_to_mask)\n", + " masked_text = Vector{Char}()\n", + " for (i, token) in enumerate(toks)\n", + " if startswith(token, \"<\")\n", + " continue\n", + " elseif i in idx_to_mask\n", + " append!(masked_text, \" [MASK]\")\n", + " else\n", + " append!(masked_text, token)\n", + " end\n", + " end\n", + " \n", + " return String(masked_text)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "04662dad-be82-4888-9c6f-c26093347e80", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"remained [MASK] below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK]\"" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "toks = decode(cls.tkr, encode(cls.tkr, text).token)\n", + "masked_text = mask_toks_at_idx(toks, idx_to_mask)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "a596f684-1b82-4823-987b-adc633545977", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(BertTextEncoder(\n", + "├─ TextTokenizer(MatchTokenization(WordPieceTokenization(bert_uncased_tokenizer, WordPiece(vocab_size = 30522, unk = [UNK], max_char = 100)), 5 patterns)),\n", + "├─ vocab = Vocab{String, SizedArray}(size = 30522, unk = [UNK], unki = 101),\n", + "├─ startsym = [CLS],\n", + "├─ endsym = [SEP],\n", + "├─ padsym = [PAD],\n", + "├─ trunc = 512,\n", + "└─ process = Pipelines:\n", + " ╰─ target[token] := TextEncodeBase.nestedcall(string_getvalue, source)\n", + " ╰─ target[token] := Transformers.TextEncoders.grouping_sentence(target.token)\n", + " ╰─ target[(token, segment)] := SequenceTemplate{String}([CLS]: Input[1]: [SEP]: (Input[2]: [SEP]:)...)(target.token)\n", + " ╰─ target[attention_mask] := (NeuralAttentionlib.LengthMask ∘ Transformers.TextEncoders.getlengths(512))(target.token)\n", + " ╰─ target[token] := TextEncodeBase.trunc_and_pad(512, [PAD], head, tail)(target.token)\n", + " ╰─ target[token] := TextEncodeBase.nested2batch(target.token)\n", + " ╰─ target[segment] := TextEncodeBase.trunc_and_pad(512, 1, head, tail)(target.segment)\n", + " ╰─ target[segment] := TextEncodeBase.nested2batch(target.segment)\n", + " ╰─ target := (target.token, target.segment, target.attention_mask)\n", + "), HGFBertForMaskedLM(HGFBertModel(Chain(CompositeEmbedding(token = Embed(768, 30522), position = ApplyEmbed(.+, FixedLenPositionEmbed(768, 512)), segment = ApplyEmbed(.+, Embed(768, 2), Transformers.HuggingFace.bert_ones_like)), DropoutLayer(LayerNorm(768, ϵ = 1.0e-12))), Transformer<12>(PostNormTransformerBlock(DropoutLayer(SelfAttention(MultiheadQKVAttenOp(head = 12, p = nothing), Fork<3>(Dense(W = (768, 768), b = true)), Dense(W = (768, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12), DropoutLayer(Chain(Dense(σ = NNlib.gelu, W = (768, 3072), b = true), Dense(W = (3072, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12))), nothing), Branch{(:logit,) = (:hidden_state,)}(Chain(Dense(σ = NNlib.gelu, W = (768, 768), b = true), LayerNorm(768, ϵ = 1.0e-12), EmbedDecoder(Embed(768, 30522), bias = true)))), Transformers.HuggingFace.HGFConfig{:bert, JSON3.Object{Vector{UInt8}, Vector{UInt64}}, Nothing}(:_name_or_path => \"bert-base-uncased\", :architectures => [\"BertForMaskedLM\"], :attention_probs_dropout_prob => 0.1, :classifier_dropout => nothing, :gradient_checkpointing => false, :hidden_act => \"gelu\", :hidden_dropout_prob => 0.1, :hidden_size => 768, :initializer_range => 0.02, :intermediate_size => 3072…))" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Loads the CMLM model from HuggingFace\n", + "function load_model(; kwrgs...)\n", + " model_name = \"karoldobiczek/relitc-FOMC-CMLM\"\n", + " tkr = Transformers.load_tokenizer(model_name)\n", + " cfg = Transformers.HuggingFace.HGFConfig(Transformers.load_config(model_name); kwrgs...)\n", + " mod = Transformers.load_model(model_name, \"ForMaskedLM\"; config = cfg)\n", + "\n", + " return tkr, mod, cfg\n", + "end\n", + "cmlm_tkr, cmlm_model = load_model()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "af3721c6-c528-4256-bb22-a4476a1e4568", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "32-element Vector{String}:\n", + " \"[CLS]\"\n", + " \"remained\"\n", + " \"[MASK]\"\n", + " \"below\"\n", + " \"their\"\n", + " \"levels\"\n", + " \"at\"\n", + " \"[MASK]\"\n", + " \"beginning\"\n", + " \"of\"\n", + " \"[MASK]\"\n", + " \"[MASK]\"\n", + " \"[MASK]\"\n", + " ⋮\n", + " \"in\"\n", + " \"oil\"\n", + " \"prices\"\n", + " \"had\"\n", + " \"been\"\n", + " \"holding\"\n", + " \"down\"\n", + " \"[MASK]\"\n", + " \"price\"\n", + " \"[MASK]\"\n", + " \"[MASK]\"\n", + " \"[SEP]\"" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cmlm_decoded = decode(cmlm_tkr, encode(cmlm_tkr, masked_text).token)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "6ee6f417-25b9-4775-b0fd-165750e0584f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "get_idx_cmlm (generic function with 1 method)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Since the CMLM model and the classifier models' tokenizers differ\n", + "# we have to create a different masking for the CMLM tokenizer\n", + "function get_idx_cmlm(cmlm_decoded)\n", + " idx_to_mask = []\n", + " for (i, tok) in enumerate(cmlm_decoded)\n", + " if tok == \"[MASK]\"\n", + " push!(idx_to_mask, i)\n", + " end\n", + " end\n", + " return idx_to_mask\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af757265-243c-450e-8af4-adcd11b03485", + "metadata": {}, + "outputs": [], + "source": [ + "idx_to_mask = get_idx_cmlm(cmlm_decoded)" + ] + }, + { + "cell_type": "markdown", + "id": "1faeb671-dead-4ba1-867f-39eaf990a507", + "metadata": {}, + "source": [ + "### Fill in masks" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "ff76df2f-f5e5-40bf-9a37-a517ac17acc7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "merge_tokens (generic function with 2 methods)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Merges a token list into a string, masking at specified indexes\n", + "function merge_tokens(tokens, idx_to_mask=[])\n", + " merged_text = Vector{Char}()\n", + " for (i, token) in enumerate(tokens)\n", + " if i in idx_to_mask\n", + " append!(merged_text, \" [MASK]\")\n", + " else\n", + " append!(merged_text, \" \" * token)\n", + " end\n", + " end\n", + " \n", + " return chop(String(merged_text), head=1, tail=0)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "7263a950-f7d8-4b02-a071-5314e7ad2559", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "group_into_words (generic function with 3 methods)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Merges the CMLM output token list into a string\n", + "function group_into_words(cmlm_out, delim=\"##\")\n", + " word_list = []\n", + " for token in cmlm_out\n", + " if startswith(delim, token) && length(word_list) != 0\n", + " last(word_list) = last(word_list) * chop(token, head=2, tail=0)\n", + " else \n", + " push(word_list, token)\n", + " end\n", + " end\n", + " return word_list\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "1ce0fbff-a40c-4e95-9489-c1f36478b29d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "left_to_right_filling (generic function with 1 method)" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Recursively fills in the tokens\n", + "# The function selects the lowest index from mask_position and uses the CMLM\n", + "# to fill in the predicted token at the given position\n", + "# Once the mask_position list is empty, the merged string is returned\n", + "function left_to_right_filling(tokens, mask_positions, model, tokenizer)\n", + " if length(mask_positions) == 0\n", + " return merge_tokens(tokens)\n", + " end\n", + "\n", + " masked_text = merge_tokens(tokens, mask_positions)\n", + " # println(masked_text)\n", + " \n", + " out = decode(cmlm_tkr, cmlm_model(encode(cmlm_tkr, masked_text)).logit)\n", + " \n", + " mask_positions = sort(mask_positions)\n", + " next_position = popfirst!(mask_positions)\n", + "\n", + " next_token = out[next_position+1]\n", + "\n", + " tokens[next_position] = next_token\n", + "\n", + " return left_to_right_filling(tokens, mask_positions, model, tokenizer)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "2d3bd657-d656-462c-97b9-58b5dd923394", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price expectations . [SEP]\"" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "left_to_right_filling(copy(cmlm_decoded), idx_to_mask, cmlm_model, cmlm_tkr)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "c2a089f0-e814-43ba-8da6-e22e1f087a7c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "uncertainty_filling (generic function with 1 method)" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Recursively fills in the tokens using CMLM uncertainty\n", + "# The function selects the masked token with the lowest logit entropy\n", + "# and fills in the predicted token at the given position\n", + "# Once the mask_position list is empty, the merged string is returned\n", + "function uncertainty_filling(tokens, mask_positions, model, tokenizer)\n", + " if length(mask_positions) == 0\n", + " return merge_tokens(tokens)\n", + " end\n", + "\n", + " masked_text = merge_tokens(tokens, mask_positions)\n", + " # println(masked_text)\n", + "\n", + " logits = cmlm_model(encode(cmlm_tkr, masked_text)).logit\n", + " out = decode(cmlm_tkr, logits)\n", + "\n", + " probs = softmax(logits[:, mask_positions, :], dims=1)\n", + " \n", + " entrs = []\n", + " for i in 1:length(mask_positions)\n", + " push!(entrs, entropy(probs[:, i]))\n", + " end\n", + " \n", + " next_position = mask_positions[argmin(entrs)]\n", + " filter!(x -> x != next_position, mask_positions)\n", + " \n", + " next_token = out[next_position+1]\n", + "\n", + " tokens[next_position] = next_token\n", + " return uncertainty_filling(tokens, mask_positions, model, tokenizer)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "67ff3d94-59e9-4235-9285-4aeb3ba841ed", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"[CLS] remained well below their levels at the beginning of august to help alleviate the weaker demand that earlier gains in oil prices had been holding down on price stability . [SEP]\"" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "uncertainty_filling(copy(cmlm_decoded), idx_to_mask, cmlm_model, cmlm_tkr)" + ] + }, + { + "cell_type": "markdown", + "id": "f3db316c-2cce-4163-9856-e12d69d577b4", + "metadata": {}, + "source": [ + "### Putting it all together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c6cc184-c8f7-4201-8886-f5da43785223", + "metadata": {}, + "outputs": [], + "source": [ + "n = 10\n", + "data = load_training_sentences()\n", + "texts = filter(:split => n -> n == \"test\", data)[1:n, :]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a14a2e5-4dc6-4fb4-94ee-01cf6526550c", + "metadata": {}, + "outputs": [], + "source": [ + "cmlm_tkr, cmlm_model = load_model()\n", + "cls = TrillionDollarWords.load_model(; output_hidden_states=true)\n", + "\n", + "using PythonCall\n", + "\n", + "transformers_interpret = PythonCall.pyimport(\"transformers_interpret\")\n", + "transformers = PythonCall.pyimport(\"transformers\")\n", + "\n", + "# load pre-trained classifier and corresponding tokenizer\n", + "model = transformers.RobertaForSequenceClassification.from_pretrained(\"model\", local_files_only=true)\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(\"gtfintechlab/FOMC-RoBERTa\")\n", + "\n", + "scorer = transformers_interpret.SequenceClassificationExplainer(model, tokenizer, attribution_type=\"lig\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5fb7e883-bad7-47fe-b44b-930a961225ee", + "metadata": {}, + "outputs": [], + "source": [ + "attributions = get_attributions(texts[1, :].sentence, scorer)\n", + "\n", + "text = texts[1, :].sentence\n", + "word_attributions = group_into_words(text, attributions, cls.tkr)\n", + "idx_to_mask = get_top_k_idx(word_attributions)\n", + "\n", + "toks = decode(cls.tkr, encode(cls.tkr, text).token)\n", + "mask_toks_at_idx(toks, idx_to_mask)\n", + "\n", + "cmlm_decoded = decode(cmlm_tkr, encode(cmlm_tkr, masked_text).token)\n", + "\n", + "idx_to_mask = get_idx_cmlm(cmlm_decoded)\n", + "\n", + "left_to_right_filling(copy(cmlm_decoded), idx_to_mask, cmlm_model, cmlm_tkr)" + ] + }, + { + "cell_type": "markdown", + "id": "e944e49b-253c-4850-af4b-d483d870af84", + "metadata": {}, + "source": [ + "### Detour: Visualizing CMLM uncertainty through entropy" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "7c0fde75-655e-48d1-8e26-c0553ff510c7", + "metadata": {}, + "outputs": [], + "source": [ + "using Plots" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "ca6bef83-20a1-4897-a4ba-b4f0bb24e3b7", + "metadata": {}, + "outputs": [], + "source": [ + "enc = encode(cmlm_tkr, \"remained [MASK] below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK]\")\n", + "out = cmlm_model(enc)\n", + "colors = []\n", + "masks = []\n", + "for i in 1:size(enc.token)[2]\n", + " if argmax(enc.token[:, i, :])[1] == 104\n", + " push!(masks, i)\n", + " push!(colors, :red)\n", + " else\n", + " push!(colors, :blue)\n", + " end\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "id": "65837706-437e-433f-8c22-f510e1addc7e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\"well\"] 0.8925405\n", + "[\"the\"] 0.001739249\n", + "[\"august\"] 2.5585306\n", + "[\"quarter\"] 4.2470326\n", + "[\"to\"] 5.439382\n", + "[\"to\"] 5.78602\n", + "[\"of\"] 4.4509435\n", + "[\"and\"] 2.307382\n", + "[\"increases\"] 2.3528287\n", + "[\"the\"] 4.842994\n", + "[\"stability\"] 2.4602342\n", + "[\".\"] 2.0014744\n" + ] + } + ], + "source": [ + "entropies = []\n", + "toks = []\n", + "\n", + "probs = softmax(out.logit, dims=1)\n", + "\n", + "for i in 1:size(out.logit)[2]\n", + " row = out.logit[:, i, :]\n", + " tok = decode(cmlm_tkr, row)\n", + " push!(toks, tok)\n", + " entr = entropy(probs[:, i])\n", + " push!(entropies, entr)\n", + " if i in masks\n", + " println(string(tok) * \" \" * string(entr))\n", + " end\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "id": "7570a204-98e7-41dd-9b16-7949c822078f", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAAGQCAIAAAD9V4nPAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nOzdZ1wUZ/c38GuBpSO9igKKiooFsAdQERRUbNhb7L1rbMmtJrYYSxJ7N/Zeomhs2IMEQcWCBTsIShUp22eeF/N3n3VhlxlcWGB/348v5GLOztllzp7pw6NpmgAAAOgqPW0nAAAAoE1ohAAAoNPQCAEAQKehEQIAgE5DIwQAAJ2GRggAADoNjRAAAHQaGiEAAOg0NEIAANBpaIQAAKDTyqkR5uXlLVy4kP30NE1zvfcbRVEckyIymawChtA0zfW9VKWPqxTvnWsIRVFcP66KuaiQUn1cVWZRQWWVaUhVqqwSlVMjTE9P379/P/vpJRKJWCzmNAuhUMj1z1ZYWMhp+vIJoShKKBRyChGLxVKplFNIYWEh10W2Yn5cUqmU66IiEom41lLFfO8URQkEAk4hqCxOIagsTiEVtrJKhF2jAACg09AIAQBAp6ERAgCATkMjBAAAnVbhGuGrV69CQwd5eLSuVatNx44DX7x4oe2MAACgKjPQdgJfefDgQUjIiIyMtTTdhhCSlna7TZt+Fy/uaNq0qbZTAwCAqqlibRGOGfNjevpfTBckhNB064yMfWPG/KjdrAAAoAqrWI3wzZv3hHh/PVb/7dsP2skGAAB0QMVqhITwtJ0AAADolop1jNDFxe7jxyRC6iiMvXJ2ttFaQgAAlVNOTk5YWJj8zjgURenpcdjyYe6Pw+Nx2DgpRQj7rHg83t69e728vNi/OHsVqxFu3PhLePiQzMztX3aQPrazG7Vp0+9aTgsAoLL59OlTcnLy6dOntZ2IZowfP/79+/c60QhbtWp5+fKWMWPmvn2bRgjPzc1py5ZNOGUUAKAUjIyM/Pz8tJ2FZlSrVq3sXrxiNUJCSJMmTf77L1IsFtM0bWRkpO10AACgiqtoJ8sAAACUKzRCAADQaWiEAACg09AIAQBAnVI8oLhyQSMEAIBiZGVlzZo1x9m5lpGREZ/P9/BouGzZ8rJ4QLzWVbizRgEAQOuePHkSGBiSl+clEv1BSFNCJG/exC5ZsnrHjn3//hvl5OSk2dklJib+/fffjx498vb2njdvnmZfvETYIgQAgK+IRKJOnbrn5AwQiS4R0o2QmoTUJmSAQBCTktKoR4/+Gp9jdHT0u3fvCgoKbt26pfEXLxEaIQAAfOXw4cOZmXoy2fIi9382EIu33L//8MaNG6V75bi4uG3btsl/jImJ2blzJyFk1KhRmzZtCggIKH3S3wCNEAAAvnLu3GWhsJeKY2eWNB16+fLl0r2ym5vbzJkzMzIymB9//vlnsVhc2jQ1Bo0QAAC+kpLykaZdVf1WLK6RnJxWule2t7fv0qXL3r17CSHv3r2Ljo4eOHBgKbPUHDRCAAD4irV1NUJyVf1WXz/H1tay1C8+fvz4rVu30jS9devWfv36lelNRFlCIwQAgK+0bdvSxOSSil9ShoZRrVq1KvWLBwYG8vn8q1ev7tq1a+zYsaV+HQ1CIwQAgK8MGTKEkHhCThT9FY/3u6WlrGvXrt/y+mPHjh0+fLizs3MFeTgGGiEAAHzF0dFx9+7tfP4QHm+Zwj7Sj/r6MwwNFx47ts/Y2PhbXn/IkCEZGRljxoyRjxw5csTGxmbp0qW3b9+2sbGZPHnyt7w+V7igHgAAlPXp08fBwWH8+BnPni00MalN01KB4HWLFu22bo3x9vb+xhdPTU01NjZWPE2mb9++ffv2/caXLTU0QgAAKEbbtm0TE+Nfvnz59OlTfX39xo0bu7i4fPvLrl27dufOndOmTTM3N//2V9MINEIAAFCpdu3atWvX1uAL2tjYLFq0KDw8XIOv+Y3QCAEAoPwMHjxY2ykow8kyAACg09AIAQBAp6ERAgCATkMjBAAAnYaTZQAAqiAjI6N3797Z2NhoOxHNyMvL+/nnn8voxdEIAQCqIBcXl+zsbJlMxvyYn5/P6bo9iURCUZSRkRH7EKFQaGBgYGDAoa1wysra2pr9K3OCRggAUDUpPtjBwMDAwsKCfaxEIpHJZJxupSYQCPh8PqdGyDWrMoJjhAAAoNPQCAEAQKehEQIAgE5DIwQAAJ2GRggAADqNcyPMzs4ePHiwo6NjREREbm5uyQEAAAAVGLfLJ4RCYadOnXr27Dlz5swpU6b8+uuvy5cvZxMok8ny8vI2b97M/Mjj8QICAurVq6dmepqm5VfAsJwFE8U1hP305RMi+4JTCE3TenocVmuYWfB4PK4h7Kcvn5DSfVx6enpV4L1TFFW6RQWVxT4ElcUppAJWFpuUuDXCEydO2Nvbz58/nxAye/bsixcvPnz4sHbt2qampuoDxWKxUCiMi4uTjzg4ONSqVUvV9BKJhOvyJ5FI9PX1uYZIJBL205dPiEwm4xoikUg4vXF5VpwW2Yr5cTFXO+nr63MKIYRw+mavmO+doqhSfFyoLE6zQGVxCiEVr7L4fH6J74JbI8zPz09OTmZuH7Bnz547d+5cvnw5Jyfn0qVLjRo1UhNoYmJib2+/fft2ljPS09OjaZrTTQ0oijI2NuZarpwuFy2fEGZ1klMIj8fT09Pj8/nsQ6RSqbGxMddyLYePi0mM/fT6+voyjpf90jTN9bLfirmoUBRF0zSnEFSWzlYW15CqVFkl4rayM3jwYCMjo/r16zdt2jQnJycxMTExMbF3796TJ0/WbFoAAADlg1sjNDU1jY2NjYqKatmy5YwZM0xNTXk8Xr9+/VJSUsooPwAAgDLF+axRPT29WrVqFRYWXrx4kRAik8m2bNkyZMiQMsgNAACgzJXypts//PBDcHDwf//9l5+f36hRox9//FGzaQEAAJSPUjZCX1/fR48enT171tPTs3379prNCQAAoNyU/jFMLi4uo0eP1mAqAAAA5Q+3WAMAAJ2GRggAADoNjRAAAHQaGiEAAOg0NEIAANBpaIQAAKDT0AgBAECnoRECAIBOQyMEAACdhkYIAAA6DY0QAAB0GhohAADoNDRCAADQaWiEAACg09AIAQBAp6ERAgCATkMjBAAAnYZGCAAAOg2NEAAAdBoaIQAA6DQ0QgAA0GlohAAAoNPQCAEAQKehEQIAgE5DIwQAAJ2GRggAADoNjRAAAHQaGiEAAOg0NEIAANBpaIQAAKDT0AgBAECnoRECAIBOQyMEAACdhkYIAAA6DY0QAAB0GhohAADoNDRCAADQaWiEAACg09AIAQBAp6ERAgCATkMjBAAAnYZGCAAAOg2NEAAAdBoaIQAA6DQ0QgAA0GlohAAAoNPQCAEAQKehEQIAgE6rOo0wLS3t8+fP2s4CAAAqmUrfCGma/vPPLc7OTdq0mVavXq+GDdvFxcVpOykAAKg0DLSdwLf6+edVa9a8ysuLIcSEEJKe/qJz54HXr++uX7++tlMDAIBKoHJvEUokkk2b9uXlrWW6ICGEEM+MjFXz56/WZloAAFB5VO5GmJyczOPVI4T/9fB3CQmPtJMQAABUNpW7EZqYmBCSX2S4wMjISAvZAABAJVS5G6Gzs7OZWTYhKYqDhob7e/bsqK2UAACgcqncjZAQ8tdfq+3tu/N4/xAiICTD1HRl7doHfvxxmrbzAgCAyqHSN8KAgO/u3v170KB/6tQJ8fX9/n//M0xIuGJmZqbtvAAAoHIozeUTsbGxe/fuXblypbGxscYTKgVXV9e9e9cWFhYaGxvr6VX61g4AAOWJc9u4ePFijx49AgMDDQ0NyyIhAACA8sR5i3DSpEk7d+4MDQ0lhGRlZVWrVo3P55cYVVBQkJqaGhQUJB/p37//gAEDVE0vFotpmhaLxewTEwgEEomE0xZhfn7RM061HyKTycRisUwmYx8iEon09PTY/CHkCgoKKIri8XjsQ8rn4yooKKBpmv30EolEJpNJJBL2IQKBgM/nGxhwWPgr5qJCUZRQKKQoin0IKktnK4trSJWpLGNj4xL/gtwaYV5e3suXL0NCQjIyMgYNGhQVFWViYrJs2bIpU6aoDzQ1NbWxsZk/f758pEmTJhYWFqqmZ8qV01UQ+vr6pdg1qiYHbYXIZDKRSGRqaso+xNDQkGu58ng8MzMzTuVKyuXj4vF45ubm7KdnypXTXnoDAwOu5Uoq5KJCUZSBgQGnI+KoLJ2tLK4hVayy1OOWsbm5uZWV1e3bt7du3RoSEvLPP/9cvHhxwIABDg4O/fv3VxPI4/GMjY2Dg4O/LVsAAAAN47aWx+Pxxo8fP3HixHv37v3www/6+vphYWGLFy/evn17GeUHAABQpjifLLNw4UILC4tHjx5FR0czI87OzriTC1RVRw8ebOPp2a527WbVq08eMiQ7O1vbGQGAhnE+WYbP50dGRg4cOLBHjx6rVq2ytraeN28etgihSvr1p5+erF8fmZtrQwgh5NihQ51u3br68CGno5gAUMGV5jpCKyurM2fOHDx4cPfu3Xp6ehs2bGjbtq3GMwPQrs+fPx/dujU2N1f/y0hvqfRDauq2deumz5unzcwAQKNKefm5vr7+4MGDL126dOHChY4dcWNPqIISEhICZDL9rwfDxOLb589rJyEAKBu4DwtA8Xg8XtEL9ChCeLh7EUDVgpIGKJ6Pj89NfX3p14NnjI0DunbVTkIAUDbQCAGKZ2ZmNnz69H5WVmmEEEIoQnYZGh5xdR05YYKWMwMAjSrNyTIAOmLKvHl1mzQZ+MMPeVlZekZGweHhl3/91cTERNt5AYAmoRECqBPauXNo5855eXkav6sTAFQQ2DUKAAA6DY0QAAB0Ghoh6JZPnz7dvn377t27QqFQ27kAQIWAY4SgKyiK+nnWrPP797eUSkV6encMDGYtXz5w2DBt5wUAWoZGCLpiyZw5kq1bbxcUMLtB8gnpN2OGnZNTx9BQLWcGAFqFXaOgEyiKOr5nz+IvXZAQYk7I+pyctQsWaDMtAKgA0AhBJ2RnZ7vweEo3DvUg5GNamnYSAoAKA40QdIK5uXkOTSsNFhJiYGiolXwAoOJAIwSdYGxs7FC7dszXg9uMjcP799dOQgBQYeBkGdAVaw8c6N2u3cCPH0OEQiEhBywtn3l5Hf/pJ23nBQBahkYIusLd3f3W06c7Nm5ceemSqZlZSN++a/r04fF42s4LALQMjRB0iLGx8cQZM0aMH8/n8w0MsPADACE4RggAADoOjRAAAHQaGiEAAOg0NEIAANBpaIQAAKDT0AgBAECnoRECAIBOQyMEAACdhkYIAAA6DY0QAAB0GhohAEAVJxaLMzMztZ1FxYVGCABQZT1//rxrq1btatSY0KpVE2fn9StXUhSl7aQqHNx3GACgakpNTe3ftu32Dx98CSGECAiZ98svi96//+WPP7ScWQWDLUIAgKpp3dKl89PTfb/8aELImvz8s/v35+fnazOtigeNEACgarp/+3bg1ztC9QhpQciTJ0+0lVLFhEYIAFA1GRkbC4oMFvJ4RkZGWsimAkMjBAComoIjIg6bmCiO5BKSYGDQsGFDbaVUMaERAgBUTaMnTTrn6bnCzCyHEBkhMYR0trX9Zd06fX19badWseCsUQCAqsnIyOhSfPzGVav6HjyY+/lzA2/v7StX1q9fX9t5VThohAAAVRafz586b97UefPy8vIsLCy0nU4FhV2jAACg09AIAQBAp6ERAgCATkMjBAAAnYZGCKBh586cCW/Zsl39+mF+fkcPHtR2OgBQApw1CqBJ8yZOTDlwYN2nT+6EpL5/v3j8+MunTm05fFjbeQGAStgiBNCYpKSkuCNH9nz65E4IIcSFkE25udlRUbGxsdpNDADUQCME0JhrV670ysnhfT3YNysrKjJSOwkBAAtohAAaIxIKjYs89dSYEFFBgVbyAQA20AgBNMa3efNr1tZKg1erVfNr21Yr+QAAG2iEABrTpk2brHr1NhsayrcKDxgY3K1Zs3OXLtpMCwDUQiOEyurt27eDw8Jaubt/5+HR/bvvHjx4oO2MCCHk8KVLb0eN8rG3D3Vw8HVwiBkw4NTNm7jZP0BFhssnoFJ69uzZwLZt/0hPD6BpQsjDDx/GBAWtOHYssF077SZmZma2fMOG5Rs2vH792t3dncfjlRwDAFqFLUKolBZNnrzh40emCxJCGhFyNCvrx3HjtJuVIjs7O3RBgEoBjRAqpeePH7f6esSVEGlOjlgs1k5CAFBpoRFCFULT9JdtRAAAltAIoVLy9PKK+3okjRB9KysjIyPtJAQAlRYaIVRKC9etG+vg8N+XH58R0sfW9peNG7WZEwBUTjhrFCqlBg0aHLp1a/7Ysa+fPCE07VCjxh+bNjVr1kzbeQFA5YNGCJVVnTp1jl65IpFIZDKZsbGxttMBgMoKu0YBAECnlaYRrl27NiYmRuOpAAAAlD/Ou0bT0tJmz55tZGR04cKFVq1alRzwhVQqffXqlfxHJycnU1NTrnMHAADQLM6N0NnZuVatWsOHD+/UqRP7Xpifn5+amtqhQwf5yMSJE8epvg+IWCymaVoikbBPrLCwUCqV6ulx2MYtKCjgeu+PcgiRyWQikYgq8jQfNUQikZ6eHp/P55QVTdOcEiu3j4vT9MwxQqlUyj5EIBDw+XwDAw4Lf8VcVCiKEgqFnC6dRGXpcmVxCqkylWVsbFxiSqU5Wcbd3b1nz54SiYTphQkJCb169bK3t1cTYm5uXrNmzRcvXrCcBVOunK4J09PTMzY25lSuNE2bm5uzn758QmQyGZ/P57S5zOfzuZYrIcTMzIzT8lQ+HxchhFNIKU6W0dfX51quFXNRoShKX1/fzMyMfQgqS2cri2tIVaqsEpWmETZv3jwmJmb+/PmEkHbt2nl6evbu3VuzaQEAAJSP0pwswzRCQohUKrW1tU1OTk5KStJ0YgAAAOWh9I1w8eLFJ0+efPDgwZw5c1auXKnxzAAAAMpBaXaNOjo6CoXCEydOXL582dbWdv78+ZwOqAIAAFQcpbyzzLlz58zMzGxtbf/vVbgcHQUAAKg4StnAatasqdk8AAAAtAK3WAMAAJ2GRggAADoNjRAAAHQaGiEAAOg0NEIAANBpaIQAAKDT0AgBAECnoRECAIBOQyMEAACdhkYIAAA6DY0QAAB0GhohAADoNDRCAADQaWiEAACg09AIAQBAp6ERAgCATkMjBAAAnYZGCAAAOg2NEAAAdJqBthMAAJKSknL69NnExDdt2jTu1aunsbGxtjMC0CHYIgTQsnXrdvj69p4yhWzY4D9q1Is6db6Li4vXdlIAOgRbhADa9PDhw59/3peVdZMQPiFEIOiSkjKkV6+Ily9j+Hy+trMD0AnYIgTQpm3bDmdlTWe64BfuBQWtY2JitJYTgI5BIwTQprdvPxBSU2mwsNAtLS1NK/kA6CA0QgBtqlevJo+XpDRoZva8Zk3l7ggAZQSNEECbxo4dZGu7ipDPCmP3ra0ftWjRQms5AegYnCwDoE21a9fevPmnSZPa5eeHFxa6Wlndc3B4GBm5T08PK6kA5QSNEEDLIiLCO3Zsd+3atZcvX/v69vf3X48uCFCe0AgBtM/CwiI8PDwvL8/CwkLbuQDoHKx4AgCATkMjBAAAnYZGCAAAOg2NEAAAdBoaIQAA6DQ0QgAA0GlohAAAoNPQCAEAQKehEUKFIBKJflu4sF2DBu3q1x8QEpKQkKDtjABAV+DOMqB9AoGgo59fxNu3kYWFZoTcS0mZHBQ0Zd263gMHajs1AKj6sEUI2rd17dqer19PKyw0J4RHiC8hZ7Ozl8ycKZVKtZ0aAFR92CIE7bty6tQ6oVBxxIKQpjJZYmJi48aNtZUVALBE0/T58+f/i4oysbDo0KVLs2bNtJ0RN2iEoH0Ssdi4yKAxTYtEIi1kAwBcZGdn9+3Qof7r18G5uUJClq1fb9u27ebDh/X19bWdGlvYNQra5+vvf/XrmqEIucPjNWjQQFspAQBLU4cMmfrw4brc3O6E9CPkRGamw/nzm9as0XZeHKARgvZNmjfvVweHGB6P+bGAkIkWFj2GDTMzM9NuYgCgnkQieXLnTrhMpjg4u6DgyI4d2kqpFNAIQfucnJyO37q1MjCwpYNDkL19YPXqvsuW/bRihbbzAs4unj/fycenfb1633l6rl68WCwWazsjKFu5ubn2RZ4jbUlIYX6+VvIpHRwjhAqhVq1ax69dk0ql6enpLi4u2k6nakpNTY2JiaEoqk2bNmXxIa/6+eeYP//cnpNTgxABIet+/bXzyZMX7typRMeKgCtra+sPNE0TwlMYTCfE0tpaazlxhy1CqEAMDAyqVaum7SyqIJqm/zd1at+mTR99/33isGF9mzadP3EiRVEanEVWVtaRDRsO5+TUIIQQYkLI7MLC5klJRw4e1OBcoKLR19cPDA3dZGQkH6EImWNpOXLWLC1mxRUaIUDVt3bFioKdO29mZCzIz19QUHAzI0OyZ88fy5ZpcBaxsbEhIpHSpl+v/Pzrp05pcC5QAa3YsiUmODjc3n6toeEKE5MAe/s648cP/P57befFAXaNAlR9+zZtupGfL995xSNkcX6+/9atM376SVOzkEql/CKbmHxCJDhMWNUZGxvviYx88uRJdHS0g5nZ0cDASnd0A40QoOqjhUKTr0eMCeF9fRODb+Tn57fSxGTR16dIXDQxadmpkwbnAhVW/fr13d3d+Xy+gUHlayvYNQpQ9cn09ZU21mhCJBo9h8XFxcWva9eZ5uaFX0aO6esfd3EZOnKkBucCUBbQCAGqvuCuXXfy+Yoju/n8oLAwzc5l9fbtdRYvDnR1bWNv7+fkdK1//3P//WdsXPSuQQAVS+XbhgUArhb9/nvfBw/inj3r+ekTj5BTVlav69Q5unatZueip6c3btq0cdOmZWVl2draavbFAcoOGiFA1WdmZnY2Jub8+fPXTp+maTq8W7cwTW8OKjI0NCy7FwfQODRCAF0RGhoaFBRE07SRwlVfAIBjhAAAoB2yr29Sqi1ohAAAUK5omt7/118tPTw61q3r5+w8pk+f9PR0LeZTyl2jhYWFpqammk0FAAB0wdK5c99s3nzx82dLQgghZ06eDIuNvfrwobbusMh5i/Ddu3dBQUHm5uZubm6xsbFlkRMAAFRVnz9/PrVr19YvXZAQEi6TjU5L2/LHH9pKidsWYW5ubocOHcaPH79///41a9YMHjz4+fPnbALFYvGnT5+WLl0qHwkICGjZsqWa6Wma5pSbSCTi8Xh6RR4Ioj6E6+lt5RAik8lEIhGnG/aLRCI9PT1O91AWiUQGBgY8Hq/kSRVCyufj4n99xZt6EolEJpNxfSMURXE6OFExFxWKopi/I/sQVJYuVxankLKrrPj4eH+ZTGl56iSRzLx4UTRnDpu5cHojfD6/xKWX2xbhb7/95u/vP2PGDGdn52XLlr1///7du3csYymK+qRAs3e+BwCASkFPT09aZFBKiBYf18Vti9Dd3b1fv37M//l8fu3atdPT02vWrFlioKGhoY2NzcqVK1nOiMfjcT3JWyaTGRkZcVpvFYvFXM8jL4cQZn2K61z09PS4bkgZGRlxWt0rn4+LSYz99Hp6esyfnn0IRVFc74hYMRcViqIoiuIUgsrS2criGlJ2ldW8efOp+vpiQhQ3606ZmLTv0YPN7Erx3kvEbYtw9OjRjRs3lv9oamoqkUgIIW/fvj1//rxmMwMAgKrH1NR03Lx5EdbWbwkhhEgJ2WxkdKpmzRHjx2srpW+6fILP58tksrdv37Zv3z45OVlTOQEAQBU2bvr0GceOjfHx8XNyCvDw+DBp0qW7d7V4W9pvurOMnp7e69evhw0bNmvWrNGjR2sqJwAAqNraBwW1v3s3Ly/PwsJC27l8WyPU19cfO3bsH3/8MWbMGE0lBAAAUJ6+qRF26dKlf//+6IIAAFB5fVMjnDlzpqbyAAAA0ArcaxQAAHQaGiEAAOg0NEIAANBpaIQAAKDT0AgBAECnoRECAIBOQyMEAACdhkYIAAA6DY0QAAB0GhohAADoNDRCAADQaWiEAACg09AI2aIoKiEh4Z9//nn06BFN09pOBwAANOObnj6hOxITEyMixmVluQsEtUxMTjk6pp04sbVOnTrazgsAAL4VGmHJCgoKOnYc/P79AUK8CCH5+SQjIyE4eMDz5/8aGRlpOzsAAPgm2DVaspMn/87OjmC64BdNcnI6nj9/Xms5AQCAhqARluzhw5cCQSOlwby8xg8fJmklH4DykZKSMnToVB+f0BYtwn/99U+xWKztjADKBHaNlszZ2cbA4INU+tWgoeEHFxc7LWUEUOZu3vy3d++pGRmLaXo5IXlPnuzZvTvozp3z5ubm2k4NQMOwRViynj27Wln9RYhIYazQ0nJ/ly5h2koJoKwNGzYzPf0UTYcRYkqIY2HhD69fD16y5A9t5wWgeWiEJXNzc/v559H29kE83hFCEni8g/b2QStXznR0dNR2agBlIi0tLT/fhhBXxUGRaNCpUxe1lRJA2cGuUVYmTBgeFtZu27YDCQnn/Py8xow55urqWnIYQOUkEAh4vKK7QE1FIlExUwNUcmiEbHl4eCxb9mNeXp6FhQWb6UUi0bJlfx448HdhocjV1XnlytmBgQFlnSSARtSoUYOmnxEiIYSvMBzdpIm31nICKDPYNVomxGJxs2YdV60yfPHicmpqXGzs7z16/Pr771u0nRcAK3w+f/z4wRYWkwkRfBl7YW8/a/nyWdpMC6BsYIuwTOzYsefVq5DCwmlfBjxzck4uX95yzJjBZmZm2swMgJ2FC2dZWW1ZsaIVRbkTkm9vT+3cub5+/frazgtA89AIy8SpU9cLC+d+PWYok7WLj48PDAzUTk4AXPB4vGnTxk2bNi4pKcnOzs7a2lrbGQGUFR1thEKh8NSpUwm3brnXr9+5W7caNWpo9vXFYgkhhkqDFMWXSCSanRFAWatevbqxsbG2swAoQ7p4jPD+/fv+des+HznSf8MG3tSpffz8tvz+u2ZnERTUjM+//PUYbWBwo2nTppqdEQAAfCOda4RSqSv7ZR0AACAASURBVHRkt25Hk5MXFBZ2IWSMTHYzI+PI0qX379/X4FymTh3j5LRdT09+M9ICC4up/foF2draanAuAADw7XSuEd65c6d5YaGHwgifkFlZWYe2aPKUzmrVqsXGnu3S5ZCjo5+9fYCra9tlyxqtXbtEg7MAAACN0LljhB8+fKghFCoN1iQk7c0bzc7Iycnp9Om/CCHp6ekODg6afXGtKCgoYHkNJQBAJaJzW4Q1atRIMjFRGnzO47l5eRU7/bczKTK7ykUsFv/vf786O/s0btzPycl30KCJ2dnZ2k4KAEBjdG6L0NfX97mNzd3MTN8vI3mE/GZru2fCBG2mVYH16jXyypWGAkEsc5ORI0eOxMeHP3x4jc/nlxgLAFDx6dwWoZ6e3v7z56fVrz/exmabnt4ic/O2Tk5zNmyoU6eOtlOriBITE2NjcwWCufJbbUmlfVNSOhw6dFS7iQEAaIrObRESQjw8PK4/fnzz5s37d+/61a49o23batWqaTupCuru3bu5uUFKgwUFwVFRp4YMGaiVlAAANEsXGyEhhMfjBQYG+vj44OwP9fh8vr5+0QcOCI2MsF9UyyiKevnyZXp6uo+Pj6mpqbbTAajEdG7XKHDi7+9vZnaGEEpx0MrqWM+eypuJUJ4uXozy8Gjp7/9Tjx673d0Dpk9fIJVKtZ0UQGWFRgjqVK9effjwTlZWAwl5Rwgh5JOFxTxf3+zQ0E5azkyH3bt3b+DAhe/eRaanH87M3JqREbt1K2/y5B+1nReUB4FAcPny5f3799+8eZOiqJIDgAU0QijBb7/9b//+YX5+46pXb9awYa/ly2tfunRE20nptAUL1mZlrSLE8cuAfmHhouPHLwsEAnVhUPlFRV2rWzegT5+LkybJevQ44OUV8OTJE20nVRXo6DFC4KRz59DOnUPZP5QYytSTJ08J8ft6jMfjNXj79q1XmV0OC1r34cOHAQNmZGRcIMSeEJKdTbKzE8PChjx/Hm1oqHyLf+AEW4QAlYy5uQUhOUWGs3Dyc9W2d++RnJzxTBf8okFubuD169e1llNVgUYIUMkMHBhuarrt67EX1tZ5Li4u2kkIysWTJ2+l0rpKg7m5Xq9fv9FGOlUKGiFAJTNt2timTWMsLWcQ8oCQdwYGe5yc+hw6tE7beUHZcnNz1NNLURo0N3/n5ORY7PTAHhohQCVjaGh469bp9etbdunye6tWU2bNSk5MvIJHXVZ5gwdH2NhsIETxmQHpZmZnO3TooLWcqgqcLANQ+fB4vMGD+w0c2EcgEJiZmWk7HSgPtWvXXr58wvz5gVlZoynKw9DwoY3N3gMH/sAC8O3QCAEAKodRowZ37hx0/PjfCQnnW7Vq0K/fdZzIrRFohAAAlYaLi8vkyeNxLZNm4RghAADoNDRCAADQaWiEAABQaeTm5mr8NdEIAQCgoissLJwy5UdHx6Z+fsMcHX2+/37qp0+fNPXiOFkGAAAqupCQfnFxncXiu4ToEUIfOLD/7t0e9+9H6evrf/uLY4sQAAD+v+zs7JEjZ9arF1CvXmBExOh3795pOyMSHR397Jm1WDz+S8/iSaWD373zPXv2nEZeH40QAAD+T3JycuPGwXv2tHn9+ua7d7dPnhzQrFmP+Pi72s3qzp272dntlQY/fw66fj1eI6+PXaNVAUVRenpYpwGoZHJycvbtO3T79uMGDdwHDYrw8PDQdkZkypRFqam/0XQw8yNNB2VkHB4xYnxCwmUtZmVkZKinJ5TJlIaFJiaaef4Uvj0rt+jo202ahLi7B9ao0bp1626PHj3SdkZfEQqFJU8EoJMuXoxq0CB4xgzq4MH+CxY4tWw5eM2azdpOisTG3qNppZuX1klLy5NIJNpJiBBCSFBQO2vrE4TQioO2tke7dg3SyOujEVYgGRkZgwZN8vBoVadOQGBgxMOHD9VPf/78pW7d5j14sCU9/c7Hj3ExMUvatx9x//79Emd07969/fv3nz59OisrS0O5fyU3N3fEiOlOTj4NG/Z0cfFZsGCFWCwuixlBFfD335HffderYcN2gYG9L1y4yCbk6dOnixevnDx5wc6dfwkEghKnf/XqVYcO/VxdW3p4BHp7B0VFXSkxRCqVbt26q3//yYMGTTl48AhN0yWGnDlzzssrsFattq6uzfv2HZuenq5m4oKCgu+/n/XhwwWpdCIh/jQ9OCPj2rJlhxITE0ucUZmiaR4hPKVBHo8vK7I5Vp7q1q3bp4+fldX3hCQTQgj5aGk5qV07i1atWmlmBnS5ePHiRe3atdlPLxKJhEIhp1kUFBTIZDJOIZ8/f+Y0fZmGpKSkVK/uo6//NyEUITQh921tW58/f0lNSJ06bQhJI4RW+Pfgu+96qAnJysoKb9Omn53dWgODn83N/Rwcdqxfr9k3IpFIGjQI4PP3fnkjYhOTJZ07D2I5l7y8PJZTMsRisUAg4BRSWFgokUg4hVSoRUVOJpPl5+dzCqlolTV8+DQrq+8JeUEIRcgza+t+kyf/qD5kzpyldnbtCTlIyBUjo19dXX3v3bunZvoXL144OvryeNFfauSdtXXHgwePqQn58OFDnTqtzcx+ISSOkJhq1WY1bdpB/Ztas2aTlVUvQj4wc9HXP+3q6pOZmalq+sjIyGrV5n1dvDSPd3TOnF/Uv31G2S1dbdp0J+Th14llubu3YBNb1pV18uRpP7+w6tV9mzTpuGfPQYqiOM1LjdI0woyMjPj4eLFYzD4EjbBEAwdO5PEiv17+Pnh4tFQ1vUQisbdvoVRIhNDOzn5q5tIjIOCMvr58aiEhnWxsbt64ocE3cvjwEQuLuUpZ2dl1f/jwIZtwNEL2KnsjvHfvnq1t+NeLCmVnF/zs2TNVIVevXrO27vFlHYv599LdvZmaDHv2HMnjXfl6Lp9cXX3VJNalyxAe7x/FEEPDnePGzVY1vUgkcnBoTIhIMcTAYM/MmQtVhezatUtff12R+o0eMGCSmsTkym7piouLs7NrRcizLymlWlt3OnbsFJvYCltZJeK8a3Tnzp1eXl7dunVzc3PbunWrZjZLgZBbt/6j6dCvxxwLCsxzcnKKnZ7H4xFCFfebYgcJISQ9Pf3zs2ddFXZxGBGyJDt7x2+/lSZjFaKi4vLylB+QlpMTcudOnAbnAlXAuXNR2dl9vx7jZWf3uXAhSlXI5s1HcnJmfr3vrlZBQcOEhARVIXFx92m67ddjlhKJQ0ZGhqqQ2NgHSsUoFg+JjFSZ1dOnT2nah5CvTtyQSjtfuhStKsTDw6NatSdKgwYGiY0a1VIVUj78/Pz++Wdd48YTHRzaODj416nT58CBGRER3bWbVVnjdtZocnLy3Llz79+/7+rqeubMmREjRty/f3/Dhg08nvI+ZSX5+fnv3r1TPCdq0qRJY8eOVTU9s7nJ6fBsYWGhVCrldPJkQUFBiZmXWwhFUcUdstXPzc3l8/nFhjg4mGdkvCJEsXKiGzSom5+fX+z0z549q00pt8l6hLx68UJViCLW750mRPmIoL6+UCo1ZDkXFrP4/yQSiUwmk0ql7EMEAgGfzzcw4LDwV6hFRY6iKGbzjn1Ihaqs7OzPNK38vU9RZp8+pahaVFJSPhBSXWlQJHJ9+/ZtnTp1VM9K+SOiKIlAICh2LhKJhKaNiwwbSCS0qqwEAgFNF10BpWhaZUiTJk2srGbl5NwlxPfLWJq19fp+/Y5rtBhLE+Ll5fXvvycLCgqkUqmlpSUhhE1KpKJWlrGxcYkpcWuEiYmJrq6urq6uhJDw8PDo6Oh27do5Ozv/73//Ux9obm7u4uISFfX/V6mcnJxMTU1VTc+Uq5GREfvc9PT0jI2NOZUrTdPm5ubspy/TED+/JikpNwhRXHXNNTTMdHd3VxWyY8eKrl37ZWauI6QVIYTHu+joOGfz5qOqZufh4fG+yOeTTIhL9epsMmT5Rnr3Djly5MinT50VQy0tz3TsuJvlR8fpE2YaobFx0W8ulfT19bmWa4VaVOQoitLX1+f0XNYKVVlt2zbfti3q8+eeioPW1tcDAgaqCm/QoNa//z4h5KvLDIyMEhs0GKoqJDCw1YED/9B0V4WxdHPzvJo1a6pKzNBQRIiIEMVPKdPKykzVLHx9ffX1EwgREGIiH+TzI8PCAtV8DpcvH+7efeSHDzU/f25sbv7O3Dx67951tWqx2iIsh6XLyMioylQWqxdlLzs729zcPD4+Xj4SHx9vbGz8+PFj9YE4RliipKQkBwcfQv77sms+ucRD+jRNP3/+PCRkgIuLr6urX0TE6JSUFPXTt2/UKO7rgxKDraxOnzypwTdC03RQUIS5+XxCcpljDFZWQ0o8A0IOxwjZq+zHCKVSacOGgfr6h+UHCA0Mdvv5hag5CSIxMdHWtvWXRYs5weSSr2+Imrm8f//exaWpwmloD2xt2/zzz0U1IUuXrrGwmEKIVH4w3dKy/969h9SEbNq0y9o6jJB3TAiff9jNzS8nJ0f9J0BRVFxc3I4dO27cuCESidRPrKgclq6qVFkl4nyyzLx58zw9PTMyMuQj48aNmzlzpvooNEI2kpKS2raNcHT0c3Bo0aBBu0uXLrMMFAqFLM9devXqVcvateebmZ0jZD+PF2xn99PkySznwv6NyGSytWu31KsX6OLi16RJyIkTf7MMpNEIuajsjZCm6ezs7P79x9nb+zg4hNvb+wwbNjU3N1d9yNGjfzs7+1hZ/WBouNrOrl/Llp0/fPigPiQ9PX3AgAmurn4uLn7+/j3v37+vfnqKoubOXWJv38zaeoaNzRRHR9/VqzeW+F6ioq40btzB0dGvenW/4cOnZ2dnlxjCqJhLV1WqrBJxboRSqTQsLKxRo0Zv375lRpYuXTphwgT1UVwb4ePHjx88eMApsWvXrpVYD0pOnTrFaS2MpumjR49ymr6wsPD06dOcQpKTk2+wO41T7u7du8+fP2c5sUQiOXr06Pc9ey5bsCAxMZH9XI4fP87pC5GiqGPHStiiVfL58+fz589zCnnx4sWdO3c4hcTExLx584ZTSGRkZEFBAacQrouKWCw+yW7TXC49PT0qKopTSCkq6/r161wr6++//+ZUWVKpdOvWreyXrvz8/MjIyJkzZyYkJLCfS3Jy8q1bt9hPn5OTs23btkOHDnFaObt48WKJG4JKyqey/vnnH04hVaay2CjN5RNCofD777+3sbFZtGjR+vXrXV1dS1zD4toIFy1aNH/+fE5Z9erV68iRI5xC6tat++TJE/bTi8ViPp/PaRbx8fFNmjThFLJr165Bg9hedceYPn36ihUrOIUEBwdzLQwnJ6fU1FT202dkZNja2nKaxZUrVwIDAzmF/P777xMnTuQUMnz48C1btnAKad68+e3btzmFmJqactpce/HihYeHB6dZnDx5Mjw8nFNIKSorIiLi8OHDnELq1avHaR1LIpEYGBhwmkVVqixnZ+f379+znz4zM9PGxobTLK5evRoQEMAppMpUFhulubOMkZHRX3/9dejQoaSkpOjo6JMnTzZp0kTjRy7LLaqi0eX3Xgq6/HGV4l3g4yqfqCpApz6u0t90OyQkJCQkRIOpAAAAlD/caxQAAHQar3y2ZB8/fty6devQ0NCSJyWEEJKYmEhRlLe3N/tZ3L59u0aNGsw1jixduHChTZs2FhYWLKenafrEiRMRERHsZ/Hp06c7d+5w2nR+8+ZNenp6ixYt2IckJCQYGxvXq1ePfciNGzfq1avn6OjIPiQyMjI4OJj9dUVisfj8+fPdunVjP4uMjIzHjx+3a9eOfUhSUlJ+fr6Pjw/7kLi4OBsbG5YXbDGioqJ8fHxsbGzYh5w8ebJbt27sH59dUFBw48aNsLAw9rNITU19/fr1d999xz6kdJXl6upao0YN9iGoLFQWS+VQWcHBwWPGjFE/TTk1QuasMHt7e5bTM6dQW1lZsZ/Fx48fraysOF0pnJyc7OrqyukmBW/fvnVzc2M/vUwmS0tL49SemRtesP+sCCE5OTkGBgbsv3cIIWlpaXZ2dqruWVOsd+/eqbkGuVhcPy6JRJKRkeHi4sI+JD8/XyQS2drasg/JzMw0MTHhdB36+/fvnZyc2Nce4f7eaZpOSUnh1G9EIlFOTo6TkxP7kNJVlqWlJacLq1FZqCyWyqGyGjduXOKqTDk1QgAAgIoJxwgBAECnoRECAIBOQyMEAACdVrEaIaeH6VTkEIqiqCIPPNLsLKpSSMXMqnxCaJqWKTwhsixmUZVCUFlcp+d6FkjFfCOlC2GvAjXC9PR0Pz+/ffv2sQ9JSEjw8vL677//2IecO3fO29v7zZs37ENWr17dvn37vLw8ltNTFDVixIhRo0axr9iCgoKQkJDly5ezzyo5OblRo0anT59mH3L79m0vL6+HDx+yDzly5IiPj09aWhr7kAULFnTt2lUoFLKcXiaTDRkyZPr06ewr9tOnT999993GjRvZZ5WUlNSgQYMrV66wD7lx44aXl9eTJ8pPT1Vj+/btrVq1ys7OZjk9TdNTp07t378/+wcEikSinj17zp07l31W5VNZ//zzT8OGDXWzsuLj4xs2bFgBK2vo0KHjx4/XwcriTLN3bPsW7du3X7FihZWV1aFD6h53IpeXl9e0adMlS5bY2Nj8999/bEKePn3aokWLKVOm1K5d+927d2xC/v777x49enTr1q1t27Ysb3C3ZMmSqVOnNmnSZMSIESzvpTtw4MAlS5Y4OTmtXr2azfQymax169YrVqywtLQ8c+YMm5DMzMwmTZosXLjQwcGB5W2X4+LiAgICRo8e7e3t/fHjRzYhe/bsGTRoUIcOHTp37szyKQc//PDD3Llz69WrN23aNDXP31EUHh7+66+/2trabtu2jc30IpHIz89v+fLllpaWV65cYROSnJzs4+Mzd+7c6tWrs7yh+dWrV0NCQgYNGtSsWTOWt11et27dmDFjWrdu3bdvX5a37R87duzChQvd3NwWLFjAZnqapoOCgpjKOnjwIJvp8/PzuVbWs2fPWrRoMXXqVE6V1b179+7duwcGBupaZcXHxzOV1bBhQ5aVtXfv3kGDBgUHB4eFhbF8KIS8sqZOnVrWlWVlZVV2lXXt2jV5ZbF/pgcnFagRFhYW0jR969Yt9r2QCfnrr7/YVywTMnv2bJYVK5FIRCKRSCRi3wuZWTDlwbJimZDExET2FcuEXLhwgX3FMiHr169nX7GFhYUURY0bN45lLxSJRFKptLCwkH0vZLJ6//49+17IhMTHx9va2m7dupXF+/i/kOPHj7PvhUzI0qVLWVasTCYTCARSqXTw4MEse6FAIKAoKjc3t3Xr1n369GHTC5msXr16xb4XKlYWy17IhOzevdvGxiYmJoZ9yJw5c7hWFvteWPUqa/z48Q0bNmTzcA95ZbHvhUxWqamp7HuhvLLs7Oy4Vhb7Xsi1siiKUqyssuiFFaIRRkVFKS7T//77r5WV1d69e1VNT1HU5ctfPavv4MGD6iu2sLDw33//VRz58ccf3dzcXr9+rSokNTX10aNH8h/FYnGPHj38/f3VPA0rISEhPT1d/mN2dnazZs2GDx+upmJv3Lih2C2ePn3q4uKybNkyVdPTNK303i9evGhpaXnq1ClV00ulUqUFdOvWreorNjc3V3HFgqKoiRMnenl5qXn6xOvXr1+8eCH/kanYTp06qanYO3fuKHaLDx8+NGjQQH0vvHr1qmK3uHfvnp2d3YYNG1RNTxf5uCIjI62trZUGFYnF4uvXryuOrFy50snJSc3jFDIzM+/evSv/USqVDh061MfHJysrS1XIkydPkpOT5T/m5+cHBgb27t1bzXMlo6OjFbvF27dvPTw85syZo2p6WnOVpeb5AIWFhUrPNmIq69WrV6pCiq2s7777rspUlpqHQ33+/FmpsiZNmlRiZSUlJcl/LHVljR07VrOVdenSJcUfy7OyMjMz1SRWCtpvhDKZLDg4eOTIkewrNisrq169er///rvioPpe+PjxY2dn57NnzyoOqu+FkZGRLi4uDx8+lI+UWLGrVq3y9vbmVLHDhg3r0qUL+4oViUStW7dmjqjJqa/Y1NRUd3f37du3Kw6q74V37txxcnK6du2afKTEij106JCbmxunil2wYEHz5s05VWzPnj2V9iKqr9j8/Hxmt5XiYGRkpJWVlaqKffXqVfXq1ZUePKS+Yq9du+bk5BQbGysfKbEXbtu2rU6dOkq9sG3bthEREap64dSpU5X2STC9cPbs2cVOX+rKWrNmjeKg+l7IVFZkZKTioPpeePbsWa6VtXr1aqV9EmwqS2mfRHlWlqpeGBcX5+joePXqVfkIU1n16tVTVVmHDx+uWbNm0crq2LGjqspauHChtipLTS9kKktpb5/6yrp+/bqqytJsL9RyIzx48OD8+fOZPWlKi6Cqil2wYMG+ffuYPWk7duxQerVie+HgwYOjo6Pj4+OdnZ2LXXtV6oVCoTAsLOzNmzfHjx93cXFR3M+jqmLfvn3brVu3z58/L1mypFGjRorlp6pir1+/PnLkSLFYPGjQoIiICMVfqarYLVu2/Prrr8yetEWLFin+SlXFzpgx49SpU69evXJ3d1d6oGWxvZCiqIiIiPv379+8edPJyenevXuKvyq2F37+/Dk0NDQtLe2vv/5yc3NTXA9Q1QufPHkSERFRWFg4e/bsli1bMue2MVRV7NmzZ6dMmSIUCrt16zZixAjFX6mq2JUrV27cuJHZk6a02qSqF44dO/bSpUuJiYnVq1dXekRwsRUrlUq7du369OnT8+fPOzk5PXv2TPFXxfbC9PT0zp07Z2VlrVu3ztPTU3FBUtUL4+PjBw4cKBKJxo0bFxwcrPjJqOqFpausvXv3qqmsor2wxMpS6oWlq6zw8PDPnz8vXbpUqbJycnLUVJZIJBo0aFCvXr0Uf/Xs2bMSK0vpy73UlaXUC5nKunfv3q1btxwdHYtWVtFeKK+s3bt316xZs2hlFe2FT58+ZSprzpw5XCure/fuw4cPV/xViZXVtGlTpdUmVb1QsbKUnteoqrLCw8OfPn164cIFVZWlwV6o5UaYnZ3t5+fHVOzjx4+VfltsxT579qxGjRpMLyz6NMtie2FUVJSTk1N0dPTjx4+Z3dOKiu2FW7ZsqVWr1ps3b+Lj45WWm2IrlqKoyZMnM4NxcXFF32bRihUIBKGhoUOHDhWLxYpVwSi2FzKLMlOxRfetF1uxCQkJLi4up06dev36ddHlptheePr0aWdn53v37j148EDpOeNFe+HChQtjYmJWrlxZv379tLS0ou+92F4ok8mGDRvWsWPHwsLCoiHFVmxeXl5AQMDEiROFQmHRDdliK/bt27e1a9dmKrbodn+xvTAmJsbJyenSpUvPnz9nbsupSKliN2/efOrUqf3797u6uj59+vTevXuK3zu06l74008/MYNF33uxvVAsFvfu3btnz54ikSg+Pl4ppNheyFTWvHnz2FfW8+fPa9SowfRCVZWl1AvZVJZSL9yyZYuHhwenypoyZYqqyiq2F8orSyQSFa2sYnuhYmUpfucySl1ZSr2wxMoq2gtXrlzp5eWlprKUeqFMJhs+fHhISEjpKqvohmzpKqtoL2Qq6+LFi2wqi3HgwIESK0tTvVD7u0azs7NDQkJSUlKK/e369etr1aolFAqTkpKYTS6app89exYcHKy0JMmNHTs2NDSUpukLFy5MnjyZ+ZNHRUUNHDhQVQ5t2rT54YcflAa3bNmitG4oV1BQ4OjouGfPHsVBpmKPHDlSbEhSUpKxsbHiNj5N0wKBoEePHkVrlXH69GkbGxvF1UCapj98+BAUFKRqh9vSpUu9vb2VFpqEhISwsDBVe5D69+/fr1+/orMeM2ZMsdPLZLJGjRotWbKEpuljx44FBQUxK+krV65ctWpVsSHZ2dlWVlZK3yMymWzEiBGqnuV97949ExMTpW/wvLy8sLAwVUfX9+7d6+zsXHR7on379qpOxJg9e3br1q2Vvo5jYmJ69uypag9SWFjY6NGjaZq+fft2kyZNmL1PBw4cUNrqkhOLxR4eHuvWrVMa/+mnn1SdmJeammpmZhYVFUXTdP/+/Zlj22KxeMCAATdv3iw25Pr16+bm5m/evGE+Iua7iaksxb2viuSVpTj4/Plz9ZXVqVMnpcESK2vWrFlKg1u2bFF1mo9iZU2ePJnZKKcoaurUqUo7q+WSkpJMTEw4VdaZM2eYysrMzAwLC2N6D1NZqr5YS1dZffv2VRo8ffo0s/wUxVTW4sWLlcZLrKyTJ08qvc7IkSO5Vlbnzp1VVda+fftUVVZeXl6xIXPmzCm2snr06FFiZSkqsbLWrl1b7G+50n4jVEMkEgUGBjJrlFKpdODAgW3atCm6NqEoOTm5bdu2zDTp6emNGjUaNWqU+rPLrly5MnDgQMVFPCMjo+h6t6I///yT6QRyFy9eVPM3pml6/Pjxx44dUxwp8byvsLAwZp3u5cuXzIrCtm3bXr58qWr6/Pz87777rui6vBrPnj3r2LFjQUGBfEQoFKo/u+zkyZPyNcqZM2fOmTPn/fv3I0eOVBOyePFiZnkVCoXMxnp8fLzS3iQlQ4YMUarkZcuWFf2OkJPJZEFBQUU3fdTIysry9/fPyMhgHxIfH9+tWzemT/z5558DBgzIzc0dMmSIqs5B0/Rff/2ltI5V4t/9hx9+2LVrF/P/I0eOWFpaKh6sLVavXr1u3LjB/H/hwoU1a9ZUs5zQNC0SiQICAhSnkUqlPXr0UDr9QVFKSkrbtm0/ffqkOKj+vVy9elWpskq0du1aeWWdP3+ezXmb31JZNE3PmTOnbt266qumdJUVEhLCqbJOnTo1ZswYltc5MJYsWcK1ExStLPWKrSz1Z/V/Y2WxtHv37qLrWKVWoRshTdOKiwXLXkhR1Pnz55m1PJa9UGnhO3z4cOPGjdUUcNGFtUWLFmpOFSsasmPHjtq1a6taW1cKWbx4ZwFiOAAAIABJREFUcZ06dX7++ecST5tWnItMJjt16tTmzZvVnBlbNLFx48ZNmDBBfTXKf/vixQtbW1tPT09m86XE6W/fvm1pablmzRpvb2/1p00rJZCVleXl5aX0Law+5L///jt8+LCak/HoIh/X5cuXT5w4of6yB3lIRkaGm5ubm5vbgQMH2Gclk8l8fHyU9iWoD2HTC5VC2PRCpZBz58716tWLfZlkZGSEhITMmDFDzfRKIR8+fNi+ffudO3fUTy+Tyfbu3csEsumFSm9k586d7CuLMXfu3BJ7oWKIRCI5fPjwyZMn1V/u8i2VRdN0cnLytm3b7t+/z34WSUlJmzZtOnfunJq/o1JIXFzc+vXro6Oj2c9FKpW6uLhwCmGDa0gpZqFGeTfCo0ePTpo0qRTvQSAQzJw5s23btgYGBurPtKZpeubMmfLjWCx7oSKKogIDA9evX69qAplMJt9JlZqa6u/v37ZtW7bvhKZ37drVpk0bpX2e6gUFBRkaGqr/alMklUpDQ0O7d+8+btw4W1vb3bt3lxiSkpLSv39/5imvJVYsIzs7u3HjxjY2NkrnVqixevVqQsimTZtYTk/TdFRUVOvWrSdNmsRyeoqiRo0a5e3tHRQUZGpqunnz5hJDCgoK2rVr5+/v36JFCzs7O6XTZIolEonatGnj6Oj4888/s0wsOjq6c+fO3t7e+vr66nuh3LFjxzp06FC9enULCwul63+KRVHUmjVr2rdvb2lp6e7urn4dSG7ZsmXNmzdneYkhTdOFhYW+vr4sr7lmvH79umbNmuHh4ba2tvPnz1ezdGVnZ/v6+spXK1luFzK4VpZMJlu6dGn79u3NzMzUX8OgGBIcHBwQEODl5dWuXTs217QxleXv78++spiTSrp162ZlZaX+kg+5yMjIOnXqTJ8+vVmzZh06dGCT2Jo1a5o0aTJ9+vRatWqNGjVKzaU7Sn755ZcBAwaon2bs2LFqrjwpSiaThYWFqdqbXdbKtRHeuXOHOTBbil44Y8aM8ePHS6XS1NTU0NDQEnuh4h4Plr0wMzOzffv2GzZskEqlcXFxdnZ2qg4YPH/+3NnZWV42M2bM4PP5T548UfXKeXl5it+trq6uzK7O6OjoAQMGTJo0SXEXSrFWr149derUOnXqsNw/s379+lGjRtE0nZub26JFi3379qmfXiaTtWjRgvl2fvDgQd26ddlULEVRT548uXPnjo2Nzc6dO9kkduTIkQ0bNlhaWrLfP8N0KU9PT5b3Hzl48KD8yM3NmzetrKxOnDihPmT+/PnyHZh79uwxNzdnc1l0YmLiixcvatSo8csvv5Q4cVpaWo0aNZjv95MnT1pZWZXYC2NjY+vVq/f+/XuRSLRq1SorK6sSe+GuXbs6dOiQl5eXm5s7fvx4lr2QeRd9+vQpcUrG3r17Bw8eTNN0QUHBb7/9Fh4err5RicXiefPmMfvTkpOTvby8pkyZombpkslkT58+lfdL9r2wRo0aTGXdvn2bTWX9+eef3bt3LywszM7OHjp0aIm9kKKoq1evDhkyhKZpkUg0ZMgQPz8/9S2HqSxmTZRlZYnF4gkTJpw7d46m6RcvXri7u//vf/9TMz1N00Kh0M3N7c2bNzRN79ixo8SvR5qmk5KS6tWrx9TU9OnThw4dyn5TIT093cLCQtWJHSKRKCEhQX4yEZsXzMzMfPXqlfxkIpZpaFD5NUKhUNi4cePFixfn5eV169aN+ZvJRUdHq99B7OXlJd9LIBQKfX19Nd4Ls7Oz7ezsOnTo0KRJk6tXr44ePZq5TV+x8vPzExMT5fvKZ8yY4enpqWrJuHv3rrW1tXwfmre397x583r16uXs7LxkyZLAwEClI46qzJgxo06dOsxcVqxYoWYDcciQIUePHs3NzfX392e64L1799TsjUxNTbWxsZH/+PLlSxMTkwkTJqiaPjMzc9++fSdOnGC+aO7evWtra6t+O+/atWu9e/dmzi08e/aspaUlU+pRUVGqzoOQY3ph165d2RxFmDp1qmJn2r17d+PGjdWHtG/fXvGMg1mzZjFf9MXKz88/fPjwoUOHmJ2ob968cXd3L3G78MiRI926dZP/ePTo0RK3CxcvXqx4fHHp0qUlbhf26dNH8VzQ/v37c+qFLLduz5w54+XltXz58po1a4aFha1cudLS0lLpgMWBAwfkKy7BwcG1a9eWr8OlpaWV2AuZgh09ejRTsCx7YaNGjebNmxcREcGysjp16vT3338z/6coqnPnzup74erVqz09PeW3XGFu5qm+FypV1qtXr0xNTdVUFkVRLVu29PDwkH96zAKmvhc+fvzYz8+Ppuk9e/YEBgYyX4yqTtxjHDp06Pvvv6dpeu7cuUwXFIlEavrW9u3bGzRosGnTJiax77//XtW5hPfv33d0dLxx40ZCQkKvXr0U/8oURSldU884fvy4m5vby5cvT58+PW3aNMVfCQQClvc2+hbl0QjT0tKYrwxm4V65cqXSBCKRyNPTU/39orp27aoYuHHjRjc3txUrVqiftVIv9Pb2vnjxYtHJ8vPzmb/Wpk2bBg0adPv27ebNm7dr187AwEDN6klMTIyNjY38e2fevHnyLlVUdHS0tbU1M/GTJ0+GDBmyfPly5pyrFStW/Pjjj0VDZDLZ4sWLHR0dFdOePXt29erVu3Tp0q1bNzU3MFuyZEmvXr0CAgKYLkhRVJcuXZTOKVAkEomsrKwUz7cOCwvz8vJSuvyOkZCQUKtWrb59+3p7e9etW5c5kB4bG2tjY/PXX38V+/onTpxo0KCB4ilIZ8+etba2HjBggJeXl+LFwnKbNm1ycXGxs7ObNm2aQCAoKCgICgoKDQ0tepT0zZs3zAHdgwcPCgSCnTt3Kl5wlpaWZmlpqRQiPwOLORw1ffp0xUs5L1265O/vX+wbSU5Orl+/fo8ePVq1aiW/eI7pIuq/c5krqeXdQiaTNWjQwMbGRs2xlgMHDvj5+cmPVX/69Kl69ep+fn5qVuaYLzX5jzdv3nRzcxs0aFDRKYVC4aRJk2xsbNzc3Ji9x8wXrtL1qUoKCwuZuf/+++9Dhw5lvtQkEomLi4tiM5DJZD179pSvJt6/f9/W1lbxezMtLa1x48bqN7uVeuHZs2c9PT2LPUZOURSzQqZUWb/99luxlSU3ceJExV3ukZGRbm5uarrU58+f/f39W7duLd+LyPRCpWvpFDGVpXhkNDQ0tH79+sVWFvM9GR0dXa1aNcXXfPPmTYMGDdSs+BYUFFSrVm3x4sXyLnjz5s1mzZoVu6rBLFH37t2rXr264rbgxo0bhw0bpjjl48ePmWs9f//996SkpEuXLvXp08fGxmbMmDEHDhxwcHBQ+gp6+vQp858bN244OjoqncpL0/Tdu3cNDQ0VG5s8ZOfOnW5ubkX3eB04cMDc3JzTgaRSKPNGmJaW5uvrKz+ilp6eXuzhX+bM76Lj2dnZzB8pOjrawsLiwoULzPikSZNOnjxZ9Bvh06dPU6ZMCQgImD17NrNAKPZCVWeaDB06tFWrVrGxsVKp1NfX99atWzKZbMeOHTVr1iy2ccoxN+WTH4GbMWNGmzZtVE2s2Avlbt68qXSHEbnZs2f37t07NTV106ZNenp68jMJL1y4sH///qLvXSaT3b59m1lws7KyatasGRoaKhaLJRLJrFmzunfvXmxVyHf/rlq1qlatWsxFVB8/fmzUqNGjR48sLS2LHjlo0aIFc3qhTCabPXu2h4cHU8CxsbHF3mxQIBC4uLgw7zEjI2Pbtm1MJTx79mzjxo3FXgpy/Phxf3//9PT0jx8/ytd4mO3Cohe6hIWFDRkyZPHixb1795ZIJGKxuFWrVuHh4cxX86+//qp0fr9IJKpVq9bSpUsHDRrErGinp6dXr1592rRpQqFQIpEMHTp0+fLlRbOiaToiIkJ+QHTdunU2NjbMvo0XL14cP3686PQSiUR+jk/Pnj07duzI9MIbN2707t17y5YtISEhSiFCoZCpBYFA0KRJkwkTJjDfXMyO9GK3iT9//sx8JaWkpDg4OMiv1tiyZcuyZcuKDZk2bdqECRNEIlFMTAyfz1fs6PKNJEUZGRmdO3d2cHCwt7ffuHGj4oc5YsQIpbV4mqYpisrKyho3bhyzAXH//n07OzvFwGJPRktISBg+fPiAAQOYW9Uo9cJi63fXrl2Ojo42NjahoaGKX6O3bt3y9PRUf9bMixcvrK2t5Wtvv/322/r164s9VJadnc18nzC9sFevXoq9UH1lrV69WrGyvL29mcoq+ncJDg5mdpMU/a4o9uNKSUmJjo5mfrVs2TJ9fX1mvSo+Pt7T07PodYQ0Ta9bt87Ly4vpKwMGDLC1tWU+otOnT3t6eqalpSlOvHTp0lq1av3www+KF7a+evVq9uzZDg4OPB5P8ZjL5cuXmzZtKj/XLDY2ttizGhXvrrdp06ZOnTrJv8quXr1a7NtUDCkjZd4IFy1a5OXlVYrLHsVicf/+/Qkhrq6uzNcrc1/Xzp07d+rUKSAgoOjyKpFIWrVqtXz58v/++69Pnz4+Pj5SqZS5CMnLy0vN0QKpVLpp0yZ7e/sRI0YcPXq0WbNmzN+m2FXvhISEsWPHTpgwgdm+UeqF6o/hKS7fzM2s69Wrp7hmIP/STExM9PHxEQqFL1++bNiw4ebNm42NjeW9UPEFmYNzQqGwY8eOtWrVMjExYTZNkpKSWrdubW5ubmNjM3bs2KJvPyUlxdfXlxDSqlWr169fUxS1aNEiU1PTdu3a1axZ89ChQ1Kp1Nzc/NOnT69fv5avxMlkMj6fL/+DUhQVEBCg/p6EHz58sLGxefTo0bJly6ytrUNCQiws/l973x0XxfW9jbFiUHYp0qUjTapIkS5gBwSxgYqFYsMSEQsalKCCaMTYEBtR7BWCJYJGRFG+RhGlKVUEqUvfvjPvH+eT+85vGwsiYMLzF2V3Z3Zm7n3uPec5zxklXD3o6uoKRwwLC4MVK5x/R0cHUdUJP1MoFCUlJWVlZbQ+bWxs9PT0FBcXV1VVdXV1RVzLYDDgc/Lz84cNG+bs7Iw+6uPHjxYWFiQSSV5efvny5UgQ2NDQQCwqUFdXJ5ofLly4cMuWLYK+xdOnT+Xk5AYPHuzn59fe3t7a2urt7S0tLe3i4qKmpvbu3bvs7GxjY2PiW86fPz9y5MgRI0Zs376dw+FUVFSYm5vr6ektWrRIRUWFeGiEnTt3/vDDD5KSkvB4ZGdnq6ioTJo0ycfHR0dHB9mSffr0Cc3XHA5HVlaWwWBQqVRXV1fgJ7gy1dXVfB/7GTNmAL+mp6ePHDkSpvVXr15BkBNdLjab7eTkBKlQJpM5e/ZsZM+bk5MjKyvLW1KJkJ2draGhceLEiX379klJScFmrra21sDAQNBFfvLkiampaX19PZvNnjRp0urVq3EBIwt98b1796qqqlpaWkKQGUy8HB0dZ8+ebWhoyKtMplKp8+bNU1FRIZPJK1eupNFo7e3tjo6OXM6IRPCOrF27dqGRdenSJTSyuN54/fp1LS0t+FiQKXDJ3JhM5rZt2+Cxv3XrlqysrIqKipGRUWlpKXw1cXFxBQUFLS0tLqMfQGJiop2dHRoOHR0dgYGBI0aMkJOTs7GxQV7BRUVFKKxqb28/fPhw3s0ojUbbtWvXpEmT4FcMwyZOnOjv7y+6+KO5uVlOTo43QNgn+OZEiGHYqlWrjI2Nu1RWguP48ePHly1bxmAwLl++LCkpCavUurq68+fP37hxg69w+fbt21DBWlVVZWJigqwNMAzju1NpaGiIi4uLjY2FJEpjY+Pq1aulpKRGjx4tSBGXnZ2tpaWVmJh48OBBaWlpSCy9fPlSSkpKkOguJSXF09PT19cXAgVELuTy48jJyVFXV4eoHZPJfP36NZvNNjU1hRIxX19fcXFxLkEjdGBISEhISkpauXIlhmHFxcXa2to//fQTvKCmpkaQxsTX1zc+Pp5Op4eHhysqKsLUVlVVlZKSAruc2NhYuJ6PHz8GAxF4o6WlZUxMDPqc8PDw8PBwvod48+YNOOtHRkaSSCRPT08wjwgKChLkbQ/rG0dHx7t37yIWxHF8+fLlqFQOkJmZCbHo1tbWkJAQc3NzPz8/4ory8+fPJSUlxJF54sQJR0fHjo6OoqKin376iddh5OPHj1xLmXfv3ikqKqJNko+PDzGYlpCQQAxFcsHAwCA7O7u+vn7mzJm2trawz3v//v0ff/zR3NzMZrO9vb2JdWD19fXjxo2rqKj4+PGjiYnJkiVLOBwOk8m8d+/e2bNn+a6xnj17Zm9v39LSkpmZqaioCPelra3t+vXr58+fJ0ZZbG1tEQkxmUxJScnPnz8jFuRwOBYWFoKS7g0NDerq6vj/HVmNjY1sNpuYO6iuri4tLb1165aioqIgLlRSUqqoqOB7lKlTpyJZ04cPH6SkpCApWFtbK2hPEBAQAAHYqKgo2KXBheXrdIPjeGho6Jw5c8rLy/fv3z906FDYxDc3N1+5cuXixYt83xISErJ582YMwz59+oSCUm1tbba2toLE0n5+fidOnBBlZAGqqqoOHjwI/Ofs7Iy44fnz50pKSsSZk81moz5fxsbGpaWlGIaFh4erqKgAjTGZTKJxHdceWktLi9fVs729nav9xYsXL1xdXWEXcezYsbCwMA0NDd67RqVSR44ciYKWsH3n8rYVjoKCAgUFhbi4OBFf/+3QGznCbnDh7t27p06disyZ0tLSEBcKwYkTJ4KCgohjtaioSNBasrS0VEtLKzg42Nvbe/To0ai+++3bt66urlwmwghOTk5Q0XXjxg1LS0u0tnr58iXfhMfly5cnTJiQlpZ2/PjxUaNGAR8DF3J9nZMnT9bW1iYkJKirqyN1w9OnT11cXODn+fPn37lzhzc6BFxobW2N1oBVVVVELkRAcQ8ajbZmzRpiUC46OhqNWBzHm5ub/fz87O3ta2trbWxs8vLy0tLS5OXl4cSys7N//PFHkNfW1NQYGBgI6oG1detWPT09rmFWUlKipaVFnNYpFIqfn19jY+ONGzdAln3y5ElJScmFCxfCoHrw4IGhoSExBvDbb79RKJTDhw+jvCyYigEXYhh24sQJ4rUC4SWDwQgMDHRycoLdz4cPHxAX0mi0+Ph4rvXslClTMjIycnJyFBUVYR1TWlpKJpMjIiLodDpokbh8PQBlZWUbNmyYP38+/MpkMr28vBAX4jiel5cHOxh0xMzMTH9/f2QaAFUEwIV8ry2O47///ruPjw96kMA8jLhGIQKWRMnJyXDExYsXS0pKImqMiIjgyg8R0djYSCaTP3z4YGJiAvvj1tZWPT09rkDW5cuXYSV369YtlJzj4kIi2eTn54eEhKAvaGFhQRwU+/fv71TIunLlypiYmD179qBYZXh4OJcLNkJubq6ZmRmDwYAoy8GDB0eMGME3oE2EvLx8R0cHUXcGahroowSvETKyYmJi+I4sYmBj165dZDJ53Lhxd+7ceffunZSUFPpAXm5GXEi0UY2MjERcSISHhweRZiQlJYmvwTCMa+R++vQJ7TFiY2NR5HPr1q3AhRiG7dixAzaIb968kZeXJ66zv18u7CXVaFe5MD4+ftCgQUT/AuBCvoYvDAYD1lBFRUWSkpLGxsbAghwOZ/r06YKEed7e3ijscOfOHQkJCeG1q4CxY8dWV1cTWXD//v2CBDI4jmtqasJKCtQraCrPzs7mogfwcKqtrU1MTESrtvLycjKZfPny5bi4OAsLC97HCzIxf//9N5lMJrpNAhcSNWB1dXXS0tJg+Q+GhEOGDCFqd6Ojo5WUlNBM/ejRI5jmrl69qqSklJ+fTxSdZmZmampqkslk3ugN1+mFhIRADQCO41QqNS4uTlVVlSvzCooDHR0daEINb1y+fLmSklJkZOS6det0dHRQUh2wfft2MzMzCoVy+vRptGCiUCgWFhZTp06dM2fOvHnziGEDoCIvLy94WlD24sOHD8rKykuXLp00aRKvCg7iZk+fPi0pKUGT/vv3701NTSUlJSUlJQWxDrDyuHHjUB4ITmDGjBnwK51O55LPffjwAUrHiB9iZmYmJPT66NGjkSNHEvUgwIVJSUnoLxUVFQ4ODhAgbWlpQXLN5uZmCwsLCwuLmJgYHx8fR0dH3shBe3v7gQMHgMAg1AxFL2w2e8GCBcTNNGolASs5rtaVYCLKlQ2trKyk0WhTpkxBZB8VFWVqaopi+GfPnuUr88Fx/P79+3D058+f//jjj25ubjCynj9/rqmpKch4gUql/u9//2MymSjKMnv27JEjR/LVMSKoqak9ffoU6c7YbLaRkRFxGQcjC9bBaGQRlboxMTHKysrEkcUV0GptbdXQ0EhOTnZ2dp4yZYqbm5uQRUlDQwNwoYyMDPE6R0ZGWllZcb24pKTk48ePaOwsWLBg/vz5iL+vXr3KZUo+Z84cSLTj/8Rm0fy5Y8cOBQUFR0fHhQsXwguePn3KK/X6Trmw98onusGF0tLSxMDLu3fv+F5cBoOho6MDk35sbKyMjEx8fHx6ejqXmfqRI0eI05a+vj5xAKxfvz44OLjTs1qwYIGrqytiwaKiIg0NDUGGexwOR0JCgkqlorgNjuNr164lykPKy8tRRV1UVBTa/yHcvn3b3Nx87ty5RFX3u3fvnJycmpqagoODo6KicEKMFL2GQqFw7W8KCgo6OjpgCoARa2JiQkzfEs1xtm/fDnl7HMevXLnCt6q3rKyMrxCDGJgCN2HYF3I4nH379vHtzvPw4UMSiWRmZka8OCkpKRs3bjx06BCa2goLC5GLzfbt2z08PLg+h0ql/vLLL8S93aNHj2AiACrilaF/+fJl69atxL3I8ePH4+Pj4ee//vpr3bp1vCdcWVkpvEaNuEOFvzCZTCGt1/B/uJB4qSkUinC5x6NHjyQlJYmFkiUlJcQHkssnHVJuYJLHZDJPnTq1bt268+fP84oU6uvrzczMwsLCYGnS0NAwceJEY2PjsLCwiRMnBgYGosEYHR1NdGd9+vQpb6KIyWRyGXZPnTo1NDSUSqVOmTIFFkZMJnPy5MlmZmapqan37t3T1tbmm0hOSEgwMzNDisRjx46NGjUqICAgKChIW1ubbxqViPT09OnTp8PPXl5eycnJwt2a9uzZM2TIENg6YxgWGhrKayjYpZGFUF9f7+7uDs9zYmIicNL169fV1NQkJCS4HI58fX2vXLny999/GxoatrS0EGOk6DV8qzjodLqGhgboBurq6jQ1NadNm3b79u2YmBhNTU2u0hpIfyJpDBcXPnny5Pbt251mAbvhYVJQUKCoqNiHXNirBfUcDmfFihXGxsbQbpHrTnM4nBMnTqxcuRIRw6lTp6SkpISoKphMJlzr5ORkTU1NeKDv3Lkzc+ZMFxeXhIQE4j2DWQZoA8fx4OBgYo3L+fPnUSCLC+/fv4+Ojoa1D6jyli1b9vHjxwcPHujo6PB1Ibl//z6MimnTpllZWSEWTEtLMzMzQ89HeXm5oaEhMa0toqqIzWbPnz9fWloa8oLwR1F6ab548YJEIiUnJ+M4jmFYUFAQ36VJTEwMSI1EORlewHIBcSGHw5kwYQJvjBQhOjp68eLFjY2NixcvJnIhV1yosLBQX1+fONMJ6XwLSEtLMzY2RmsIJpPZaaHxxYsXlZSUOrU44IuCgoIlS5Z4enpCDri5uXnixIlce1MuPHv2bM2aNQcOHICnF55SIa6qOI7fvHkzODj43LlzcOszMjLIZLIQ0wAhXCjkKN7e3siUp7y8/MuXL0wm89KlS7/88gtxBVlfXy8rK3vo0CEhH0UEh8OB+bqpqWnixImhoaHE6ZLJZEZGRpqYmEyePJmvwzjoe7meWBihCQkJiAkqKiogToNh2JYtW8hksoODA8QnCwsLpaWlU1JSjhw5YmlpyXsR2tvbAwMDyWSyvr7+9evXgdjGjBkTGBhoa2s7e/ZsRJypqaloe/Ty5UsSiQTLKVDrdLroxzAMWpd4e3uXlJRYWVkBKVKpVGKzRkBGRgaJRNLW1kZRVjabvWjRIuH9n6Oion7//XcojIHi2ubm5i1btjg6Oq5atQoFYBsbGydOnAgBT+FcKCKAC4ODg0GfIYqC5v3793Jycr/99huHw+nUXLfH0dsWa8CFBgYG48ePJy5G2Gy2h4eHj4/P0aNHVVVVUd/tEydOyMjICOrwOXfuXFSbMXXqVERygkDkwoaGBlVV1SVLltTU1Hz58sXCwoKYFwwJCQENXmZmprKy8pIlS2RkZKDE6tOnTz4+PsrKys7OzoJyY/v374cgWGlpqYyMTEBAwMuXL6EBDXFPAN1wOi0nJ2L9+vVAyZAemzhxInE9AftC4TXXT548IZPJsIGGdkhmZmZcElziyOkSkO5/6tSpRC5cvnz5+vXr+dYL0mi0cePGwZ4PYqSmpqbV1dWHDx/mMu9PT0+XlJQEFhcRJ06ckJWVFW4Dy4VurGcB+fn56urq8fHxhw8fVlZWBgUd7AsFOTNcuHBBX1//8OHDHh4e5ubmEOmCp5RYaUDEtm3bbG1tjx8/bmFhMXv2bFBYwJXh6/gKri7Nzc1d5UISiVRTU1NQUODh4SEhISEhISGo9wWEtkRxf+ZwOEuWLEEldE1NTcLtm3Ecb29vX7duHdrjPnz40NramvgCvjGJvXv3gsFFYmIiNIyMjIyUlpaG3Mr58+f19fVnz57NV3/k7+8fFhbGYDBSUlKGDBkCA/bZs2dxcXFcmruWlhYrKysUS+QaWUuXLjU1NeVbjMFgMN6/fw/PfHt7+7Zt20gk0rRp0wwNDbnWTDk5OStXruRwOO3t7SYmJlzTBewLieF0IsrLyw0MDJDtg7q6upARfebMmaamJngxFxdmZWWRyeSukhOMI2tra3d3dxHN24ALra2tBfWa/nboA9NtDoezbds2Lm47dOgQhDFfvHhhaGioqamJum88fvxY0JQUHh5uZ2dnbm4FbExXAAAgAElEQVS+cOHCtLQ0MpksJF0HIHJhdXW1u7v70KFDpaSkUCgMAOXhZ8+eXbhwIWxBysvLNTU1hSRsiGhtbSWTySB1q6ys9Pf3NzY2XrJkCVHQBQAu7NQCDcdxNptdUVEBopWsrCyoeFuwYAHiQiaTWVdXBwltrvc2NzcfO3bszp078C+uEUvsl43QDS6k0+nq6upQtg9c6OTkVFVV9fr1a21tbb77yzdv3ujp6RFHMofDAcu6mTNn8vpfE89cRCQkJEhLS/OtqRKE7nHhokWLUGi6pqZGVVUVlLEUCoVvvK6iokJbW7uurq6trc3BwcHBwQE1Yf/w4QPvo4LjeHp6uo2NDYPB+PTpk5GRkYWFBWrC/uLFC74h+v379y9atAiUHUQu/PLly44dO4R8wbCwMCkpKRKJ9Msvv7S2tkZHRwupNBeRCy9evEgmk0Wx9ERgMpkBAQHIv/DTp0/i4uLE2SMuLo63rAjH8U2bNmlpac2dOxfJQ06fPo24UBDa2toUFRUxDCOqY4TEToVzId+R9ebNGx0dHTU1tWHDhm3duhXeWFxc7OHhgepSAKWlpTCOII1Kp9NhX4i4sKqqis1m8w2e//HHH1OnTiWGFjrlwmfPnikrK8NqlYsL8/PzhTuM80VdXd3GjRu71FPi/fv33Vh/fz36svvE9u3bUYwlJiamrq6utLRUT0+vuLg4IyNDTEyMt0oXwGAwIHTQ3t6uqalZWFh48uRJNTU1OTk5Ia3RPn/+DPeSK0Yq6CkHLlRVVUXLmcrKSuFcSAy7hYaGcjVSFwQRuRC01CDgRG3hQLYwYcKEnJwcT09Pvpm85uZmXV1dLy8vLS0tcFbERWMU4ELhYTqElJQUBwcHAwODoUOHAhfS6fSVK1cOHTpUVlaWt2M1wvr160eNGsVVqCTENKcfcuGbN28YDMbMmTOJNSHnz5+3t7cX/i4wRvDw8Dh+/DiTydTU1ERciHD27Fkk079582ZWVlZbW5uZmdmjR49qamqGDRuGuJAXMTExLi4uiHgwnr7KXGAwGKmpqenp6bBqKSgogEcaw7DZs2cL77MBXCikTBAgSqsHXrx69crV1RWYfsOGDXp6ejADPHr0SENDg1gGfvjwYVQDt2nTJjExMeJGGbhQSKa2sbFRWlq6trYWsSCDwZgwYQJfGvj8+bOXl5eNjc0PP/yAAuDCn08ajaajowN76/z8fF1dXWK/Pa5b7+LismnTJhqNNm3aNDRVAhfGx8ffuHFDR0dHUCueyspKLS0tR0dH4gNcUlLi6ekpZHDt379fEBf+u9E3RIhhWEVFRUZGhry8PDHfsGzZMkiutLW1WVtbc61YfX194Zl+/PixjIxMYGBgXV3dpUuXZs6cieN4U1PTunXrDhw4wHu4/Pz88ePHa2try8jIwOKRiwsRWCwWDLaCgoKamprs7GwSiUT0OqqsrLSysuI7j1RXV8vJyXl4eMBT/vnz51GjRom4+AUuJIr9iECpNRBwFhYWcnWnCgsLGzt2bExMDO9ekEajnT59Ggiyvb192rRpKGKZlpYmSGiOICIXFhQUqKurwyYG2gYhO7f29vZOc43r169HylJR0K+4sLy8XEVFJTs7+8yZM2pqaigtlJyc7Obm1ulR0tLSpk2bBj97eHiEhYUR94Ll5eUlJSVqamrEkrVff/0VxY7s7e03b94spPOwmJgY0RAANGt8XSIbGhosLS3d3NzMzMxcXFxQvq20tNTT03Px4sWdZnq+ERfevn2bSqUuWLAA7IVZLFZ4eLi4uLi0tLSRkREx7l1eXp6Xl6ekpETkQg0NDWKg6O7du8LVMW5ubkTrxA0bNgjqDTtp0iSYTz5+/Ghqaoq4kGtkxcfHoyXy69ev9fT00L/A1Jer7IHJZELar7m52dLSEqoYiS/43//+Z2xs7OjoyOXYzAXgQt4yKl5UV1ejxQSRC1tbWyMiIrqxF/zu0DdE+Pr1ayi5zcjIsLe3R7fZy8sLejHv3r1769at6PVUKjUvL+/58+fy8vLAhY2NjSEhIdLS0vv27bO2tkb6Rl7Q6fTx48fDjiQxMVFRUREJPtXV1bk2InFxcdbW1i9evDAwMIAlJ4qRivK9WltbDx06pKKiYmZmlpiY6Ofnt23bNhGviSAufP78uZGRERpIqampXbLpCQ4OVlVVRRsyOp1O5EJR8PHjR2VlZaBSQZRw9OhR4vY3ISEBVSvzxalTp8aNG2dmZoakRlu2bBHi1MqLXuNCQ0NDUCTx/e5lZWVRUVHgNo5hmK+vr6am5pUrV+7evaunp8c3aZecnOzr63vs2DH4wOvXr1tZWdHp9NzcXH19feKU19jYCLX8JSUlpqamKFAcERGxZMkSDMNu3rwJq0Ah2Lx589ixY4nkKui7TJ8+Hb7I+fPnyWQyVKdgGLZ06dLDhw+L6BiSm5s7ZsyYo0ePslgsIWEA4EJRbvfDhw8NDQ0rKiog+IGs9ul0OpcfWFVVlby8fFpaGjhQI7nTtm3b+DqPo/Vle3v75s2bHRwcNmzYUFNTU1VVpaOjM3ny5EOHDnl4eEyfPh0F927cuIGuJJVKHTx4MLostbW1Y8aM4RVGcTicuXPn2tjYwGnX19dz9VObPHkyUegE2mYUWmhubu60fpqIkydPmpiY2NnZgb8HcCFv20jkHcPhcAIDA2VkZCQkJNzc3GB1QuTC/wh6mwiRDcHt27cVFRVzc3OJA+zt27dycnJjxoxxcnIiJjwyMzPl5eX//vvv58+fE0uLXr9+bWtrKy4urqurK2iOzs7OBictUB6Dkh42asTgNWz/WSzWjBkzBg0aRNQFQGsFIW3t0tPTg4KCIiIiILLBYDDOnj1raGgoISGBLIhEAfQMunr1anp6OjAEm83W19f/mkbMMJUT1e10On369OkiNngDABfu3r1bUMOwP//8U0NDA5Fre3u7np6elJQU39LMuLg4BweHgoKCxMTEH374AY359evXCxLu8gVUXKSkpCQnJwsvBUM4duzYmDFjcnNzjx49KkiBxQW4gMHBwcuWLePV6WzatGnEiBEorA1OHDY2Ni4uLnxpICMjQ1dX99ChQyYmJrNmzYJE75QpU6SlpRUUFJDKCUxbcBzPyclRUFBISUkhDpOGhgZ9fX1ZWVk9PT3e+R3DsAsXLuzYsQMlJrdv366qqiq8B0VNTY2npyeO46mpqRMnTmxoaLCwsAAuFOUqEQFcaG5uLtyFXJSlz5MnT4KCgpBMA7gQkQpCfX09XB+0VuaibcgXEsMzLBZLW1sbZDsLFy4MCQlJTU318/NTUFAoKChoa2v79ddfV61alZSURPyo7du3a2hoABdiGKaqqkpcggcGBmpraxP33+i0fX19ra2tIYy5fv16IyMjSOxRKBQ1NTXixu7cuXOysrJdDR0Dbt26ZWdn9/z58/PnzysoKMBeAhI6XOlb8PRISkq6efPmggULmExme3t7QECAlpYWSHiioqLAr+4/gl4lwpycHENDQyT2vX//Pu/9bm9vLyoqQg8fKgVNTU2Vl5eHRgdEgARZSBPd9+/fa2hogAYHEtEfP360tLQkvoZKperr60Nl0qpVq6ZPn46eWgB0huMbjk9MTDQzM/v9999Xr15NtJDAMOzevXuCCgYEAbjQ2NgYXZnuCThZLBZSzYDPL1Ei2I3GyGVlZerq6lyFPrm5uUFBQWw2G7qVenh4wPLl0qVLq1at2rdv37x587g+p7q6WkdHp6mpqb6+3tTUNDY2duTIkUipJGLHQQTgQgsLC9FNi6DszN3dXfQcPsRIYVnN4XDABLW0tPTcuXNsNnvx4sVaWlqdFnIAQkNDoZyuo6PDxcVl2rRpdDod2u+hHUxNTY2ZmRlacOTk5PDqO6AkkTdmxWazPT09Z82atXPnTjk5OXRhN2/erKmpKdzCn8Vi0el0NTU1IAwobxelpTMvcnNz+SYpuLBhwwYIiVMoFL7nFhAQMHjwYGS1j/+zYSKGHyoqKoyMjJDbQ1ZWFt8U4OnTp7kionl5eXJycr/++ivRKH/nzp1GRkZCBsiOHTtUVVVhFXX+/Hk5OTm4U2w229ramq9upbS0NCIigkQiAYUzmczg4OBRo0bNnDlTXV2d16dNeB8bQbh3756fnx9a/YBeHWISlZWVvAsa4EInJyeklscwbNasWSgU11XV9HeNXi2oNzc3FyI848WdO3cmTpyI9hmZmZmiB/SIx500aZKEhAQMj9bWVktLS2Is4vXr14WFhQ8ePJCXlwcuZLFYPj4+iAuzs7O/fPnCN1DO4XAUFBRgI0ismv8aZGVlca0PusqFVVVVxsbG4uLiOjo6kEHpht0DL3gr8EAy5+Pjw2Kxmpqa3N3dpaSkrK2t9fT0Kisr//jjD94kGYvFgt22i4sLBGdWrVo1bNgwZHHXVTx9+rSrNrapqaldUrLhhO9eWFhIIpEOHDgwfvx4YEQoCeAqoObFjRs3ZsyYMXHiRFQJTqVSXVxceKUu/v7+Tk5O3UvM7Nu3DxoyQ3hfRkYGrRGvXbvGe/cZDMaNGzcuXLgAJ19UVKSqqspisTAMc3Nz61LZSfcAXGhlZcU3uwGlcrq6usR5nMlkEi/17NmzZ82aJfqDzWazo6OjYdkBXKimpobezuFwlJWVhVflE7kwLi5OQkJi2rRpRkZGfLNxMHiTkpLu3btna2uLtrPQzVRQZyURuTApKQldt5UrVw4aNIgYR921a5dwkxDo90Ss7UlOTnZ0dBR+0H8lenVHWFFRoaGhIWKNCIvF0tPTEyQc5QtU1VdQUGBvby8vLz9v3ryKiorKykoTExNjY+OVK1dqa2tzNQ+7cuWKsrIycCEK0wEXmpubx8XFaWpqovpWDofj7u6O8kzQdhz/vyy4c+fOHtdZdYkLV6xYER8fz+FwDh8+LCUlBTMvdFYSMYQoOohciON4fn5+RkYGjUajUqm2trbEDoi3bt1CO5uCggLUdWHt2rWnTp3qRgiur5CUlCQmJkbUm3TKhZmZmbq6usePH7exsbGwsEBhfyqVumzZMq51z9eo9X766afm5ubi4mJQX1+6dGnQoEGC4iVtbW0WFhYLFixYsGCBgoIC1Ck5OjpaWVlNmjSJ6Mr0TbFp0yauTpnPnz/ft29fSkoKBBuEX9umpiYLCwt/f39RuJDNZjMYjKCgIPBex//hQtQMFcMwXV3dTpukE7mwqqrq4sWLRLMx4oYyPDwcCQXAOoc3tMsXnXIhhUJ59eqVgoICcCGGYQEBAerq6ih1unv3bt7sIBeeP38uKSmJWmpHREQQPeX/O+jtHGGXuFB01ROO4xQKRUFBISYmhsPhGBgY3Lx5My8vb+3atTIyMkVFRXQ6/ebNm0ePHkWxVg6HgxwvL168qKKiwmVRyOFwYmJilixZwpVcOXv2rJSUFCJdNTU1b29vxIKpqaldyguKDiFciNJdDAZj6dKldnZ26F/nzp2TlpYGd56uboME4fXr18SWyC0tLUZGRvPnz0fq0KysrPHjx+/bt4/4rlOnTsnIyAAX1tbWksnka9euXbp0SVdXV7iKr7/h7t27v/32G4lEIvYbYbPZq1ev5luj9vLly5CQEHAaYjAYnp6ednZ2gmz5AF+pXJ87dy7EPD59+jRp0iRBs+FPP/0EabyqqipdXV1IbkFnY1GctL4RTp8+ra+vDwazDg4ODQ0NPciFv/32G/jNBgUFoZ0QcKGvr29qauqqVatmzZrF+0YKhRISEjJ58uSdO3dCAJ/IhUQwGAxra2tkwb97926iJdvbt2+HDRvG66TIF0K4sLa2VklJKSsr69WrV9ra2qjKJSAgQElJ6eDBg3v27OHb6BTDsKNHj3p7ex86dAj1uRw9evSECRNcXFycnJxEDPL/y9AHqtFvx4WFhYUKCgphYWHE1FR4eDhvs1xwMCFWBIoikUJxDCIXPnjwYPDgwUeOHPny5Qu47xNLYnsWxcXFysrKUPWBNqktLS1jxoxBRo5hYWFiYmJElUpiYqKMjEwPPt+lpaVjx44ltjK/e/fuiBEjQkJC4NeOjg6+a+rTp0+jHWpycrK+vr6zszOXm3Z/xtu3b318fGA6zszMRFyYk5MjqG8XjuMrV64cMmQIMiUBB+oe5EIOh3Pq1Km1a9ciZYetrS3UXEdERAixW7K2tn7z5k11dTXqKfHkyZO+nQchwADhARqN5u3tDSWzbDY7PDxcSMa9Uy6ESZ/ovU5M+QMXmpubJyUl8V7wjo6O8ePH//LLL1euXHF0dNTX14dnYNu2bVpaWsR8TVlZWVlZGTSiAi4sLi6WlJRECpr8/PyFCxeK4u8P2LhxI6RRa2pqIAWDcskPHz4Ebw3iAhe4cOTIkUeOHOF7KxcvXuzt7X39+nUbGxs3Nzf4NMgXCtHe/+vRN+UTPcuFFArF398fphXgQmNjY7SepVKpP/74I1elamVlpaKiInFb0ylYLJahoSFyhyJy4V9//WVtbS0tLT1z5kxBQf+eAnChp6eni4sL2oFVVFR0dHSkpqbCr7z9w3pcCV1aWqqqqoq48PLly5GRkXy/e2pqKjH0Fx8fLyUl1akzcj/Es2fPtLS0iJrAzMxMaWlpPz8/TU1N1LiYF7DqMjAwQBMTk8lcs2aNcDdtXGQunDdvnq+v78mTJy0tLWfOnAmhDklJyXHjxtnZ2fHqj5qbmydNmlRcXLx48eK1a9eamprCl4K2DF0qL+kRvHnzBkZrY2OjnJycnp4eGrwdHR1ycnLCvWAQhHBhVlYW6n3BZDKRkzgReXl5KEDKhdOnTyOxOpvN9vLymjNnDvyKMr6ACxcuQBu1mzdvoiXItWvXRo8evWXLlosXL5qamnapHAL/hwvNzc0fPXpEoVAsLS3RIjgtLY3X9w5KRfX09LjKS3Acv3z5sru7O4Zhubm5BgYGDg4OqCVZXl7ef6FeUBD6zFmmrKxMTU0Nnjzhlvy4UC6sra2l0+mrV69GS2zgQtQqlkKhkEgk3ukAauqFK7wRkpOTk5KSoMAIKSe5YqS9huLi4kWLFnG5Qr9//15aWhop0MLDw/nGbb4G2dnZAQEB4eHhMKGXlpZqaGgsWLDgyJEjampqgvbBoaGhXNXT06ZNQ21rvhdgGKajowML+fb29gsXLkCos6ys7Ndff+Vrh/bmzZuoqKgLFy4wGAwRBTW8aG1tnTRpEjh4FRUV8VoTpKenQ3UQSG9Qsqe6ujorK4uvlUFMTAy04CgoKJCQkABnZBqN5ufnt3bt2i6d3tcDwzAnJ6c1a9YA+d25c2fYsGFEl6UpU6aI7i7b2Nhoamq6dOlSDodTUFAAn1lXV9dVi1ouHDx4kKuP7vDhw7kGIGLrkydPqqurc91o0FfPmTOne6exZcsWaEUwf/78WbNmdRq1hn2hiYkJkdswDEtOTi4oKACn2bdv39bV1Q0dOtTZ2bl7FvP/JvSlxRpwITQs7TR9BdUwYMZNdPDbtGmTu7s7cCFSwQAXTpky5cCBAxYWFoK6e4jOhUVFRSoqKhcuXKiqqiIa7/YVFxLR0dEBwRnoPoGmwrCwME1NzW7obPnizp07Ojo68fHxy5cvR7TX2Ni4YcMGHx8fvvshtL7ZuHEjkQvBk7P/q2MaGxsnT54M4TgWi/Xjjz/m5uYeP35cXl7ezs6ORCKhNim8uHr1qr6+fmRkpKur64QJE6AF1ddwoZub2/jx46urq6FUEQlNjxw5sn79eiqV6urqCrf+y5cvQmpeFy9ebGJigrYUmZmZGhoacnJyUlJS27Zt6+U9AbQVbGtrs7OzQ4nMO3fujB49+vjx49CyUVVVtUuPCnDhjBkzTExM0Ba8G14KRHz8+FFCQgKVM7a2tv7444/ERcb27dsXL16MfhUSHvhKQB/sTjuH4DiOYdjz58/z8vJgfouKikI27hEREciL0d7ePjY29r+8FwT0JRHiOF5WVjZ37lxBdnlcAC60sbGBxuUwobBYrLlz50JZGPHhAC708PAQpQOcoDxKW1sbiv4VFRVpa2tzhVhxHD9z5oxwS8lvjZ07dyKnGC4u7CmGbmhogA7DYAE8bdo0YsUkX1CpVE1NTXT1Nm7cqK6ufvfu3cTERAMDg/5fovThw4eWlpa9e/caGBjATT969OioUaMcHR1BHxgREUHMkhJBp9OVlZWhGu/w4cNubm6gBuJwOFu2bOG7fRSO1tbWefPmlZSUQBNBPz8/KEDEcfzly5fy8vIuLi5omgsODhYS88/IyJCQkOA685KSkj7ZE0BVT2RkZFtbG7EZGewL1dTU5s+fD871XUJjY6OPjw+XFldELkTXoba2ds2aNYGBgZDDPnv2rISExN69e6Gd4a5du9Bbvnz5Iisr26mxXE9BdC7EcfzTp08aGhr29vbEAPuOHTt8fX1ZLNbhw4eJ/P1fRh8TYVdRWVkZGhoKlUBIi8VisU6fPs37WBQWFoqS/hXEhW1tbba2tkQRhKBUTV+Z0n7+/Hn69Omurq7i4uKCuLDbyM3N9ff3h6ViRUUFBF0XLVq0d+9eDMNsbGyEcGFsbKyBgYGTk9PQoUPRnHvy5EldXV0nJ6fvQh3z888/29jYtLS07Nmzh3eDVV9fb2BggKK7LBZr2bJlSF2MikOOHDkyZcoUYMETJ058vQ7z6NGjfn5+DAbDz88PiR5XrFihrKz84sWL+vr67du329nZ8cZXmpubV69eDWY3GRkZkpKSXWr+1bNobW1ds2YN7Kjq6upQJzwi7t69SyaTeV1avgadcmF+fj5UUrFYrPHjx2/ZsmXjxo1kMhnK0p88eTJ79mwHB4fjx49z3cr8/HxRTFZ7Cl3iwp9//nn48OGhoaHoLxQKZcKECUOHDrWzs+tqfOLfiu+MCAFv374dMmRIDwYki4qKFBUVY2JiKBQK0sTT6fSZM2ei5Xw/hL29PRh/1NfXOzk5IS589erVmTNnvuaTP3z4AEJ/VCCI43hubu6ECRPg58DAwE2bNiE7DyJevnypp6cHZ/L27VslJSVBO6d+CAzD4EthGLZu3TroeUR8AYvF+v333zU1Nbm6hfz8889jx46FCZ1Op0tKSm7YsAGx4MOHD3mly109Mfwfq1jgQmQjwGazIyMjVVRUZGVl165dy7u3o9FoZmZme/fuRU8ySF5Faf7V4+jo6LC1tSU6fgmaze/evUsikXqZC6OjoxUUFK5evRoUFAR/uXXrFolE4msbS0T/5ML29vaSkpKSkhINDQ3U5BXMZoXbDP3X8F0SIf6PEF9I8/quArhQQ0ODaBXdn7mQyWT+8MMPaJpubm5WVFR0dXUV0mNFdMyYMWPVqlUMBsPLywtNAVlZWRoaGk1NTR8+fNDR0SFmH+/fv49+PXXqFNEyFDpqfRdcCF2KkCYLwzBUaUpEbGwsX/n7vn37EBceOXJk8ODBkESEVkHdCPEh5OTkoA4V0NyqS2Y6+/bt4/WNhG4+367URxAwDFu5cmWn3dsBd+7cEZLv7B6OHTuGejWjwvaWlhZU1B8dHT1y5EiiXf7t27f7IRdWV1fr6uquW7cOwzDeLjcsFmvt2rVoJQcx0k2bNrFYrDVr1ghq+/yfxfdKhPi34UJeZXN/5sJx48YRA1yLFi2ytrYWbqokHEwmE/Jh7e3t9vb2vB4TK1asGDlypKysLNc6PSAgwMXFBbjw3bt3JBKJmKExNTUl1lr0W7x9+3bEiBF8ya9TJCcnu7u7jx49Gtlbnzx5csyYMeLi4tAtsttnVV9fLyMj01XNPRHz5s3jKnMEAuies/PXo0tc+C0AXLh9+3ZoX4phGOgPED1ER0dLSkqibsA4jt++fbvTnmV4H3HhokWLDAwMuIosAwICAgMDiSqYT58+mZqajho1KiAgYEAdw4XvmAjxb8CFfEGn02fMmNFPuJDNZufn58NDn5KSgmJHX7580dPTq66u5i2aFBFQa4ySi+3t7ciNl4jPnz8T94JpaWkLFixgMBjLli2D9gU4jq9Zs8bIyAi6rN29e9fT0xPqDvuk93SXcO/evW6kpjIyMqARRG1tbWBgILHVg/CqeRFx8OBBOTk5Xsd5EbF3715ig1Ymk2lubt5Vf/OeRZ9zYUJCgoODA9ydOXPmXLt2raKiQlNTEz3zBw4c6N41h+Ybx44do9FoQnTFPYXq6mp7e3s4T+RBU1VVNXbsWA6HA61Ipk+fHhERARFR3uLCAeDfOxHi/zEurK6utrS0tLCwGDNmTEBAAI1Gu3z5MolE0tPTk5eXh+ymkpKS8HadgnDx4kUpKamuvpfBYLi7u8+dO5fBYCAVDIvF2rJli7i4uLa29rhx4yD+VlZW1m1n7d6EiFxI1PSHhITs378f/erv799p26OuoqtcSKPRrl27BuOivb1dV1d3/vz5X758aWpqWrhwYZcsfL8RoO67D7kQrQygl++1a9eqq6uJKeGv5MLx48dzNT/6poDNKDg6USgUGRmZPXv2mJmZmZiYxMfHa2pqIsONAfDiuydCHMcTEhJkZGTevHnDZDJ7toScCCqV6ubmNnXq1NzcXBE9cXoc06dPP3fuHI7jOTk5ZDIZ8hx0Oj07Oxtmk4MHD7q7u3f783fu3ImyXMJBoVC8vLxgWcBgMHx9fXlzYM3Nze/fv++RnGUvo1MupNFoGhoaqDh63759Xl5e6L/5+fnQU7Nnz0p0LmxtbYXurDIyMqDPrK2t9fLyGjFihKSk5M6dO/tJZKxvufDChQsoRJGXlxcYGMj7mpiYmG43okI9sHoBMTExxcXFKSkpiAsfP37s7++flJQEfO/t7f010fV/Pf4NRIj/w4VTp07l6izRs4CyZeQc3cugUqmysrI4jpeVlY0fPx6q2ZBuk06n+/j4ODs7oz7m3YMoXMhgMMzNzWNjY7/mQP0cQrjwwYMHmZmZb968gZa5OI7X1NTIy8ujzNCff/65evVqLg/3HoEoXAjWo9Aco7a2dsKECYsWLYJaBcf287IAAAUgSURBVDqd3ldW2oLA4XBWrFhhbGyclZXl7+/fa8e9f/8+iUTqq81oj+Ps2bOqqqrFxcX37t3j3QwcO3bMysqqr6q8vgv8S4gQx/GEhAQh/sI9BSqV2u1UzVeCzWZLSUmlp6cbGRkBC7a0tKipqSG5/Pv373ukUB24UPjeOj4+Xk5ODhmU/CuRnJxMIpHS0tLa2tqIsffHjx/LyckBF966dQv+mJ2draSk5Obmtn79+rFjx367K7N//34FBQWIQqNA/cOHD5GLSlBQkIGBAeoK1NTUROTCfgjgQjKZ3Jvry46ODmdnZ+RL8J2iqKjo4sWL8PPZs2d5nT2am5sdHBxcXFz+NZT/jfDvIcL/ArZt24Z6b9JoNA8PD2JqqgcxwIUA4MIJEyYkJibSaDSkG3z8+LGysjLXno9CocTHx+/fv797OVrRAVwYHR2NylQCAwMjIyPhZ7BlJwb6gAv5Sp/6CTgczrdLagjC986FRUVFOjo6xNUDXz/FvLy8/hYG6IcYIML+iLS0NBicnz9/dnJyGjJkiImJyf3798HBRFpaes6cOVpaWlu3bv12j/iOHTvU1dXLy8tramoEdQf9j3DhuXPnaDTalClTiB1u+9Yr9eDBg+bm5ugcOBwOnU4PDQ2FfSFwIaJGvOdaUf7L8F1zYWFhoaKi4qFDh/r6RP4NGCDCfgcMw1xdXWfMmAFS1TNnzjQ3N587d05SUhJ0oW/fvr148WIvlEIDF5qZmQkpJf4vcCGO43V1dfr6+qtWreo/i2tI+fj5+YFLJ4ZhwcHByNQb2jgTLTEHwBcDXDgAfIAI+ydgcM6YMYOoPExLSxs9erSIBuU9hV27dt2/f1/4aw4fPowUHP2HJ3octbW1hoaGAQEB/eQ7UqnUDx8+oIQlzsOFxcXFKioqqGPzAASho6PD0dERGff322QqXwxwYY9ggAj7ETAMmzNnDuyugAtHjBhBtAS0srLqn12kgQsvX77s5ubWT3jiW6BfceHjx48VFRXfvXv3+PFjcEjBebiwl5dN3y8QF4aEhIjiINOvMMCFX48BIuwvaGlp+fTp0/Xr12F2w//hQmdnZ/C/YLFYqDdsP8R/JEbaH7gQFbfcuHGDtwcIh8NZunSpqanp9xjr60PAcFuxYkX/7xHGiwEu/EoMwnFcbAD9ADdv3gwNDX306NGrV68ePnx44sQJMTExKpU6a9as8vJyLy+v7OxsCwuL2NjYvj5TgWhubiaRSH19Ft8cdXV1kydPtra2jo+PHzRoUC8fPTk5OSoqKiMjY/jw4WJiYn/99ZelpaW4uDjxNWAXPnXq1F4+t+8ddDp9+PDhvX9PewT5+fnOzs7btm3T0NCg0Wg+Pj59fUbfEwaIsO9RUVGhqqoqJiYWHx8fExPz9OlTRUVF9F/gwvr6+lu3bmlqavbdaQ7g/6O2ttbZ2dnJyWnz5s03b95cv3597xyXyWQaGBj4+Pjs2bOnd444gO8IwIXKysr379+XkZHp69P5njBAhH2MpKSkhISE9PT0wYMHi4mJ3bt3b/LkycOGDSO+hkqlpqenz5o1q4/OcQB8AFxIoVCuXr1qZ2fXa8etqKhwcnLy9/ffuXNnrx10AN8LCgsLZWRkBliwqxggwr5EW1sblAP22pZiAD2I2trasrIyKyurXj7uABcOYAA9iyF9fQL/aYwaNerPP/90dXUdNmzYqlWr+vp0BtA1yMnJycnJ9f5xVVVVHz9+7OTkJCYmNsCFAxjA1+P/AfR8Sk7KDz4ZAAAAAElFTkSuQmCC", + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "execution_count": 127, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "masked_toks = toks\n", + "scatter(entropies, markercolor=colors, xticks = (1:size(masked_toks)[1], masked_toks), rot=45)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c564a9bd-ff84-40c3-b4bd-3c301bb8cfa7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 1.10.2", + "language": "julia", + "name": "julia-1.10" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}