From fa7cff744b5a302a0f2beb5aabcff23711a34698 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Wed, 12 Aug 2020 11:29:44 -0700 Subject: [PATCH 01/21] Fix typo (#1118) In PyTorch tutorial, `torch` should be installed rather than `torchaudio` --- recipes_source/recipes/what_is_state_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes_source/recipes/what_is_state_dict.py b/recipes_source/recipes/what_is_state_dict.py index 8e718e9071e..5e7f259fd7b 100644 --- a/recipes_source/recipes/what_is_state_dict.py +++ b/recipes_source/recipes/what_is_state_dict.py @@ -28,7 +28,7 @@ :: - pip install torchaudio + pip install torch """ From c61e49c7694a65e185d2c7a3bae9f000cba9cabe Mon Sep 17 00:00:00 2001 From: guyang3532 <62738430+guyang3532@users.noreply.github.com> Date: Thu, 20 Aug 2020 04:19:49 +0800 Subject: [PATCH 02/21] Recover the attributes of torch in memory_format_tutorial (#1112) Co-authored-by: Brian Johnson --- intermediate_source/memory_format_tutorial.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/intermediate_source/memory_format_tutorial.py b/intermediate_source/memory_format_tutorial.py index 244e23ac204..014f1668504 100644 --- a/intermediate_source/memory_format_tutorial.py +++ b/intermediate_source/memory_format_tutorial.py @@ -261,14 +261,17 @@ def check_cl(*args, **kwargs): return result return check_cl +old_attrs = dict() def attribute(m): + old_attrs[m] = dict() for i in dir(m): e = getattr(m, i) exclude_functions = ['is_cuda', 'has_names', 'numel', 'stride', 'Tensor', 'is_contiguous', '__class__'] if i not in exclude_functions and not i.startswith('_') and '__call__' in dir(e): try: + old_attrs[m][i] = e setattr(m, i, check_wrapper(e)) except Exception as e: print(i) @@ -286,6 +289,13 @@ def attribute(m): # guide https://github.com/pytorch/pytorch/wiki/Writing-memory-format-aware-operators. # +###################################################################### +# Code below is to recover the attributes of torch. + +for (m, attrs) in old_attrs.items(): + for (k,v) in attrs.items(): + setattr(m, k, v) + ###################################################################### # Work to do # ---------- From fdbe99c1769d29a8c697e8850440a57b3e1b52f4 Mon Sep 17 00:00:00 2001 From: guyang3532 <62738430+guyang3532@users.noreply.github.com> Date: Thu, 20 Aug 2020 04:21:36 +0800 Subject: [PATCH 03/21] fix bugs for data_loading_tutorial and dcgan_faces_tutorial (#1092) --- beginner_source/data_loading_tutorial.py | 2 +- beginner_source/dcgan_faces_tutorial.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/beginner_source/data_loading_tutorial.py b/beginner_source/data_loading_tutorial.py index f0cc99ce081..bf812b8eb52 100644 --- a/beginner_source/data_loading_tutorial.py +++ b/beginner_source/data_loading_tutorial.py @@ -374,7 +374,7 @@ def __call__(self, sample): # dataloader = DataLoader(transformed_dataset, batch_size=4, - shuffle=True, num_workers=4) + shuffle=True, num_workers=0) # Helper function to show a batch diff --git a/beginner_source/dcgan_faces_tutorial.py b/beginner_source/dcgan_faces_tutorial.py index 1da4614ebb7..509bf545234 100644 --- a/beginner_source/dcgan_faces_tutorial.py +++ b/beginner_source/dcgan_faces_tutorial.py @@ -591,7 +591,7 @@ def forward(self, input): # Format batch real_cpu = data[0].to(device) b_size = real_cpu.size(0) - label = torch.full((b_size,), real_label, device=device) + label = torch.full((b_size,), real_label, dtype=torch.float, device=device) # Forward pass real batch through D output = netD(real_cpu).view(-1) # Calculate loss on all-real batch From de2571faae93a77651830d1968894d563a4874fa Mon Sep 17 00:00:00 2001 From: mcarilli Date: Thu, 20 Aug 2020 08:08:47 -0600 Subject: [PATCH 04/21] Update autocast in dispatcher tutorial (#1128) * draft * fixes * dont overrun the line --- advanced_source/dispatcher.rst | 85 ++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 24 deletions(-) diff --git a/advanced_source/dispatcher.rst b/advanced_source/dispatcher.rst index 7a7d806c328..23ba0f96be1 100644 --- a/advanced_source/dispatcher.rst +++ b/advanced_source/dispatcher.rst @@ -229,38 +229,75 @@ Autocast ^^^^^^^^ The Autocast dispatch key implements support for -`automatic mixed precision `_ -(AMP). An autocast kernel typically modifies the operation of an operator by casting the -input arguments to some precision before carrying out the operation. For some -operations, it is numerically safe to cast to lower precision, which is how AMP -can achieve speed ups and reduced memory usage without sacrificing much -accuracy. A nontrivial autocast kernel looks something like this: +`automatic mixed precision (AMP) `_. +An autocast wrapper kernel typically casts incoming ``float16`` or ``float32`` CUDA tensors +to some preferred precision before running the op. +For example, matmuls and convolutions on floating-point CUDA tensors usually run faster +and use less memory in ``float16`` without impairing convergence. +Autocast wrappers only have an effect in +`autocast-enabled contexts `_. + +Here's an autocast wrapper for a hypothetical custom matmul, along with its registration: .. code-block:: cpp + // Autocast-specific helper functions + #include + Tensor mymatmul_autocast(const Tensor& self, const Tensor& other) { c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast); - return mymatmul(autocast::_cast(at::kHalf, self), autocast::_cast(at::kHalf, other)); + return mymatmul(at::autocast::cached_cast(at::kHalf, self), + at::autocast::cached_cast(at::kHalf, other)); + } + + TORCH_LIBRARY_IMPL(myops, Autocast, m) { + m.impl("mymatmul", mymatmul_autocast); } +``cached_cast(kHalf, tensor)`` casts ``tensor`` to ``float16`` if ``tensor`` is CUDA and ``float32``, +otherwise, it leaves ``tensor`` unchanged (c.f. the +`eligibility policy `_ for natively autocasted ops). +This ensures if the network calls ``mymatmul`` on any mixture of ``float16`` and ``float32`` CUDA tensors, +``mymatmul`` runs in ``float16``. Meanwhile, calls to ``mymatmul`` with non-CUDA, integer-type, or ``float64`` +inputs are unaffected. Using ``cached_cast`` to follow the native eligibility policy in your own autocast wrapper +is recommended, but not required. For example, if you wanted to force ``float16`` execution for all input types, +you could ``return mymatmul(self.half(), other.half());`` instead of using ``cached_cast``. + Notice that, like our autograd kernels, we exclude the ``Autocast`` key from -dispatch before redispatching. By default, if no autocast kernel is provided, -we simply fallthrough directly to the regular operator implementation (no -autocasting occurs.) (We didn't use ``myadd`` for this example, since pointwise -addition doesn't do autocasting and should just fall through). - -When should an autocast kernel be registered? Unfortunately, there aren't -cut-and-dry rules for when you should cast to a lower precision. You can -get a sense for what operators have autocasting behavior by looking at -the `AMP documentation -`_. Some other -general rules: - -* Operations that do reductions should be carried out in float32, -* Any operation with multiple float tensor inputs has to standardize them - to a common precision, and -* Any operation that does a convolution or gemm under the hood should - probably be float16 +dispatch before redispatching. + +By default, if no autocast wrapper is provided, +we fallthrough directly to the regular operator implementation (no +autocasting occurs). (We didn't use ``myadd`` for this example, since pointwise +addition doesn't need autocasting and should just fall through.) + +When should an autocast wrapper be registered? Unfortunately, there aren't +cut-and-dried rules for an op's preferred precision. You can +get a sense for some native ops' preferred precisions by looking at the +`cast lists `_. +General guidance: + +* Ops that do reductions should probably execute in ``float32``, +* Any op that does a convolution or gemm under the hood should + probably execute in ``float16``, and +* Other ops with multiple floating-point tensor inputs should standardize + them to a common precision (unless the implementation supports inputs with different precisions). + +If your custom op falls into the third category, the ``promote_type`` template +helps figure out the widest floating-point type present among input tensors, which is +the safest choice for the execution type: + +.. code-block:: cpp + + #include + + Tensor my_multiple_input_op_autocast(const Tensor& t0, const Tensor& t1) { + c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast); + // The required at::kHalf argument is an optimistic initial guess. + auto exec_type = at::autocast::promote_type(at::kHalf, t0, t1); + return my_multiple_input_op(at::autocast::cached_cast(exec_type, t0), + at::autocast::cached_cast(exec_type, t1)); + } Batched ^^^^^^^ From 6ab3a3771b203395bda42d244fdff5f9bd38bb09 Mon Sep 17 00:00:00 2001 From: Sayantan Das <36279638+ucalyptus@users.noreply.github.com> Date: Wed, 26 Aug 2020 22:55:23 +0530 Subject: [PATCH 05/21] Corrected model.resnet50() spelling (#1139) Spelling mistake led to errors for beginners. --- intermediate_source/model_parallel_tutorial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/intermediate_source/model_parallel_tutorial.py b/intermediate_source/model_parallel_tutorial.py index 515b689301a..df43f6fa5f9 100644 --- a/intermediate_source/model_parallel_tutorial.py +++ b/intermediate_source/model_parallel_tutorial.py @@ -86,7 +86,7 @@ def forward(self, x): # # It is also possible to run an existing single-GPU module on multiple GPUs # with just a few lines of changes. The code below shows how to decompose -# ``torchvision.models.reset50()`` to two GPUs. The idea is to inherit from +# ``torchvision.models.resnet50()`` to two GPUs. The idea is to inherit from # the existing ``ResNet`` module, and split the layers to two GPUs during # construction. Then, override the ``forward`` method to stitch two # sub-networks by moving the intermediate outputs accordingly. @@ -136,7 +136,7 @@ def forward(self, x): # # Let us run an experiment to get a more quantitative view of the execution # time. In this experiment, we train ``ModelParallelResNet50`` and the existing -# ``torchvision.models.reset50()`` by running random inputs and labels through +# ``torchvision.models.resnet50()`` by running random inputs and labels through # them. After the training, the models will not produce any useful predictions, # but we can get a reasonable understanding of the execution times. From b458ced83eb980f685ef445a09c7bfe950efe929 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9E=A5=EB=B3=B4=EC=9A=B0=20Bowoo=20Jang?= Date: Fri, 28 Aug 2020 01:34:22 +0900 Subject: [PATCH 06/21] Fix typo & Minor changes (#1138) Thanks for the fixes @codingbowoo! --- recipes_source/recipes_index.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/recipes_source/recipes_index.rst b/recipes_source/recipes_index.rst index e842c19bae5..86438135e1d 100644 --- a/recipes_source/recipes_index.rst +++ b/recipes_source/recipes_index.rst @@ -1,6 +1,6 @@ PyTorch Recipes --------------------------------------------- -Recipes are bite-sized bite-sized, actionable examples of how to use specific PyTorch features, different from our full-length tutorials. +Recipes are bite-sized, actionable examples of how to use specific PyTorch features, different from our full-length tutorials. .. raw:: html @@ -40,14 +40,14 @@ Recipes are bite-sized bite-sized, actionable examples of how to use specific Py .. customcarditem:: :header: Defining a Neural Network - :card_description: Learn how to use PyTorch's torch.nn package to create and define a neural network the MNIST dataset. + :card_description: Learn how to use PyTorch's torch.nn package to create and define a neural network for the MNIST dataset. :image: ../_static/img/thumbnails/cropped/defining-a-network.PNG :link: ../recipes/recipes/defining_a_neural_network.html :tags: Basics .. customcarditem:: :header: What is a state_dict in PyTorch - :card_description: Learn how state_dict objects, Python dictionaries, are used in saving or loading models from PyTorch. + :card_description: Learn how state_dict objects and Python dictionaries are used in saving or loading models from PyTorch. :image: ../_static/img/thumbnails/cropped/what-is-a-state-dict.PNG :link: ../recipes/recipes/what_is_state_dict.html :tags: Basics @@ -90,7 +90,7 @@ Recipes are bite-sized bite-sized, actionable examples of how to use specific Py .. customcarditem:: :header: Zeroing out gradients in PyTorch - :card_description: Learn when you should zero out graidents and how doing so can help increase the accuracy of your model. + :card_description: Learn when you should zero out gradients and how doing so can help increase the accuracy of your model. :image: ../_static/img/thumbnails/cropped/zeroing-out-gradients.PNG :link: ../recipes/recipes/zeroing_out_gradients.html :tags: Basics From 770abb2050d25c8964e0abe6d56f822629008c0e Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Thu, 27 Aug 2020 12:24:00 -0700 Subject: [PATCH 07/21] Run win_test_worker manually (#1142) Merging to clean up a build issue. --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5697f123d49..df10716c645 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -564,6 +564,7 @@ workflows: - master - pytorch_windows_build_worker: name: win_test_worker + type: approval filters: branches: only: From 4bfe3380a89c8d9eaf566847564a78fdc3a9e37a Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Fri, 28 Aug 2020 00:06:05 -0700 Subject: [PATCH 08/21] Disable `pytorch_windows_builder_worker` config (#1143) See https://github.com/pytorch/tutorials/issues/1141 --- .circleci/config.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index df10716c645..73d6353f673 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -562,11 +562,11 @@ workflows: branches: only: - master - - pytorch_windows_build_worker: - name: win_test_worker - type: approval - filters: - branches: - only: - - master +# - pytorch_windows_build_worker: +# name: win_test_worker +# type: approval +# filters: +# branches: +# only: +# - master From 1707a90225551a5b2fda471d6857df00a04e24ee Mon Sep 17 00:00:00 2001 From: Brian Johnson Date: Fri, 28 Aug 2020 00:06:48 -0700 Subject: [PATCH 09/21] Update index.rst (#1140) Fixed incorrect link. --- index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.rst b/index.rst index a5ad877b0f4..1ae48e5e5e9 100644 --- a/index.rst +++ b/index.rst @@ -206,7 +206,7 @@ Welcome to PyTorch Tutorials :header: (prototype) Introduction to Named Tensors in PyTorch :card_description: Learn how to use PyTorch to train a Deep Q Learning (DQN) agent on the CartPole-v0 task from the OpenAI Gym. :image: _static/img/thumbnails/cropped/experimental-Introduction-to-Named-Tensors-in-PyTorch.png - :link: intermediate/memory_format_tutorial.html + :link: /intermediate/named_tensor_tutorial.html :tags: Frontend-APIs,Named-Tensor,Best-Practice .. customcarditem:: From e60c6af161363a54eb031eec1c006b0bc5e1a4e0 Mon Sep 17 00:00:00 2001 From: Brian Johnson Date: Fri, 28 Aug 2020 09:22:49 -0700 Subject: [PATCH 10/21] Update index.rst Fix to broken link. --- index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.rst b/index.rst index 1ae48e5e5e9..ad0655f2562 100644 --- a/index.rst +++ b/index.rst @@ -206,7 +206,7 @@ Welcome to PyTorch Tutorials :header: (prototype) Introduction to Named Tensors in PyTorch :card_description: Learn how to use PyTorch to train a Deep Q Learning (DQN) agent on the CartPole-v0 task from the OpenAI Gym. :image: _static/img/thumbnails/cropped/experimental-Introduction-to-Named-Tensors-in-PyTorch.png - :link: /intermediate/named_tensor_tutorial.html + :link: intermediate/named_tensor_tutorial.html :tags: Frontend-APIs,Named-Tensor,Best-Practice .. customcarditem:: From 3191c0cbb5f2d508e63b6ed07d5325dd6e8f1ef5 Mon Sep 17 00:00:00 2001 From: Alan deLevie Date: Fri, 28 Aug 2020 19:15:03 -0400 Subject: [PATCH 11/21] LSTM's -> LSTMs in equence_models_tutorial.py docs (#1136) Co-authored-by: Brian Johnson --- beginner_source/nlp/sequence_models_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beginner_source/nlp/sequence_models_tutorial.py b/beginner_source/nlp/sequence_models_tutorial.py index 4db0361954d..40e4b79afd7 100644 --- a/beginner_source/nlp/sequence_models_tutorial.py +++ b/beginner_source/nlp/sequence_models_tutorial.py @@ -21,7 +21,7 @@ part-of-speech tags, and a myriad of other things. -LSTM's in Pytorch +LSTMs in Pytorch ~~~~~~~~~~~~~~~~~ Before getting to the example, note a few things. Pytorch's LSTM expects From ae17b15e381c76b5c290be38852c87d775b7dee9 Mon Sep 17 00:00:00 2001 From: krfricke Date: Mon, 31 Aug 2020 20:33:57 +0100 Subject: [PATCH 12/21] Added Ray Tune Hyperparameter Tuning Tutorial (#1066) * Added Ray Tune Hyperparameter Tuning Tutorial * Use nightly ray release * Fix checkpoint API Co-authored-by: Brian Johnson --- _static/img/ray-tune.png | Bin 0 -> 59762 bytes .../hyperparameter_tuning_tutorial.py | 462 ++++++++++++++++++ index.rst | 8 + requirements.txt | 1 + 4 files changed, 471 insertions(+) create mode 100644 _static/img/ray-tune.png create mode 100644 beginner_source/hyperparameter_tuning_tutorial.py diff --git a/_static/img/ray-tune.png b/_static/img/ray-tune.png new file mode 100644 index 0000000000000000000000000000000000000000..febd6de282e1d62a08b5ebbdf9806d6ce6fe566e GIT binary patch literal 59762 zcmdqIgNc@A^1tIHBA-V912YR$B`102OeZTOR ze|+t2!ttT(Is9L=!T)wI^zt}4?aVk3C5ubuKUT_kqOJ(}F$?%}x3}|>d%x#HRiuZBeOY8`f?f;#!FF0X z%34fspyHa8+I+`SCR3V5Xl|REp-53gNQmwt#^ZK}y3((+;B~{%m$&ke-;rzh9~>Fm zD|0k`gR=_*UpBd7Iob(d+`3^%>`zA|b+ zPe>{>yil1tYyN+IGr5Be%=zTY1@Q zI}?{s-~}93=0^Z|v2Y&Mr$y-VtrLeSgR`wuK^C#Wz_H3l)wF+;1@AI9>7X@&ew}p* z`gMgNmP3b0IKMb7ZV2-bilP@vU=0{`prtP31SXV1%PGGkxrPFK;B6&2+=$`pPfr#z zpwL|#ywt56UXuFw#FwS~1EASoZkY9v-^1odAs4RY_!JM60ykdX!~hxfllQo91MLfxtM4$Xwlq zc?W^6tGM5umGyW_);MLO2aqMjvu82$91FS9mW6nyEmA_M7K7e~+6eCB7m9-o|8CM? z%nZ@BsH|iQB?oS-NIc$Kh6>mMKU^#{#N0L8EX4Gs1#FH+1a-9N4zy@urmnQ!x>~}> z=xnH}i@M zVwCGfg`oFk-2}d9`RT$4!wE(lZ&qu(Gb+>Cdi~lp@VR$}H1J0Ub=qXDPhwh6BhA*v zuGGhyMy>x07K7F|&yUz}5o?xq%|?WPuiLyIfEF)?wyHoxR>0q9`)P+{if8fSu_KRu zFQ6W%s1yNTHqcu#U-_X-==1RSeAuBcg^>pi^5W!^eCT8qFMCO8F3-_YwzepDa@9u^U zy9up!=jCYG0M&S3q-N8~Ik5C_I>ds3L0$GZgD`N??n=A+x9gOxOIKi#vPND-34A(m`oIWFRhZb1I$u+lpwD@lb?@CI7{1R|AfD!9;yO+sf%Bk63 zz?%v5AA>V3+&;q?6=j+7(|sT>f1nkj(t+taY|!UgpFj}VpbFw+kkkr=y4ZU30Nwbn zM5`0PL%Z%_=?@&VuQQDfyvk$C5jw^S%gbXqdG%HB>cYBNVY@MHX}!Ys_^+~di$83k zgW+Ynb{(wm%>jX|+gj6ptXlKNq+YUk-LLSWSh315WyF{NAPe;QfQ6gqGBxdLs@pi5 z3gt1yHo-l(-lp1!Up7Y%2!jVI()^`uMSB5i{E^wP`iwdlhyI#6qgmM)Jl^ zrBQlhL6L4R^(u|Vf2L)|ilZ~X9BtqV^f4e>GOwp1jLo_ht-NfZkzVBy$rH*VbS8Ar zGsSz*8*rSqG#TMLv=mw*!Y?fa;ALl|j$E8azNK~%$z>aJXM3~=@Y%M z_FX{T2~2A+w-pwRxAdRdaw_xLTi%|4sQ$>sk$@-Li;XeiaHLJDCI@m9&K4S$inco# z%{n0_Ct#n+H-pv+XXWI(YKy{{_X==fr)P{-G-4<3zq+MHy^!m+SezUOumqe~YtAEq zvK4O3s1qp0Q*i#jf=zHh22nG`mHpNG`rT-o=$IHFkRe#**!nTn&ZT zts8RKd+0b|O8X4SF<2Us7%F$N6QI$XJkGt_=TUdUbK7*q z-Ea=(sWHHiNlhM}^1SF@#PDNRnlUaXYxIu=(tf6D;PR_eALYZ$tOtm0;w`y3mr^=r zI5;7DBj18E(fN#=IOD9CV#u>ei&ICS@%?i#eSLT9juLUBatY+{&e7! z46C8?y|F^hY^M`rAA%>KJhMN!0^!!az!GA6S89Vg5jhqGwPHws7Pf@TNN}8h#FCKG2rCg-ZMS__~h-L_%Oy zq?#qbxCCz~qhGBW*9rsfcEemMZ1qQmRbMav|2)+m9c4M$qQ~0)Tmx)sBHR{iin%d^ z?idS*+snxXAcdlq7t;#rev8#PqdQvjd9ucvMxFkmd20s1A#+1p%o*F?vH(LXfTvWM zc z3#5O0gu}rJdzo=fanG9~>a>ZV1M(CB=(Rz8k9S{+{XIfcCok6hRe}w_*O&pM?cL+( z+DZfV6mD!`X>nz?F|wJOch+?xwBW8AzrrDR-T=#ZCj?YQ+8GU7G7G3TqqgGHJ``Cd zi#GzL+=?j<6?i`|vHlMFlxI~auxXADXmU~Bw#$*#7fu=RdZq!JFp39 z_btT7J`*(^pC(8>fobQ+7y>?#*X$|1cqCI*6o(sCwLe=y>8zdl&SvSJp z13%@plV^bHQ>CQOm;&zL7wF<#7bf(9GROEhl*g&(AcLw@0L7|457e;VZC)6BsQ~Y~ zN`~hO1@sd~FY3+*&Wh~_^AcBH@koB{`)&q^N=95f5-9exRv@7p$yIpFEL>qg;Jt*9 z;(|MiAqjk2qA9-dHEPhI=DGafc+7(kS}{yKeI+I&Zs^qyh3vn?SAMTA;k%3RKt2KnrX&fVh3AHZA&P+dSJ#Ff!cu?@0D@`;h}x_b`Q(x= z88W_BvamPY_g>i1T*3IrdLv+cIAEf_8S|UlTl%%qKu|SyJMrMQ7M}qMFgMB1D27%m zX9cSPH60%0@tg$)f|NhomgZVR+t}Q81|l-Ql>0cvRe)WJ{J(dp*;D2^E2AJ>SzzT# z11+^A2exT2Ysn0e+tf$@4}y&zzu<8X3|$<#SXK0}Q1hV5oqUV`Qwj^s_8+m1sCdgy zJG4@vE1(#CF#3n+jZtkBw8=h)I@V25s|CYK+Qwz-C0EI7>{&x3?l7R1ntSH?m_ zdjwx*@RX%Gd7!Avq4i>^&247f5G0B)hc(HuI)g3II>*BHHE?YAO{6IRU8|?vtuW># zJl(Yhg}QG10&DA3>~Yz^{b0E94;(e7?YYR9U$VaJG82imvEIN}q(|Tx zzOu!?)<3pRD=Zj6h826VGhwsu8KUJ`9*>AOY5wo-Cxrb5^_J@Chqc< zodIRhgiSW4C?3R98~#XKAAkxgi>9Wa+{UpYX6az~^|2rJU}rJ8K#bB!GXGDb*Qp580rqR|IQjcdUG9k zb7U<@kB68U2qV$Wb`m}ZU+NWPdhlIKvCZ4bLqJ@ghLEqH3Ox&43nc<@Jk^Ryg7RMR zKYJWwsoL_@W0PKqWA$i06M&M-;Wse4c<1 z`aDg@PC2?)o`7>VuvnR0x;n^2L%@hJs?_xF&t6xtnnbO_(; zVc$Pm@}O2T-qlkWWTX7!txJNfTR{O@X2l+an!jHwZIvw1TC2$;;j;0}P@*C{)5hQk z^*uDNczOy!Wd@U{uA{fa&I_Kcas=7HF||LP zydIXI-2|(2=7#A0@597$!1I8R8uotFtWwcrM(%J*g}&#AL+v)K>gddwB2u%fz`6uI zpee*;A!H?7z#B~@E*J(xoFrf4CMW}3C%3~nGv_aQK-8K=e%i=I-Tiy~?qc+BKn%Rs z@=>^ZWpRf~ttt~yQ(AesUFuK4bKY~_RTS7x%PzaE+BVdviG5U@Z3Lzf4WdhihkFVT zUf%NvsP9tc_+XgWs}XXg#0v)cQUv?DTL~jjB>)?H6PvA5KvHyfmmM7x@Az5FGP#8(~xSi$8#vK6aFER29Vm9AH#*uj&( zAcnU4$ENJ&fBGJClce%6@nIY&ZqYYyw$_l*QU>T?AVx9LYb94Ps_*+1H$Oj(hj^1x z^R&^{61`MmXcWjQ<%jnd%M0UlGxzdQkrD9i{Op8=)$H%H$B#a9lsJV{UkiPvx+tKL zE;o@{i9J!y5dQ(K8u30OPVJDk!ZQR)r>qGjpWPQfH-YlRJby9^3I`Ix3m*F5yQT~ zLD2AhM};XjXj2fO2yNLm%?aD5-!NaIA%eT(21|0BeH({|^*P8q*6uG>I#ev? zF}9RS$>Ygfp`6?cuzBIto;f&TT zY-%mS=aRLKW8thF&p#bD%rBBc^z4d{N(OTL<nM9C94miZkM<5t>uXY!iowh8>tIZWjFRKko5Z495>()6^Xpy zR?)m*&;(jzUfc+PB+9mrgjm!ng)`7Ij!Q@H^R9|im|h5!W+R-~T^nuAB`qm`wFEfW z@5V7-Pb_PvhvT(et;!;iC}+MP3CCL*b4R@nZ(|1N)kCH^F%@3hmDT~!s}Bh5kBdxp z`om>wAbcix>_Z?KxFqIrE49^T?iiq_Pa9#grXwYmD^8EGfv1G+o^yH~=KJBlx~H`N z7g^ctY@ZQ{6niPIB3kijq6>6ye(Xa^n~e{^Qfn<+)^Glaz_ejV6p8XbSsycdx1S?uI(F3^QX-ZV( z=6l%W!ETB={cCRA5RmpRZg-$frnXoocg(m$u|UAgeB{$KcP);R&{ym8YqFs3+YWEV zegYi}`T%a6z`?O`Lk?xKteoczI7a>C*-N@s$ zQ>&;XByWU2PDQH|+2tY06Ccmn{(ad(0F#t|r1SM(*Kx3(o7q!Ej0>O{ex_vmkG5qO zKFF!Je{o`kUcVllAesQ)4eG0Y$eCZP&w>bGmP`J5$~@Wb7ps~^MmSU)E@h5@0v04iFdFiK*pBW!yvQZp~t>=mRR@Am<1@CNS}c_$0yJgY}M=s98UXV*$cPW74v z2P(O82L~<{aeF@CLcL)~x~TF)E>z>%>|)!I<11$ON`ut;i#k)!{Z-gRRg>(Df)NuW zueB=eg?Q-o8XG$Fu+VAV_lH7Eej$KItn6EWXJ`ibmQYL~E#@UB8P@BFfCJsO2T;2{ zw_7X9qj~n){PndL$47r>!JsXwUOjEZ?ZL%vhN1rTglc9P>I*{D*?Ksr4|PP(v%r%8 z$GZdWOz_xu0FAX9#nAUYfnGD3CLn}V3pt`)c+^%-*dKo=zVGh?3WCUki`PKmPzG@` ze$B|J2`Gp^PznC!EXc|`x}RK=YMxL}Eu)*8KLVM?il31oovikET)snF`KORO1P@+mCT+v7g=+i(dl-i4JNRsFY_3!VqFATF^T z6p#)Rp*v3AyHOx4U92qhbOtnb<+40smhR;t+kKh`ec3~yDiU_QjLmq7?Ar}}tMKjj zm7+0`L75ZdGhD6E0Y9ZD-?H=9Ym-e+ykj%0cMW)l#cSs6wkGIZ{RY@mwT1JGUrVHO z+rQn4DF{7WW6ef*jP#g zt3+OI{9gO**wg6hFUy+HX>CC1M$3YwOVgi>S2o}P>y`B@FYtgts&ZZ|qU~OdRm;0()VS8vgOw{Y)#7?yp_x6F`1f?nOzuj+@w9 zkT#q~gI3Po3{agsm0sdU0e&1ePbHv?6h4a`Es~Ft5n0Qlcx(vI?_i9Iv6zG%RC56_ zI@4)Y!`DAj7#0I4)Kx6IyeQtD!s*SC(%O|BQmGoyu#~f$5U+W<86D`bH2@0I#z`lM zgu}NqkYBMtqU5Py7S`WGEs=vEiXg|9o z87IUg+Y|_Q8Q)k3b*+ZDr{Yz-)+YzOaEjFHzj{PfE3pPr2mMa z!-R(#27Qy=p^?j#IE@4H5ugsLh_;hexS#^+_vOJ*KwJLmG24^me;GEO6s%Bnmj!Y` zSCY`iBqxraJ@lX(^Lb=90<3XHpm`6EAe{jEGP#fTV=B6EsC7&w4*W5wPZQA7(SsfF z*RbwY;0Le#g3O;iiPMWQq&Rewa*(NQN)&F32t9t_o~oW5UfFtFvK-(fT)2$(TS{Wf za!lfnq#PK!P0-m&i=}$< zgg|9=#M-Hz4txq6Wf4alqkjg-u1Jlv2xkkuP!f@br-xiuGBi1&5^N?$h)Box+HF1e zcBBo(i{d>SYT9l$^t-`uI|t8LSgD0wo-%mZhSz_JU>pXyoZV;iTuMtp%P@O@TLV>ms~s4h|Z8DXg1Fk=rjh@8Lz1jW%{BQ?#> zKe_lKL)SJR;yluUWZ$#qI2`pV=Q`YkT{irC7hqiLqv+UAY@2O%b`LU z(g@HKEoTNxd1uHDHzq%sMLKOqbbcv=)j8Aafu`l3AI}f4JlM~>OEGeZhExF=9e@b; z>A5Fx9xE=~oX&QOdmlHU3FeD%kXZ`yh%i#fC5hcZFqhjLd+pvUR6ap7gI5UbViI0z+!F!wm>EFyfM_4S&Iy zniWp>{o6c;E@G9t2}X_1@gfNmT_2f`lCYLG2x;ag>2ifSqh}x;9l(48K(no3i>ZUUpo3TQ7npRLRe?n9$UfDJ5kk)!H5 z&{}k#%4^Wmk?hM#p$-1hqzKL+alyzigwU$Hm&X2Gn(hzK zWDwzbINSo$JgBb*q%QZqDDJlVDynj&;ujD!g0o#vNhKK5rIB$R2@%gQKj2RI;0I~7 z1h%E?cXs5ETzA}(_x8Xs6PZUJ>9p3py=u^_l6;xD@epSlk21f5b9=^@9XD8CYu@qUhj3^ObBQ!;42R%;{YxvfYi4xa&DA3ZLA84V`qFa z5N;r`*2>HY$L1<3YmY)yzMS7a0s_WT&XIE>ZbXl1Kaj~sEpnv(>&l=@PPoy2atyxO zou%VHvbt%g7m-U&vyC>o7m@8brGGTWHt#8e()bqD1rRs#chdqF&s8RC%sXz9{-(dp z-Gs)EkSR_G1aU$GUSlff0s3qIA37`i36(O#2#5_eLT8`*XUm3=6e)b%7=gJhA~K7+F?3@zyk z0j}61^O{mM6fKQd5_J@Zm-)0OfO`@}*k%|8{TVzYT@md^e=pcPKYBK&fFIhEzLW^c zHFAJ2RO9y!;I9&T6#t zn+D8U!00W$hN^M=Q4Jj8rmYLp%M%8+b-h?^zdZKRhNp=COl)1KP_p0RO``@ zk@j3gL(GV*e8G8=aKeCEay}1MSSG)`h37^m7Q(u}eh*j#3gTIDgm+=xT*-ngymHIm z$8|c9h`-I2r1>fuI2(*Tw|~&nwL)amwVriMHWH^^@hsWFXXQNj?^wm&n`Ww5PogBz z(n*KJ`%%r=?B${$zF%Lhs7#_VJplAqd+s`Fn*%wXMsM&>s}6hc*F-vN(s|hXBAMMY z3guJSHl#(aRWQ}uB`yM;I#4D-?&w+vvr*frC<@KIfjeHs227x}Gf9KfQ!La4MBWT~ z5VD1(usVo!yq!Ttk9#sJXCs)ru-wcrgCrS}^+Okcnp8;;Kkc)sgPS%pTj-({6Q@-e z3}Bk084}%YQiHV5qq2B++k~bch*!Cg;Bp(mFx}^o#7!s9vm^ybd=E5!moguh%yCb!^UIl^97p=}6b5`HXuoT6Se%Hjn8W1=)J}*`R4iruAG8D; zbeH3ByH$Cz5;dl%9M|_7r~sc^lm?Indm$VJh%7@RY<3z;g3FslvidB{ebWbR@YVdI zKGOFrXM3lY@YeF2(4m&6Y%ORo;V+5QT16i0w?RDBm{7t8#$C$po4F)!LVMLJ!>di#^RTMgM09h9(VgfXTf?{hBn&8(EZ@2*+UMuA4Q zWe-mhiO^6Oe9(te+JW~x=>tf0+9Y$@1PI-)V0P&7|DTiw23^}^MxJ8O7&7NW`A{F- zIHV7L{2JP8t$ZotIin6s(hXcW(wvU1?Lycw-w!N>xqu9EW7agop1l|f&k{3iwUO!H z{-r|O#47H@0On~L95wbB+iXa>;g>KjqBz%C6c|tR_072FgWXj*W^VMF1c^-|hUwP3 z!@B61%2LBV<%kuu%d*hR)-G{xJ`YHkoDP>y50?j%N6w}NM~j4toUrChL*Jw}7}$Ma zrlFHFQIy1=-%V?V|JkHKx$Z>;fBP$EEt?3CxzQB56g`M3M8=awAjE=DC~lL)!tclp zd^jlJ3fC7^-(aLd!HodS!H|6)b_QX!+l`w2x-Cd^qXYs+5117#WvQv813{$Ql6(Fa z!%QZ#)MhD{amEvc!f2>%eXuQ2dKc&+(6^+>&xg~Ra6>U!0S&r9$I{%^oT*tFOm2?4 zH^V!iU>bsAyZk2+CDZ=;oXE1$hBPghBHMu*BHI$4yNiM}U635?rZrx4yrq(KWx<+UmEHr#6942U^$f{1p%1AsLm86-GqMK$4E+<5#g@9rr z3|xtD+kf`Fr5HpP0G;*HS_ioEW|n9F$)D2fus`1L0=;}>o#wo!=E6G^By+BfizP?KtHCPqmfo^~z*GlCPp3fX0enhjk}D+iMdrE+*vQ z>Ai=b9SZJPjPqbTxPhasZE(>lVuA_giIzv`<1GMH0NPR@Eiim@mnbd?U9^cSe7wG= zXPzpfmi=V)gmz(v2h*x=G05gMk69TC0WjyEUo%YI36UHNAOoh6FrVRarAM0zrm3)` zeOlcmg3BRZX4gcbqhg!^Ou38)XU3LfAyke6v#awWlH4_egRzsUosdN$n z?IwgQwTC(rxOX`zaX8cP{ktQ+a(}fflu1BcNdOuNcz?YO7JNc2=kW{~kEA+N^)1pN zg45QWqRWyQG`+=ls-r#xb_?)f>%9*0!-QSkte)4xnHsLj69J2i(+ya7mlN76H#Bmk;bw#TwDpm(S2 z%M8Zd5CzvvMF@!X$-0-v6D=h)cRrF#g?NDX8srIiu{5Qma>j)BN$B`BSQlOlXeh;^ zIZ%!?>}B9Ea5=mmD9piV?CS*&A}Qz!Rpr)33AmS$sG7)J*=;d5jkeMb@rdCW=N^--Ai)sL-pWU8nIsF42Ly2h}*S z*lLFD(S$hKQYqWU40qz$B4@X4BhRrG5<;m-a1~G3xE!JcRb`M|^^zkrWHv=mjg~oK z4w0b`5Ij|r9^ew#IHoKm4-`>Qj^7!Z$}8`uO8?_*k`XC!IN1k{+Ptd5nBJipBW#eN|xOe9mJq6O1QIy#Tn~KQ8dJDw}8%HTGl0vc8ivSO5VPoN9fJHdvS0LnFi zuwldBbYUiG`qS$9z$nw%>Lgd*wCV0W3!q}}1dM~5A$h#8=pJQTbF)IYhJXhsn!!b9 zs9u=a3}|W*6cF#0bstO!fp)zMD#XpyY1X)ohFP-EYE9u*RU76GGS*C*P4g?O>Q+HL z0`1?;P~RiunO(;Qow{MAl3g>PE7(=2{4t?lu|l%}+^guAyTmV|rjR*#02#R(1}&*3 z2gW+#OU(Q0cJW_DLKQtZ!2~M((%pTkyVULi9x~Bg4(?Jnc)+#ie3-^pPzIaiM^<`T z++|fkLY@;4(pAXRJ$89-fv3u|>VXYg8&Ivn^ zTLBR#fqSYw5?=Q}g0Eb4msy>)nwweoV}+FTZ%Y_%5Bm(NE!*+7;+xZ#N>;m_YNg*o zJW_Q48(lHIacTc1%z76%bVsjxm(`gyL zho!&SN|s=B6OfTyUOv3u2za%~d1q$xJy0Bq-+S!G{}d|#;|TQCp>4|xShO&Ane+E@ zkDLmqSXx9b1v>xN*|~P{c~Fl%2oRZP)N{@-@@5U(9^@^8 zEFpi2C8NTSPL?p;vU0>Ul6i=DLSIBHB||OK)Ga;Ats=B?_dP5L+#~wJhX2{20GbPz zo=k&4nCP;_?8Mop^<-=p8kODwICNmCrJ3A;M2A)!DRdflXEGNYx13tS*MGB`IJ1n= z%UYUPy1l&{Q>ekwP+g)4UEKkLU7(`-u#1O>^Fy1F`kNk;+ndeT%S5K683<0FJ%5$t zn{0z-4rfECbz`C+KDN-=X)1O<+sVlZRlcG9?X-+B)5p^?-(*9soew5>7fU@fP0QXB zxLaf6{fSHM(W2gZc}w-5jh5Vy zd!6DT_2}ql9MkqDdyu;09D;43vU&8A^;Pdml9w{>t_hz4NV7~vMQ((Rxk?k-?{#cu zzwk(t8k9;yp0?IFW9Vt}cKg?jNCXXPq=TGR{7;a@H$$T<$3G=J*S3CV=3X3LJ0mT0 zc*x54hUPN5Fmf|wPWMA#(yOV%mB69-&+N+yhXYc|@k31K%QCup$ewWO%%g37yahFW<8KkZ`<$UeCnr>jQ)MN73*WY>M1oU_8HVIz3=o(|tA$72muTHYz zE_#@#Ra=a}1k@7N*o|TKd)l!>b9jsh`m~oIXYsMNQ^+zr^t4-UzOV zd6l$Ccy3;N5WGC)B~rT>byc`ENyB07&0oXIq{B(=-jlpbx9~&mQk&k5w)gvr@9aM1 zfUhd5F66!RnVM&L&BRB;>J$ZnX3KNX(dts-oA5%?rPyr#%HAKmBQfiO6mR~JuezvidP>9?H3u=#Ll&-_RnyT*>dQ=S8HP-F(IX&uFD&q)en;v zNNVh-dq@6Xmn3k%{fH&qfDq+>#h(;rc=WU*;@{ss{;@-7%rKx^UR1>>l8L^5O~TDU z=ymS(rFUm?I2UGwcm;HRI&vONu3FSB5c9OWT(OK}n8(x%MrXbYL!^r$4Qlw6`s3aW z2{6}6ujyYq@f#e1L0q^Bt4TBMqd@DdTi=t#FmnIx4@wiVlM-z{dG}3l++*aIFn6=i zvd?0=i>$XM>*)HJ6(=XU1S433^K@nPyv!#QTjO|rFW>}P3y+h121}WIpI%Q#J}i!E zzDs%I+UA210({0@30K$4Q^G30AK@Uw^{;GAD7 zDH%Us1j6>~uh~+SYpe!nEQ6x9;lhgzwLnX*B#oT3HR0ed|FE>J3T-;zs{mQy*k!ap zDdCxb-CLF9iYj`g20UsJa%s=UdRfR5g#nll`I7?ZKex}9B0FKqUo^??c_p{J<#4R} zl0C1=-Pc?6@Gm~MpNd?UwGA3c3O}!T;Rob(+ho7iU#~U<&g}dq6>j}0c|{RHI(3}fu4HPVdCMBsYGbuGyUz~^$R%38wH{0T$c18 zxm5h!W(&lqaj!=O=5cIJ@!({5B8PsQ6|dOiq+s;BZ*Ne_xW2#Kw{&DIZ~G#4XW&oi z*++f)ejP*}DzD%qzG)uK=X(n4r#RWo{Ng&KoRldDd;-%|@~^kOohoUY`Ke#hJv`p0 z#Mx--_RZTQhDpw?IPS#S^K8rE@GFI1#E1i(8CC>}z0Zs2t!k_=oM03%$;Y36 zN=SkCVfiIzYkb=igK-y9Qn{KLdJ1#6e4bU0K*|!(aAnL-`!pX8?2wCCLzX`eOc!1WnQq*drmg<`>88Gt7*_WFYYu=jhICF9LTgzjt{>CV%ww=^($B$X|J*y%Ol-P2Ty*hM0s(>{ZY$i{sI%yFt6E#F zJy-g=ch@@U0{114G0JWs6js>C3~ao~^hyukh1YM?9#?dpoMDAD2u$%B@8V0%jj9q+ zzLim7*6Um1r?J;QXKfVHa9c8W$@_S3FYM{9T#vdw@n>w+v$Aq#xHiN|6DUgzce83 z@J?Gf-6}5p#w13+44VW3vMhh(6`S&NLCHx9-KG|p|L7a}zG{1}*Swm|KF4^MjZ_=v zaIW|MkF=2d59Z=eDt*aN<&6k3ANc8%M4N?&Sur=qBmQBjjYLP=wqxd}LCNI3S8j~s zqt-rQhz!omB0dtzdFKZA8@|R*pMNtrKG>uTD3pM`g$omh zQE*(hkv|s`i^xSfM?I@HcN|L9C3{yUu*n>7RVe*bzMgt>@nh`P3VN@4De9Vlq4LY0tXKaPG$;;&Ci9W)^nHcwXV`qErMn!SX z`rkSRZRpErU@?O7ayR5N$GpBV;$A5>I*lMCS*}f(e#;L>MW^O#edsd98x$U4UWPg=c zk!{gukDFJ_#)_X5anH^{As|pVw02MfFgwj2={4f{(Sig4DO?*rQBe()fB>dQ7&9MCH$50Lr9LpXqhqZEe+3DIaG#OxtM;KG)IBWS! z?(PDeH%X!AAo8*&3w|rpwHVVo>5R;Ld0-c3*Jf_bM5ISlM&KJVL zf8&ndQ2H|DudO@t!Vx2PndMV}kZ|j55^_z&vxx zVWO^z>a4n0ikQ1`fg-yZMQb5IDs!f1(UxBR#OT)W-Ln2Ewb*jiWOKT;#Yln;m7A@-TN7ISDY;> zkf#gae4uVA_e6!iO&0Pn1GukujbOidf|H^H@ zStr}O2(#&ol6A@Y_@4X}B-vxQozUU@?!=E|(SYyWNytZq3Z}lg3e|{VgsyE$8pE_* z7c%x9WLZ6C@9!iQ9jAy`4dC++*BcDEXTbWi4v0{xZG0+Z%HZ zQ{xh)RC4Df1-j+^&maYAW!>%TC0W)FQ)}=&d#>`YweNtnWufQ4u?x0%SB7>;c}MxR zKX~ES?r#(#0rK3P>(+Mhzc52)0q0^0FD2-T&G&;dfV8NM{p=pl%bGg6HYR$$um5Hz zqDO!{JaWm_JN}eN3%R4nbM)D65~zq_?1(8e8k0VwDgSeskH7Qsx!1WGUV!00kiLoD zWw~EQE>PJ!`ZG!iO3^kob@obx?A%3WPbp{U|JHbD!BRHoNaI%qavWC_$-eRWyq9I@ zebe$xSHU>4ARGU+B#$OF{OASZXmbdaUCGlGA))-s*qAo}H3O?O5;%-?QKzvp&jzU{`&;?R?CV|ARF z1s#9TG3rL`p16`ZE~t^9pfv|j`GO8qgBr^m$3CrH`{exTUTfgIsthvGqogIXc9}E``Z+1 z+~Dx{f58Dy0XgIf`6z=}L8hxvrb|7$^Vy5;)!Z4~4;Bxkdih@(ya(y+d6mhRQ8mt9 zhrQ2kj6car4^`3ke1ROo7ZTQ~WZEy<1%6DpIjzav2cu^_361SBU}l9erNuB!mgE@8 zn=b1tM*nJuFU4LN-PK*#-xVd4dMqi%y!_KEK(`0YXYz_pU-9K@Ht!^_UJCTbmAt+2 zzR^ZeAZ}ji>azaoj4x2 z;IkmAKcys@?}Oj;=|2p;Z)=&+{q@tvPQajKk??kl{O|5V@E4&S z_7)R1-ewLve{E<(_WDiCwZFZPBnV;=_Xm9^rKXz@ z=5rM!y8-9k=Z4TSS)6%qlQH|?IM&=E0mE;GGF@_yL9_?>DF#&2y&650(`aGv zw*HJAMCT_gdhrrm{N+nLak3lD+}7drWe0zwEi9o8L(bFa=yK z728z0`rPq0kae;5@*5iRpZrZi?W^Db_CwD85haV8yMKdNhFtdcGRv3c%i3=Rn{K!U zQ@m(YJp4=uPYs|IS2bSB;`d1mt}JKUI;jU;VLX6UHe-!bULoCY=u+HT z5hx8l8!N9nH@pS{tmqOb*@1NJF>U>LHHdy5fm`JFF!R$SBdP>#y1b&1u@aUu*C_`0 z-+epV_%Hs46afC+VW&YwB&aOe*|f}&Lhx&UnAB&+0%@k_id-&3YDLwNZLBUY@=vHO zovU0>{y8ciV+E4>z%66Sj>Vs!QyyUR|N2_C2h>BGGFKlA%3IOBzMiKuclpc(m#|>m zCCw`&lI|P>ALn#k45{}Phc0^V?&^1Eoa%DBf0>C%>_v{=HRkWOTeR?!YNRLn$KEsI z9zMCNFX#Z^y2DOd)TJMRt5rU%Ef;mWJ~w*H)HKN-$`rTAb(5g%yDm{*x9!M{+hs?{4%XpCh#3la44Jz*j-NiX!4@9X-vtS?wL=-G1Q7gy5J z58Wb6eE-}%z&>Coj!^$6X%O`E$-!t)#i=9T{CwO_9wbRy)9>4wA*1CA`hyXg1M_`4 zEOJ-D`sIkU$rFU~x5|bxwbd|eO68m?G@CW}P2L@?;g6QTwfGS) zO9JU1ffA+?<>>FVw|MNTp%@A>FJ*4;7sywtZ8&fUP}%qoyzWXotVCu+S&uG zt(CS5zF!x#BKkKfLLs6#;3Hx1&AAAAMcRIf2d7<6r<{4~#{fHL&K+lXqhU~?y(&); z!i8mdAWn*m&8kwW=*K<2t&YJr}V?6Ig+x+hf8kINz}en zB&qomCUeXXMO+}Fy_hdBi3_%-c9F_SyQ9V~&x_YfoOKeuCXd_kD@=DG zPL4EAx`oV%_Qky9h?$dpR^|O;acx+LBfBEJDgUTkwdbsru@m*oN$}bAl-RoT<8!eL zi)Rg+aX$5nMYT9wMMdpEDlE##+L&6Ey)XFw<=WZpKdvg_xA6p2J7f1)nl)up$&+#JYu;?iAiHF>+!c1R zj5A?{ffnu=+Ze1o`4B7n@S6w%w0BjrgJ)nFRPyE8L?@1JZih> z512mVZ0!iTgs;bnkKQx3Vf@k;@Xl@gDa_j?W6{_+?N?fUOu4IFT*p*KC-tpgua!zn z&sa$PQ_ikX4|g9ysi%&K`BP5tSO7XA2H!o>W%D9Zqy#st&qzHDl?T1j+?Oc`^i9pRMK=2 z8DFAKy`{~XUr0(gtej+k0R-geDo=sqo-JYb=RLVl-pbx@eF1JM zIOn;G0Hq~P6%9z7{sbuXwQ+0y-`?;l9y1a738WU6rkufn-DkSMw6IaLdKMC~lu@TTYz|37TK1yq$=7dA?Rq$r(|N~5GwBBHcP_Xg?QbaxBV-GX#? zvniDh3F$8B?)ulp^ZobUG43Ac49DTfUNz@4pZU!BuD8oI?tdk@6&Pm}z2q@xmwWu% zAvLd6>h`6h1rAYVPnsD@--FVQuJ~5Uv6#~1te>mA9uGei23{YIG29+Bs_=W5rW&$j z5<_3anSeUqA%MffGstYew~&G6c$U1w28)18`+woS`AVkX0mS(UHrGyTXwt@_MH}Wr zBU`raKZ_PWZ@_UULhDWQ*t=Vi zb*I(8oBf?RvtNvB^~Qw$8$c)ne7#ksGTABnHCE9rcBc9}h&XQywQv623mI4 z`(FQSvp*4!{5Q-DB~XgcJ2{6X=XG$BjA#ms>}Vd4$hlZ)SYr0gG@_4uIiBgA`oCzh zn@FqI@iG8i$b9-C`c?>oMB+QXZ1Owq5INQ#&LQ{|j(k zlt4De3)9Wff_=hDT7RLv_0VS){UWfH-8Vl>ZsQ>SOnKi-4N%cCpvyhQ!-N0f9V{J1 z52`1+3CE*9phzgX?LF#&wQ|t?qj^^A>ru24y?+|0$HU0M{Qo(mWjGSVx%F$+qc({5 zAMqf8NzHeUa?O~w;0)+Ra*0dKlgv5`Gt2@#PTHilPvM1C?^9t+V}U-=V7LK-7Z;QEyZHrE_6<8ConLv#R1*yyjC`5be)M!`^DQfDHNU+>G=Sm(f%QUA-nP+pd7@3i zm~{Cc5Q|(p3rI%2#q$;XD2en5hgaLb{ti3T_H|5uwJHP2HNjU;WX!A99y3pIOVMWh zA>()FqCW)I3*7c{ZJUla9|4sDaPTFnCOuNGeye{>Y&zB{z0Vn-BxHzH*R&C!`O8uUgZEx!QrLObRIg;Z)`^oIpIe-?vCA zkhs+Z()$b`vEDr-N(YtY3bKs}ch2LNBzLb?3|pCvJ;irhmu$#IfwkqZ zJ04V6g*(Z+%cm1wYt9&E`Za%O9*k74IOepKZkY6_tXR(0u|`WxX3IM~$MFSn3!Hj8 zwSNDDVZ!|;Gmq~nJiY@(raYSb`Y@JZK)XPf1Z~VkqL6ZtpRvZ^+4VoW+V~8rd^-$9 zZ+0va?$Gm^6Bn5v$W4+YSD4uDL`~M5T05m(T5DJM2xBf?pvYyoo(v+#K)9!Ms=|7~ z=q94j%&8Y5kPNlB#D#B@x_n#@6XRiIsobYY|h`=r|(jiWs#haljHG^uvf^$;#M@15Yaz>D{Blv z^dtkMqr(uq;U3Zsv30!IPvIrE*a zg|ERw#C+A)V8i7fXhn$wHz#kfcA7@2GNJ)3#*_~xva5Q>Hx+=J-_(_>F~%8uzNd7c z^svyCf#@(uz(izz{RHNmpjIjer**IJ<4btYS6QT+9Nc0-D+7!g>c8}{PXqJ6ewd2$ zdR1V-0(`j85P_jYe;vx8%KA8dlsEf~Pxx>QMFB3(JiV{Cuh?#k?kxis?u!&X-P_R9 zAL@YXLF&l(XI8(VBn+%j*_+$T69bTkCsMF1ykZPBlxU;`IfcuceSjSJEM&v)5**7pKg=Z-R6sjIM-7SH`x>n34D&;|fb<#2^KypW5Evb5Wc}m97Qr@nyXKuCjITR?Q zPyX(R%{>wNTdZm0v>oXIyBHTZAFccj+BB;6`t(12p#KLb77}3dAi6l}D#NXgI}Aaa zlUgA{5pow{9;i0aI)qg%w9{{!@gH`Y%ijEP-kBgPrP5m58A;Ymg@Uq^U!-Mhm<1fcqr_z8#V7j#1fpYBAINv+t$c#8T+G*U7-5 zQxNSzkmTA+y)5QIl1rOhiTz#(gy&yiB^6~enIpePOaJp1&~L+}B>ay?nAZ*h#$%v? z#hY=<%j!nX3-PT>{OzBx5Z2xW$%%%|5vV2wJ)SI?+QAT!Fw+Oh-C;5Hz@cw?zxAS2 zs=5%Ms&+@$3q66LJXBJ6N%n6uu4KVxKtGZD7^q%B%FcJpKU^eH#ZU)+xkFfe+PXUu zQ!0VR%3h0@_Xk;9sG%J+t{kj&_Suw;`QMvcD}hMDEwvP|a;J5GFX@*lF!7=aDA3PIkF*Ewh`JJy2hj6_`MvEi-f z^KT9i_z5|WSq5pyhl>ie{y zjg9NxQjeI-)AwykJ%65DIB!p7@n7ug zngxA16*Q)zM}n+?I<}zb@-&(~7kWnNQx1K-N@{i4T2|djOL_(Z#cDOLh_i|J-R%-Z zE{CGpJT``pszM7NyDz5aKPcKIMTJG-NMhV#7TvuPYASXX?ofa+I_a5&?{&pLsYNaV zeRWx&U}%TogCY{d?_AS!cU3HXy%a01Jnr5uQ9C=|NXz^mR@Z}GEGY*+d*d`;e`wof z>5~EjM>a#;o)4|(T3u&!L@L8Qf$Bml@ulnsgB+Tx=lS-Vp&#w>?kJb*6Z)>u8SJJQaaoxnt@}G)^-umE#`+tC)wz`7+*a|-`!%~2Y_-3;=(^j zyk^1(Mb)AtTI(zjV*|<>`L=h@-h7 zt0(X*WB;G{ywU-P?_?B6MEt7L+m;+h_T!%waIR540ycX};LuCH@i>xGBuX~Y=f;&tz{S?hplZa=tpEe~{F$e>3&bVCOenw^PeXgJMx$|dkz zpsO7{NrNrd3(WI(8x){8S0xY$z6ha{7iexr*@H6s)GfCUqxI^O?_~d59lY@(O6cCZ zJW1oUCGc#deTrZxZ5x5!A6h5IGs@8K54V$zz4pNOxR!YI>?)%qi$AP;!YYiz&BdqX zs)wEN-8}g%k>zdCCHlR~Zd?vZz?G9Il+1Tz$Lzv*TP;3im};S~v5BSb9bwf3l)FDZ zp`m3uxtV)4{UR@hGR?`(tL1zYrO_xDDCUV|gIv7VmJf^m+rNfLBuIA-zn7H|j%(>1 zpRtx7@P=N_)I-Wk!LK(0R-JA6#^K^CXL);PH6%D%mQ=HF6#uogIv=P7dW z`;#Ff7FZ4m`9S{BKTD{%Nd!G!`s$-TYtE|IyhV4V0ME#HR~$R`iHF5znykyDr}XeI zh6rxweq|Gv)x7eg7MU1Kzw71~$i)H~2np$NLYfob^$yDxAIH66&?BeYf~n;&(44cB zUDC2PBNR)M?jEA{3C7^I{f*DKQ?o*|_eu_$i39M(VkUfd=v(M^must;W-+w;_V3!A zaJ(+E=D5B-XD4iy;dGbzXggXAR0@ao{9Cd!LrutBTJOivpN9JZv3ZsPOnP2A*R8`e zfLPB*07)#}|96Ye47MujPI=Hw8w@{(%KAy4jAAd`)5S#<4xqZa$nb+jo*K1Zj=(d`tk4s6_vg@zvI#T<`Cb# z1J!e+D}W;%$1od8^BqfX?fH;ViSTw(D_ki6_H-C|rYGD&N#Ggl!${XXJpUGv>q&c} zI1Nzh+CTGUpUn{+;36gMDF>x2kf_6x!9+SSnXu-opIYTQCYS%ra2F7OVWB9_ox@^E z->o|w(@q*oLnl#2eqYXWNs=40XWJu)mb5nWvlyQ4B5Qp=$vB$$J4+!DKx@UD@tF~B z^;B$-LAo4KOG35pbgd)ec{9nB=5CV!lP5d>59%WuCA~}S;`35Z&)DxC#%X&E*uM~3 zHLLsh3S$}K+rtHS7b=qZaMgM2;dI|1#{s2_xex2-CuY5x8piTaKahbJITG%9N~e)P==1-iagC&c&91^2$z9ig96g}avq>1DcadFdts zAO0j&{N&jKPl$(mBB*151m@unOiz)&I3yFYa3|vIB(B=dOq`%k%Y?aETlQH zJClm_-#Jtdk^^Jf`XP+6^-%~jWIo_h66BB`u~Q4OZC%80uQC_d!|_A}WedT(Wi^k( zVfzwIGx=h4FvRw1TGg#CVD`kqLT&S)KjQKCJ*rDy-lz_H83Z5-a^naY-{*UWP!SD~ zsFp<2o3u_hU`x*jR;T;6ApMvOMJS5GwR#*WqpIm~hQ;gO;`r+YP7gyevg_sfsO@)( ziIL6;iLir63eb{zDRtWKxz%vlk9R+FNf0b{WBeW#iSAecs8b9Fw0PgA2G@%G9OdgR zA!w|jDa+Vk$Df4V+Mi!dl`Fw{arPuyydgPjX^lxW9cnA}8oSQxfy#tL*8TF_0C0(w zSGjKZmvX_!-<^L2%%`HT6I#gL-TT2YCorbwOrrCUvO@*c3;Zg>?o&idT!PbV1sJj? z(ryrZ5q)`o=8D}_HBD@fcX+hjZDT=OoMw~y`lURm3=5shLS=6Gd%3%u8NYYDbdV*U zv{s3arp|VHQ^cZ@jbVZ-~dQ8%kt$(k?1lut1upUslHoJd@=G&nt!<` zx<9f`Ji%qK#M&r1;(>CnTrs}YVIN~Dh1TkiM4SQ5JZp;#$!7dV0pN&6lV7&l!#fEC zaxu44*lnyJ{_ANJOGCGqt4oc9uiXvC5bv$b?#2iOEOG2Jbvw?48x9c+2WalQ`d50* zTnNaXm~kZo3fx`raQ;~jl$+S9}H~r_DS<)iJaBCpu0S&t>BHjSO)?X!-I&|&%Ee>`J z8G&v9mQr`3$Ud`Lx~bIT%Wd@M;Lw^J%wypSc&wT10Hmb6Vy^AXet$a9fNbMyIyl-0f1`aNGp3c zx0x*3%K0R{5)`La^giGmtt=vfI>G-)L`vhup2fYhS>3O5`a}qZ1la)fhfm04xw?yx zhGhi4j({0Ck_%ewTT&-IkY0_`4xcQ6G=5}m-t>REDpCA2XkS~6FJ-m9e8ZD^t90qQ zRZHAzEfavQ1@EA+{ea8G8wJu=JvVL~Z~KQSaBAAc>+zwqRmJ5g&EFd283Zo$?Dx9c zEFYc8ilRU#^+vJum1{dhBnZ?jX0bgv`gxrWRIr+yKc|8wD-v+wl|{1}3XoV8M@9e5 zJ<$md{%t+3cvc!0)EkJFk_kIJiKu^FDc;?oU8bQm;>pNre35`2-ZLld7%Q-hfnyu$|F;#IN?|kc?A-Ujcyj$FMGW|=v z7Svr%>}#g4*9-)wKC#j)KmHe)8x?^>E3<5PM+flck(f#4`P40N~;;&;bv6 zW6R2Br0~2#xDnmZ9hq2rzsWnm<6gRpWla7-`xO<63iA}E*L}WZd3I9^Y)sNRA^;Z3 zRd!4{;e^_5dMY9}ee>}GOy0bP3!!`S7YVDJYWi?tcEVf()4YxAWb1E1C_Y;v)S1(GN5 zC{UaI1kJlU3!9HX3ey7}fAH477c0-XJ}m$eA!w^Fq1AftMzx2x>cDEM@*bp$t6se< zy2emfd7-`YdTM%0HV3^9#~p3vQyC^)4%!|1mE(D|u1*J<-kIS-#b`aV$~u{5qFDuO zBlo7orEYg3a0jX6xc)eH?xt~T)8i?8gl3TSAJ@?Pl;QSQmRQc%R?4TeQPd`J0Yemt{;6 zv=v(-5GvuQONWk5LV0TGfb`)*lPgJzNv4So+!r>Q&-y{YU;jyo!I$BI8Rv;IHcm6~ zWZI}Q*~AmNYC>|06a$(?5l6;6LV{+HVbrtN^`OzOC}*TqG{I`^O)T}bd74UCNmgzwl@p}61Nbve&A_y@{t#iVcQhRdO1$e4(V}U1B_&`R@WV9n1W}lBu&zgn3 zCH3?dh?tL*yTH9wQ{T~vhx-Gb1YC!Qxk3R*9sI`=V(S@gv1Ex3m?{62pwbV&1P1y2>K#$UCf zz+U_BNu6dY^3(J;J;|Pr*?w7kO5?^yGS}5g5k4 zS=?;<%I34A(ZH&`3vKSCo#g5Z!q&pu!)M(UZ=@LLrmQvMsoZs#19-SlEH&y88W^cDiiOiqT8%e6~@hB^6Zq_N0U3+H1o@V_;0QW={JwNeXpx@}L zZ!%nyN)l8V+IEw?!hMdho7AGIe2EC%3^8-d5)(^))La5jnTG4qNeMk$v3!GO^KDf`T)aEfC9{ROZ)ljvsz)dBVa5VMX}bvx3cc%e_|kDd z&EU7%Tef|BYdx;N$NF{&5O z;E~(Q0h^GFFZpgeM=}Gy8>#!Y{8e)7_Wx$C5yzpoUBEDWN`K;3^+_AOr!DILo_e|M zj@;n-zVy=NGud{ynf|kk@{+FZwFsA5*gB|S<)SFbWhwZb;dkk} zOhzj6H%-9xBvH$~!P!>+cbCnAfZ&8GzKkpUcB#=*wg+zB()(H?gM6NEl5Xy@{Q0sE zt2T}r=kVByar2Le^7mALd&(kE$R*U^A$Z}eU-un0s`k_rhCj7&x$CGprAirJsrMO> zBewIX$&>jLxdkpz?Nl*WQrC4VTFf2WeQi=KUs0etm!+B!?J#_hTxqtVvT zZ}H}8N4fWMtQ#{jpVo~+j*(%X1P)#hwgC_b#kJ1^OP9T|wrH%D#+#cirL46tWU0PZ zPuu?bY^>f*_))nrKCrHbqweBJ5S{GzFoVaJhNn_o98!sR^sb^&jRVvP4F1vbHjO4b zLsx1{!pv&VUBJ`ProrMWKZg&^v~5KeY)4SxQ*gx_Fmr&hPM>0Um7aw)XMODQtN$I8 zk1_Y+PyVdHRn_vT<;lqp?g{Ggu84e$T`Bfj>r7g3?bValSNd7AU_a*4>Y~;H@T~^?q}X&EhiZ z!q`3`4&1q-$#{ua>Whr1RY&V|Wudhx^-i`oXb~4|pRtp|XXnzkD0xY=X7EgSQTL4? z5@Z3h&N*LD&*fqOM!y;+q5YgB&|^_MSlN7IXJ-}l5kEg$8}|8_eF7I;HBS@hM#{5oiU z>gj^Ru{#T+nyoGT0H(7kC(iRKW@y1{<1q!mgX!U?;HVZZpX{TSoEwkv7E@y8W?w)g zHiYCizOKX~?e{&5%tVe&gME&*@Q42_islVYRAN# zN)GrS{dO)>-??;8lPxECfE>Oz)}h#my!cx7 z9ZMus;9{R4!@J;CVpUYjIg}Rv$HRSCh6x(Za#7@7Mqi}8drgyJ((^c?b?6w#1Dok zxfm1E(CY_lPLFMuWSU{}=l4QFD6*JLVe3?zTTM-4;jIcvkd8k&;v3xZJWUzj_>%JC zrG~+D^zu3O7Y+2H2!aqbvcI8bcKjKNMv6Dh&;P&BH6y@3N&2av(wb3}_17(UodO0f z0IuqD*a~6xu$sw1{P4Az{BF<+tJEQq&D+?BSC+A8E%YZsy3C%%e);F92s)`rkZ0XB z@s3}T1AnBZGnu-S-qvNTn>ofmxUo|yrDsZZjOpdLqvMOVw_90>g5yZ1oB_9x z%ILAVm9f(@PwJ`nnJkeP^P1JYV^XzRY&j)?d7#mDr&1Psz_PxLVchhdy6iYt}w{;N51 zQ<7)n)0AB4&|tIiQ>vZ0$y_$Q)L`-WUN6^%?`#=Xj>LOWz>f=?(%&p#X^P4H@@ool z%}%72Ohjfx!Ik+-&NtTV{vko(b0xt>WD$voD?)3)s&h`Wn9}5Fj5aGrI*02kcgEZ^ zysVSOMO3(3>E;(zu7@OPJj>TST`3Qw`s&M89nh|oAM`HpmEGO!&6K-JX+Bly2nD7_ zT#VEsOt1mH6X-MmP@e;)W0dvNF{}PA5|BwtId9SxqF||mPx*V+`FBhs^W(u%k?bC) z2nbb+&ajI5Uvsxc18Y27kl~g3rnnbjh9eA=f(?0icpSzqHF<7rv0J~N@<*WSn_~N4 z{r`IMNIn6$IhA;}1rF!W=ZlYX+Lv-3=1G_^K7Ajv4aLD&_eNn7zbg zs?ZP?oZD_GHkV0GL$}x9gvjXlLWD3w@jbI z^ajtSP&oOm%-`fD5Y^Q0qgt}*{KF#}M1(prfaIS|oYOPRT>*OWYOZfzWD4`A{Fy^G zNlkukL2J0)n7VAl_J{rU$TzFq z*hp@W4#r7LgB_osf;Zl(_e|V_yuCxix5{-{LMy<`{&s&o^^E!RLs5G&g|732l&TgV z56QrAJKznCS{&fmn;T)K3J}fifgqPgy(vJF;AnrQ<<7>&AfFl;_bnj~+)Z7SG428g z$E!p0?%IB!h9w(DXzZ$HH;a9|CFqc+XRx2sEpT(Cih9JCXh9QV3IVJmdW{!g=)Ed@*^c7M+1Y;dKR;L9AtC6DK{^5BC2#GP&=ysn;^>Fb z8=RsSHv7KG@r0VIZo&q!?W8TEGm~YqIo!eRZ*_};gV20TVO#cjE8%!Y`|Z)FQ!1IQ zm8WVTh;nnO3^ZRTm@> zj$89Iyu6GXNWxrb73_E``*-?Chv9gZM*J$v>Zyn)0>U5f&%c!>wW(etcTXn0f(+Gf zWE!qXGZScm@!+wWlD{JF__JDe18&d?DEpmUKsd`W!)Lw#5)6UwjdJ$-0&UJ~1K&Nb^4(We~Or z2~xzqNreRdAmZvP-m{hO><6#Tn<7T=dd)z{4YVGpwG%l-Y#IT6lM z5MF@K4u=@@XG{02YQ1F7;8eEY81?J;B>g)m+loI+fRTPAmjS6g6_F0y{W7ai(x3;|1<)u2~aVVa2eq**b zheW_}3DTsKZ`&p*ZhWmNPqoG5=|3}(Jy7!bEd9kV z8^8wTm$FCJ(jdB7&ZUeBIi7B=^-E?)g=<)6twfghLAVugej z(I*UB(0ns#OmaYshP3drCl%v8eZ}Nn9FIod4U>6JWvF-9#ADx(% zjXk_YZC|tLx45LMC9gB`S^w4f$c${}DZXIo(;Xy506eukiq$)77A4y` z(`$#tKUMmh*BO{*k6k_sq!GoDK|W}XkAKdYu@NDPH)k`jSUn5_jwwGmCjaX14gGKf8^H@2 z``((IE&q;Up+xt2X#SxlQ?F@dj%=lp+dOn~tpTQ4dqCQEq)}U6!ZB_(`=`&^%~=*- z$(E;3zd)0GFIn*!uVp3WhwO@6^8rDX>9Aaz$><%Y*d8@-o7b*c{x(x`trDvX4o$)G z<)NVGOTm-)jl!~dX_MH_L9juT3`Hu2cYvwDwdcuf**Q#{o50ZiYrG@fna9JuBFVrXABa?4X&5& zf{`Qb@@oE>g>G%&0G0E%6I;DkQ4xxiM_(_HJAu=2S%~e=7i2TQlf;5bB>F}PDdeBx zB$W~DMm*AxO%4R&ZdkJHg4xfF9dIw+pFK;h0nphWechHwW1Bo09u_vy4Uf6$@@w@rJ9v^f z#P^AGvqoXlU32sHE5QzUNBtOxSUSiimQ0^&gxFYlsuQ~!UkPXp^=V2TVHvDqh7l3T zN`LtqcoL5J=a77EN8yp_DD(8yriuTfi#IVEnmRO+ZyGMmd# zW5HouEGLt1ii2S<2Bv)o-He{-Zl(0jj@gSTC@@~E&jFvpRF;MkzP*H$PHu!l`p z34duN7@x&d(wIwmTM+2 zDtbt4NJ1PjIfeYIhY`JEBr7_ag__7?|^{V_tS7> zJV#K>L_wNw1&Y|88?I_SAIQDBdhvaD$yNUJ8|mM*HNrkEjb4-%kad{Vg0w6DH1s&} z7KUvPQb~*yoBHPj>~T#2A627loZr{G>Fq6n3WUSRNzS^|<==@+Tq>>6V@*jgqWq)+ z``$B%mA*hu&xF1E%aR9Sh3^L`-Xg1xKS%P0E`V4lx;SY#KGK~;W0{DTo$U^1{e$R@ z{bF@uQs%KPA}l9DGz^w6I%bOa+u$cVrLM}#w<~w$A^kC>H(B{{=rn~va!I^4E1=j6 z?sjgH>xRKSYuoBhDZLzlooVa6*0O^umvq*+J&2kgP+)Dqf>Aau0@HMOTs!#P_DfRfLLKgQU+A zGwmprmk>+ZAe-1CwujL!O*)<%Wbi=S276tz4AMSDW>*^u2+YknZRM($y3p7N@f(#l zbRR6)+wIgrwr(pn8;Ey{I9#K}`(IXw2>uR@2TOy5;>aO?-6Q`>m`PY}ANY=!ecO=? zs#WvnZZpb=cqsU{`z0F$%C2R~4;Ao`?$UhL05l8}-R?hdifI9xzkZ{(ZWJuVk<{ig zwz&e%yvE%bW@0Pag{=uWd2F`XIqnBfIxa{sC6%9L8FOc;j>_{>$a80bBq4OPSG3-6W>BWV=TP2ArXOd(4HEIJgA3 z%+Jh8Wt@yuDS=RRyfrC_*_42XVdkv;dN47X$1&`l?EW1zKootG@sWWOC!Y@g6Eo>2 z?nhAu{si~rCN?OLFhqTgpmKDYQURniLR!V*?{At9e$h=><*m9LBxJu8oGD|&$19}! zUB{+=krq1rCb*NVbR6*O_QecY3Ow}Nv*cFd2@F@Ov4z?v2Dnm3EkJhWv!HSlXl>-x zeZ2k7MaDI7bk?v=W8Ff&uJVwT#Ns+0{#tdfYTIWcj9*U$yR9{lH|38$wqzE{*B`j} zXXYzfHWg1F3LX0YaTH&}3SF#OFJvG%<0vbxC!4l*i?yg-wXT;PKXMq}*OQQ#0F8y5gcd!rN=wteq!sFxAv=`U*GXt$K@xZ3I(314?z#AB!DJ@=*2Q z&M6D5+L@Az`&B9oO~rogsbF{a)GO!o?5f)ZQ6cn4zqH&tAVY=hB`M=wL>p@8ObhyD zeSC|VoAo1M)HyJwNkM8(m)V03uuN8eR$C$XaRPh$3N}RIS}Jc#KYN%IAX5HuKULgz zy;O4G6A=bQW{>a5GO zR1UJ*nYy(xoRZNcD`fJ>H)0=YWUcTjTueJ_WN-2*IR|9T0#E@TRS102J!cFp=9gDf zw(_?zF+#?2{v+h{=QxQDxQlX3ywB`+BN}#j5jl6L{u}Qt7|#x-SK!`UVRBSfr=sqn z(vu$1GscUzi>4Y>rvEehJYv*38HkW5EtGtTL=VCLp&{G7AJMBiq?OofaWW@!oJBfY za%4^Sz&+o}W*N0*5f{`pK$&6oOggnzB2t(sMbq-^Nr(d=P~fA}4+MdsJmGV;lsKX} zpq0d6rb}6BlhYl?%6ub_%xsqLWRoBCl^?fcs_8~)StL+u`G5UhnhLK{a~d0=NaV0< zmVa&5(hoE&4;FWiVlXD7{mzRvDERHF!Q(reT$yypmwosvJP;6OfWnHvKENU^r))DU zzgt&zoECHOky%z!`}Z-z9A`V@P^g_WrABs%tL!;BbzJ+bEF1OPb9m?l!B7CabbxsM zDvDWbcBW{BRWiFWr*m&uTj6F)pME~J>QkB=vr*mSOH9MK;>|nxoK3zN zHOI&8ApHOzs~rGbs)?riTds`f_D4}dEC#rV1@klb-D!MnWlf>S-0BZJ_2kVzZZ^pR zvJo@GF7Cq8#u58(>G!n)tBY%?J8gKVk-Ugz-0!F474qQI-)CV9Z26?Hkz6OaS^q(5 zR$=2z`hOE=0~1#ns8ljL(;1ZX{{Zmv@%37mj4Zw{}*b%9=`7P&vzcDO)eIY2!%upls+q)rn zK0;so;s#_-;A4S*UgBf-if!DUhm-T$$7er$5Ki3QAF_-!$tm|xMiI?NR=m#|&H^W< zZH-*(nrD<~s?7rSRVEwSseJTY*i<+?RAMo#!RX-GeY8SA$fMcULLC&jYl0o6?mZ(4 zF>!O#)YH==qXu=huAq1C-<##`?phLIktZkfQ8!mSaOI+wO7xXqRLoxD)5ynmP-8hF z&?3Di;vsB}v{cw3r;d!8&MI*QiZMaK)*1OOcZT(jw4d6$`|0yOOnOt2X;WVmE^@0% z?Rjn8;V^$ji0L0A!)WHcKQWO2A1`Ac3>?)>9}=X4<(L$nVVrH>#x~qKk~0W3Oy4-s zIDc!iCd;8l$bodWja0@J`Z0b|OCV})wV}|`U!9$1V{K#MbaZrNR8-1>R7TJ7v6U6& zt8u&;fPX#ZGjq9Shdo{WAd$^9a+vXFYg}(Sw&gdH-CRr%2#%IJF8-1V$}}#C&jey-o4}I=g*Vyqkj4)!z%N@ZBR`9 z>?TTx!vIez5jkhG)=kcaDDT>$m&-lM{r#a-RHZSqw^_5-`>Y>CkI&y-WlfajV}{`- zm~Jt%{2{^MdIEp73P9mNKP$?_w6s6R$5p3$^Yb=p{1hchs4COqqqSdkzES+~H|<5A z=NT)wN$1NoUELeLlD}2VK8)W#YiC9*+mf&xC>kH0>oJIJv#OA(Bj4b4_j;@$Lqf5^ ztJLN?k&|=9LY=P>~-jb zUt2jM6R+Zf0EO5;_#LsY>pJ{e7jcg&vG@YpU&Qk~c?N&a7lIDY!*fe3?3j)Q+w%Qd zeg;B0>Fi4R#jJ}aLC{>i)`QY2Z00n%iT-nv@nH>M*T2_{VphYeO&5(<-DHC8665fH z!+N~x{T>8=N6t~_Pf^7UF2+@moEZ|%>{0Ko)JT@K&^~;la#{VRcv>v-bUI0F>tj*B zSa6NU;97*Be)Ywr#V)$&OZZgGjZt8#k+HZj)6K56xBGZ^5vUy}pQvvGXypr3q;_Ib ziWHNIP+%iDhqdSV5ByS5xhDD?#m^h3u2;ihDh~p&dWomGR1T_DSX!}_c8!efci-o+ zuavYD5e-}xoXFyeZ@*)@-yGY2ZfR91Gt6MP9P)l2NAiJv~Q0_X{E99R-C+U$e5h96+1SVNaBh$^*Bj*~2gM_sd^G2PG{thDYKy=d1f6 ztEJ13=|;jJNcIqFjZzJt->m7Bwr8`ILY~Ueq2X_w93u2ptJBT)>W|5Ej|I0FkU zlX(fXTtZpn%de+5e=NpIr>`f9axBL7#>^ugL zk&SBQT#olA-nLkZ+Psjzt&6#M&72ao?QBaqpI?ZCR#tw_(qi7>w_uTO&QWpmCc|mA zlcfL~FFB&=4;q7dedriLutqQ2$2XA;TR)izSIc>YVAH5Q%2)d{!Bv)K<2m{aV@nxj z%A?dXrYZ-sC8k2d4WYLSU}L;N8l$s};oqkL+rkU$NR2}4cjkloEM(j%y-bg_d7qWt zEWh|TUy!a|I_G_vO4cJ11XJY%hsVJX{VS=0pB%rboBnJGG&D75s5cE$H>vYjh$~^` zm1e2CWp^-7D|z#wkB)T?H}SG9o=R!t6_u$sSvbj6^8=DLkKu-Bu9*=*h9)N7ppJ*+ zO8q=nonzrazMXQ#h`e)+jmNMey6c!nt6wXEmtlOQ19U>ity(=#vp~J+1;$7)#~xNy zF;OYrU@?|rhzhzDBl}0T5?-Y-i>ZR5I(5NeSgPdoDNB3o7^*8 zkZOZPBM?8@)x4c4P(*%OlW=s>u^?q6dlh&nqb0Jy(9My|tn)?(=ptT3%q*4drUwBJ z&*jIK78^FVKVA_rk1ZxCR}%(j$0~)qkJHE-MYGh_-HM7cwJvlwo_*nWWQt~4Qf71> ztX^#FKekl$5fMR?5(IbE-{neK)-A&Nl7E5-sGqMlSG*ikME{W*8*D`H+Ex{m7tp?{ zHX<@nD+|?irhN$PHr^qYO_AMRjniR<-Fk9WF+{qTO>wiZvXF58_bRAbc?3o?l9b}^ zVEZTgSew=N%mgQHsh+SYPGR?(3tYdLfe}0P)|~MARcfKWv`|7=052awY9*w6|CY>A zHQ6!39TI#j1u%TD!!S9y6#GlBgypA0Dfx>gQUaCpABqpgOQN~r#{+)iqXC**ggv&T zQXcIZxifBsVI+;}TxXAAR9)&=Tkq(XxhBe2y|$z=*yC9ugQ*;RjNJ+wCJ{Fwx9=s5{RiR z>yTS}Uc18BDbe2sn$YYWm7>*N^{LX)YQ+aXnfZ!qAd-ojR%0g|;0cp_v^nLsm_N6e z2Mc#pmwLs7$aib9YhwRc1nSr155vIh*pT_gTtqfmIs`1&KJYI9ztHL5zVhoE_>ryM zM>kvS8mO)COcTd(%Nh0YqekbW`aSl*_H5gns0kJoOO1#Wne1Xey@GJ}Mw@z;5ySRE zb10e2dSh(3_6QYg+Y7K`W#tzt4nhSz$#_&MPLrqwz0}U}ke^P6N5mq-gF;<_Fk9p0MC_)9+Je=hiA_kS(ucS~Ns@oe~?%W|!8W&_9 zvq|T%M;^;JCpS5rulL~`rN}d0jeUToH2~y%2;}0^-|F-XrptWXOL!40_KFu=Zc%*> z9T4r4E!gHe%xrZO5>-*1CyG0P1PVb%>J7dExTr4FIL2c@-VB>J$ls4{3b7uZC z$0J5=6$aL`9($!#53L~W@vtn9vJ42d(^c_VuEB?R1kfRe@OmHk1VR}ZJAY`sTCylU zrcT%Hm1%L;X>vi>7{P*aZM{X!q$!T|&Y!)l>il=m{1<1k7_(N5yl3CFoV1IFEH9{b zv-gtslrAla<5`-_lon2Q6qwl*O=Fj0qvQDqS5AtR3J@Fix=~h#$CSLKcf_oGtiiZC z&7fcjIt4fg!B= zz>9*}Tj^bey%H)zPF{Yy26#%d))k^17Spoy8~s;&mYE`K3T4dF^fGCerO3gtd)UHVzvDer(&v75_GME-L(*W_cTJ*JfZ=x$<{mSbpPZTuP~ zQ(`VT)EafQJ#A)3lt4gZ#%re6WLR#*Hk=}>covJB@MDjrzDpUeTl2IFRX>cZG7eXC zV$H+~&YW$U>B&oDacee79uzRhe}CAmqv@gfiye&h;?n0^OH8L;SaLU=5n}g#y5Ikm zc*S<-#=ZcnoDQQk0q>lCG3V(E@<5+t3Z|1RNG~hh+%MF@RI(&V-%_04wl6ye?k>~f zY;&(9$;l&Szx3GXlJe7e0g=&_-4c^r*~TIH8C-77>fKryuLpElCu5y(2qw%32ykax zM-22;KhEjbLspk(R(tYJw^ofXht);!iDl~F8M3GzZ#nx_d`-b;p~T6!s$s+WW@n!A zz9!Z#3Hc=*y6uTIT1$K~znr#OGPUWvd(+ZxmM(gVB|mj+%JJReSzLQV<&;Pe_L-uk znM)3tM_+As zQ+f2%#eP=noR{zZOcVY_^Ow0S1Gxj!(PyeBvw0twF4%QN=`ct4tMEI8Di0R5PI*n% zxfMkQ)IY0U{YlgIN(OOA@+XI@)~UP7(j#M4nv2{t#}OZjQHC4w_r zbmKkxyuliO-w-rDkZzut%c1+^p;@y;G<6XC6B0~4KJs7MkpKJT|J{fGpBTaVa`Fq| z@u$t~$~TdO58|=!#bT;O@i+E;Qw{gBC7+*3R(gC_xMJDt^2ed|)Zp)aO-M~H;BQi0 zYI&FYpx?$71`7PHpVmfbM1d7aF%mPL`Skg7+L0}y)k&w=%?79u9-m-x+oknRLW1dz z_@@8$BP%bT^7%8AG&N#3-ihFY@$Zdysrn-OuKRs46}gF_pV7f|%61rvMainDWRsPC_|Po&CcU*^{9P7zvsZUf zmR9n|^nmtht^AUb5_1Rr%=H;2QN|hXrl+S@hQI!^i6%#k?30{a^44QD4u=k@-?(vO zdaPbgZu91QmQt0pnVorEZH1mYyocJBWN!t*CTa1^VW26KkYPRVWAl7|AB~8J07y2y zu$Y|=Yb9T=iC0W{_f9i@yXV7Gcfh`2anaGy+x|0eFfB^1cf{W{il=@i<)<-JfM3dE zw>y4Z=i9e$AewpxUsKE_BqjU){Q9=TXY>K#q;3f7t?0KFZVc2P`chij{UkV;ru}67 z{-U{wR_=Ex(7%-BNPuj4C>d&rCKw zI~bj8;Z|(in(x-U#=GsF;!we^{0`p?-!o?p$|Np2a>1MQsKaE4U*9UdzQ5&&-{Eyh zf07}`#Ue#F{$rY9in)~b#uY6k{?DJeeaUKdeV%Dzps{InPQd&3@B7Mv>2{nRNhHg? zzcQ%V*JT?=tWokcGBh-l_-}{41lRb>rR#o#E>})-XgjK+WD<@0?urp{Wzg=3h$(7@V0e`CX%!cV^PH z-4nl|d}ipm&(NE59HXM*1y28n(j4~dLj~X1{1;}v924Aj^$UI%c3oe63OrU$Svl+R zI!Q9N`0NI5T-!FXZlVyHN!@|l+ z)|)5LHPPy3s=$BLE@iTxsvE^_xAYATHvai@T5@I9^3e0=&--L-q}W=CtX#Ik?@!LeU~3A_w!iabDB_XC ze+;rpN|~nCK6|oXj9%I(dRwRe?wO&w;>nJTGsDHE-W@)|GxB9}GKeTP^P@4o-y-D4 zI?fEcqzXnC_%=o>FO1tPY|`ZA>-kW+@wLI0JZ;1*iIwiR{k}BAaL2F}jvwwSaqqlK$$Hh$a5Laf z#k&6UDzob`4x*Vakqx0NSRxb;O=lAUc-@3?(?HP@2nc6>hxzdKbYQh7S{)L=`pgKSlj zZM#{0@bHGno#lFr!J~yA{B;B8ngiFQR-RO%{i1#G^&!^5_33KvS(V z&uXruuL%oN33=GsGUXEHLBmZqG+jDgOob?YlCNs5Q~FSUZ`b*;>4EHl@n)OSu59N| zJXpclpKp-^b$izlBXe8`VwJw}?nk4kO`~+cFXm!+LT>fTJ0BYxlgW`M%(V5A*Z+=- zkc*t$M`t!!)%`?y{?B3m*S?pN-FC+>M^kR4S&v?*3kB zvq3AD{CdkZmj2#n?GFx2JdV-Y1Oq(N^7L4wb>N&ABE>J}-~lHmCoST2cLV2rHmzV% z8mryq`}1+-*ht0lt-BfdoK)sxN@tU;1E%bYCt94tUYI@Ia{Ds%zq6y&T0%Fb8*S!{ z{eOkqG#pd@ecP*8C9D&Idv>!%f$-oH6nFcFDJgRB1vtku%cj|(OTT{oI_GWn==D9} zDL?Bo6F=)RvP;tCzxWxxK6Gu}mZq>nk5iki&+N}Gp18Bov9~7Px99e;u?9muM_b#V zCF-60J5TGY%=PKvm%B?l+t~##d0BM5y4%S=5FR=H-_{R5`ce{_T=LuBe6@WKPs!=p ztj|V~`f|3z%Aj%fL+-S<8A5?ov-0VsZ2Yz1(j}=0^J6Jh-6v*8a(et>&BSBoC)^fH zmaEL>kaJf0^@{U_IMYDOt5;zG)Ab4c$uZU+U%jGd4pCJIoPSZWFgH9f^JSrg1VL&4 zK5gIrv=nq7-hS;ow?3H?q@8L~kY}i(Kja-a={;UXw>-y|k{G_Q_5Blxh|8CAbHh`g zUH&mJ@VtLm1;HT3Cg7mT{I5u99xXcOgCZ-h56}3{4Cap`Xo;_l9Qd7pa|)PtR2nIx zV_9Ba)TwZVoTSpi?C+M+Kov|WqaTX{cLF!O{}%+HiASSFV6o~SuiBq)OUmJZSIz-*wQIJ|H)3#?Q?kXDf zw;3*7D4CyJnEwL*Sk6qFd+q(v@z8a%stYiNhaUpxfBQ~+KIZj7X);Hq;OHVow2hCA zjjdOitx)-qFyEIj2Bb5PA25)g>pydfi=p}Q@XYk*4*?GYr(y!@E^-Q+vD=;vlqU(n z+NXO}yH9WYbiZk7y0l*x^VrnubdX4xZBP($y!4Ba5bM)jO9nfNElTHFOEZPsrc zcs{^P$*pwmS;+`d z8@LcK_@&gw;@R=Kb+a$m{YXo>n{W+xddRyp1D^SWr%Y`7sh_vz$MhDACfmIe zkgT@1`%kt@EX(4}#=QpIK(yuLVgC@oIY(fH#gTmFyOr1 z>(nVR-Uu4`##)@M;4dZTnx@(K!4r>1-saVGGvv#+l? zK<((xNIchN6i*KPA*?cW@vi^GLH|K#8^dGAgu_EtElzJ+;_01P07T5Y6D3ARfuNWLGs@h&sN~$R&tgx`KrLwSadz98D zu^QK_01SJFU;Msu*Kbr)X|m0;sPAE{8RSX#c1=&@m%g{C%6{6>VcqUgxu)fbefTri z_Vi!BJdlkJ0v}zf{AN0yx8d4_9XobN&76AXGVt^BQ&KM6-g-=S5!J@;^f|C~>(<%Y zEV@dMF=F*93sWD)Gu`~Pmn;7f2oH&xYkx`)D^PWRb!c-$WTf-|Fm}_(NDknCLwkGX z-MeeOyuEF_f^y7J1vsgdrz8ZljMP1I2}o>yNlkFS-tbMgG?zTg4j9kCK1ftC7$H4J zF8qm{o(x>@ZjrYxBm8`Q&09GI2vW!Vt9IemBF}X>qE4u~!j%~?v<_>=n z`~si7UpNh5qF&@huVv#dSdMlHWdZvgocLl^7A)v>g^PKH^!js z*>hQWc4SvE5HN}COhMVf>*jv!r6Ry%ffL6_s5{7R$xfxQax}b7o^(J-GQH6He6m1f zV0P|Xvkd=kw#~GH0(Ut~cXn=OF{nN%ng8tLqn!^v_q)zNX*w=xVbMHhaj?+dJ}bHD zq07=dm;9e)EiXUHw(mT=R(~`7?%fwc85w`iR9sWrwPbbY-P_H-e|6pqN%m~duQ-{f#wa_6W{mkk5e!R2nY!KT0Z*wcbh}VGUyq52saEMpO%>U*XOH8j9J}5o)hp9DnfewdUaHSMl%zGjagQTdCtwcU0q$eDoc9y?T_7NobTG+ zkZBVd|rnC!QRNVygN_lFtG3lgunjUbnx`))2W6jx@0|XBKT6u%hfY&O06|DHK*ri zh6`86(fR#)%}pF~ZNMKp)-^kBs=BX5Jwc%)a^$+Snke~>>7yoT%yTw9d-g1cmgLm* zw3xE8H4u_7DyOL1x6@epwuy*}YCbucl~~k2qCwFe0#dO?xA=TlakVHFY+>sk+Kr`(64s-bb-cY zo_ib{G(?n?tZe*#oxOhj`pJ<_weqyk2M;#7d?~TgQyRZ{=6VU7^(G8d zELSlyFyQF24E|ObIe>#`MnXJLn6f2K+r%V#Ei^HMpP!zW$hA95(2)xjZksAR^q-S7AN7e1zXFN%xhPLJ5KuH9)yfbr6Geu+Tlmd9%11mnyVCVSct z>(cEi&T(AeyH5oR{Ufw_dI`!}Iyy$`>a;Niyq2n}L2pp8D-ORfT<1S_ts`K@WsOQe zoUKzIBD!98as^z1{*e36<}=+%NlCIt|KpjPZZ7R%y~1#E=%^b>{iLuW1;!KeOnSpz z_V)I76uq-Z4Jm5)Fef`Zc8g!vfW$)Tej(0dH#pZw*TDJQlgZ2-Y7?sldGBm<(Gt;p z)7)&jVegd&x4;<-|M{t2^T&tIB1$tpT`92e;kV8$}_Ws5U?qHq<=j`H#@*Sr` z1)S0#7~|v(2UR$)y0~ar>_$N~f6G_(usyl^@?${o%uh%UwJSxo%&SB@aH?J5S1TS~|lT1v2VOjg__eY6k$hWzNHf7sncUQ-F)+h4fnC_?qmQ+<& zd-cY9uehKlVe#zplYFj_&`>$Aej`?1*$Xl(5<)^6@RK+|F2(l=DmJTEuMRsvNBF0) zzWVXm-yiC23OOdICM8w-+}n*9WdD~^m6>ft95DD-uXb+|5^}14xSxE~m97tFLokKX#bs8QGB}JTb#3vEiL1srL59{V9sZ5a<)Yp zr;-LZJS`jfE;WKG%03tS0Nv5dtHeTUzJ>?}aN@ng+5VOTbw<-?RaK2LBmOiLOsVcK6`hm$k%Idg{d zt^JG9D)E~otUsTAP_8dO%gpq3iv(9VmxNg&U!DZu`4L(?6fV$>v&ZHcG_9KxwJ}r3{J`s7M?_mF{t$iX5yWq5x<0< zp42TLZ;Bdz+B&-8kqMrw6jS=+)kV%Q13gkHB|mt;{%X&y_ebB~Rk8rb8=aCO99+Pp zr>8eJKUvyrsHgP0X-&-LV>SGUXdGe8hrEa>Q1XxnWt|#CBrOZBx~NGB3JdRjdi4EA zAY;agmxQ(?lmNX4hqAJ=?@(c{?2)(Z4c;kCMBxJ+P+28E>-re1Y@<)7Tsz`vE z-6;j?9v&X%xegpLmu$+LGA%xfe|*WdX2*kL3I5RnQtJf-REd>OPYi1FxMq0|clb_q z$M_r`T0yAXW}Cox0ZW#%(qwuKYQSK54t48YdrmQtpp*Y4K%1RS-_r6CrkMWVL6}TB z2S5KK1Uh4@=chKY?7I%DB>#<)?~(e=U+nQsBg9>BB`Jd``g?(TF{^7y_wO@(Ti^5F z?fYY6Cqq*2;|%*px+>@mB5)e8@qL4>`NAx5DHzc*!eh0b^3(&}yE`@6r9!A_kisvp zapP`M#*$QjpLfZUC4>iHN@5!w9nJNdIuVk3E;2F_CY1tASFW0eEKOnzf^eja)adLe zd>SX5NDViXrA#a=4WNPd_MPNVnH>q;cKnknyQ#y}Xt$Vz#KsHEw{(Qw-`;v&-opJL zRGziy9&~r9tucv*af@%*5Gr6vIR*_Bg`#lv^`W}iso$$suH4PjXoh9NX_%6tw`|>- zccNP>B=rqoKoTNZh-$>E%1Xk)ZMxtTx7|~tI$ClABe&{eoyx|wYE?4M$)uyBBg`W> zbm{W`8+yuu7tD#rS5;Lpex*J9=5ifkf-ZY%l!a^I9-s{23r>#?KGhHv6{YX`PIV4S z;@XpaANRnP)?aXX0lW$d_)_A}l*CH{8olt| z#hHwM2Y1*!95AUEQlNV3)H8xCLQ(~*Vr)jo#~c598p5PsF_)tc<7H)?D3 z_a`XF3#31Wt*N^o`fD(eJC&5i0e|0CR-Tth7ZDTt2uPyJ9`g%~B}e%Z;NZkqx02fe zo$R3utRIo9n7-1Iq=MwkRIa&*+ffBam=iaBKwLYx+&v4*^Hls29k|lk5 zfzzC7W%nNH#;1b?ydxkrf*isba)avqlpqFoR7m2=%6A3Q_i1a_qb6Yby8a6P_9W2z zoV!u#UZ`gV!)`|rqJWKZ9> ze}DITVc}O#HxY}M&AGN=ub7C)1%cxwf4(wD#l&Dw$m!F^{`ld5guQ~EcQ?94vhJO# zA*mtqiE+ypql&CdU|?Xx>aFJlj-T-KjJk72i+!6(hVi?1@96C+B3f!|T}$Uqsiz)7 zAr;nniuQR%!YU(s@gs=Ny$c- z8$9IS7LAUL9l2h26lAw+0!{MdK{Yisr^fVEYKe6)m2mdspIqPg{Cn;F5hZbHX+1|r zN7wk5$k5w4Hlmz;cVr6(J&T9bSBWgtn184Kw&>KL7UQ-dOOfwAJ%lfeqyM^}-Cc^8 zW9E_K^1rh^Qaat3{k^YGll{c7Beb`gSqOgk8A62#g&kuo1~MwV>dZKpf?- z0d29dvGMv*8!Yg>!-ChnFxM)aOOlc1TwRI(8TG42^2|x|FT|a8aUQl5mzG>X`uO-0 zlA@v>)s;2##m7($iEP{$CSc3crdlhtY13Kue`E7dI3te1mYo;o=Uk^gwU2w)q}o?U zx8<;&Qxx`Sg2^+!WLn&eAe!BK_Hcx}N4RMp+RV+xC5AFR^h4>;=ih8Zm-zjOwzjtC^UAopcWZ)s zztFOXA=#nZO)n^lLKO@Jy`B5Nb%9oBL@3h;G?(z$7CIC;IT=ijbF!5jsds|4&O{{VQh;4Thy{Lb9?q z*=_fsB-Tl2<==kd)!@WI?`?lq%#Lr97$7)#XC!$fkC}-{ke=r{8aF^&i-rwQ%~)Rc zZx+ZOqA!lOrH$;@)9Vdtb)TEL(2yqB?L^rbCFv8 z5aY733DNU#ozY2H;Aj1|7+WcH_ePX5a&sG8T#xkV&aCpBueiOr%Q@!uZB0gozuAuA z=uUtT?%axyi08)(zdc%9 zWy0&TasRXcWvaJE@;uMKYmSTKXUk=csSfpQfamh^{WS(gL_uU>tNG3I07U!_d=G`~ zPQoOTlamq11sFsB&sy`UtE)YyD))6cSI!s96a@uQP(y?X94}CRS@3szRbdVY;25oF zSDKZfsg>6%!Ju9TVf8L&%fAOE0Lumcwt+ETa+kQ6nC0~5EnUvs?Ch$HGA|Mn1+Z7= zUA*H~{hc1mncH_!97QE041bKgR^BWnBXgDA_Own?D|Y(dkTRBAj`#MTO;tQ@VPL59 zr+hW62YLr=@ZYSGeN%_ZhiXrzv>s_{NJw5dW zIABvzo|XT0;cWIA&XS?v>hCWL`{P9XBK~G*mUA#1+_Kc@s@}f5x1wo7uehkFAU)mR zGYh>p`^2-G$;LF`c-(JyV{2>cF#RQ*C2w{wrX7Y|Q}*7Y-!)_WkK@3R&js<>C{NT= zL&}?P{hM6Dkxy9#1)uDtS4&Ar^-x;>087xug#dC08oi8W``=&{VZ&1h7nY+Qkj0z} zb*_lzcjr3#^x)dH4UWAqIL>3_J}iZYrMr2`GH;~&fP>TRo<#}g9b*Zf;6 zXGyPTqo&!V6*WEz!c?dEKg)eymhizi+yLKw*z(dd7a&JY=x?}cVV0XXzjL|8jsgTQ z1W5Yiw*IJ)e-Dl@0I`BaLn-!Yq1woFO!SBMuF?4A@?ylAqPM zhZU2MUzZMy11EX-tK?xagv{4)IX%4zja)nJrOo+h1a$ZIa?;1CgV+=NG~!>C==|@q z>~JWU3f zFPBgh@P?orT|;%JbR009^Cf2A9=b%qDxDoE6sMwd0<;vbu5aIVF>XuIOAsQANy9B- z5)=ym6?}TQ(b9XQm{N6-*-5kbN&N_NSE&1-&v*Ck-7s);3fKWJga%VfOC18TY$aJK z4n~+&e^__crLmppZxB_-_f#W%e0(@)DP>)YZ@-GPx@y%b4l+;b>ydZK zEGPYt@vv+>!Q&$(NTY)77nvqIIUY*tFXj`jEheHeGWuwxEmx!H3Q0*_VZZ!m?kppv z;kB`;)gDG;8RE71K3vdT#Db@@DEQR8=Y7+*u*-s9U?T^?v*?3NcKT4ThIk zL)f90=cq%=ky@6~*q!bO#hrzmo(7UZG|~B|iVFFCAe(tbjl4%)U7ay_j~IU7bWzFg z19?p+4>{S(&zW0^=Mb*L=~Zj1#r*s{Fn;5SzduLSdNjNH^ zWrBYG{`j?WYV6l9g@+e*k==8;OigK&RHRS}TYb8*xO&cZpV5tQc1ohgVwC*%L$pJ& z?Qh=-P`91YwCXc_PqiKYxbwJfvx|w2)*zmqdhO~}cJ!83gcZ%+RZEQy#|sL&cnfn(6?PrhRli1N(Q}wwKs5(pE0qr0voJTyFlHby!xa)%<8qD z&)BWx=H{fQG|-UOe0}3Gan?ywlSz#_VK_6-T}n_90#v6y&aJPnFAP5j;=j4y@Ilw+qPvf*GAj516z@f}IY2x1}n8w3RfL4j;yB#X5o zizPZPoZ+H#N1*sszk2e|h@CnmXa{V$Q{TYA01YR4LDy~gfS{JXw!*D~pPK4`y69;r zrdUl-1+}Dm^*+NSRxBl#QoP`nWnH&)6vROeqAicNVERBBil;{e1Sw^! zkpDZ~t%^~(I)|2(%{eCD0ITmqX@xWDH0&8-M>A18Zi5dbWSe`Q1|2Ut( zk}x8#BlO9CZoIkb!v|qvx0WU-D_0^0vR@}XWI|d6S!_oG{pf$U?tM_6(LpJrs_#q! z-F{Tz-GhVs*{^G9Yv-LBG!Mc1%j^*~Tth5h6UvRo)gGMUG&nW6GZV*2cbYU+S2ipo ztNb=Nn2B1NGt8xKe(3pdIzkm5qDyfM(!Dutsk~^}F z*N?uxllyS|Z_|#JF!1o$tcIC%UN5hxxILPAsNx)J%QF`~ zwX$SF)TX7SanJcWI;IiU{rON)_?*87bqHfy+eE@Ivv2$S`SWyV=yFD4ZZV|02x6x$ zX-^m>X8iN91jp{GD7gTuck?9$D`7YiMtq;3Oim67p`~u%CRQF9)uT^u3mhj(N%#l{ zy=4tPYYcjbz`Xh4PhYsXcflw-HvMH!j^m)l-|)FntxoFQp~vodZA-W$&G8I+cTr+4 zmsf8+iS>)m2j-MYO92}vc7QKSLpzn;;5E9I_typd2^PRy-8FZeV-XA!BXl3NkJq^Z zr7uylrQ%P`g7n1X9%Jkl?<-dMHdRLMTZ-ON7I>&Pwra(S6%7wGIYUwpHKrR+<2)H@ zbtUEHP0_zH1d+OHq4ei4GYnRhPRa=Ux;DEpzv3w!Xs&3fTb+a!W_qhzX}n`G^Um=G zLny~Ls+EbNZ{UdL5X8Rj#dE8F4i9u7Hpwyq!nX%5%+Y%U>#58cp)0;*aS+m|DJui< zwjN;v#wwv!*2ll&w_Wgk|Ayd%uT03T7V%4@x1p6zaX(2BXQspP9YsI zjE~TiUO?BFiB|V81Or58;sxa#!<^^mFU@l>JO_QWJcPoSzxVbIbe&&kqN~ehhanQp z^V?6wl6d-+7H!KXO<7jj>@qJPDsVuH3~uI@TS_-GQ$)BRRGf5sM*xiP6y$E3lB%*dDu6*4DqRTJBrMFyz-x(S8^~~i2Uee8KHm@YLFju7 zO8lq}vXE#ui5NnIJGbOIk`@Rap-nLw{q?I3J*?!QAWC_vhPCxwBESZ%c2TBQ0LPGf zcX-_?%?Im^m6|SLU^u~Ug;r9Kh8!AK5j>ki1f;&6Ul&k|Hrh$lqGS(L zli$AGH`gs4D0m^9=&Fgp1dJ#LLx1DK{GX^r%lWcp_z%EE$YUN`%rd}CM~|< zSyUPbla{k9r)PUWx_-4kAQq8DpH6B+VVLKcYeW=HqzEB99-C)EOT!HJ>ZFBJD_eQK zm!tP!B91W*+2(@b(W63ki8afXEz3Rq>kw9}jt8JTHdHmfKCOnUG~dgLNR@Z)`mJ00`YhYGpn&P~3#b5g znHRXJ7k%2m|kPGbm^l;Xla`)(4Pm!Xepf! zNw={|JX>VTmg_Q3wor@^!D*gTqZnzBT;OZPl^KE`@Oq;6GDJSs-YEr<15R6P^X6-z zKQ(v~=xoiHn3$-_b6h7#0~}D5Iy^T$PA*KjJ|Emhw7bieFBe`ZuMb5Cl*86UY*RAT z!-p`&4uf%(0sv&@mTG{fq5jhlh%rEq0~<=@Ne!Ulp_UOKegDgR3b>erz?>oH+~fex zTWa#pUk9kNmPdXptIaB&FsF*3A^youa zDJ*7sAMWi411`JY1$skwTc2pW4j39TJz!v4yE9cd!S4XNjZHW-S?Ehh%j$ACVsahi z6Jzg$6C8juq!d4|L2udTcaJDiL+H>sBr6_kKff2z5(ksW5s*xEm6x~>09NkN_q+rx zpr|l`V2tt2JK$$%GWJiyw+i1OqZK?tn~@kTRpO}Fpbj|}Y28a&6&Y>NxLk`CbR+oN z5RXH!D3$p?_mvlBJz?*n($XK2*STS}vVrq{&rW$9hE>F=lg>E89-6~@zxe%T4Q?oh z#4XpgeaHBZx?Ske8epbi$6qR3L`8GCWwaD{+&^*RMB7)7%Ga-{!Y+M%cHv+m#Ojot zotvZHJs@fcE@^8WfxKL=0TYNKjqB>_cok1Qf=XtGkMF2U*#)xu;{(kD02sz6PHYKA zF|lKF?E5N?g&>gK zWs=dDxuu}%9G|Rbj}AI3?&<~Gt^4TQ#e{BXqI@&0ys`$yCBcm0xi z6GfnPv9Cdv+Nt89AcP!bdr5>TJ1dfkkVx4Qic(l^r1sxigy^l*S{j{UeF znU0j`3t1b)O*$f`KhR>H#&wi&$Xbvv(vXFtLRUh}v|eE8qx*&j4xGgM*C@EF!el*L zMLYW2PPMw+*1@THu6)k-4p0*;ieFJZeH9sLk;4+|pr2Z7R%ZKDFa z(L>MD1m?)}V+$ygdr-5X+qjPy1f;4ruU%6^-BRZ6TGS^*`mupCtzQZ@KcO?rG$Ha# z(wird)+Ezhbn|8d1hm6fziz?OVQdd?-du@3p97Q#ngQhW^KuYwMw@-^-KA zg*}YHg~q)^#x`6$NxcCe73qET#fzOpE=b%Q6HmQC zq)uD9_yGt2^|Yl}WfSB?d2rCG52S3u7fzp_B*QQ6Vd6-62at0i1(E&lOEH#+43D-_ zEi|M^u8*oBQK*7>&;c&^c?^$$XuFhiU;0}iZb3VVPInNM(q^)^5x4( zhP)$V7lO#hL@!6Uu1zS$QF-Vf98mRgKp7)aC+MImIRJ?)bmZbH$MVeWx7=RI&YlRu z3cie}r33J2h)qSf&MN8`Ll0~>IxC|S6A*Z%Ky?Z`+5~p?V5SKqa7ffT_y!ck^~mO= z&aC=iI_G0&cMoOhhnYW+2a7XzBqgn9nKKPf#=o|LsqG(i$A$i=Ya_9PXVKrTw~wicaY{_9<8YQey- zRPu@jT@Zz*pC6Ii!LW=bQXCV(o{|Zlfvam48h=dX`-zAKIJ|bheEatU7Cra6PNSpY zUHimdo9N)k9j@ugVLbtY^I6Gji5wcTTcUI(=wq3_9uA0z6N^z(@@_lMV=tY>15eLi zv1Uys+_De*V~9(jOoT;B4F=mm{WuIkZrT{k(z1Prl9F6$%`nYw0-nl1g`%z&PsmVh zaAI{tW=PO0Wb*X17^6XT-@XfLeSZj2KM+#Uol2iYR}>t8z>efelWglu%^O0i`!$-~ zU3YG!b9eTi9y5UCdmOS}D&WCQ!CfL4sLY;}PN{A}|A)$U#POX9CAgG_dEEuge0N zp++_alLq6ntUS0iJR+j;?X?YZ(Bw>%YJ`HG3<{Sp8wQ+bML^?|cWpqzA4lTnTpSE3 zyx4dAK(=);B4ctOu0v1b=0j9$thR!?I2wM$G6--g^gKqZM!Kaz+9M+)`s(T-2%5x> zU|W4;L8LJcpQjXr0T7Z>{o9)e>SYJ0u`I}?0G@(bpM}^jvl^vygKn#Md9?%_R+7c; zLYY~I7o8!T84|P6fdkhGUPYxqe(xbgs=TstFSweztAe|bF*9*z3o|F zdJUl;*eIjYfH~NywulUAqomTI6-uBgxlI6vBgL3M?)HIKi$%|k=B4|K7cXvJy_%>N z%Fss?%y3@m=`-lZdwqSyWPjv{QFK|aRu_P-n6z}HDjrI|im(;9mUX}K+6`|!jXOhg zx0=zJp~Af+;o(YW8m>pU-eK7le(_>mgnUOT1{Q_-Mb&m6Hn>xOhK7b*7xF^!h+YXl z!O0ngg##U`2X_&AT?y(fa(3s~)}%jqvW477BsPldaPmj3Agy}w?_a-&dp4E^29hm> zlAUc~D`Ltj#E&08e%Sa8LaA6)Q=^>CqHM~TlAJ86qLL7bXp+8)lk@(qTRiOu9$=!H z!7ApGi$nnGpe*Ube0x#XhK11+%mcC9{^Cc;!PBIdKK>u{Ti_u~amz~1XY@#NN{TbV z=Q(zv0e`xd61=#AKM8zyQO_Och7`SZUJzm;yhP-ejf@$=&B3HLLc?a`SDcvG$!+hh zp>YmZ6K>+R4Yq>~u?06n5*b)|A7Hz|c8P7;l>B729ruNBWntGo_X`lUjt{n)0zDbx zPSJO;&LZySILO*El%6D15Ko3C3m(u!KrFe&)>dc;M+^(&eQYLiDKgR&>7pUSR63<) zmxO>o;tAhgbwG_Bps>16aiB#4J5Oy>EXc{x2gR+eh+=xH(2caw{v_P%&cJs8BE-qEsq?2Mf$zFN@vo+BldR`KPLcai4GoL^P2IA^w`lb#Bh88kd#7{dzmPABdqb(8#3~6cmJK$2A(0#4lSC>&1qibe+k!qHH-V6x2$?^9GJE~z%@dJ4h=kEhO~ehTKjGOp zVx;(tmcCBtmr3(i08yw4Q3zcLV?rh&yJ>y|jDGO_`w@(+4PNSimew-0LK>v>Teogi zT^xbsGoGe(}o@!)BF1gc@gF>fEM&UuduZ%`S>gj zj^K`Qb#rr!t8DQ&phHBW6g;te_Xv)Q&#M%Q2K_)>NFXeIKf=+?jd0w4 zRPN-iIqE^P!iPQk?Rs%V%Ne3NHI$4_UrLI}1r`*{A;A{NUKC+Y9-dU(DM`fvMOppXCoWOnU3dh5a0~~>HGLVKAAwQ7`q6juZ$HJ`Al_b5)K24 z-oAbCAZ>zp2rl1#7OPT4StAzS?%C}M!vY~|SWY8l`(*Uz5{eWYfZ#kWJ-ucu20L#u zbr{4Aq4Lt!BK(rN33sNlBu=&FI^G99;6Q&mh*Lq{$ip=})w6XEd78rvLj!3Mlf|Ki zr7jM$1!5(d2{wH?5(UXfNFQ_ITVMsdz+UwBaLHMQY3$xDvUBH)(Mr&^;t~=%P?#PI zM_8k9o3#7W)%b(Czsx6Xb99YA7+-N&StGn4d)5meLw(mSNJAwcbq%5I4}vF*FvA5Q z;H*cEzX=i&9bh1|0o-7Q;}wu<3PeGS__^ZZ;#Fwm5Pxv$T5POE;Tz@rho>gQhtb3a!uanfDL%xi+dNh7il4v}i3z2_upx+n z7=OVbBJ={_1tP^|y<{-humoIpOdb8*8z3Pff==Z%Bt({MIsESN4otT(V z9{z0{Xw^QA{~ch9&LCRqVq|jhjGwm^6``*;;5gjRVg%f~6GXBur?e4{aPdz8 z`_ is an industry standard tool for +distributed hyperparameter tuning. Ray Tune includes the latest hyperparameter search +algorithms, integrates with TensorBoard and other analysis libraries, and natively +supports distributed training through `Ray's distributed machine learning engine +`_. + +In this tutorial, we will show you how to integrate Ray Tune into your PyTorch +training workflow. We will extend `this tutorial from the PyTorch documentation +`_ for training +a CIFAR10 image classifier. + +As you will see, we only need to add some slight modifications. In particular, we +need to + +1. wrap data loading and training in functions, +2. make some network parameters configurable, +3. add checkpointing (optional), +4. and define the search space for the model tuning + +| + +To run this tutorial, please make sure the following packages are +installed: + +- ``ray[tune]``: Distributed hyperparameter tuning library +- ``torchvision``: For the data transformers + +Setup / Imports +--------------- +Let's start with the imports: +""" +from functools import partial +import numpy as np +import os +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torch.utils.data import random_split +import torchvision +import torchvision.transforms as transforms +from ray import tune +from ray.tune import CLIReporter +from ray.tune.schedulers import ASHAScheduler + +###################################################################### +# Most of the imports are needed for building the PyTorch model. Only the last three +# imports are for Ray Tune. +# +# Data loaders +# ------------ +# We wrap the data loaders in their own function and pass a global data directory. +# This way we can share a data directory between different trials. + + +def load_data(data_dir="./data"): + transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + ]) + + trainset = torchvision.datasets.CIFAR10( + root=data_dir, train=True, download=True, transform=transform) + + testset = torchvision.datasets.CIFAR10( + root=data_dir, train=False, download=True, transform=transform) + + return trainset, testset + +###################################################################### +# Configurable neural network +# --------------------------- +# We can only tune those parameters that are configurable. In this example, we can specify +# the layer sizes of the fully connected layers: + + +class Net(nn.Module): + def __init__(self, l1=120, l2=84): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16 * 5 * 5, l1) + self.fc2 = nn.Linear(l1, l2) + self.fc3 = nn.Linear(l2, 10) + + def forward(self, x): + x = self.pool(F.relu(self.conv1(x))) + x = self.pool(F.relu(self.conv2(x))) + x = x.view(-1, 16 * 5 * 5) + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = self.fc3(x) + return x + +###################################################################### +# The train function +# ------------------ +# Now it gets interesting, because we introduce some changes to the example `from the PyTorch +# documentation `_. +# +# We wrap the training script in a function ``train_cifar(config, checkpoint_dir=None, data_dir=None)``. +# As you can guess, the ``config`` parameter will receive the hyperparameters we would like to +# train with. The ``checkpoint_dir`` parameter is used to restore checkpoints. The ``data_dir`` specifies +# the directory where we load and store the data, so multiple runs can share the same data source. +# +# .. code-block:: python +# +# net = Net(config["l1"], config["l2"]) +# +# if checkpoint_dir: +# model_state, optimizer_state = torch.load( +# os.path.join(checkpoint_dir, "checkpoint")) +# net.load_state_dict(model_state) +# optimizer.load_state_dict(optimizer_state) +# +# The learning rate of the optimizer is made configurable, too: +# +# .. code-block:: python +# +# optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9) +# +# We also split the training data into a training and validation subset. We thus train on +# 80% of the data and calculate the validation loss on the remaining 20%. The batch sizes +# with which we iterate through the training and test sets are configurable as well. +# +# Adding (multi) GPU support with DataParallel +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Image classification benefits largely from GPUs. Luckily, we can continue to use +# PyTorch's abstractions in Ray Tune. Thus, we can wrap our model in ``nn.DataParallel`` +# to support data parallel training on multiple GPUs: +# +# .. code-block:: python +# +# device = "cpu" +# if torch.cuda.is_available(): +# device = "cuda:0" +# if torch.cuda.device_count() > 1: +# net = nn.DataParallel(net) +# net.to(device) +# +# By using a ``device`` variable we make sure that training also works when we have +# no GPUs available. PyTorch requires us to send our data to the GPU memory explicitly, +# like this: +# +# .. code-block:: python +# +# for i, data in enumerate(trainloader, 0): +# inputs, labels = data +# inputs, labels = inputs.to(device), labels.to(device) +# +# The code now supports training on CPUs, on a single GPU, and on multiple GPUs. Notably, Ray +# also supports `fractional GPUs `_ +# so we can share GPUs among trials, as long as the model still fits on the GPU memory. We'll come back +# to that later. +# +# Communicating with Ray Tune +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# The most interesting part is the communication with Ray Tune: +# +# .. code-block:: python +# +# with tune.checkpoint_dir(epoch) as checkpoint_dir: +# path = os.path.join(checkpoint_dir, "checkpoint") +# torch.save((net.state_dict(), optimizer.state_dict()), path) +# +# tune.report(loss=(val_loss / val_steps), accuracy=correct / total) +# +# Here we first save a checkpoint and then report some metrics back to Ray Tune. Specifically, +# we send the validation loss and accuracy back to Ray Tune. Ray Tune can then use these metrics +# to decide which hyperparameter configuration lead to the best results. These metrics +# can also be used to stop bad performing trials early in order to avoid wasting +# resources on those trials. +# +# The checkpoint saving is optional, however, it is necessary if we wanted to use advanced +# schedulers like +# `Population Based Training `_. +# Also, by saving the checkpoint we can later load the trained models and validate them +# on a test set. +# +# Full training function +# ~~~~~~~~~~~~~~~~~~~~~~ +# +# The full code example looks like this: + + +def train_cifar(config, checkpoint_dir=None, data_dir=None): + net = Net(config["l1"], config["l2"]) + + device = "cpu" + if torch.cuda.is_available(): + device = "cuda:0" + if torch.cuda.device_count() > 1: + net = nn.DataParallel(net) + net.to(device) + + criterion = nn.CrossEntropyLoss() + optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9) + + if checkpoint_dir: + model_state, optimizer_state = torch.load( + os.path.join(checkpoint_dir, "checkpoint")) + net.load_state_dict(model_state) + optimizer.load_state_dict(optimizer_state) + + trainset, testset = load_data(data_dir) + + test_abs = int(len(trainset) * 0.8) + train_subset, val_subset = random_split( + trainset, [test_abs, len(trainset) - test_abs]) + + trainloader = torch.utils.data.DataLoader( + train_subset, + batch_size=int(config["batch_size"]), + shuffle=True, + num_workers=8) + valloader = torch.utils.data.DataLoader( + val_subset, + batch_size=int(config["batch_size"]), + shuffle=True, + num_workers=8) + + for epoch in range(10): # loop over the dataset multiple times + running_loss = 0.0 + epoch_steps = 0 + for i, data in enumerate(trainloader, 0): + # get the inputs; data is a list of [inputs, labels] + inputs, labels = data + inputs, labels = inputs.to(device), labels.to(device) + + # zero the parameter gradients + optimizer.zero_grad() + + # forward + backward + optimize + outputs = net(inputs) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + # print statistics + running_loss += loss.item() + epoch_steps += 1 + if i % 2000 == 1999: # print every 2000 mini-batches + print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, + running_loss / epoch_steps)) + running_loss = 0.0 + + # Validation loss + val_loss = 0.0 + val_steps = 0 + total = 0 + correct = 0 + for i, data in enumerate(valloader, 0): + with torch.no_grad(): + inputs, labels = data + inputs, labels = inputs.to(device), labels.to(device) + + outputs = net(inputs) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + + loss = criterion(outputs, labels) + val_loss += loss.cpu().numpy() + val_steps += 1 + + with tune.checkpoint_dir(epoch) as checkpoint_dir: + path = os.path.join(checkpoint_dir, "checkpoint") + torch.save((net.state_dict(), optimizer.state_dict()), path) + + tune.report(loss=(val_loss / val_steps), accuracy=correct / total) + print("Finished Training") + +###################################################################### +# As you can see, most of the code is adapted directly from the original example. +# +# Test set accuracy +# ----------------- +# Commonly the performance of a machine learning model is tested on a hold-out test +# set with data that has not been used for training the model. We also wrap this in a +# function: + + +def test_accuracy(net, device="cpu"): + trainset, testset = load_data() + + testloader = torch.utils.data.DataLoader( + testset, batch_size=4, shuffle=False, num_workers=2) + + correct = 0 + total = 0 + with torch.no_grad(): + for data in testloader: + images, labels = data + images, labels = images.to(device), labels.to(device) + outputs = net(images) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + + return correct / total + +###################################################################### +# The function also expects a ``device`` parameter, so we can do the +# test set validation on a GPU. +# +# Configuring the search space +# ---------------------------- +# Lastly, we need to define Ray Tune's search space. Here is an example: +# +# .. code-block:: python +# +# config = { +# "l1": tune.sample_from(lambda _: 2**np.random.randint(2, 9)), +# "l2": tune.sample_from(lambda _: 2**np.random.randint(2, 9)), +# "lr": tune.loguniform(1e-4, 1e-1), +# "batch_size": tune.choice([2, 4, 8, 16]) +# } +# +# The ``tune.sample_from()`` function makes it possible to define your own sample +# methods to obtain hyperparameters. In this example, the ``l1`` and ``l2`` parameters +# should be powers of 2 between 4 and 256, so either 4, 8, 16, 32, 64, 128, or 256. +# The ``lr`` (learning rate) should be uniformly sampled between 0.0001 and 0.1. Lastly, +# the batch size is a choice between 2, 4, 8, and 16. +# +# At each trial, Ray Tune will now randomly sample a combination of parameters from these +# search spaces. It will then train a number of models in parallel and find the best +# performing one among these. We also use the ``ASHAScheduler`` which will terminate bad +# performing trials early. +# +# We wrap the ``train_cifar`` function with ``functools.partial`` to set the constant +# ``data_dir`` parameter. We can also tell Ray Tune what resources should be +# available for each trial: +# +# .. code-block:: python +# +# gpus_per_trial = 2 +# # ... +# result = tune.run( +# partial(train_cifar, data_dir=data_dir), +# resources_per_trial={"cpu": 8, "gpu": gpus_per_trial}, +# config=config, +# num_samples=num_samples, +# scheduler=scheduler, +# progress_reporter=reporter, +# checkpoint_at_end=True) +# +# You can specify the number of CPUs, which are then available e.g. +# to increase the ``num_workers`` of the PyTorch ``DataLoader`` instances. The selected +# number of GPUs are made visible to PyTorch in each trial. Trials do not have access to +# GPUs that haven't been requested for them - so you don't have to care about two trials +# using the same set of resources. +# +# Here we can also specify fractional GPUs, so something like ``gpus_per_trial=0.5`` is +# completely valid. The trials will then share GPUs among each other. +# You just have to make sure that the models still fit in the GPU memory. +# +# After training the models, we will find the best performing one and load the trained +# network from the checkpoint file. We then obtain the test set accuracy and report +# everything by printing. +# +# The full main function looks like this: + + +def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2): + data_dir = os.path.abspath("./data") + load_data(data_dir) + config = { + "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), + "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), + "lr": tune.loguniform(1e-4, 1e-1), + "batch_size": tune.choice([2, 4, 8, 16]) + } + scheduler = ASHAScheduler( + metric="loss", + mode="min", + max_t=max_num_epochs, + grace_period=1, + reduction_factor=2) + reporter = CLIReporter( + # parameter_columns=["l1", "l2", "lr", "batch_size"], + metric_columns=["loss", "accuracy", "training_iteration"]) + result = tune.run( + partial(train_cifar, data_dir=data_dir), + resources_per_trial={"cpu": 2, "gpu": gpus_per_trial}, + config=config, + num_samples=num_samples, + scheduler=scheduler, + progress_reporter=reporter) + + best_trial = result.get_best_trial("loss", "min", "last") + print("Best trial config: {}".format(best_trial.config)) + print("Best trial final validation loss: {}".format( + best_trial.last_result["loss"])) + print("Best trial final validation accuracy: {}".format( + best_trial.last_result["accuracy"])) + + best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"]) + device = "cpu" + if torch.cuda.is_available(): + device = "cuda:0" + if gpus_per_trial > 1: + best_trained_model = nn.DataParallel(best_trained_model) + best_trained_model.to(device) + + best_checkpoint_dir = best_trial.checkpoint.value + model_state, optimizer_state = torch.load(os.path.join( + best_checkpoint_dir, "checkpoint")) + best_trained_model.load_state_dict(model_state) + + test_acc = test_accuracy(best_trained_model, device) + print("Best trial test set accuracy: {}".format(test_acc)) + + +if __name__ == "__main__": + # You can change the number of GPUs per trial here: + main(num_samples=10, max_num_epochs=10, gpus_per_trial=0) + + +###################################################################### +# If you run the code, an example output could look like this: +# +# .. code-block:: +# +# Number of trials: 10 (10 TERMINATED) +# +-----+------+------+-------------+--------------+---------+------------+--------------------+ +# | ... | l1 | l2 | lr | batch_size | loss | accuracy | training_iteration | +# |-----+------+------+-------------+--------------+---------+------------+--------------------| +# | ... | 64 | 4 | 0.00011629 | 2 | 1.87273 | 0.244 | 2 | +# | ... | 32 | 64 | 0.000339763 | 8 | 1.23603 | 0.567 | 8 | +# | ... | 8 | 16 | 0.00276249 | 16 | 1.1815 | 0.5836 | 10 | +# | ... | 4 | 64 | 0.000648721 | 4 | 1.31131 | 0.5224 | 8 | +# | ... | 32 | 16 | 0.000340753 | 8 | 1.26454 | 0.5444 | 8 | +# | ... | 8 | 4 | 0.000699775 | 8 | 1.99594 | 0.1983 | 2 | +# | ... | 256 | 8 | 0.0839654 | 16 | 2.3119 | 0.0993 | 1 | +# | ... | 16 | 128 | 0.0758154 | 16 | 2.33575 | 0.1327 | 1 | +# | ... | 16 | 8 | 0.0763312 | 16 | 2.31129 | 0.1042 | 4 | +# | ... | 128 | 16 | 0.000124903 | 4 | 2.26917 | 0.1945 | 1 | +# +-----+------+------+-------------+--------------+---------+------------+--------------------+ +# +# +# Best trial config: {'l1': 8, 'l2': 16, 'lr': 0.00276249, 'batch_size': 16, 'data_dir': '...'} +# Best trial final validation loss: 1.181501 +# Best trial final validation accuracy: 0.5836 +# Best trial test set accuracy: 0.5806 +# +# Most trials have been stopped early in order to avoid wasting resources. +# The best performing trial achieved a validation accuracy of about 58%, which could +# be confirmed on the test set. +# +# So that's it! You can now tune the parameters of your PyTorch models. diff --git a/index.rst b/index.rst index ad0655f2562..06a24e8c76d 100644 --- a/index.rst +++ b/index.rst @@ -260,6 +260,13 @@ Welcome to PyTorch Tutorials .. Model Optimization +.. customcarditem:: + :header: Hyperparameter Tuning Tutorial + :card_description: Learn how to use Ray Tune to find the best performing set of hyperparameters for your model. + :image: _static/img/ray-tune.png + :link: beginner/hyperparameter_tuning_tutorial.html + :tags: Model-Optimization,Best-Practice + .. customcarditem:: :header: Pruning Tutorial :card_description: Learn how to use torch.nn.utils.prune to sparsify your neural networks, and how to extend it to implement your own custom pruning technique. @@ -516,6 +523,7 @@ Additional Resources :hidden: :caption: Model Optimization + beginner/hyperparameter_tuning_tutorial intermediate/pruning_tutorial advanced/dynamic_quantization_tutorial intermediate/dynamic_quantization_bert_tutorial diff --git a/requirements.txt b/requirements.txt index a0aca3ca028..5e87cf36170 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,6 +14,7 @@ bs4 awscli==1.16.35 flask spacy +ray[tune] # PyTorch Theme -e git+git://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme From 4a70101752f713295535f3d885ee0a118d5b6af3 Mon Sep 17 00:00:00 2001 From: Vijay Viswanathan Date: Tue, 1 Sep 2020 16:18:11 -0400 Subject: [PATCH 13/21] Fix typo in "Introduction to Pytorch" tutorial (in NLP tutorial) (#1145) * Fix typo in "Introduction to Pytorch" tutorial (in Pytorch for NLP tutorials) * Dummy commit, to restart CI * Revert dummy commit, to restart CI * Revert whitespace changes --- beginner_source/nlp/pytorch_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beginner_source/nlp/pytorch_tutorial.py b/beginner_source/nlp/pytorch_tutorial.py index d61496d382c..2e60c20ab81 100644 --- a/beginner_source/nlp/pytorch_tutorial.py +++ b/beginner_source/nlp/pytorch_tutorial.py @@ -274,7 +274,7 @@ ############################################################### # You can also stop autograd from tracking history on Tensors -# with ``.requires_grad``=True by wrapping the code block in +# with ``.requires_grad=True`` by wrapping the code block in # ``with torch.no_grad():`` print(x.requires_grad) print((x ** 2).requires_grad) From ee5e4483a2b5fb9fd67412a380c1146b95249459 Mon Sep 17 00:00:00 2001 From: "J. Randall Hunt" Date: Wed, 9 Sep 2020 10:46:11 -0700 Subject: [PATCH 14/21] Install torch not torch vision (#1153) Small update to recipe that instructs users to install `torch` not `torchaudio` --- recipes_source/recipes/defining_a_neural_network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes_source/recipes/defining_a_neural_network.py b/recipes_source/recipes/defining_a_neural_network.py index bdb7ccfb375..f0a4ef69be3 100644 --- a/recipes_source/recipes/defining_a_neural_network.py +++ b/recipes_source/recipes/defining_a_neural_network.py @@ -26,7 +26,7 @@ :: - pip install torchaudio + pip install torch """ From fe33b549f7b2cac58d7da07b76091acd709171d3 Mon Sep 17 00:00:00 2001 From: mcarilli Date: Tue, 15 Sep 2020 10:00:06 -0600 Subject: [PATCH 15/21] Python recipe for automatic mixed precision (#1137) * fdsa * Tutorial runs * clarify one scaler per convergence run * adjust sizes, dont run illustrative sections * satisfying ocd * MORE * fdsa * details * rephrase * fix formatting * move script to recipes * hopefully moved to recipes * fdsa * add amp_tutorial to toctree * amp_tutorial -> amp_recipe * looks like backtick highlights dont render in card_description * correct path for amp_recipe.html * arch notes and saving/restoring * formatting * fdsa * Clarify autograd-autocast interaction for custom ops * touchups Co-authored-by: Brian Johnson --- _static/img/thumbnails/cropped/amp.png | Bin 0 -> 14849 bytes advanced_source/dispatcher.rst | 24 ++ recipes_source/recipes/README.txt | 4 + recipes_source/recipes/amp_recipe.py | 325 +++++++++++++++++++++++++ recipes_source/recipes_index.rst | 10 + 5 files changed, 363 insertions(+) create mode 100644 _static/img/thumbnails/cropped/amp.png create mode 100644 recipes_source/recipes/amp_recipe.py diff --git a/_static/img/thumbnails/cropped/amp.png b/_static/img/thumbnails/cropped/amp.png new file mode 100644 index 0000000000000000000000000000000000000000..a6916ce5605e99d5168c7d52aa157f913b8e7526 GIT binary patch literal 14849 zcmc(`bx<9_w=PP8TX1(taCZw%a0{}tad&qoxVyXS#wEDJ#x-o*osA^81s=ci&V5zy z&wJ}uovG^nzSX@}P0vhq_gcMbB2|^Yp`#F^z`($u%gIWq!@zt5{HGwnzPCiSSzf@v z5Z!re09@6LJSZKV9n7t4%_v>H9L*@rJgv-OU_4h3vUQwk+nuG}8cc z96&v_c*4(?6cw(ixRmr7ms3+S#X`cvg%Xx>-=1cmufcEDmn#SJpgW+*+d-ZW>5I|p ztwDf4Vt)SCOu(?OZ@|-m%e@ojmD;XI_owE+-Wb6%69#m?o!>GHL_O}lVxRjJ-v4wU zQ0!>3ZxO{vn%RS1Cu|X%9^HOBhkV6w>lry&dXAn^-4um(nPYA8E%a_2VK~#scmAot z-IMcud1+ljnul8aS;*fSxtAD!A^YyymHK;=hKkal?EZY?dFI0t_P#?v`(^Rl-zAU! z_`QE#PpoOTWHhf2CqGsF$zSM(^6hiJ%HZ~rBXPx0lwH5Jd#rUI@pL9KVbwTDdEK)J z3Wl>OOXTG(FxHKz>|EY;39?-evZ?p{yGmX{5s_+i)*Cen$Fk?2c}9t*I`lN&6B)r< zxXvua_t8B`rPJRTk-2l{bZ*^sV)*OyLa5+*v6lZ_W4P{x*h36UfGU4or+?vr__lv= zp@*XO=MIZ$1TR!KtZ?##Z;P60r{`KL4<^8;d*M=a2eBfl|0-l9WBbpwTjY-cgJpS@ zJe)12u3mZiw$?2IN~!fu_Kbwlz&ff_)nEsXR?LXv#De}&lX0h{pQ`ffXq<;B)~+n- zoa@;xK+hf$1&|uWpb~9SNfvQ?VUMiVH2h1mt)XR#hBHHHn$~2hzKjyjlDdtZCM_S( zx;<4(#|Gj}lE$plU_CY25lik91glWj?FQ|R$J*GI3ARD+5q&Tn$u!S1gRGQIeFcB~ z%2d~QYCWN4U;@tNH1#qKc#tex#86 z3g(nSxkheS|C89+%XuGvB}4G|%_}+}U7-B;KaxzBk_&LxKUy0d*I#)t-Bqd_w~?oN zLNr->U?2Z~4qI;qdZWrvx@qH`-~P@Gu;Mh@)Rj4g9v%Ugbhertdd8-J3QckRH9~$Teh?_>^tOa!fLyvz zn%7Kjal)H@rpg7-Dn7N15D%aQ!P9c(u0&Or==R7n5v5Rg=$rs;zvfQtL*FZ1^06VgP}toPAL zv4)PnM@Xf)ExUgsErNoqW~rsPe5~SD7QtL6Y*uF+`jHWHsL+lzOTk>x10ly zHSdM;X3_nu84||Ha#^qZDXbQ%6XWS=t`O~%-6P~4&hi~ctg@4 z{OQx+Z-PG^*^#;^YxmyD5Zp0Tzn-~kCo9a+#x#_5u4zYa*d-gK-k+|F{!3D}=hJB( zdQssu5b-bYmeA%E?0SqEO2IaR^@!LFQoTd<&t?uX{KuRVsGdb+6(A~sK7JlaV)c>n zm~#v2H@n?@1|Hm+zBw|iOUzki7sY41Z<9Q6y|aCT(Xj(Rwe2Lc1Ml2>1oY@GsN<2A z2`A-LZsTo&y_&^$N|PBoEIkl{GIabxf|C?*&(Tv>Bqvr@P&8InZe^y_* zqILo2roNU7O!3H@2#+LqpFNNo%YE*Ck2{$;gX1W2%NkM_hL4T#`X1DIuw@!$;t&5)r%f$EFfK0LGcFV<4v0G3b;rkY?shvM(M$3@>MyH0072Ik3 zT6e=7jKncTiy*rQM#r4J`MGKXyIP%s#U3gHJHPT-xcc;0O9Apq8HQRqk9w)bYEKlZ zpq~{GXS1`fM_FB(FO>Fy_p7jBld(5UuwXfY6m8`Ih1-lUWKswwI1dAF#q?fXSK({I zX)(S_O$vQsujXNfav}}8Xmwe*ljFsL!Kz6h<_-Zyvz8$_LRnZ;jpCA=Dv=K(c)wI9!?lBZQTI2IjzwAz zNbRW~f`^CtM=>YkN6v_m^Ktbu*5K{g=WWAxCoAvMPH0g7&7r~|ERF<@Y02F5Ng4ih z|4D@s4NaCPP^Ks&n`&f3uZA$w8%cITfWPx8O`_6PkMrxQ`j8S+WYAFnkOeAAmhr387U4(83?U&QTb}gB)n<^TFUs(M-H-hn9w9rUa zx8{k-Q?Yr_pV(J1I!gO z+t51cx@|zuG7iRSDwYY+^nv3XVFUqHrRg29;7!H)4y|x+;Wmv?%9PqUE`WW9(p=`> z6@!F#c)tyayV=OIG;Iq6Hf5ND2%wp)&HFoI?@M9^|6k|yAO$dE7itH)-lJl z_*D)?)j;dvA%*_?OXjJ&nsx(d|rf=4d04S>3B$aZPX&=n84qYK+ zJX2Mw~CkSnHOv!zx6d@riz};pNn`uX)R2-KE_c>u?a5fxa>+qdo3d0arU-74pbZ!HI|qfI0e!;PDn2AliUT;9e?}=w5oh8%Ogl)+#wuY zf`3b~I8?EZyePdd$e{Z4XIbR1EjtjYgMg>Lg>^|^_hXHw+_}W zaFcZ%Q<2vi1UiZG!Hly!ADLGq8+3QZV-Qb?{456^{^XgdtT>EzJ-PX)(qO((D;CF- z8HhvZ?7`}<;*0Wdp>*R$dSDEvKG~KB zcV$@ppz4r-<40)GQ~e#xZpoRy+qMcDd;`@zw|)FWh~Yv4)HC*p`K{`O2YJunT7-+` zjF@NCKozkRw$V&jy$r*2Tb~1dRMUXoGhi{Y{-TZQ&lTv)3Ey)0o^{QDf@wJt=)xIk zpa}kV_A7UnL3~I2V4at66AVh)xde-v-Br2de4OT!>yTf-F^|4o@Q09_8CJjmavaTS zO}#}3y-vrU)+gRuGlQ;%2y-2VapQ&{nmV)BRkloq# zp}=Ci&97tj^cIUL2Y0=E&ma!_(~M}v>(YR_NG~wRySvkX>|x>lNFqa?__EByDs}sl z7q{dO$zuB5AV#DcpCN;ahY(GYhQ9XdISKF*qm@UJVSK5$UxDk%Hg`>x^a{>w58xuA zeAZiIH_X!GNeg#z4!dIS0!#Z55_~0!Nh-)$(Bobr&<$y}Bo9aty5}>7zH@T7=yLgFCzYg zbNA_BS7PE3gZP(W;8vng5M1Hx6N6I$*TC?;5vL=Y)i{-%%bOwghw98fhx^~+oSBsl zo-^B2zI#cZyUgY#R6b28>B);t{|V<8wP?Fkd6dh^|6on@Lg3`jdG4QTO-b%Rj6A3ipdi*NEy35X+Is z2fM5W`sv<4ZhiX6cSnwTGPcHB>zGppIg|A9C<-9*TXQfOWD%LWyU_h-DfkrXXO(PW zSfX2c5cp@I7$5TF-_rZz^9W-Q@^>*gyWDDx>4d%|tNrAraP(azzdaztEV9i>P!$W@*06nx4+1 zA+tG~&kWhxC{U`GCjF?pl~KJGYhZ3pw)Ik^gxlZUIBrUp5ldG^`RB&psT1f2GwpL( zz!L=gH_!Q5D~Q*j!n66S5{;Hj`63+shdm4WGuST+t81N4^I)pi61~(~t?12Fc-Dp8 z&#Q2o!g$#n2^Us@n<#qw!C!->^A|6RhS{=s?3+!})N?qf)2n0VQlbR&kV3zkgFY280Uz^52L-nuo0UuP6XhRBN0AKo{`_2lXHcT9ty5C|k`n!| zc)}PqUEqXr)M}1R{A@I0un-YWkh)hY9%DFAEhQS?i)LuA(qA}^r8}c%NA|fw8MQDr zJBu`e+^Ns0$g;UbQb%d^dm#v?!XCF!@ru)c>tM{V^c42?`-Q8GHGvcfMMg}*hp!2K zT1?eyMf8WeL}7aTgZ46_Vsd|iA!++-(dRA*THJK!F?}W)Ldm6J2}rU)SFE5SY`xP2 zhl}aZv23_6<(GQ3VP~Y>a?TF8X`juE&eO}Jm5x0xh zj)$g_CnpW?RR|xtTAFi$a9C(Od{*@0^DKX4DC4W!^nWQ#2|hzY^7a}BdQBO&E}umC z&o2M~MO z(=RREwDA1iNR(KRDA|w8*x%0D81=NFFUZo6vrrM5pQnbKl8tb3eCkCqw(pSZ9oaP2i8X2+23uax_=>vJ5=C&mMi5%mVtJmWx(rr99rIb8}okxxibJ?aOLuOZseT88DQpF*NZC z12NVjAAhIJuUs-Yh*Z8^BaetvJIAl$(Nm{rokki?4hzOm^RvK=Z8dd|H8HD0#_V@K znF00$8J7Mm0DQ)}iSDD9I#JVnV@9;5_zCut12Py9lAGh~U@yCt9+PP@X*!Wzf0 z2*snWnAUlBEZNXyv3*@gRYqPWF;XwEjCE5y#*$UUR(zq4G``S+krz8F4aaC9Q1_dn zMU8$;XSVkT!a_qs5nI~XLpE3U^7D`1ED6g3KLrQm1qZ)rQ_d%?sw$b#YcOcUy~;d8 z89oV;t;+>4uouJbb5jl>>_iJ}isdhU!8>mrX(UcHQ%s^h=aXYC%2h!q>r+w~9{;MD znhVKlVePDZc)VHJW#|dk=sTpEWNHrPS@oNzAB$NUsfP&H+eegChUTb8M-vzc6FTqm z_D2WMQD1fkeZtUKv|loZg3Z-9QPl`0CvPg94m2prEuP9O zCZ9T4zWw~I=qOZXt`X5LdwL$p^2Z+_T!K09MZ#e0EtsSu22xX-#r2o2JGd~D9B}hp zrM(}py0)l1!T9yGUA9U(5Y8Fc2uq)oyc*Wp`VkvtmuN^qg}dVOmvcE6-XhPwHE2<@ zLa;8?UhGx)ZZk=D3x;%MA|~ycn$@+_yth2#4cWq>=$^Objcp`69AlUTrLTfJZdIQ3 zQj-|^Mp*8j`AU5sRvayLtYAr$2smpm!OBddV3@r~1h9uEx|s4J)c$ssfw^d&hK)Ljbg@0Fr zbA~8;>xac1db6j`&8B)jgZ5g~>J99!d{|VbTF+i1MFJ1L>xl8Y zo--IGF}yu4+O}=kGW+s)bcnz7Z=B4biELtSNK~UwIrPD-QouVkXAAg2R$;#Qw|uZO zh$2k5zt+!?`uxS-{pJStu%jI()O3=Nm?b@Qf$=A_$nZtXb7F@Bp=Q&}hls!4Bm-&p z6K%x|D~dyH)t;292GjHh+Twt!)`ZG~yq2$EwWNHG8@YH0WmWUvkO5{O{Re!TT_=P1 z1JXtqjujtkw?UU%kxc} z+r7<8z(QWCjnosrbI$duQa}0y3K5@Rc0qn?K{jO2CEZ6W*Mh3tTM+lt{fDpak1er7 zSR#D=o`E;MBK09XEWoWGDYe8f^~KgnUlwn_H!2C#L$+1DIN)N;C6#Z-lEE(&QHnLL zkM|rQq9lDD_xGE_0i>Z@i2eWK5&!C8t}ppWGneOGJNN}mmEyNKy-F{!A``?TExxG) zi>eM*1T0PTEt}v(DMpy0j}!d7J2Nf9sSGysj+wwewu&(E){eU2^Y{Da zKR#@l5rNQR0(cmh4{=12~QB`rZFhBTK+okEHp8vr*YCXOH$SlO97KgOa||BiV=-W zwQ^J_?t6Uu5?pIQo32=kjqA)@@DV);iufG7wE=t_LuO;ZMiew4Ju-nqI{z4(wdYGA zq`U`*^c7ayd%~a+mMgzx2?)GFI$lFyt<}luLe#y0fmI1Jm9NDu89TV{dnWSxj(^+> z-?7xSnf1)?2Q&X$^9q5VA5I1}rTgOg!qLa!Ezj!i9+o@GIWBPj6VLniKvGrIkbZl6 zyLvbR@1}RG2Q)4_W=@C&bYr@=!+3Wh)j})>x8qN4mM-qL5=Pi#hUkB-dLCTO{648G zoKZ~~e&oW8Rf59Fznq7Z(a4=`2!AUFNbke1#hqPt}i$?9TvY`xpxB6*Vne?8e z{3YDt@D3l06~0NqM8K2Dz6;2XvOip4U_K50r+ffARG7XuBD%^cNh9tfBjFOjq$Bet zz`&56%SnlAc&?u31~{zBuZCfvmK-7JD>GPM*(b@nVjN&lj$nVLnj6s~`B~&1`O&oZ z?c0w(Gns#v3F~o^bhYE+P{N3Oe4q0y@`%nPKD^)Jf8yE}S$NX!ey!9M_{ef_MR6OT znK+iF@#YreuI{M&M$%Anbg3fyrMoo?=}W0js@jB>uLRJlqB!LomesTg>&&|RHG$n< zl23NBhXgMrwD#(!7}E#WmT-%fS&YA$tWNmjQ9Bqz4Fu(~vJ{3?u_6NBq+CPVUBe<| z2Mv%+99Bmou#8w~Bu%A4Qc?h>AHRv>xXF1x);bkMGhuNW6FbH@tWKpDBX0SwN>= z-D;v9c{Grxo0ISD!S#j#M!C<@AcxJv^^WTwd37t1HKjn|%{MEra7X2@=@SM$Lt849 za{~So1B~#}+2jVmf zqwkxXX~MI#Tlu9u$vpbxw95JFOVIti44AP?6U>MJWi#V9g#l@f6~KJo?~j@i*t${&A-fb4P=LLTT_+BXH(C1Ht^1Gkf(l=T7HIC((Xu*|fnaq+ z?H%&~LsCnAF z{rX6Qa_XFXR8p`~wTge@uauxLzUX|KTMvlR=4!WEBW~Im;0W(A3FV7Um6@Hjj0@a> zs&IKtkUn4-4HncLqWR#ffrBtk!F0kqG7NZA#D-YejwwkO&*N%eyG*9C(y`0GkEg@# z5g966TA#HlSIa}kA=e8BX5g_h{!Cq{&*$g)C)-je&XL(SVT1UEOZ|%gE_5sRGMm{GaPr9Xg}t(_fx6Mw&mr z2Y_9J^;)mDHKG zVoJ?d0Sw1_F_-Jqa;LJcFDaB)Af50}rNn6gEBvGD?>}Ycpv<^!k&?! zGc+sAQC7V6Ldbeivb?pX-yx}Nga*n*w$r8b)!pALQ=3fMpC1&++%#N6dG=2-&Pf&c z3@f`*>O+>4kzCTu_g>>l)7EiKf5%*u)>>4{`5F1$H*EV4H1drA$`dg%)VV8bH*h#V ztszhJx-#81^-f+VVej?i)HCk>;d9#K<$z=bNRcv??VG!9XwkXX%3d|L-kh$GF;y~w zkTQ`tva}_j$>vR`_*z>HOgF5(jSL6VlLQ2i#I|WJr@hv=+jv^AAa;?*)J{r^SAn6? zXv4Bu9w}LYNoJNNGx~v$wCL{Fw9uAZlf{(jOWQ54K~RVpiUY0vI`4?QD|6v~m=N^~ zwmK`H#_;c+R_*lG6_60T9}XGx<+zlTiE89Y;d}GCP%Bf`s1c&k`=EU8W5*{@tFvf9 z*3L>k{8|qqOG9{Y7q?5lC*Pj6y-tQntFhQXWrt^wP?a2wW`X|P(9c~?triBJ>;v+m zjK9-Kg~o2@JgP^ZS{#i3YV)dSun4f0e;#Z@!BIjAlCkWlzUfr4n7H zy4NG+)0B;sx_Iya=2bR#>9uiODm$a_94;Bcr9Gjkd}zx(mKNQ-0K^GoYz+6>pW*_Y ziZIC~PcTV6P&4A`Kq)aUYkV4NNaDQAvb7w( ze3+28tmKEdb3nXI=Oz)&G9L8Fs*^qW%aVdcg($(+w&PvDiYIicdYrRDrNGu@cxijF z6hnRrak^V7F}DI%J#|HgkZeQ|M&UG3)krwW)NMzUhB!@SN7AWNgz|%`R!)VLfd<|0 zPZew>plPi<=_>#tC!;IINZc(t7aF~de9=j@E`Y(PRfUF`XLQv={3IF)y0)bJ8A><- zg}b>gt*k5(isaYY8zzNnP-LSf5AKZ4yoj1nef!R;kRZcB8p*y_g+DPkSsfQ)$FSHMO+^P{#jqTcrOkWwmFAIHGd$ zp-fb_v6vdgGu~BWYpLnIrpUz{Q7>p$1A z1B@D?L(@V%C`Otk`oj7Ex;4SH3w!*s8}UwWXLs{eXh4LEAmRbQ|A;_d{O^^_WaCUD z->^#++F2a;_evm`@#U6ENUyuVBsD42jPI8_vr@emb?7A&a=ku#VtKYV7~KHzc5+D; z4_v@rzX6=)DF=BDJRW3c9gtE$_X&>x)&EY0G;bI~4)oOz5d~tRbW-bMO=?zE}*1EbD4-Ot_fu4`f0QS60S;a0}W0>g65ZjkLp4SrYxq{ENmTp1~b*8F2fNvkAbe zAmgGBv}0MEnb{u>>dmV5R^cve|6vQV>w>J@FBx>AKz%o;$4M;cta!c2*KpM{bCct< zdX3xuf^t+FUq;l9)?J2@TN_t5Z2l#8EG&@{C~YB=RLgv2>J6#a#b`lg6Q6D&??yV1 z?->SfEEQ`_+wq~iZ#v$=l%H2H)ZhjN8ZhzWLhBuwM?aFV>;U-R1(W~4_WuTrga7}` z{{K{~Od|m`|kMr%1C-o z&P13{RLRnZG43u0$FtqA=Pb<%Yu`x1#MkEKayUU)eII1`+8BZ?Y}*Ds9b+8T zOSVM|Qdn>j#eRqpY~25j`KgS-IgT-<`_9dKgUn{}7n}IGdErFd^{&Au#Vw=sSTi`tTp} zuJj-BU)n|={^!j+>l8_!^6J;MakAj=hL5Emm7Dr{GKh_0t?)$ga4`in-OA5u%_{}TA_}9T5AH)xxSm= zW8v>(nTc>HApI}SK^vmW{JyBMIEv&LY_8vk%}h;|a-7p7vk<8S6y)MEqc^v}`^7Md zh?>908zBFpv>L!4E^zZB^>WBQw{zUIrTa253D`c_5Me@ErJja&EyqiXs0qD z)RE62cjL~1#(wk0zhbTI65MdH)X6NWb*dWD;-m7=^v5?2kBB!WC_UJwC7iq~pK!wK zr4b7cvfm=8MVmIZ)rRQ@6F-b?tR|$;BQ8OR@z>tmDRjMQpTEo9xg=>quATCksf{s@CC?A}+_%ik z6VB=)Bc~LQf#)6`#{t#}GhE-_Y=;+GIhn+hmluZ+$>xql`^iH+8`|?V;uW&Nta}gU z&*(=d66us=(Z$>C1%@v4`lb_dAcOTG_s}hzM#_;U0SI@SepQ%q#B~2OLCtL&Z!DczW-a3y!;6A`_1TCW|a|MllCN3+wu~4F6 zW~zNn^xr!4Q3|jZVAR+UU$%~T#5$5HlMNMZF1ggTt@?)F=NW8*XJ4A(x%fk#?HqBK zVL-{R9UVwx^>icHNoccrF6g&jw3e%a&PQUUtx4u7;on8F_OCL#pCr>b;@=?9@87d* z&(l+K&w|;^mw&egqNnsYMWKQ$&9vi zKA5iG4tE$s6FVjD;dH8HbSR{4oOhb@KhNZ^CO_uV=llf;kuZ%kqoq-g6_<*+a-GmS zaFXE(cCQD}5X4Y|poz2rOY*NIKuDhrjn6F30QDLLUKU4nZAY}e1TN2%A-{Xx2tEz9R6W5l!%#Az4_hj_l zXNhWg?-7-SMB79n{n%2^&nLPo^Q$+Q;@t=u^xQNo>5n|=iIO?gL@P^3M*d=H4A^d4 zUYfo??de3}SS%45V5tg#*PPICJ*Btv9i7%4AiA3~ayEvK2L-&U0sdulb8@nSn&S>i zx=;tQ$mncazxA(t5)uVdK9Pj-5Oij0+&*Bh;dZfTnh1LEzYlHEH;_NA;fcGVy(e;xWQj$x}k=M8ujlIxAdWiWz8Hd zubJq$4SeOh(=Owfj&-2-;FnmLlRee@inW)npXJ@^2x@3VJcWiRShv8si<6;dUtA4L zY&Ho&=&OpPx&QsncRj*!p^s)ak_UIS5#Q{pYc}Z*IC9v48;gv8@b858+7)?EN_MCf zCZ)?(Sws^~XvnJ>7(nkGPcBp2tPX!uC{=$|h;iG9uG%YqA-i@inKHG@V(J2%rXIwl z+`S2W>mZznkRhzyWA3US)yVctVe~doa4v)pe^*H|QR43R2{IATE9E}$^ztwgg#A0J zxf6qM9&}S3K1piR*5B}yRm3)&SYmZNtrsu0=mCF|@#H-mya&e-KLJ9H5P47C1c ziJLdpaKY_@T2|*EgzNohN&~ed2><=mwvw)sP{AGav$8wwAaQpDPM(zmLfhsl(Xm4p z_Nwu0s-Cfq-s;M{#78^PxZxFc@Y*4N0^kzLQtrJIVS!>_aykRL>f^;kD?kn=3dQ-YPr!sF1FPQD)WCV z*Zg;lBt?XTw0vWDaLv72;@oKRE05fm^~i1Z`G&;C_)bWfoLoJsQJ|b>dG9Y7Va~S0 z=g+-*-c;NV@n!~X>n9~*otZFUtpE!!XDT)uF4*}fFZa96S7D!+gQz3UsxX^?j8!&T zL9Jb&h#QuqsQbB=ecUbO)N*L%&mMDAvrn!1Y!{OsYy6DAi#WxZT@af{_tXeNa&031 z4U{3zGLzYqfJSg8dXU|HDvy7Rbz$&!%!uW+wx+L?)T{GDG2J9TSx^|AtKHA_YMG|_B_Z9B%0h^Vylz-g8-G?<iwdBa;A+dcuD-ULIv4d zn#7J$X6s}8D>u+j;Ygzj?K=l5D*s;cKX*s4Hb*{MvHwtI4O9|xe)Ff<;7yR3fI*r%;Rb!ENk41Aq$raSQXTq$5@BE9>%xCG= z%OI1!A&gK9<4u z9-BOn0BC;fRFeltUz|M519kUNCu4x$ifH+?`?k{tz;GFUN2u&t(}WgkkbN5I#7q~H zaQV_KO+K+=Dr*)>6-X84`)|Lp3Pd_F!~NElim&ox#L*aFd^&aSv}eqjDLnp&-LL;m zMQ^WH!Q=AZUbFpvhWaO;yB=L^vWc~R$c@?=4a%>PBEH71-_siGeAm0X0}eVq-uV)M ze=TGN=rVkw&+fCMa<@bLY0#RxR30Ee+hZ*bg<^wE$o|n%9>;uh(U&1kZ|}O~hk%x& z+7t>$d-hKjwq?{>4Y1`PNrc>wQ#n%&T2F(QK3YUD@aZwQK<_;0#Uw}WWq5U!bbxIb zF0|g0vXH#n#H+A|MVEffbn~AAMAO5FTKB9>x07Ua#>E}9{SdT$WQ^P7FaA2bQ@ZsS ziq5CrW9F|-EBJ}{sq9`i4v$m}^FkMDo|PgVZ2;=#ILsPRdKp~yP+M&wi)Pc}aUFo`oCD2lUh?OThV@xh7!Ohlcru2c_WeKkQ}sm{82Ao^Wi zLTLeL?yk;*0_n`~#O|Iv6VgXrH$0KWubLi|2fWYK%ZuJ;)6^}xXaUieGi6~^1UGAqteu1P zpL}@90O=$95{%<#_jKc1-b-J+_#r@GQ*?u;L|Q!8k%ez^p|kV1V6R`#&m$z~r5P|z zCK;92)qVcG)ekzhBJmq|#9Y3>H(n>1+Cgr(F2MApD-R=GuO$q)c;TyJx=8omyE zKL}uO&b`tAPoAS#Am6$>F_YT3$d9L8pt~QGVuW4F1NgP>;tnP&VmF?5HWb0Hxb*L* z2j{~MN)3Bzm$^Sp%q{@E0n+jtMh&p<9n$!Hx?_Atov#}{0o#D~{RV_z{dzYe@!j`G zz;UMdmS#|;S*-UZE-BO7q);5Z@G$v*$4>76^S?#MBM@nSnjG|hlrtSbVi-f4mHNV* zkTgk?H42ByFy=ZZ^@UJY?(n0M=>R4JLky-gelsB+w&?+uaUoMGa@A-ct3=fP<_RiS zpOh?CZ34W>fG)F5Bs)8E5lC`BYF5wY$0Wf87`9{Hq;bfI?NtWAk;WrK>f>lc1L8Ns zGCxlokkCRN?~tZEX3XDA;J2asa%>&k2#i7ZAjF89Ta+=wtG!Sgcc1*RkRT6iz?erM zi#Px9z9+L>pNb-V!HaH>tZH?xfo$|quge=-iRjg6gx0odq@BND7&`%xOaCWwgfK8& b?yrbNC+vqew=cv08G)R%vQ({vVbK2r7|~Lg literal 0 HcmV?d00001 diff --git a/advanced_source/dispatcher.rst b/advanced_source/dispatcher.rst index 23ba0f96be1..4f3b52fea32 100644 --- a/advanced_source/dispatcher.rst +++ b/advanced_source/dispatcher.rst @@ -105,6 +105,8 @@ speaking, the structure of your registrations will look like this: that provides implementations for all basic operators on the XLA dispatch key. +.. _autograd-support: + Adding autograd support ----------------------- @@ -299,6 +301,28 @@ the safest choice for the execution type: at::autocast::cached_cast(exec_type, t1)); } +If your custom op is :ref:`autograd-enabled`, you only need to write and register +an autocast wrapper for the same name onto which the autograd wrapper is registered. +For example, if you wanted an autocast wrapper for the ``myadd`` function shown +in the autograd section, all you'd need is + +.. code-block:: cpp + + Tensor myadd_autocast(const Tensor& self, const Tensor& other) { + c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast); + return myadd(at::autocast::cached_cast(, self), + at::autocast::cached_cast(, other)); + } + + TORCH_LIBRARY_IMPL(myops, Autocast, m) { + m.impl("myadd", myadd_autocast); + } + +There are no separate gymnastics to make the backward method autocast compatible. +However, the backward method defined in your custom autograd function will run in the same +dtype as autocast sets for the forward method, so you should choose a ```` +suitable for both your forward and backward methods. + Batched ^^^^^^^ diff --git a/recipes_source/recipes/README.txt b/recipes_source/recipes/README.txt index f93ee92c2c6..a182b0a11c5 100644 --- a/recipes_source/recipes/README.txt +++ b/recipes_source/recipes/README.txt @@ -56,3 +56,7 @@ PyTorch Recipes 14. mobile_perf.py PyTorch Mobile Performance Recipes https://pytorch.org/tutorials/recipes/mobile_perf.html + +15. amp_recipe.py + Automatic Mixed Precision + https://pytorch.org/tutorials/recipes/amp_recipe.html diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py new file mode 100644 index 00000000000..c1ec52a3883 --- /dev/null +++ b/recipes_source/recipes/amp_recipe.py @@ -0,0 +1,325 @@ +# -*- coding: utf-8 -*- +""" +Automatic Mixed Precision +************************* +**Author**: `Michael Carilli `_ + +`torch.cuda.amp `_ provides convenience methods for mixed precision, +where some operations use the ``torch.float32`` (``float``) datatype and other operations +use ``torch.float16`` (``half``). Some ops, like linear layers and convolutions, +are much faster in ``float16``. Other ops, like reductions, often require the dynamic +range of ``float32``. Mixed precision tries to match each op to its appropriate datatype, +which can reduce your network's runtime and memory footprint. + +Ordinarily, "automatic mixed precision training" uses `torch.cuda.amp.autocast `_ and +`torch.cuda.amp.GradScaler `_ together. + +This recipe measures the performance of a simple network in default precision, +then walks through adding ``autocast`` and ``GradScaler`` to run the same network in +mixed precision with improved performance. + +You may download and run this recipe as a standalone Python script. +The only requirements are Pytorch 1.6+ and a CUDA-capable GPU. + +Mixed precision primarily benefits Tensor Core-enabled architectures (Volta, Turing, Ampere). +This recipe should show significant (2-3X) speedup on those architectures. +On earlier architectures (Kepler, Maxwell, Pascal), you may observe a modest speedup. +Run ``nvidia-smi`` to display your GPU's architecture. +""" + +import torch, time, gc + +# Timing utilities +start_time = None + +def start_timer(): + global start_time + gc.collect() + torch.cuda.empty_cache() + torch.cuda.reset_max_memory_allocated() + torch.cuda.synchronize() + start_time = time.time() + +def end_timer_and_print(local_msg): + torch.cuda.synchronize() + end_time = time.time() + print("\n" + local_msg) + print("Total execution time = {:.3f} sec".format(end_time - start_time)) + print("Max memory used by tensors = {} bytes".format(torch.cuda.max_memory_allocated())) + +########################################################## +# A simple network +# ---------------- +# The following sequence of linear layers and ReLUs should show a speedup with mixed precision. + +def make_model(in_size, out_size, num_layers): + layers = [] + for _ in range(num_layers - 1): + layers.append(torch.nn.Linear(in_size, in_size)) + layers.append(torch.nn.ReLU()) + layers.append(torch.nn.Linear(in_size, out_size)) + return torch.nn.Sequential(*tuple(layers)).cuda() + +########################################################## +# ``batch_size``, ``in_size``, ``out_size``, and ``num_layers`` are chosen to be large enough to saturate the GPU with work. +# Typically, mixed precision provides the greatest speedup when the GPU is saturated. +# Small networks may be CPU bound, in which case mixed precision won't improve performance. +# Sizes are also chosen such that linear layers' participating dimensions are multiples of 8, +# to permit Tensor Core usage on Tensor Core-capable GPUs (see :ref:`Troubleshooting` below). +# +# Exercise: Vary participating sizes and see how the mixed precision speedup changes. + +batch_size = 512 # Try, for example, 128, 256, 513. +in_size = 4096 +out_size = 4096 +num_layers = 3 +num_batches = 50 +epochs = 3 + +# Creates data in default precision. +# The same data is used for both default and mixed precision trials below. +# You don't need to manually change inputs' dtype when enabling mixed precision. +data = [torch.randn(batch_size, in_size, device="cuda") for _ in range(num_batches)] +targets = [torch.randn(batch_size, out_size, device="cuda") for _ in range(num_batches)] + +loss_fn = torch.nn.MSELoss().cuda() + +########################################################## +# Default Precision +# ----------------- +# Without ``torch.cuda.amp``, the following simple network executes all ops in default precision (``torch.float32``): + +net = make_model(in_size, out_size, num_layers) +opt = torch.optim.SGD(net.parameters(), lr=0.001) + +start_timer() +for epoch in range(epochs): + for input, target in zip(data, targets): + output = net(input) + loss = loss_fn(output, target) + loss.backward() + opt.step() + opt.zero_grad() # set_to_none=True here can modestly improve performance +end_timer_and_print("Default precision:") + +########################################################## +# Adding autocast +# --------------- +# Instances of `torch.cuda.amp.autocast `_ +# serve as context managers that allow regions of your script to run in mixed precision. +# +# In these regions, CUDA ops run in a dtype chosen by autocast +# to improve performance while maintaining accuracy. +# See the `Autocast Op Reference `_ +# for details on what precision autocast chooses for each op, and under what circumstances. + +for epoch in range(0): # 0 epochs, this section is for illustration only + for input, target in zip(data, targets): + # Runs the forward pass under autocast. + with torch.cuda.amp.autocast(): + output = net(input) + # output is float16 because linear layers autocast to float16. + assert output.dtype is torch.float16 + + loss = loss_fn(output, target) + # loss is float32 because mse_loss layers autocast to float32. + assert loss.dtype is torch.float32 + + # Exits autocast before backward(). + # Backward passes under autocast are not recommended. + # Backward ops run in the same dtype autocast chose for corresponding forward ops. + loss.backward() + opt.step() + opt.zero_grad() # set_to_none=True here can modestly improve performance + +########################################################## +# Adding GradScaler +# ----------------- +# `Gradient scaling `_ +# helps prevent gradients with small magnitudes from flushing to zero +# ("underflowing") when training with mixed precision. +# +# `torch.cuda.amp.GradScaler `_ +# performs the steps of gradient scaling conveniently. + +# Constructs scaler once, at the beginning of the convergence run, using default args. +# If your network fails to converge with default GradScaler args, please file an issue. +# The same GradScaler instance should be used for the entire convergence run. +# If you perform multiple convergence runs in the same script, each run should use +# a dedicated fresh GradScaler instance. GradScaler instances are lightweight. +scaler = torch.cuda.amp.GradScaler() + +for epoch in range(0): # 0 epochs, this section is for illustration only + for input, target in zip(data, targets): + with torch.cuda.amp.autocast(): + output = net(input) + loss = loss_fn(output, target) + + # Scales loss. Calls backward() on scaled loss to create scaled gradients. + scaler.scale(loss).backward() + + # scaler.step() first unscales the gradients of the optimizer's assigned params. + # If these gradients do not contain infs or NaNs, optimizer.step() is then called, + # otherwise, optimizer.step() is skipped. + scaler.step(opt) + + # Updates the scale for next iteration. + scaler.update() + + opt.zero_grad() # set_to_none=True here can modestly improve performance + +########################################################## +# All together: "Automatic Mixed Precision" +# ------------------------------------------ +# (The following also demonstrates ``enabled``, an optional convenience argument to ``autocast`` and ``GradScaler``. +# If False, ``autocast`` and ``GradScaler``\ 's calls become no-ops. +# This allows switching between default precision and mixed precision without if/else statements.) + +use_amp = True + +net = make_model(in_size, out_size, num_layers) +opt = torch.optim.SGD(net.parameters(), lr=0.001) +scaler = torch.cuda.amp.GradScaler(enabled=use_amp) + +start_timer() +for epoch in range(epochs): + for input, target in zip(data, targets): + with torch.cuda.amp.autocast(enabled=use_amp): + output = net(input) + loss = loss_fn(output, target) + scaler.scale(loss).backward() + scaler.step(opt) + scaler.update() + opt.zero_grad() # set_to_none=True here can modestly improve performance +end_timer_and_print("Mixed precision:") + +########################################################## +# Inspecting/modifying gradients (e.g., clipping) +# -------------------------------------------------------- +# All gradients produced by ``scaler.scale(loss).backward()`` are scaled. If you wish to modify or inspect +# the parameters' ``.grad`` attributes between ``backward()`` and ``scaler.step(optimizer)``, you should +# unscale them first using `scaler.unscale_(optimizer) `_. + +for epoch in range(0): # 0 epochs, this section is for illustration only + for input, target in zip(data, targets): + with torch.cuda.amp.autocast(): + output = net(input) + loss = loss_fn(output, target) + scaler.scale(loss).backward() + + # Unscales the gradients of optimizer's assigned params in-place + scaler.unscale_(opt) + + # Since the gradients of optimizer's assigned params are now unscaled, clips as usual. + # You may use the same value for max_norm here as you would without gradient scaling. + torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=0.1) + + scaler.step(opt) + scaler.update() + opt.zero_grad() # set_to_none=True here can modestly improve performance + +########################################################## +# Saving/Resuming +# ---------------- +# To save/resume Amp-enabled runs with bitwise accuracy, use +# `scaler.state_dict `_ and +# `scaler.load_state_dict `_. +# +# When saving, save the scaler state dict alongside the usual model and optimizer state dicts. +# Do this either at the beginning of an iteration before any forward passes, or at the end of +# an iteration after ``scaler.update()``. + +checkpoint = {"model": net.state_dict(), + "optimizer": opt.state_dict(), + "scaler": scaler.state_dict()} +# Write checkpoint as desired, e.g., +# torch.save(checkpoint, "filename") + +########################################################## +# When resuming, load the scaler state dict alongside the model and optimizer state dicts. + +# Read checkpoint as desired, e.g., +# dev = torch.cuda.current_device() +# checkpoint = torch.load("filename", +# map_location = lambda storage, loc: storage.cuda(dev)) +net.load_state_dict(checkpoint["model"]) +opt.load_state_dict(checkpoint["optimizer"]) +scaler.load_state_dict(checkpoint["scaler"]) + +########################################################## +# If a checkpoint was created from a run *without* Amp, and you want to resume training *with* Amp, +# load model and optimizer states from the checkpoint as usual. The checkpoint won't contain a saved scaler state, so +# use a fresh instance of ``GradScaler``. +# +# If a checkpoint was created from a run *with* Amp and you want to resume training *without* Amp, +# load model and optimizer states from the checkpoint as usual, and ignore the saved scaler state. + +########################################################## +# Inference/Evaluation +# -------------------- +# ``autocast`` may be used by itself to wrap inference or evaluation forward passes. ``GradScaler`` is not necessary. + +########################################################## +# .. _advanced-topics: +# +# Advanced topics +# --------------- +# See the `Automatic Mixed Precision Examples `_ for advanced use cases including: +# +# * Gradient accumulation +# * Gradient penalty/double backward +# * Networks with multiple models, optimizers, or losses +# * Multiple GPUs (``torch.nn.DataParallel`` or ``torch.nn.parallel.DistributedDataParallel``) +# * Custom autograd functions (subclasses of ``torch.autograd.Function``) +# +# If you perform multiple convergence runs in the same script, each run should use +# a dedicated fresh GradScaler instance. GradScaler instances are lightweight. +# +# If you're registering a custom C++ op with the dispatcher, see the +# `autocast section `_ +# of the dispatcher tutorial. + +########################################################## +# .. _troubleshooting: +# +# Troubleshooting +# --------------- +# Speedup with Amp is minor +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# 1. Your network may fail to saturate the GPU(s) with work, and is therefore CPU bound. Amp's effect on GPU performance +# won't matter. +# +# * A rough rule of thumb to saturate the GPU is to increase batch and/or network size(s) +# as much as you can without running OOM. +# * Try to avoid excessive CPU-GPU synchronization (``.item()`` calls, or printing values from CUDA tensors). +# * Try to avoid sequences of many small CUDA ops (coalesce these into a few large CUDA ops if you can). +# 2. Your network may be GPU compute bound (lots of matmuls/convolutions) but your GPU does not have Tensor Cores. +# In this case a reduced speedup is expected. +# 3. Matmul dimensions are not Tensor Core-friendly. Make sure matmuls' participating sizes are multiples of 8. +# (For NLP models with encoders/decoders, this can be subtle. Also, convolutions used to have similar size constraints +# for Tensor Core use, but for CuDNN versions 7.3 and later, no such constraints exist. See +# `here `_ for guidance.) +# +# Loss is inf/NaN +# ~~~~~~~~~~~~~~~ +# First, check if your network fits an :ref:`advanced use case`. +# See also `Prefer binary_cross_entropy_with_logits over binary_cross_entropy `_. +# +# If you're confident your Amp usage is correct, you may need to file an issue, but before doing so, it's helpful to gather the following information: +# +# 1. Disable ``autocast`` or ``GradScaler`` individually (by passing ``enabled=False`` to their constructor) and see if infs/NaNs persist. +# 2. If you suspect part of your network (e.g., a complicated loss function) overflows , run that forward region in ``float32`` +# and see if infs/NaNs persist. +# `The autocast docstring `_'s last code snippet +# shows forcing a subregion to run in ``float32`` (by locally disabling autocast and casting the subregion's inputs). +# +# Type mismatch error (may manifest as CUDNN_STATUS_BAD_PARAM) +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Autocast tries to cover all ops that benefit from or require casting. +# `Ops that receive explicit coverage `_ +# are chosen based on numerical properties, but also on experience. +# If you see a type mismatch error in an autocast-enabled forward region or a backward pass following that region, +# it's possible autocast missed an op. +# +# Please file an issue with the error backtrace. ``export TORCH_SHOW_CPP_STACKTRACES=1`` before running your script to provide +# fine-grained information on which backend op is failing. diff --git a/recipes_source/recipes_index.rst b/recipes_source/recipes_index.rst index 86438135e1d..f8986363092 100644 --- a/recipes_source/recipes_index.rst +++ b/recipes_source/recipes_index.rst @@ -167,6 +167,15 @@ Recipes are bite-sized, actionable examples of how to use specific PyTorch featu :link: ../recipes/android_native_app_with_custom_op.html :tags: Mobile +.. Automatic Mixed Precision + +.. customcarditem:: + :header: Automatic Mixed Precision + :card_description: Use torch.cuda.amp to reduce runtime and save memory on NVIDIA GPUs. + :image: ../_static/img/thumbnails/cropped/amp.png + :link: ../recipes/recipes/amp_recipe.html + :tags: Model-Optimization + .. End of tutorial card section .. raw:: html @@ -199,6 +208,7 @@ Recipes are bite-sized, actionable examples of how to use specific PyTorch featu /recipes/recipes/Captum_Recipe /recipes/recipes/tensorboard_with_pytorch /recipes/recipes/dynamic_quantization + /recipes/recipes/amp_recipe /recipes/torchscript_inference /recipes/deployment_with_flask /recipes/distributed_rpc_profiling From ba6070e0b201775f334db7232c2f13387b599999 Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Tue, 15 Sep 2020 13:43:56 -0700 Subject: [PATCH 16/21] Fix model to be properly exported to ONNX (#1144) Co-authored-by: Brian Johnson --- beginner_source/transformer_tutorial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beginner_source/transformer_tutorial.py b/beginner_source/transformer_tutorial.py index 0ba9711ed67..90c8b902d37 100644 --- a/beginner_source/transformer_tutorial.py +++ b/beginner_source/transformer_tutorial.py @@ -77,9 +77,9 @@ def init_weights(self): self.decoder.weight.data.uniform_(-initrange, initrange) def forward(self, src): - if self.src_mask is None or self.src_mask.size(0) != len(src): + if self.src_mask is None or self.src_mask.size(0) != src.size(0): device = src.device - mask = self._generate_square_subsequent_mask(len(src)).to(device) + mask = self._generate_square_subsequent_mask(src.size(0)).to(device) self.src_mask = mask src = self.encoder(src) * math.sqrt(self.ninp) From cba6b85051c3aa404c9b77d28f4ee9a5b2d38266 Mon Sep 17 00:00:00 2001 From: Brian Johnson Date: Thu, 17 Sep 2020 12:58:39 -0700 Subject: [PATCH 17/21] Dist rpc merge (#1158) * Create distributed_rpc_profiling.rst * Update recipes_index.rst * Add files via upload * Update recipes_index.rst --- _static/img/thumbnails/cropped/profile.png | Bin 0 -> 42945 bytes recipes_source/distributed_rpc_profiling.rst | 314 +++++++++++++++++++ recipes_source/recipes_index.rst | 7 + 3 files changed, 321 insertions(+) create mode 100644 _static/img/thumbnails/cropped/profile.png create mode 100644 recipes_source/distributed_rpc_profiling.rst diff --git a/_static/img/thumbnails/cropped/profile.png b/_static/img/thumbnails/cropped/profile.png new file mode 100644 index 0000000000000000000000000000000000000000..372db8bbe8771eb7c788f3c70d895e5629896816 GIT binary patch literal 42945 zcmeFZ^OGA#ogT@xVuYnC=S6X6qn#`!3#x;7iqB|#VKyZp+JzL#l5)S^qlja z_ul{D{_rH(nc0)k@1C`0*(dSZn#$N1jj|FWF_zWz5wEQ0^PJr*JSpV|mUMTq}5|F4~xVj%>Upu4LWd%?kR zGyJ>YL1oguu&#`q^o_iY-fD>1xP9OT+PYcWaf3d%|7!&&0TP2{KiGK#X+a-cT)o6V zlJx&oLkyPxSIk3C`(IVOoh9jw-fGjzyLsBt3UUi@1L&nNXlZFBJZJ0>Oxq31DcPIa+9|b!v z8&4;9Zznfb+JF55t=)XQCF$w^4fMaS|DLC}ll}i4$<^zBmIYfN&%a-Ic)0;Q|JygL zsl>lhF)dFgJJ`to`j_IB_^+D(pR)fMM}p_y;Qz15{P#-#s}#0WDGUjo|AkEoL!*zy z7!FPbPE|ox9|V8gh2F%d?7v5mW9*Z)cI&FzrS^N-uPF={Uk($2F_p9jMYF|x6N%a_yxYaUR$#pA-@kD&^ag?MhN@}$W4o#D*(*Vlx& zb|2HFQjq!2=RXMi2Z8?}@E-*JgTQ|f z_zwdALE!(_2-xS!@XaAlUnt5T?J|ag&Ndq^f}`*j2fK$&iLogLZq0;#xIq=%U_UDq zbs;ea5;(g#Wptp$^|;XW1gc|f^N~++fvLGK&xIFTIFjBa8^y2=EE~Mp;Xt4V45OXz zmNFvH(<10G;Ua4g>W#-*m!Xt`a$KxKn2d-MLvN#Qb&x#Fi44OBZ7x7&BrUxw=q#D} z!!|!O#v|=gwi1pq{t1*;x=35tcDmwDHh8$6lS<*W*@sF+fC!9wZ&I4Y7fgLKq1uvD2+^l13Bg0keY*i0`szxwPv3P-;$X_#rtE=a$7TELUWWhZZYkcDL34p=kz#tQfhm4R#h*lBk+8V-e%m+1q zZ~(LVSj+e8|IWtJe@lGZc4f`%H{W>}bD3M3dJ*q`Q83r~hs8`-=-P^4x1Wp4@`tRo z;c7dE-#l7ks9yEOb3%IXPp$|Lh99>SdSTb)U&m(g|7yJ5FgW)>rb)>p%j+Pq61Or1 zrXU9*jO|%fR6s2`16n4HAaj1R>##XBrt~DZxip?p`Y6HfY|s_zFuPxmW4+tapou<< z{jO2wh#g4H#JdJ>khO4pxqvxV>uMGr75l^u10TpI>df_hM|HhxOu|&Gd9KDK_EY)5 z@apF+IM~+0p!SdW3v=5<1kuE=iOO+jT_odf24m*3Gtp(}OoSDrF@}HI)h37#_ZtSg zwXiz2(CDLCDw()=-;tAnY+f!sJj=dBY0b80m)`x2-*RF(u7J?={J?y~OSGsPQkWz( zTTKfd%I^LM1DTSp$29(5=9oohrhlkl-Ty}My#~kmFgC;#DBN2cR)3bUp1bzNfmhJyqUMrDoBlRwY%# za*6$SaYDJin_OL{>ruf}N;89#*Sr_wQ2#=4RTo8e6L1eF^YrnF3vubp*Laylf{oj} z60n>j+CnATepx$}FKXh}Aw8*?stTcgFobgyFQ5wLj8KvAaueTtX|K)pnm&s&Jlh1i z_c26)jIBI?^-C*HqH+a@Je}72d_9pTXo@ha)a~g(wL4~euzwk!$86{}$v+pZzxN<4 z@|>OB;2oB%6}FsrP77v4uV%+4D~k9J{D%6^t7JFwvo04#WjnFm-j84<3UFgX5;6WYC*;Z56WvRY~VN`EIl_ z&2mt{S5Hu7vMDO#6EFfyHE8a6!IxESwvQiBu`5q;MK`_}*{`iKn0%B;WT$m~q5>m=J-p~|aIJeb zs{(m+Mtr=EBJf*{=_z+bmSn;DL0`h&m^OnViF^XfQegscaJ%;IfV9`Z>yp+ea2AWY z=NEP|p6M}Q3a};J_d+&wKyPi0mPW5`QKO=uj<;`(@Y@sf*m{7y9j)mT95P-sQUBZe zOKqApJ?$=JU-3jQZGSBTY4Q;8H+XNQZ-lE^nT*aPt+v~qS=Bus>7)ttp+W%_ou>E6k=a1!Fqv%m35m#H_iM#^LGXK=H7Tq| zXQ+eK#e;`F%SKflF)1r-AL3`%v<%gD(XzLqX{5qYSP2arn-a~&(4oUtTD{gKM)uiR1zb$96N;N2 zu;`X$?)_C?9}j?VY^mrmctDiRn)yO>oDW$69!s1=*$*|_mxr5R%fE> z*y(X0=@20k-BwUe{|;xI%pZwoYuK8VO9*GHi0fe;eCk1ecPt`c=)#*g;yn81?^V?O z!1*bORp^^X)b5`aHWeWB8Wh$54upIS+BIUawnX+kw74*u&EYfWz)p`wqJoh?I{q>*Ufz%x~{;tGWKUnAHMTkT~>%N8v*W-&9NBhq=XKNX0Uy`0$l zg-&=ZP9OSb8-2W|cf?a#pE^3;25m+nJa1!|bs_0Vx$o9wLnB`pAg>h7lGR^W(oF{f z@C=>l5a_4b{tdmOR!K<@$b^#*)wnVQcpz;ATDisCpMAj4GC(02-G5S7?Zc(*GTT@8Fmafw#z-UauilK)ol?Y<)T)F}uhR{npvUZj<+|jdafadB5vP z9U2<_%=o$g;F_^|K@KLBl%wBKTx(>>4U@p*(S{G5*<4w65*~$8_(}^yxITV~u>+6t zKHp?luy^_ApP5Gce{*qVzd7JdF@O&HE~qQxZ4m7eeVHziW%Vkkd0LX^eqE?F`W11R zDr>dbPw$NTGc*DF1@rh3D8H*)M>*VsSV-w$MsQ5dOD!bOEYs#H(uL|K|6@dpucv z^njZEdAU2ldf@7g)~U~6vQ4SOCnkowQY0RGEW8lXP(4RU>;zRgj*rv2NcLfHZm0hw zu{o`<0@iOO`*zT{+ivtnJJMXuOZ{Kds4T7JMfre^fB^KUdG@P1c6lQm{rXTM!Q3CC zF&`E>v#0x(zD%oJy-(8TeZZAFs(3ulrar0|)Z^0DVr{yUPyn+9w7l>@QLP@WG^)ui z_)UvRU^II`SG4l?%6&mcffnD&X86pU^7%W`Styl~$9xJ(IO$gkr#ITe&mnjXejOM4a*$Gu7)RK_)LrbQuUpb{t9jylDMGntnJ3g;WRh-aH zjKxd~bI7;m_@r8c^EX~`eF=vKUV=ZHCbY4^=%6V(49PGdoLw9b4GCH~V?*?4^31vR zN_|XEU~t)}cdFn+{c_V-yWZ40Wg|#ql0db1VMKJv>!?qQ%a;EMPLX&APmx$`V5Jn~ zOX+=II^|!-B5iI&?#@zm8JQS>W!g=mR7Xs3#O9SMoFI^)P!tVgG$v?z#_=+=Jt6w? zo6p3YuQ;U0#9bKKs`w0uv7VTy<-#>S2brI^jpb^h2i$49Kj9n3$ny{|;zkocI_P*& zN$tE3THCz6dBA$$5BPSs;kNL$Tsrod+lA#-=6!WzNRkDN@N(&C!{-U5PB;;4(6j}5 zKWXEj|G+$>iKuD5uO_zdKXsgc7-TffB+h0OxS)d8V@4A_qaB;V)Ga-x9&{D9Nwy~K zioe18h1aF#wV<2UDGv{)X)0wZTXU~dQK(z*3pkWk$JU?DKJ-u#rrMDqwC4%SYdwN{ zSi_ZYdhDx1q{p?0#b_+5OlC0a42Fhyx-RLnJ{ZMJp6ny&MMA8lInb+gJ_amO!hbxiq%#1vehp(h74N`Y}mg;tTbagbuLV}bw3`YmqH6y?8Dk{KQF zgCu~sqh&S>Wj{9Q(b z#`{xQSDEJR2#BoQW+=l%4>u=LIVG2Yqg-4#a#wm9=8^3WaG8WgpO?dl(tUE*IRA~s zhM$V+4BlwULkp8(BYiXl?iW7RK`f)_pb>ZxWld3&gE<@(+#P`l>Wgwl9?QtSRt~Jn zeLuAi$|jJk&25zLioaD!hC+7=1D9#MJ=zeo3|$FU8U$HaL;H{1`tBmbH(ip$u)C4Y z|9L>H`d~YRIZ=%Q+(U|hh0d4Q7dHL@g40_YA=*M}QDs9Ea{BRTq4gMiY2HBkfk+ZV zNDS9fJoqwB*{K~3lG*Uh(**U0ctoWjHKVyiNSF^E1NK<|X=`2M-JK~U6!B3?gHB#X z8$bLju6CR|`>&=cGZp|1oVCjNZ^t63#O~sf={h^rHWZKOHsa~bu3h{rVBfSY^`nU( zsD`l?Hi#HHWRllK#f^BKd*A2aJ|W*U`Q>!w4NLZZ^PkM?>Ab;|#bWMmqA_{MSuY_U z5)u<$HYfb=Ll$+7l_@)frymfvpKMTiXkiImK*BvFU=DA*z}&OqJ)06*`|(f`@DN$R zk?$pjsOr9#2FZ{&&vgq*7s=3krU4~Tei{N*2tLwI9ohK-Xt z+5OVF9;)Mn65)hm#o#!H zsEhISOPy`;JIwAI*%e*0K{g)7rlC^F2Y*`=1nIc>phcZ`j}d+I+8{F?g6$7zVM zNPbY=u#s}7l0|3!(+Q2y_HROo0VZ4s;WFD#hB&f-=VZYvHtNa_z}Ra2`=7D=4#IAp zqx3F6hgUbcqvjb8ak{h#4Fu4>Jw+36uLu7k3Opa_46d(Q$+=YL&OXCWoYa`K+?xF~ z-6=WU9u&iXOc9+qJAjrgjQO-V)9~ipaz%)oPXc+zgyW}S1!iFKE~11_DWwvwHSr-03)bAlKU_lJ7 z`06-^Cp$(B-&SgaWWJ|K*Iy@1aQC@BQMYZ=k&k0^^6}Ldqwf<$)o9lsyZZ-6O^O@U zNTkGnC}paEuW%h?nH~4W;mOAfl@4_szo@4Zuca6(-O`J*r`65r%edFQ#^-z`&b3aBv;|1-P89XN`T#X0zk^u|TmJX^_o`$vUv z0zKx~vXF#`*kq0%PgPquIZ$^s1n&lg6g5c27Cz!5+GkA}zk<2={j)aBuyu3{)*GHS z{2Ly~H6RK}Jx5&Ut*Ei%K{$t5gJ-c^1RVc?LVX52|HIe$0_f4-I;o?^k&k^y1T5}$ z3}=c&8Cj3$iRuGbTwKFP_E159CL6A#!EvQQ11^`LU0pX;QAZ*z#0yG~o4l^c#y%DdzPKKvNB`pL4owXKE;(s?I>PS2#-D0EObCR_9# znWyzE&j+8uHF&8qI2$LgL<#q+zYUj|brT5v)_iT{+cc<>ATwk_YiEvfoSmexS zgUPJxKeB(Bp7B6*Yz_Nu0occw0KPpRr%s_eJ(z)jIvk{z7wAySPaLj;@I@cV^3vs*dx+-+9%g?EBd z-_R)7-loDGiM0V;g#)!kA38}o0Blb1H%y7;6DKBJRBe*zHk@gT(dCQ?Ub%2?nrifb zEcA16+@t3yv2tp4kpz79ri#4=5GD)K)%)^Z;~=)Sx}QtWfyhU!hrh z`oF!dMg9hmB?bqO-XPj`FcVxvxKp@J9jR=8$+nE`!gl6Q4BI9|bgF-?R2Sy*Q+qO(*5uEZTg0zrz3T(zF2)Uk2=TSdtaiG+DeY-E*F|gLB4AZr^g$F z|2#k1@beRMoUfHXW+HXvJ;3{KjxH$vUI!-lUQ}2e6I>ZO-;r_u*%!W|QkybOoL5;j zks_Ao-6UBE?r_o8{H*QgL{n%paugpxX9+KRDX4hie|$!iS(5r5#>Gl+PH z-Ud0Fl!TlBz*Tw>dxEYs!eOj;^x=!>Z^oMhTG96d4lD&f5`9=Qzb)BpY4hi9-P4*C zaK?^^g%)#!1oR|SOT|@xOWOG2US1i%$jQ`Te^qj@!7mG&8FS@PHot0^@Pmlu)Z@ik zLY2Sq=zu{cahb5Z881@b-g-Un!3~NBvs2zHz1MxRd@VHDT{p%(|17T&ajY=>!tEQB zRrQv2O7JI;g0rm_S zvC3W=qlqj@Sm7PyKiLf!Y)GdfWmdmvE%8YJwBwe_iF!6lZ=-3tUa*sPI}^4`ILC9! zXy=NvYJ~fpQ<6(-z{(weI9z*S3Ik9wHU{! zOPW*fQveF8X8}9b&jFS{Q@Hq#8j8IMEnHgSsoOOoyY^533U>yj1owfb$OM$cg&iS5 z4uilw*Q&3LXgJoa{;V~ABz9s*i?-hM}Oox@2K#S<1P=I zhr-;XN^cukf=0O&Gk;a|L}`hO)rOH$B7;yhok(<*dQZW_kW%d6J)vHqKgM(FM)8VI z=k6P9!{6~3HM_|vzgEM;OQ8OoYz1Q3f2L>?W8|4Zv_7*qIOIoAeMO5ZC&6?}T;tI# zf9`ntgst}>hJtCITHi4QYO4IiE9m4-T33Af%81aI9)`4V+*vDDRgbjKSEOV0EOv|t ziFoz^v}1YQf2qCNFjy#5*ut+{Od84AL7oDcn6UAY zHuqW(O{>8xIMc=G5SdsCawFyH6k50)y#=POl)cJ{g?tMpk0iR)X6VCguT;=-#CZq+ zE+a_D*Rxy?g#fX=RroYh>EGTxn9VzYB!+0g-J!R8&!#6yA%kwN1^%pO8lzSwAz7J@6 zZdbY4KYY^FiLZ+>svI*|qNB?o!H_llL7Y{fRtZ?t~LZ z+va_QNHtE|(sY(3jLBbRM=0wi>YFvZFIErW-S-fRuU5qV_bgi^lR;V7`$SG zIsk)wx2Q=SN{7Q&O`u7T#n$3w4y)jE$E2b+nc!gy73cGqPp&Hw2L?J+qG=VM@J;=k zOIR@RX1xU;cz;}|y{7J`&r?TXZuVAPZ#})>e>}AKy!Ot@eRIjL(R9jihdA5|f1^$n zwul-eXpfFr7uHRS??*}8ck)8b=g+`7IZ>=e;IUtxA9yPu>G}! z9=3Er^}O1!XnC3-hUPI}2owlALX1qY+4%$r5oT-1P_QzDSxj<2W%+0p66R?!gE=(K z{PB*{AyQFQO4A#8dtl*^ym|3SuW0Oy@+29`(WO(wHMftohVFecjDv|!qY)M$qYcBn zb6OP3{P!s*vMe|0AQ8cWLJ-#0NUYc^<(RltVYmm?5wWC+0A=M%K^j6p(+i5}Z}5(E zKc63(^#?MA1p-v_4=drS8R&t2p7eaPMI$CHzwGL|1_Jr03XYZ{wLfgS(`ne&^cvzD z=Lc~aG?$~PSP4dJiTja;gPiy3tic=uY^&@2Aci_xPCcb=(kz(D`8sVP07smD@qu-X z<9h}ig>ggDrF4?L=EX`a;SpSaBn||JRJ{4p3ccTu6SGT~K=sP=6EEU3CX3lijd>gk zL!DkCmkG;BchBE^00s-pI?sV2`tp{-6*LtL-HThPZ;|ew$6_hpMeiTKxhI*aJ7R4@ ziC#xah}q4w92qj6zS+y+9?a&d=ISH;U@>u7-0ObsLBc7%citB#t4~d zqpf3>&vntcSJe%pvR9jbG0{2qy+FP z3wk7|+p1Xj^}H~CrbTwDWCbP>(@*cfPP6tWgl~5?Xw!PVD4z~erdu1_G&VCc2|ySh zuXew4ci4Wv#hp&zH6`a0|NSNi-dav`7j+Nm^=7;dr|dgd-=iT9HN@D+5V>_HL>5bIN1tD|WAlled zUdOqN%RXdjYjA5cX9U5EvvF+7YxH_g^h$aRm)!1c5#~J=WIdes!X&3<=4Cbk&07>6 zy_&l_jQxca;R#tI{Mv2C>X|%LwBO5M8(Xq1D0-~c;)tSd3BVXK?&SBECfn2-&nJ#) z)lCD~MHtGP=)5%?`m5oc&~j&8S8tRWbzkHjmr$=Q^{04h48@6CniSL0>YKYP9_v8{ zKmd@Lqt@C2E1ox)@P;68aoC7C*&Xd60bz{2E_n8thtQa?Ia|Kk`$R0u#o}i#0#lH% z|Fba@b>C5*)zi@5KS{`$Fi*xjjJfOLhcC&%UC=n6TSTv1(k#~m(Cw~^ny|GQJac#W z6GX(`d5lv?y4|Uru*i&O2lR+#*46h~x2_Ws%4oNro@$2~#zb#DA2a_6COd>rG4SNz zb;q6>yIho_yf(uWy8I z5u&$bnw)>#cShM!H^I?yT$cCQ6vJ(6Mh%BrYf$q{;&*q?(L4am4SvUjw;zCT5%cSs zrGy}R4hb57)3ea~f+c+1AW6J~w7V|L{@dp(4o4KXVh5#0R^BWz0{w};BQFcv@P_C92)zi zc0G(i*#z@z;v1O9w&5|QN=WiKJ6$e2I7b6l>x8!X-7)3Fi@CdTt}W2vy|mY(0hCl>sIdDdL>;WfvX*GRo=u8`CArIJ`}CIADcTzI@0HUIOT3C8ZNIA z&3Bga2V`m=kdIRHiRl4dFg{S8)snnmg5aXRL@x;!SZ6n(d7?b?^;t|}#qUzh)Gn#i zb*^AgqZ4K3qq&c)gjgMl;sP;rxKAyhutWu;iurroEguavCx*}@_vW9H8&Y!0WNWw1 zf#s-|cf`~|->9j-WHs?hkRoq^+VYPsjYwl!`T^>$)b9$N7r9>(7sDe+^6wkR01fAS zXlL+=U;gwj2A7 zS!qd((5SBnIRFX%$AfiuJ0*psdfl09f;QPVqKi-dBIh3lGhRV^h4Mss+47J zYOO#x3RQ1)H~UdR*KzWaM02R-B9Suik*SkUVnkqO^=ZK&B8^D>A{k_(r>arj z=`h)Gx#;mmMkcN!9z}bt(*kN%VKmn~%I6K^L7am^B=6i)48T!x1FvtuE1z%r{VDfKE< zUu3r2Nz=Ji)oBi=^xW9V{M2liHhcL9`zCHps(YpfYIpX?HxruC<2PdbX(;D_X!Zxq>dcZK2~1xBvApAC#VZEc!+^>U9DDH5XYBO>gXbH2Z3v@(Z`>-Leu_~_3Lfv{03GZ!@fXYs z?9|`%2Z>2;+HQ`j@fdB*t_VH!Er9(uWY4a;W1c@-9(;|uHHnjXhnw1S2p;<)Cxjd4 zleq@IU3rK$#@>wn^j{! zgKKc8-}^4>oM$+MEc4D~rYueyG%~4CMvFixH3!6&h(R9^ES@A^%ZmM(O6;{-zi&w` z?D`WOV?Y(utzwFc6}#1D-aoq)GEyr+(@0djPQ)T`9EB2{R45o=7NeA$gf1z%pRPYW zrbIUqr33&L^2WqkM~+OjN(uv|8tKvy6^WIIGDA+q-UWg<`aCK?zIFoN7(^EF6(**( z-Io1;gu>EcClOSU^=V~bd$a>z=#f>z67O#X6DA(h!mn%NWg;;bc`D|#&YC*_Ge!So zdi5_BW^usgqto;t+4u7XadMm2BaNtZyaAVHp4dnzG<*HdQZo$3=JgHt&+pOP8r<}7 zxvKrpj|h|Yl!+zY@gK*w5J(w{XpE6apvGK2Zv5^H>8g_Q`2v@M--^|i_PJnGI{G)_ z_8=`;rim(&SOLs2H*85y1i+O!(k@)R5I%n~Tn$70ts2Mi<+yo5wSg3|0otnZMRrH? zM8gFE>OMUUC5j}X3Fs@;2FtUUd*JF+1XC8%EQ5cHGgZ-{iR8nJM{f5HjLK=?DE5vB zAuZS+pYT-%yS9#>sbWRHS2)3q`IdA+BB`p!PnC3u}l zXn+YMRn24A5K^oz>Lo3v{Wz-lA;qg3dMyV!7pa}nBc@K2D@X?he+BN@Q}|^Tk`#=_ zqKaY<01C23+WItHHgHA0dk~1qRg+W{si(YY-GVzJa-orMuP_Q?ykb+I?7u}Q;9cOr zSjkA?K18NyD21jp44(1w*Gaft1>4Xx9nYJ7Ty`j6cYph+Jw#{L-=R}!8C$PK+>N{W z=b!gTEW1bH#}JiS5YdM>cpFJXhocYsL389q{>IS^DYK>v_j_0|C=N=<#Cet>Rr_34 z+kTW@Dq9{L_gXg_bS=k*))QoTO+zucfdk!3aC7)lja-V-CEgSCiTsf_MTvB4jxQ9f?)~L)-42OtMR>;b3r#U!C-rsf)ZE?yHBS}0R=>_)8%cq; z-E&biZF4`Zc_5-VY=|kJ%`JIu7|T=EyXt%|SicUk70doUf z2lC-fX^F}9T#`r8D9^jO`VHcESlC4cRUv28#eW;qo^`P5T*EgGEBdvB^r!dm_^5Q& zA3?{g{zu5P;lvcm>gaN(?MVcE7fA5oyy&Ft7Tg`>z&?nAV7HmLdfyz${d(9~FJPd- z5N=P%Ki=4Dr4mKJIW!PchO}MzUEnJ)!@LHxtMc_`JguqOzF^E1y20}HJMKPXl>B%% zK~|@sETG(ZW|WMo^Kwipo0h9UJfk(Tzl*t%HKP-dv=uIwAUCM%d7+!-V)RBzy>l_e zf(#_jY(@h5Z4?S)`D3h%x;$pm^1iFD2k)AWiC8`yNOj)Wv}4>gzE}0+>W`cT6X3n0 ziw)WX^3?7!r0^-(TAymhsr};mtTi6HViJez#s~WOZ7@z8kg>Mb4Lj zPMtlt!yZ$38}%Y_F+Ls8$^dBp4ob%I>ASX`vb}WPl~4UNbN`4&4NkDlzuPQUC58!o zgoI||nD}|4te&hW?yiKDLb+#1qy;^JQAS*!2j>(m1uw5?e|_RsV)gMr^XJ96>9)yD zR4F3u-5=V5#XLQC(3IW}@nZ$f;n$MPFBm}b$MF_iBHu9S-vWZM&)mZICwaS6KK>3h z{#a|dtN2Dr#Cc1!p^h4)-+hxvbJE(x66=>yk{*BUM1$oa$H5cL%IACcG3r;!^J4?v zaX$?wTgAkPMf0d!+**3HjYf>+c#FZ9yX0RgRPz(dke_X@qtv{bszYcG@dDGGAl6M|d_{Nk%V z#4a8-YWWw81+brjqVt=#Z?S4x9_bU4j+9E(8F(i>c^IW_OlA%Fs0Rr}8+uTbRk@fv!)6DYBHDmXh#>EJ@61cGxM8xTAR1SU zvAOTM9k)3SmC`Q%L$Dn1E~)pn?cw^vn) zEq3@d`=uw|=WfWDxwKrQ{QgsWdH0CxT9h%^J;lz^ro8hVCsj06vgJxU4Yojg9OM_K zIs>l4e&vVIU{vQ2)S=1ySF}KUU$~)|&(x?d_zn(-<}f$46#63zDMk9&G=!7yY;m?A zS6t%bZpw+*LAf1T?56UQ%VD}=vH;(X|6nQ+o~}l{o={!mhsY%ImiR~OMO7vYbg*a? z;U&woJxveOf$~*i{%-yw4}|6|d$*Sx%-b$|t_hXAam>aOajZ2>xBYApwHI7FFy;SZ zWCC(W75KF^3+OBzzhwX{)a?PUI_ov-T|M6LVB8N|6-GP@x`?|VDjbnVmGHNGkw`q# z|0<)xZ4946i(+Jy%a#_cCy<6ZlgAl8`OI63*{%HftQI2Wd{jR;4D3`A%!Xmc2>(J4 zbN6d9^_X(anOfbJrpy8!s_t|mg?AUFnEViAy(=xgoM z_2@4A8z3n%0fl@3--DI5%K3=@c3^GAMdtxZDQh58BDT$Yx_rfR5n6-Sn_*H>6bH0^ z#Ve8V@`1*R)XD_5C#o)eLW7_wz4A)%ipKR*4$gpSxVCNmHLlm!FqS%GNg2yc48}E< zyl4$xTxFcKehoTOf3D!TJbJg)D0*$jhB{IdOzqlQa(@TbORKN}@BOLPuxx0AFjPWP zV8J&G=o)`af2Se8E4G(}Uj=dv)WeNQ14h#t6crP^PskmfYXN&HHG4mER0xHz7CXbF z-W3*8%c)J%`H47e`{KhwW~KX3tOzyDv~W-;OoSX~#(H8R&e{rwE^YgJPYT1j-rUHV z26pBa^D3|~VU-15kqPjQBqz~zVK)kr!?s}h!qZdW zJfoA19W6Nb7UDck;kT+J$|%&fA5}{uThz)zhf??k!5PQVSN*cimE^U1+@2Fsj!_8k@O=MntAeunP^$G z7EMpY2}!pycmTv4Z~}lSs#LzR`k%$Zjf6ufoEM5y#%Wz4l19W!JA0c-2`NiF!_Ku2 znsYZX*z{vzlSAL#TyA4e-c}hZh0ay5J~)tu`ZVg;^be!(2;qVP7X}Q|u~8j_aW2RO zVxk|Xnuk>d#xH4)3nX2Es+U_()PNj9cnf)q>Y16y=p*@1W*&CXiFNLXV8Tkl2eQjn z5(-YpIc-XaWWfX!=R~leXKU?AsmSFzXP2bk{7~6QDpBjEC)>zxGBX=9TH=UUk$7&{ z$}POUF}Tn)YFbEfA;lNS5$CsXaq%;TYWZU&OaJXSHQEV|we!V|E;R6&{V^oE1*Smk z;cmS9$AOdJj+jC}SRy>Wbw;WZRRpVB*5h^G6^q(S2#oaIX`T#ni&Tjqg&@iQ34Rde zqz-VgmZ0KI+=@;RWivjp)Qr}aHv>l^j2~J>@Z47Q9eK{Cg>@SUm zXLvU5q`vC;5OyIh=T--!dBSigt+#%$ezvF>lNVxAby{j;91oQEQEHe}yJ{^dG`Y4-fGdph$ zXOL~dc{Y!kr+JI8mwXg?e~RlEyEfPfZ1wGoNC4tQ&kh6*d6Q@k6NES7XCIUNttLA_ z#B;=h#~^5?^y{M@NNS473*ouctTW9Lk*Z4alBxIjJQ_8`y~lr)Y+#hiZc8}UN;mUlqekc~fg+!D<}}`RM{O_D^5%^HH*WKxegy-KSuRZI>X-GSUCzryTnu$$mE}P4 z9N(TF5spD^{PQ44fP>SJDHwxF^65FMv2m>c!UeY2I#G55Cc}*kN#$n;h0ON&I@?P(d+U-K zZxy8MslXQ1-Tja7prj@AkS{$~48+Wzolqst|;2N%U|*XFh_ zw+8YCTw(ZT_`rfN2ahk^XMVV0v^~{aRV{h(sZKk8d7uu!%5&!YxR&$4c9G}`Y zq@rTDeoN;lVJO5xdGqU_O+t`pye&$)IUcCqkkfrsa6iSTA#;ljHSH`=ZWOvH30@}S ztbW6Vu-3)?hJG3y$$Dgay$C?k5BM%5wn}f8wPF9$@*QciTgw_zY=S=iU|h9g1% zE3^rSm-okfz$=?Ms`II-LSj$MUs_5LDw({Yi@CvjID{l%FsWAxY>lxmU3D$p9j&UQL=zT>yi z+lNG#6Z|?d`XjK~WCA}&ZB%m|mD)D%TQohW>GGKidc4H zyw71g{blRPAQg(17UuNx8S&#>f}t<*=V=iyPc=r(aa@X-XMXKz$)sUUrO*lC+J#46 zBwAb;n$HWYv}#VrEtg9WwzGtpQUb%b0kTen6d?h1)*wiR*~Cm_<=<^5qL4{5Rur75 z^NmhU&8P=fsrXIVfc)II5+*J<<2A97muH~X#Ni8g8$6mrD*?dQwFa(=g{Chpvd&y`E3w0Nca~E( zPN=nniSk%~=tYK+f(SRtZ$Rf`%Wv5Y~OC*1%_H^kPu%00bRe7WS;=l{2`Y!v{Du(DSr?t~QJlLOD zh)aznYL7w-YiF~IPD9Ehf`kJ+ZrNvVJ8+=!XYjpk`R$x0EIMU5?I^4QFK(^M5BSXd zI4JdTB}wXcQ{Stn)Q8xd*Yv@J4?0}}{1*wv>+51K=$*y37}O&HAa}Zgqy7HKl#RIx zd2Ssu4cfUzSN!PdYv`7eL`3XnFH$<_Ays&cw;-zVJqqtg&Mq$t(ccky&^g(k`%mtF z3^N1^6gcrhj|bX6L7Nyk-B_49GFchV@HB>bY0O7|f-qkKs%rEcZo^~~a_Dip75IIE zsQg?B;<^QlahBThgEfv9;+3~rZQ)NZ_uXc6);Kbg%TDTV(tF4E6*2q*3I>|W1e282 z#A@(dIL(whhX)fjWi|2DdEVv@oLgO?UGz26OOQ&k`_Xns;~G|rmi_rTPmpIuNMRaL z!93hQkaqrAAqq7o!OJr8B(k1)XnW&>Ui{&}-lQ=>Ds`6;@xrFbj z7S-6DADI$s6D|H&TD1ZHT7SoFX~>m;Iq^pM=csEKdMtlxykZ{nbuhhoOjbB3-)Svg zdHyZ5A4hU2R-{2XXuFjnd|{%4Jz1?z{&OTRP+afI1Z4lCc+gA}9G8emwdk$^az{%s zchUl-(kEF=bX051<5ElDZ1&!*j$FJ)KccAk2n9~5RtriT@!b$@-?A(qi)#?@aK0tX zBr%clm%Ydo(_{e#e-=A_9N8)|JBYC}Tt(qopfd94wlqFtAba^Ghg6;dPnD{0D9n{Cd?f3%pipzH!s zI&-m>W*$jaPWX=F<{)vATk^xyk~5igz{S2dVNmx6Lq>`PCx$sJ&cPSqLS66#bpd`) zBAhhWpG&24+)-bHigBUNnjU*vgKpqN+?+CI>2i1C&Dfe=y*D|S@bP9f+=eUSGe3`f zgX=|QPs5gmylVR)u458g)Dmzt7L^wfaXM1wUeTfxGfY&N=na^+FEyrThpi}5IDg3B7k=~SE%AQ;72c4K3#wvR*Tnm^-I_vKWTK}frBfszh z_nrvZAkb_R_}vNowJyQ@Za~esu*kY_+W%Mv!ooiC*76+12psVo4p8#{PTK%;U2Y!V z385d!{Xa~7gLfp{6Xhh6Ol*5%bnHx=Osokzwryi#O>En?ZQHhOZ-2kD=j{Fi-Tht{ zZdKj74^N(xD61V|uwQ<(y%EH*tbkZuL6$xXJ=WC{`MU`SBEsHn1;np|Bd-KbQ{S_) zu8jvuYjo=N4Mjd3C&l)65|MIw3u(MPb?v#f$IQXaMqXixX)<4N>zcM-a0M)a%AHRG zqak~P6=Rwj`Wy3XV*1@WD6H>mAt!NFdvTP)GpP^SOn7C`s@e!1DS=Q>=E?AA4+VTJ z#}G6er*XU<*IbUaS-~_1V!tj0GLia{pFhl$!*>1oQN0IFZ~t6V=u|^6!=>7h-Neqc_+D;%6Wl)*yB&D;$qUq;YEJCH?INDO>(eSbD~9#3Au~ zO+U1A65zZJ3!74Z$-X%oCnG1)rya%8JkaF31+Q6d_QMlh@f7*UVZMV8Vjd|++Z zpsl82psJ{LlnLzIhqrPHcuCy{5_x1WenjLli@fR{BZx5y&x=KY`VCJ!- zy_%>72KuduAYC;e`!=G1w#vBm39@~^UAfws|M;xkzjipQir04X{Qxg0>jI$qu#u7t z5udMIqDglqR0nOBKi<1Y3i3vXWvBfn*2_-dOn9YC&ugp1W}vd zhdOsXH4X?UcPcRRAXGD`cxLo?w%n4>ZVE$ocklFs?X6XGZ?s3x#vP$7!5}!!gt&a6 zo2XeS6r1F&gR&K&O~=1wwI1-+_$+J09Es_clfKi-cPj?mU4@b@_F(H9W;XSG!Zf1$ z86pqunuxf6y!KB(QP`d#?nKT7Bafq<&W)&d35fJNYcxdQc%927wS66x(cX)XC1fqR*R1g8g?|&l zMzeWr?}-B&z{GQ6cb;j^jKJg1LpCC04N~|bTK<9&uG1>rwWEHVYj=DqxYpC!tOKaBwBwAofo=`l;aD+P3hPZj!rt($EF6aw=8Wlc&gD3oT2}h z#8<@x5-Pkb6YfBM1v#j*gS7kM89-`_#!$Q8%H~~}G4kF^l=cMw2?_i@nxWM?Sis<4 zxNBO)dNMG~5JtLr@`e8>v0SDreQ*B9^2XtRmx4gfZy-GSJkG7BQynFzD_?Y;B=hzX|ImYoxJ&I(v^EnknyMzVsC5)uWD-bURM{JPBO!3 zJ){#Pi)!#;epu_IqoNcoGj6i#VZ|Tbn-+&52`Akzg+|>9;ePP^tb312^NVL zsTYZHx9TcySQeQMrF(9uv3YYpto2~@der$TCuD54;tjC`=M>AU+t*vuP)F10g^u0< zTt89$YWeqG?xKJWkf=Dt8!YJVW*lH|#2`iZMC%rPp&>d(|KtgSj zwLq}l`*ou;QkJ8lesMGc%G-D~!#vM8R0{#=0*%Oj`*Lfw-3EW((|MY7&0M@`SB_(# zcu}SAdCCJj&>zFAzaTuYEXiMQl|XdV>+tkV=%kJt$)rrAe;fOHx;-W&H?P^COiS68 z!5vaEEtOn7CpUFXjDEz;Xn*8nN>S5O*sbRdIOn^M!5g%5j|Heu;uL{Or+~3g3TxP$ zxO?GzVogVk~W~3S}i+6f1u@qc7i9kP}@BQY6O8FU*3O%I7$SZ^l!zm z02}LeZ@_ZKA7ntM35TH#b6I2xV<`{(#sg+UWnwY3wbDUFdDO0-`((~6G<~&zQ)LM0 ztBgOu_BC&SxRn7r*#Iw!uGG}i`6GO(jAgW30i-)qZVGB&S%ae^*PwW05R@b3ekUzo zCT82|GjuaBbU;oEb51H=c&QWI<;9<(-VX>KSvzc5m3wO0ee;mIpWU&=2zY)jA0Twi z#O$qe!ohWSZxlrn3%An-EevPd5!U>$t!6*f+TIg*7BFF7+!!~%|5tRN<$X;SrMRv3 zm%9D0=a&`s07hKJ^?#g_vK~jSa}3XR_*b_CuGEP`N`nmJkIYij9q)8Zh>r_N2DK~u z`>0Yc11O=OKJ?8NUZ1M?czKiKjJ1#h(K4dv=}tL#VuS7c1v*(B+wxdl?#nY&$J{AK zUf3dhCOx(kZ~gIXG_-ab&vrTY8=52^2Otiy@@MrPQu|UjRW;RbH@RLc(4G`1?tqv5 z7k8+@ioxMREBbEP&sbOukzy|zChIM~VaImomZ9MZowfN5Lr{FuZdCUiU{%_JLEYVXTa-hrfny?aC(meU6345GHmijo=piYvbjpbhhRKjydK;| z!6~E}v*zo9U+fT(NLrY^CK<|SjP}-~^@T60impt8Uqn$qlaHm?^>#HQ`{4){)3`#k z93GntsNBEbYISxWxEAW)JZIJ~a|_f2j2h_AEES{55yB{zrv7aDS&R^=Tf9lmk*jT` zsTb$4y6*69H~kOVurMQf*9BR-Jis3=Syn)f316M=vQLNTubm$g3}|t}oB>qsJ>Du` zW(vfAujB`NUYW093g`OPvBA}bCJ;VC&*ILy;Xi0Va_TOGTB#ky zNG=Q8sow1l&0qFu*W`&B-ex?_%sMK|G^r*0_^yV~Fd4^tjCRB*u#f9$|NT6i(XHy6V99^Hy45jYi-(OgJkq9tfaEb;-&Q=l zTY@~~%a|KGxrMzLo-p38$uwRQZ=EQ_DbwjN6jpO|nFtsJ?Ow!IWaITBdLA59!&Ewi zZ2mMew+T=mMwg$$nAaG;%l|t+2IA)F^|lo;&oZdEx)mtjRH5vZPgAQ#WaJMcarH(E z_-7Sav4H#s{f6{<@lSv}C({h~ziOvmC||`tGZv$uQqA&4{e^|VYvAOjEn)mSp6Axj z>S^k-?Q}itFta?EyzW>mi}=*d244{O1}~dtj0r+@Xc~99{ZG>}!e>5p5LsZ-AnxB4 zWI}w0(VZx&kn?w3TppfTQr`}7Mvx=_NugFLpD->Y(o2 zeb=c3+`q;>7=i#V_VkpIxNGmya95+2_N~_!_~&LD$*5QvtiOHrQvIxpQG=~$XNsEa z-h(Vw>o%n(n|=TO4Q|s7AHT)mt1diPNUFPQH4+o^Ry?Q?%KGn#_w13^ zIIHBVXC}KTB{%?qixTTV30Hbxo0LkFW3j8VXO?#hJZRX`73_ zV*cG^O8KgpMgHKlS&F#~7QK)8*fLOTulj39Vk<8(FPoNU_(uqbFgc1T2G}CyqeI`Z z3|SD=o>ydD5{unwxG}HlVt|IVcdgtwqee9tLyZk_5lk`$sX-r%^9D%}%uG@ZAw@q= z9lKVb!#P}-jteLSXoKGj&TZyVJ+(;wlq@>?whfENJAD09^AVGfEMb|*uo89Fttk^k z-=}19zdj-LR*>QA9Oio8;UhpazUlq%##NhIzP@7;-VfQaKZv*W`76>rXs%AR2ScOT zM^-7bXJ~S{;@Ch0NgblXW}J zdk^BFdE@t69Cg)X%JPqyj-IdS_)@SJg+;*#ump92Yt=kcfj&@~Anyuj^bZ zv_%!%6ruDBfT~%DCSGeg#XMfTrVK{{^oh=gK4e z4u~c2{?qBF9FQ=}A<=nxC(xn8yME`rL$QRAkk%nlMhYIuI(Q}xw{P)LFplAT=q_v< zg}O}y8cBcAd*DY}{_5z4+IP-WWbJ`{p%9(OTWPeg^M-0b3plkq>BoN?otP+~stiTk zh$RSQ5a%`nj&U8BkVO{5mcNm>zom7DU1V8-gjGx?9$rz1cwyQuxeZRn{4uWbKhmgm2#(zq;JKLw3 z!NJJpY5A)ZV@Ggtm1=g=bhAuS?}3-Miiu}f_K0YIVD%<3pBY`TkJBaDz#Xf&Euls(E=o>`r zKK}}AUhJ`v>+Oo19Qqc>>u5%v;%S%tIW;NGBtw)}@5kAe$kuJ{R)<&Y$(bTDa4XOZ zZ2_~~;g{mu1U9s0 z!%K(KAsdDF6^Ax3&YF~0Eq6gw)>9ub8S`x{WhR}Dd zT{PHO3|J&Ti6aVFopCM;F99`4SX=(#`+}V5nn}k?u3|$iv7eiat(Ds}wnL~PUEoXE z0ZAHPW#AuG%VI{}AZedVBLa_y)*4+Vo@Y&LUe&U<=uf_ML`W5YLc7jpFsW{-$7gD`-LR+TJ+m4R1lujVx=2rN(Ag|Hsx z*65wLuYi&p$YERtgP=MfZc)$Th&|Y|E5wJ)6Crk{I&c?&2u|E65Xv8F z#k56T23nwiQ{eV242NQx2s%d{M8j7KwC!ytUCx*CY2r4H)kltho3)&eG0PY0MPk*<2nMc0b=}WW$8CBsSnrLHpNAWZI7vZ;K&Cf zew@pNn@Hh#LT(XGE~%XWCdx=MUp7kmyZQ{ncEOA&(n^F&7akv5uvfnAPYnKy#@%Cq zy2Nj1s3v=aJ^Q&+YsZ4eg!f&yPUmPl?Hc4B%g@XmV1qm|_2fwv^T&b;CyD(4u__ZH z2+T|xU-Y#u1XmwI;LnEA$>;U^Uqvweun<|1-B8xg|l8<4l#FP0&c*j zTD`zR73Lr&(FyMf0YOmw1h+PUPhn~UjC`dfPN^E={KTlr|DvPQ&SklH->`pc#Sh zdmv*=v0Fm$x7CD5?695 z(?`zZAZmu)ax;y0Sgna}!Ej7aE;k4JZFEBdjs@bxksV!(lc#$Gq|L@YX6e4aT z?!3^kMkfb|VoCriMiy(*&$!qmi{L8!+ib4W>ku!|Rol}*W*i4`y3->F&I;3V7o~9)LoJr0ALUcf9_Rn(cNthSVksglF*BH6 z<=4?~U-9}0d^{JFnfOM#kvxqQPO#wx8;c{CaZUY63-4o246gW}$2&Q%-i<>rG9ioZ zKc^;8jLq~A)KNC&w{YLx)H~CoKyL!@c$)z{xJ3c%v*8(Ca8&9}Iu`i6$pdP;ab-Z# z^9b)ip2A-L4d5G9$D6HOZxnyNDvWCr7Z!!FUEDwQP{6&=5I;|1=q%lk`> z^qfwomx!r%L+^%?r;s9efvarQ zXSBh3b5@}dgsf31t9pC0$_&lM?auErO%R(-VbuIQeoG!QoETcm--N;s|64d6lJkf{ z4T-?rrBqcLxHegD2L)t<8kB%{)u#%1O?M76@OwXfh-AFM{oJ;`W5Mx4(2n5Eiho6G z(SYZ2OUJm{T8gJDvQM=RE!-%s4T;ttC5#$> znEo})@aDnS0tnxO*Szjt$_Ndo>yctp0?EI)1e<^0kgOZ7LC{k_J@~SmAwWSahv+kX z*6(Q`r~2s3QmvO2S^UI3f)HM_)kSXugV8m5)2VOj!K;t7_B0BuKoKIw52^7Yd z$Q^BXBzGi*0p)T6!>ux{xY4t9yx>T#OLt4>iiHD3HbkQP9l^6MhfDZ-GY(_{rD*+? zqGvQlSq4&Hj1kinbYFJ_3mPntwp!*lydrYq-^tA)&w(K?#vlkl4GtRHbPRc_c-cn0 zUT=R{(p+I(;p45nUCVc!?crM^1DEvAfAA81CNpeGS+s_KBS$BC8SlLY?s7Nhbky3x|UQ#z61J7tqSxb$srtv zOkqTgP@%{BW{t137mfB>vNakmU^V~&qw0$66p{yJ;9AWwAyj-xglBKI=&=x;;6eA9eD!+q zm`t`r&9e+`(%9ZKB);l@e5N0^3HTHTX|5#xmnvWexw0fI*QqMqg|mOih4{aD)svp2 z2a|nM{Mnrf;vd5NBlKhrfO3FCvM-#e3czK+WA_d;Uuu+TEd$Br`Z+b|>mLLCYgmPnK}TR;Y1XgF zd9&+e$b@zAM7?$CD?RO+NJ#efGe_X)i?zmzNUz0J>&4#u6! zk>;7XJgn0Ii=su*1UWI(>GFe^(kqaXuCRbyQ%Ko4|ho`a0zbRb)%k8<-(cBfMZaD7x zq*xGpVpy?cn|Sv;WRkyg6hmA|2>yUA&Xs7t4$4jWs`j74P^h%j$tpP?kEx>G3Z=tc z;8MQ4q56l-JF4z?>%RVl#h#JR)AOVm``UZj7Eu)EM~t)5SqWXIg@e=Q-a(CW2CA>H zrV3fw)&wMm_6h~~S&d#i>%8T#gUB7(ppxZf`;iY(n`t{Kx+0;U%u&)SE>-`iSP~mZ zzh}DWs5|ws!Im@hI7=Tenydux2*T2UdPjB}OaV~xM6m@9p7*-xPvQQ`jE&u$q{XiW z{g^p}XM<h*`}^R*HM8m;Hv z*M3{1ge9kHCnw4&vS>*LZcs$CzAnV)zc2=x@JJXKtJskjUiy@+@S~X>!tu!(baiD_ zOU+-0`>pvuo#Y|^rr*cphMf&NA}eYsThI?RP&E>4!=N~FDHX`Er4ui5Watw<{VWP( z=TIm}rC1uc)}4EC(fQGiDE;nz?V(WeGK9Xvz3VFTOrZA>1r8Zkee0pOh2$xB{?_vW zSPpV?YMs8_(of_}=iK`LPQVLNmpbLBL4m}?liB=xVJw3&3r5Y3kG~V5vpUeDNWbMV zkI@#@vdcBDG4f+a)66JKioV$k8L zyucpTSz8JY;21`Fzuyn*P_p@Hp#zT*}=$x8E5jC2RsiHl;- zEfu!g2w^wY3dVquMVEZ#+w4VHM}Hm_SG@MA)XKWmMZ>^2R{Y%*l$eKVl|!EdU`z4z zme%?~gJ~gMGO45GcyVVhTfOJK?|w~E1 zx%>-V%G)^w;*hVYrlLGP`DUI9Zk3L=0}dOYi-a>odPMf#+Ob7V&SKX%k`LZ8?FLr8 z=)F!Tk}7-R*pEkct(T>#ks1HF!hEcU?ST}R-TE;!o?7&gUV#x5?3>Dtmcgw(4dMdr z-%KVG|K9lP)lCJGR|R3|=nbG_wo5WawcTt6Ly6svXYvk*+N(LQT6Sfk1JU`Q=MJw` z7CIXzTCkR{wiFPS^24T>h6sM(Tzgn)KBJ<(nZ}^pQC)u&qqp-N>~gnhVI*$4#f-66 z;D6^}DAd%C#L z>|T!e7`EPmn*S)jylI}1RJ>Uf^h`~L5UzzxF|vzV9RSTPHtXmTwmi!_Ei{m7+oS*; zL78XWypJno}%5Cqfq$LVKgSUsIz^p{P zUpIIk*uLHMr~*Byi|GV1t(xD6u&_E1^UAX|1xv^JI(ah~*7MBOI|M5^H|Ug9R?J?x zUq9^v@i6rHzFlCUecdF$Ua1t^Ow}jlh0^3SDlsSv97LEu5GBi95Ot|01W*3DkW$mM zYvZ0)=@WAx;&I&yeM=Wa85tjcir|Kuw$5#aedZVph*;q9Xs^*@(B^ zzF<-#@T>GX2^NW(Cn>!u?n){1GtB@eRE42QoB#fPhlU_m*2~ZB~uSeV&b%_bBs&(eSp!1OO5MSlRuR*{gxh zS1?_s*^+&M@QS-|(Xsla5eUHGtF*wFX|)-_Ewo7Du#w$pJ7_|^)Oa0Gh)=&cjjeDd z63?jG%o?pK)^7ZYs337xT`C&(w{DP9`U>ytQrp1|)KRe&`TW}}bU*|2enEU4F$=Fc za8~y?ZRR93XOnw*KdK#!y(h@0N*L~u_ zc8n@twZ+_t@Jn!Gd@2bpZr*Ea85C8rmD$e6HL0xazq!u&>rMQl_vp&Yi)V}%g|OFh z^&{P;${XyU%o6Y}rr;;f*HwaWmm2<#5=v4Jm#|uy<1UB96sE^JvsZC~*}A3FQO!V$ zru<9Yyxr{|-y2}ykOuOK&Kt`e5W%@zcfxBj7QEE^mfb4{IFHL+ScLAkj>X7-&y1&) zquY*A9fGs{u!ISjJCoFi5_lqBsK{yuC`Xw&9$JotB^+u+3#p49&^hQ_I~;tJ!EPxh z0~wD3-C|bGl(v4S{-#ph?BcClO}L7_yqK}c+^)aejAWT>F)RL+HL9^drOK^w!fThb zU5TD9ZsGs=bO)(3gb_wi)s??ZUwH8f(#weOQ zNx0HFYK-XldC9*8 zeETI53t2?STP*D;U8pwCrg}L`HJO%wMsK6ha{Fd+FbZlXD?L~GW!dqcc^_t7bqpV6 z_PUl_4^$fHD?l=0Pe z_w(%Zf30q!StOnjJA^&BUz&_c>>%S>o2+4tNMY~H7%K4&M$sCa4VmC|G7oCi-G26X zri|g#`K0t-;YRrZbomKB@yF|lf}S%Q6VdDvd!|dHI#Vu#F{FoBWLEL=tZ{=Thx75?(5%jk0fLZq~Q^@0S_V8N5jf zbVMOn4F{trXGxqJD!89fer-aUu0s!F9tWfLN)?7tn)sOhA>GmJmFNWm?yy0&+RlL; z#)b3n=dxW1Wo;Pi2qr^Lz}-nWz)oI%42A`NSrg6hqC$nV_Sv746Ys~&UgHi1;=>jvb z^^cZvqdtUhrv#~Kq?jbQuDl4>WlCKx9p%*p)1AQ%sb{w z{Te4(mRgstBDcuK;oCar#xY0OyqIke9At4Ouzm>5|x56uwV#va$<&ZBN2w1 zPD9DI>>3tM#Y=a4t$2~`r3uSw^H|W^JfkUB|LG{ogGHcbSNAJc44rR*DpB4YxTfr@ z`U1kkbf}2{3?gUKE)R>_S*jdOUNn|Q^_JKC-YKeP)CLhU4fY2++Wr@#@KlEbe(El7 zF20Ma@lkLr44qyoTY+~97O-aL|Hs>5- zT_E3ZyeK4wgw#ebTf_pv=?^W8yy^6n%HLXQU2UO$31#O=OZd2H834thrLaXJnkhc? zs8elfHIp?4iMfJ}Th)z$vr!-1QceEY)7nZxtJW!tvxEd97aNUJ5YBw^NIOfrI_-5K zZ$TD6ETel{psIa~Gm37ix{7WBsXm}>*-xH|AdK8tcGmiLpP+0D{^GTMx3B}JCwJk% zh|w6LhON;{NU7vGZ*E=s97UBg$1!ppN6bvfdR^CTeAqZWaCR=PtR^oiXCEOtkDiv6 zBDZpWbckbVP7;7Q89v=X(HD75?3DAfKQMh88OMYJsCQMFJQ>*CpS+~p=eAT5JhtKO zhg|3&o^dT!{2);wy@wyHYu5O6#q3pLGVMaK*l9ZC22TR}@=@*fa;TbVt_)8+FRc+K z*LZ8}WP;4Z=`3SZSY8F!|DBO`KAa7z(M0?&)CoM;?Nb~yk6Z6wk1DQ1aEIxHI5-pj z3G;;29rauZ9M}pgtb&IOK1PU}34Aygw)LfT2J|*bZ%HPa3NFr?wnB?(%;4Kb87Jn& z2|~%>wy}hvwBH}Rzf22^(&>OWKHkg(Tbh`kt+Tpt>8Y?{a8W_`tDXsm;i-w0y@Tre zPo!N5NK_5h392JJ5@*vj-l1EN9KUFv`Wq`mIyRtd{0>|mRO5Hgbve8#EkcEahzcY~ zk&lT$iR=CWMgd`fV4@`h&rW!j;ejo};0CL4kz# zZso%wA4MF!Y-ojTDYVqv#4vMFWqqF6R6FRolG)GLMhy&2vqL^2FMJET(sYwZn~cbK zzU(PH^m=%wi{GY{pV$be_j1km(lXI#bu|*P80D{S1k$)(7v;JOSB5C(W2f2I!F~sL zNX)Q9Xoo$FL;VQB?h_(cELc9GKW$e*#xILU;B!tqtwF+va$wMA9VSYASl4HsvRaj! zxkfQGl84LIPG^V>Y0^1Vd6us2@=zLEWgF4}~Fmdvc-036Iv!&%= z^!dv9h$>gCZw8p8fqdKb{6jsXX>Ft-d$pmudBF5Sr`D62A5Tq8DN!M<#Cv7PvhPe4 zCgQCv>qtd(sEa++4PQR5W==kpwJ(j$e8RO(=D$%4ldT)^3##zyGqptiAg=8w7udrA>YEu<$ec`FWj{BEOPjTSE=2TTI_s;sSl{S)Hd+Dkvo z>?ugTe~C|r1tmX1wJK|Q-u%hBdY8?xkcH!|c@@P)xqz(%gLhAT9pbTI3_{>*m};4v z&N`bq{}@-=N1a66Ys@**K!uqC4(y_@f~ztr(y%z5?kknc zqjwyrHmW3R$Dm(yT56CLTu{sp7nC>

z}$o{YF3K;~D-7apA0t;y(7?C{8c>Yh6| z+Lv>;A}B(M&WVwkI?|zf0A|^`=eb~Z`F48P*dm0ZO6#Zxz-dhFsBMvGHyE%7sQPz$ zIShGUrMti~*j9s z%$W0nGy2b=UW40!nixp^4CeG{MCs^t-uA3D47<2s_h5Z6sLK74fdDy{4W+8ue_#~) zaI5^F>DD5zdmwo@5)I=epSE0whX=wl25}gQXeiv!HsRKS06cDGxfVt|Ly!pE%9B)L zpnk=fdnhH&1MXkyj*T>8aqq0kxej`jkF(x3WzAZeKtBwv0^yK>zaT>?sR6HyTbf8b z5ucaZnyF~Vk0b}CKQxVO{o?hGF8}s!M|Na7-==)vR>R|4neZxsDh9j~uMujUHxKN4 zRp=sOSV&o7lc>pMqZ%1aYa`qG$v+W~IG@L#D~}#QmVv{nvOy`T(Av&fNnWm12jn zmnVl$M%|I>&EmIq;*5f&g7^c=k1(ZjM|9d3%7-Rj%MaK?lz>PEJUfR5rZ9(*GXwunU-Td!w7Qh`K;O;pOXS2GxR#n@Q!mX9=PWPM;^UMank*K#bl z+zl(EqSsDQKQcEbAF`N+HRe~W+g6Vdwk_bwQP1I7k}Bg_ey0Vd!+o-0=+xmpq=?I? zl_@W%$PgTa(oizV75u56T=?0^R$F>`fj;qOS2Fu#Arpy^t38arRp`;qFma0Mao9Jm zS)LTQ1UQ86>%}wr9p$$$>)X0NI%V(wmjGfiz5AzjK1&n^F!?t)ec|ohy(!sG27h9) zC)FnaON;F)g*okDy599CKt82W!>NZsf9B4O`rhVvVP1gCDss3 z6;^Dh!x+0o2=uD;P8L_@Z5LDFJxO3nwe6m&w_XO?8Hi~uR-7^18EL2^QWf3k9KWTZ zztej3E_jDxPy=?nW-{<2g{L|EV-<%WagGW`aQ5SlNl>jk;ERD3RTb5M%o$CtfU(*r>C>{?8-dF`{) z2Nd7Iu!GAs4#2y&7h~gx>!+4kbA9LU9iKtvHIWh#M-cjJ)2CQ*U$`!B^f>ne*}J|3 zPwyqWPxL-dRhP`x;%FXS<{E}K_K1ZgAt5ohZJFh|Ei&FqlMW5wit7|%Juk3g%3N)v zR`H7nkY1)6;a>TKK5WF%~ zC5B`zUk%?HP95ls^(QIPuEEkGrl$FJR zD4w_uB-!lWf%R@W1b2Bkx-HDDRT7f;$=!w>(Nr2eweylj^1sP7FB(vcg$6K5*@RYY zI%;JFHZp9;&u9V}Xv*a|vvmv39Je%G($?&aUcLuD0mh3~RJXYrHa*qMGm0uDIRt{p zY4G%aJb9s1c?bLqf(Xn85f+~ugSTxUXxSfJd7uyB^_)j5Swl- zul4md-`E)pKyFZ3tLprHVb?Hww9vp(_PxPVscl{BSaJa4xTF)VQ&eTta->CeM-~z& ztu^tyaSM@N(d8xLsQBYMz%!1fGWu3%0g%urYj|T!ZUT(UH@hU%Ot#=ZZm@Bc8C-3$ z6}kh4`JDAOlsqJknd_9-nS2mFd%61ePQWq%t1(ZcpICfoU>G0VIDJZB4W zRAibJeQYGQU9B~0za;!Sk&-=_Z51NQAL`lA+D!CF9YbV5oj_QT;qXyFzK;-~XMA%Oq_5A1X|5lgn*WfWU)%sQs(;+TKn^rRH}{81 zNGNwuDNO15I(_w4sWn!fZG;74S1CM4`iX^XWN*_Wh)pU_=?ezWiNJno_9R?%Rh8i( zIAzh5Yl#A3)?Vjz%v%c2Ea4M;8M++sK8oW_`N|ovS^ho>L+9Q;# z`30agnaNJ4n-~O$SFvuoKM>#_B-^{A36*ebP#F6ggrvcdg_KWI%Bdf%sylPS=zw31 zh8ayX6YXzoB@T1nsd-W8mQ#v<<8RfvC+)t_51i^j0yM2#Y*en1Di}u&3 zNO(1X;r!LxzJjIt#m*arif(MlN)2tF<(OJ#$sh}!F zeo}%CqQ{Qt0V*?`kHNt(|HA-EUu zU%v&cf13Hh0`zXYG)H3g&z&k8CJ6*xTFn@zm3R_yTY+74OeQ6ImGtemvvrDYQ;$l# z&+(F4P6&&WYnSZtEmCW}J|Ux^<%?K;bpS=@TSiI4Z@Pz>*2~y=_mh_g1)|H_>iGoe z54YJSS+5RFO`EsP^u%0F|GX|%)U2#~v!6Rdyq-T6JiG_EN?fNb6knz%l{RaYUn+yd zKfJ}^;B;s^U}k=Xa)=BWp2P|%zp=FJ*p+KjSMo75=ui_?-2s;nqV$~M9uLC}N~u() zRK*Z)-Yi>48!dk_eghxU4gk|klcJXacdAh?`hYSY=;W?QPm#FGkn~nfr4lUifW6Bh zx%Dy-Z>EWPIMZi8yG{KlS-o|nX%Z(|_ny8I=DpdjQ={0gQqZiS$55D7R|hXrrAtEl zT7*-N%2nnw{acwVE4!p{f~zUcVM@!x$+ndzlQZ88(3xteP^$n;N*%Dvm^Hb%loGD& zc~;?JH83~0&N8USs3&E_sVAAHqH`4UNO@R$roYqDc}I8?Hjtdxm*o86wfdglKjyk1 zxkvZlYl3quSmj6C_+E2uhfnl*s7kH? z_)W(!XV-I@96ygHrsAxbSWr>hyML-AVvSwLY7z~|fG*qjBh5XVPoqo~ZSGTdoQaSibVn3+_1_MpA*@>|hm*)*YG z`t8qi$29k>dc}u*4s3y_XqVob#)0@j$6m<_vHoY7mUBz&MX4681v;%!>Yk~&eJNt;h$u&RYJJItJv{4bQ-877BQR`;3Stxi~lP31q@W2>97B0%CuKZS8%OYmTQB~%C?B-F}p zzN<$A4GHlf(%ip1F!~PK&M8hCa zA+n979wtk6Lbf4{8Cw$BCCk{$64@zcMz$$gV(bai*oG{bkTu&RG-R(QjIF$P&vAUe z@Bi=~-9OzwoyU27uFv^7&-=R0`?`iR2l6?8Cu=`%XRa@K(QZrgGAW3Lilvy;&AjX; zwfMTVRiFv#uj^g9@7{(^9t&VpG^9nN?jtOt=uz2jaen#}HqL=t+> ziQO$x)46p3a%e6A~GPFPRqp>Dg-dRa5A5RI?QH98uc_ ztMIr#SEA!A&6{o_CfN(cR7clObtx9uKl_(%zVoPb@a(uzQ6ebsjJ`IX_56z+geaGZ zBH9fzLSq-?=U=eAGb)dYdfRAO+^(XTRpVER`4$r7cPKpVx&u%{Jud-lyRO{Rwc+PMtRg4jLC7{0stxS#cmGia% ze*ERUPM<7BE(!!MDD4BIlF&Nc3uD>+^nN03%RpR1Y5Gno+6xhKE!!{zGH`r-n5iVk)Gx*1r+=76>py``MDr5AYFCK5-u%MIw(ene^T-O={{;prd8oEN?4kQfzrd?2 zYaUYfN^yT>HtF~jHoB6b>P#4s_`RzF*w+nCuJpCJA; z;fF+YEbQ~X;!54?tz`U_h&K;Ojlq5T_i8|S>r}HuVXS{@U9<02HQA`&vcO|Nj^WP&lCaoQ!-;|G=b()*^UbIX*+A#dz# z9YwVG$8S#!mO%P)xNCoZamW@%taZYI5gQonm6wo$e*VfPnMe~Ii2_&nf~QA=YPY%J zMB(ppO_TRNHsKFaCSYqY<(z4@!m4u|KF`mR%)UBrUWn`;&YDv1zX+FIT^?UhFWqM2 z^*O1-z~T+ICf-$uaflAntR#)T*xVU5Yc^T??df)iEG+f+OiYzybk88Mz3FlIk-JlV zK;9)b`0aX3D{fW4J}uSU$u4%_PG9I_Othx(%f1C=wF=9I@sT= zP0wyDBt`h^LG~-PGeYbVsqg;uTufHxBBO&_`_52$3EwmWTS8<)Wlk*9i(_^@(@gp7SHe9~g9aWf`9KqFQAju^%Y({%SV) zxVFs`Jjw@Sy(!wx{(m&97Yh}Z%;c!wTKVWGXcih}r)V`5bPw|2HjDR&?}gq`wF3Ms z%>_C!YwFc{)H&7~f*V1-Q3~_9bgS~N50zKKG0?h{7N9*7rS#S?C9oLo(keKY>X!|r z*~#FantWkIRbf3Xdgi}fXb1?t1qn0G|1r>Ao^+OdV$79>lzjJcaQ)bsb+kstp~erm z9H2jdGFmke|8GX4O|+k`=-E5(5tV^%-Qfk1b( zD42S?V8XSgrV86?c-=Y(gZ-FV=SnN08gF>Z8`HuFu*FhVm-n%-sv;%ZdgN5UT_qN- z1xZPNXb#W_e{y*C4wO)usLCUTquPdALBf=G9r#7T0#b~;l`#vn1r6qa81(D(s9RKH zWe@S(r4T%vRvlH|+H+zgGf70mLKHDY^Ib1MwHh>ovQOOcB=xDN;tH&vQ4q(ZO@zP@vYB9~>Es>@~Z zR+Ct+eomKhQXMz|XpH}x`a6fpi>?~smvMRnqG+(d zDF95eKdmAKqEgh=nCoZVY)QtvZnqu3J7DE08N$p2j_%;NzBsAWIUiK4n^$m6~3kPx^siH8JNm+I|i=W*SJ=xT@5^(%4Mc|oHE`^9{fDv zkYLGP)|oZ8od#s_Ig5Z7R^rZ(D75-7>!W4oPw8cDVRVEXM-@3CO`%)bDFd&=z+{rE zP1Qi2SAW_stm(&d-fSTA@ug$8xSk2%XNRK)d)A145IpXAmDcfm_%)HWkv3S^Z%O!rPHY^A(gP*M-kA*A!RNy~)QUi`= zV2+Irg4)+Q+_lczm8iO}hTjc|49e1_h7$N>jk6ef?&bjUlyD<2Vu$B@HYbI5({@ic z|9cS`jee*tH;lAUPOpz)U>5QJhml~~=yV>lvr0a%xm7~c*hrx%vS&y)r&CUNZ*#|V zeE;K91}sGFSu~#vMOZ3y4E|)$%VspkxtIT8Z32MQ+(Mj5V&+;S;3z~CdGu;u0N-a@n@@5hIpUYs zobO{fnMD|4a-$1oH0r*^%}3C%IT&cla_d`ra{OaC+W$wI=y^KELF%{pxjY=7Hv)3< zx4)rpckZy*Dk}HC>1t)RR(iCBk^Nln6fC%gN#Kz<47O9xJh^7`+*y$wAoR=UgBlTR zlgV!FNj1`uF!RYQT`wE@gEXmk$$UnWz*K24cZS=Pj=W6wo+CKmqAcg`S$2fG{nKZ} z_VsZ<@slr0CWYFKE>W^ZK^0i+kaUIchAB-RX5>z_?oNL570<(V(;V2z}zbT*?8Ogm<P4KI^2#CeaOU`r9D%VV}O*j1E{K0mlNcw7^e$NNCXF&<-AeLZ~D-x5H=0*E@on&X@cvKi=tBvFi4jVVXVcLxJ(n4B4@}<(q$@VM_HoB zOw|B(-WVdUWV9F9Dj-5zr^^gS90&!C8wx(}Wd?5*B+~>jWvJ1KwN_Iqw(;(UmICFobk%i z=WbAD0#{|EMk;_N=xeqm6Y<_koknSPP&>b!KfH70sfA6SK&j=HN6jdfAI2Z91<7Uw z1dC@w?Q2@5yWlXkZMw>Eq&w^XLQxnWT~!&-Vzde|RhDkK+7`iUsSy`w=@9t93%CQ! zt7`P$JLqV-)kdtKn~4Y*l&TtYRDBSoA=B+ekFowt(y$Ryn!^Yo-ER=pG+%rR310VQ z-{cAEf=Y-@T!`tIiW0s8UE4cVg%=C^w8t!LDkiY4#%-aK2iP=qOOUYNDc zZ`E~t9C)*__|ZK64agjzzbWX^&G8$U{7EPjaoSUcAAZq`kS)ohQB15>v%5;b!oBG~ z*{@nf^q`NbnHm4n=u&2f=XdvCX*Tnsb?i$pzGr&mea_Do1>#9Q}5fQM!m6#*>18T6|d&g+Fl=>^6!HUkeD}!8rxRNPk zt(loQl`@K6*Rh<}$vpV$4KN36hp%MKfAO9gi3@iYPSr4JnLtZq`%uRSe z5r{q@e9tk;6edyrx!3|?)apePS6UEl*8NVt{fon268sH@ rze?~|5&x!xziILRPEDY9>H9I`09f3GMVFe#06aIYo9kEWI>-JG=7v}j literal 0 HcmV?d00001 diff --git a/recipes_source/distributed_rpc_profiling.rst b/recipes_source/distributed_rpc_profiling.rst new file mode 100644 index 00000000000..da9c003d1c7 --- /dev/null +++ b/recipes_source/distributed_rpc_profiling.rst @@ -0,0 +1,314 @@ +Profiling PyTorch RPC-Based Workloads +====================================== + +In this recipe, you will learn: + +- An overview of the `Distributed RPC Framework`_ +- An overview of the `PyTorch Profiler`_ +- How to use the profiler to profile RPC-based workloads + +Requirements +------------ + +- PyTorch 1.6 + +The instructions for installing PyTorch are +available at `pytorch.org`_. + +What is the Distributed RPC Framework? +--------------------------------------- + +The **Distributed RPC Framework** provides mechanisms for multi-machine model +training through a set of primitives to allow for remote communication, and a +higher-level API to automatically differentiate models split across several machines. +For this recipe, it would be helpful to be familiar with the `Distributed RPC Framework`_ +as well as the `RPC Tutorials`_. + +What is the PyTorch Profiler? +--------------------------------------- +The profiler is a context manager based API that allows for on-demand profiling of +operators in a model's workload. The profiler can be used to analyze various aspects +of a model including execution time, operators invoked, and memory consumption. For a +detailed tutorial on using the profiler to profile a single-node model, please see the +`Profiler Recipe`_. + + + +How to use the Profiler for RPC-based workloads +----------------------------------------------- + +The profiler supports profiling of calls made of RPC and allows the user to have a +detailed view into the operations that take place on different nodes. To demonstrate an +example of this, let's first set up the RPC framework. The below code snippet will initialize +two RPC workers on the same host, named ``worker0`` and ``worker1`` respectively. The workers will +be spawned as subprocesses, and we set some environment variables required for proper +initialization. + +:: + + import torch + import torch.distributed.rpc as rpc + import torch.autograd.profiler as profiler + import torch.multiprocessing as mp + import os + import logging + import sys + + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) + logger = logging.getLogger() + + def random_tensor(): + return torch.rand((3, 3), requires_grad=True) + + + def worker(rank, world_size): + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "29500" + worker_name = f"worker{rank}" + + # Initialize RPC framework. + rpc.init_rpc( + name=worker_name, + rank=rank, + world_size=world_size + ) + logger.debug(f"{worker_name} successfully initialized RPC.") + + pass # to be continued below + + logger.debug(f"Rank {rank} waiting for workers and shutting down RPC") + rpc.shutdown() + logger.debug(f"Rank {rank} shutdown RPC") + + + if __name__ == '__main__': + # Run 2 RPC workers. + world_size = 2 + mp.spawn(worker, args=(world_size,), nprocs=world_size) + +Running the above program should present you with the following output: + +:: + + DEBUG:root:worker1 successfully initialized RPC. + DEBUG:root:worker0 successfully initialized RPC. + DEBUG:root:Rank 0 waiting for workers and shutting down RPC + DEBUG:root:Rank 1 waiting for workers and shutting down RPC + DEBUG:root:Rank 1 shutdown RPC + DEBUG:root:Rank 0 shutdown RPC + +Now that we have a skeleton setup of our RPC framework, we can move on to +sending RPCs back and forth and using the profiler to obtain a view of what's +happening under the hood. Let's add to the above ``worker`` function: + +:: + + def worker(rank, world_size): + # Above code omitted... + if rank == 0: + dst_worker_rank = (rank + 1) % world_size + dst_worker_name = f"worker{dst_worker_rank}" + t1, t2 = random_tensor(), random_tensor() + # Send and wait RPC completion under profiling scope. + with profiler.profile() as prof: + fut1 = rpc.rpc_async(dst_worker_name, torch.add, args=(t1, t2)) + fut2 = rpc.rpc_async(dst_worker_name, torch.mul, args=(t1, t2)) + # RPCs must be awaited within profiling scope. + fut1.wait() + fut2.wait() + + print(prof.key_averages().table()) + +The aformentioned code creates 2 RPCs, specifying ``torch.add`` and ``torch.mul``, respectively, +to be run with two random input tensors on worker 1. Since we use the ``rpc_async`` API, +we are returned a ``torch.futures.Future`` object, which must be awaited for the result +of the computation. Note that this wait must take place within the scope created by +the profiling context manager in order for the RPC to be accurately profiled. Running +the code with this new worker function should result in the following output: + +:: + + # Some columns are omitted for brevity, exact output subject to randomness + ---------------------------------------------------------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- + Name Self CPU total % Self CPU total CPU total % CPU total CPU time avg Number of Calls Node ID + ---------------------------------------------------------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- + rpc_async#aten::add(worker0 -> worker1) 0.00% 0.000us 0 20.462ms 20.462ms 1 0 + rpc_async#aten::mul(worker0 -> worker1) 0.00% 0.000us 0 5.712ms 5.712ms 1 0 + rpc_async#aten::mul(worker0 -> worker1)#remote_op: mul 1.84% 206.864us 2.69% 302.162us 151.081us 2 1 + rpc_async#aten::add(worker0 -> worker1)#remote_op: add 1.41% 158.501us 1.57% 176.924us 176.924us 1 1 + rpc_async#aten::mul(worker0 -> worker1)#remote_op: output_nr 0.04% 4.980us 0.04% 4.980us 2.490us 2 1 + rpc_async#aten::mul(worker0 -> worker1)#remote_op: is_leaf 0.07% 7.806us 0.07% 7.806us 1.952us 4 1 + rpc_async#aten::add(worker0 -> worker1)#remote_op: empty 0.16% 18.423us 0.16% 18.423us 18.423us 1 1 + rpc_async#aten::mul(worker0 -> worker1)#remote_op: empty 0.14% 15.712us 0.14% 15.712us 15.712us 1 1 + ---------------------------------------------------------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- + Self CPU time total: 11.237ms + +Here we can see that the profiler has profiled our ``rpc_async`` calls made to ``worker1`` +from ``worker0``. In particular, the first 2 entries in the table show details (such as +the operator name, originating worker, and destination worker) about each RPC call made +and the ``CPU total`` column indicates the end-to-end latency of the RPC call. + +We also have visibility into the actual operators invoked remotely on worker 1 due RPC. +We can see operations that took place on ``worker1`` by checking the ``Node ID`` column. For +example, we can interpret the row with name ``rpc_async#aten::mul(worker0 -> worker1)#remote_op: mul`` +as a ``mul`` operation taking place on the remote node, as a result of the RPC sent to ``worker1`` +from ``worker0``, specifying ``worker1`` to run the builtin ``mul`` operator on the input tensors. +Note that names of remote operations are prefixed with the name of the RPC event that resulted +in them. For example, remote operations corresponding to the ``rpc.rpc_async(dst_worker_name, torch.add, args=(t1, t2))`` +call are prefixed with ``rpc_async#aten::mul(worker0 -> worker1)``. + +We can also use the profiler to gain insight into user-defined functions that are executed over RPC. +For example, let's add the following to the above ``worker`` function: + +:: + + # Define somewhere outside of worker() func. + def udf_with_ops(): + import time + time.sleep(1) + t1, t2 = random_tensor(), random_tensor() + torch.add(t1, t2) + torch.mul(t1, t2) + + def worker(rank, world_size): + # Above code omitted + with profiler.profile() as p: + fut = rpc.rpc_async(dst_worker_name, udf_with_ops) + fut.wait() + print(p.key_averages().table()) + +The above code creates a user-defined function that sleeps for 1 second, and then executes various +operators. Similar to what we've done above, we send an RPC to the remote worker, specifying it to +run our user-defined function. Running this code should result in the following output: + +:: + + # Exact output subject to randomness + -------------------------------------------------------------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- + Name Self CPU total % Self CPU total CPU total % CPU total CPU time avg Number of Calls Node ID + -------------------------------------------------------------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- + rpc_async#udf_with_ops(worker0 -> worker1) 0.00% 0.000us 0 1.008s 1.008s 1 0 + rpc_async#udf_with_ops(worker0 -> worker1)#remote_op: rand 12.58% 80.037us 47.09% 299.589us 149.795us 2 1 + rpc_async#udf_with_ops(worker0 -> worker1)#remote_op: empty 15.40% 98.013us 15.40% 98.013us 24.503us 4 1 + rpc_async#udf_with_ops(worker0 -> worker1)#remote_op: uniform_ 22.85% 145.358us 23.87% 151.870us 75.935us 2 1 + rpc_async#udf_with_ops(worker0 -> worker1)#remote_op: is_complex 1.02% 6.512us 1.02% 6.512us 3.256us 2 1 + rpc_async#udf_with_ops(worker0 -> worker1)#remote_op: add 25.80% 164.179us 28.43% 180.867us 180.867us 1 1 + rpc_async#udf_with_ops(worker0 -> worker1)#remote_op: mul 20.48% 130.293us 31.43% 199.949us 99.975us 2 1 + rpc_async#udf_with_ops(worker0 -> worker1)#remote_op: output_nr 0.71% 4.506us 0.71% 4.506us 2.253us 2 1 + rpc_async#udf_with_ops(worker0 -> worker1)#remote_op: is_leaf 1.16% 7.367us 1.16% 7.367us 1.842us 4 1 + -------------------------------------------------------------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- + +Here we can see that the user-defined function has successfully been profiled with its name +``(rpc_async#udf_with_ops(worker0 -> worker1))``, and has the CPU total time we would roughly expect +(slightly greater than 1s given the ``sleep``). Similar to the above profiling output, we can see the +remote operators that have been executed on worker 1 as part of executing this RPC request. + +Lastly, we can visualize remote execution using the tracing functionality provided by the profiler. +Let's add the following code to the above ``worker`` function: + +:: + + def worker(rank, world_size): + # Above code omitted + # Will generate trace for above profiling output + trace_file = "/tmp/trace.json" + prof.export_chrome_trace(trace_file) + logger.debug(f"Wrote trace to {trace_file}") + +Now, we can load the trace file in Chrome (``chrome://tracing``). We should see output similar to +the following: + +.. image:: ../_static/img/rpc_trace_img.png + :scale: 25 % + +As we can see, we have traced our RPC requests and can also visualize traces of the remote operations, +in this case, given in the trace row for ``node_id: 1``. + +Putting it all together, we have the following code for this recipe: + +:: + + import torch + import torch.distributed.rpc as rpc + import torch.autograd.profiler as profiler + import torch.multiprocessing as mp + import os + import logging + import sys + + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) + logger = logging.getLogger() + + def random_tensor(): + return torch.rand((3, 3), requires_grad=True) + + def udf_with_ops(): + import time + time.sleep(1) + t1, t2 = random_tensor(), random_tensor() + torch.add(t1, t2) + torch.mul(t1, t2) + + def worker(rank, world_size): + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "29500" + worker_name = f"worker{rank}" + + # Initialize RPC framework. + rpc.init_rpc( + name=worker_name, + rank=rank, + world_size=world_size + ) + logger.debug(f"{worker_name} successfully initialized RPC.") + + if rank == 0: + dst_worker_rank = (rank + 1) % world_size + dst_worker_name = f"worker{dst_worker_rank}" + t1, t2 = random_tensor(), random_tensor() + # Send and wait RPC completion under profiling scope. + with profiler.profile() as prof: + fut1 = rpc.rpc_async(dst_worker_name, torch.add, args=(t1, t2)) + fut2 = rpc.rpc_async(dst_worker_name, torch.mul, args=(t1, t2)) + # RPCs must be awaited within profiling scope. + fut1.wait() + fut2.wait() + print(prof.key_averages().table()) + + with profiler.profile() as p: + fut = rpc.rpc_async(dst_worker_name, udf_with_ops) + fut.wait() + + print(p.key_averages().table()) + + trace_file = "/tmp/trace.json" + prof.export_chrome_trace(trace_file) + logger.debug(f"Wrote trace to {trace_file}") + + + logger.debug(f"Rank {rank} waiting for workers and shutting down RPC") + rpc.shutdown() + logger.debug(f"Rank {rank} shutdown RPC") + + + + if __name__ == '__main__': + # Run 2 RPC workers. + world_size = 2 + mp.spawn(worker, args=(world_size,), nprocs=world_size) + + +Learn More +------------------- + +- `pytorch.org`_ for installation instructions, and more documentation + and tutorials. +- `Distributed RPC Framework`_ for RPC framework and API reference. +- `Full profiler documentation`_ for profiler documentation. + +.. _pytorch.org: https://pytorch.org/ +.. _Full profiler documentation: https://pytorch.org/docs/stable/autograd.html#profiler +.. _Pytorch Profiler: https://pytorch.org/docs/stable/autograd.html#profiler +.. _Distributed RPC Framework: https://pytorch.org/docs/stable/rpc.html +.. _RPC Tutorials: https://pytorch.org/tutorials/intermediate/rpc_tutorial.html +.. _Profiler Recipe: https://pytorch.org/tutorials/recipes/recipes/profiler.html diff --git a/recipes_source/recipes_index.rst b/recipes_source/recipes_index.rst index f8986363092..3a05d729b67 100644 --- a/recipes_source/recipes_index.rst +++ b/recipes_source/recipes_index.rst @@ -166,6 +166,13 @@ Recipes are bite-sized, actionable examples of how to use specific PyTorch featu :image: ../_static/img/thumbnails/cropped/android.png :link: ../recipes/android_native_app_with_custom_op.html :tags: Mobile + +.. customcarditem:: + :header: Profiling PyTorch RPC-Based Workloads + :card_description: How to use the PyTorch profiler to profile RPC-based workloads. + :image: ../_static/img/thumbnails/cropped/profile.png + :link: ../recipes/distributed_rpc_profiling.html + :tags: Production .. Automatic Mixed Precision From f1e682ea90391bf1d35f011736b55b62d5b864b4 Mon Sep 17 00:00:00 2001 From: Peter Whidden Date: Mon, 21 Sep 2020 13:28:33 -0700 Subject: [PATCH 18/21] Fix typo "asynchronizely" -> "asynchronously" (#1154) --- intermediate_source/model_parallel_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intermediate_source/model_parallel_tutorial.py b/intermediate_source/model_parallel_tutorial.py index df43f6fa5f9..62ca66db2bc 100644 --- a/intermediate_source/model_parallel_tutorial.py +++ b/intermediate_source/model_parallel_tutorial.py @@ -245,7 +245,7 @@ def plot(means, stds, labels, fig_name): # ----------------------------- # # In the following experiments, we further divide each 120-image batch into -# 20-image splits. As PyTorch launches CUDA operations asynchronizely, the +# 20-image splits. As PyTorch launches CUDA operations asynchronously, the # implementation does not need to spawn multiple threads to achieve # concurrency. From 468124c8d030990ea6e12e5456bb63330e08a3cf Mon Sep 17 00:00:00 2001 From: Pritam Damania <9958665+pritamdamania87@users.noreply.github.com> Date: Mon, 21 Sep 2020 16:38:08 -0700 Subject: [PATCH 19/21] Update dist_overview with additional information. (#1155) Summary: 1) Added DDP + RPC tutorial. 2) Added a pointer to PT Distributed CONTRIBUTING.md. Test Plan: Verified by loading the page locally. Reviewers: sentinel Subscribers: Tasks: Tags: Co-authored-by: pritam --- beginner_source/dist_overview.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/beginner_source/dist_overview.rst b/beginner_source/dist_overview.rst index bc9f7fe6bf0..f2878fb3ba9 100644 --- a/beginner_source/dist_overview.rst +++ b/beginner_source/dist_overview.rst @@ -195,3 +195,13 @@ RPC Tutorials are listed below: `@rpc.functions.async_execution `__ decorator, which can help speed up inference and training. It uses similar RL and PS examples employed in the above tutorials 1 and 2. +5. The `Combining Distributed DataParallel with Distributed RPC Framework <../advanced/rpc_ddp_tutorial.html>`__ + tutorial demonstrates how to combine DDP with RPC to train a model using + distributed data parallelism combined with distributed model parallelism. + + +PyTorch Distributed Developers +------------------------------ + +If you'd like to contribute to PyTorch Distributed, please refer to our +`Developer Guide `_. From 96d42011de8904236b6467a7cf914e80635d846d Mon Sep 17 00:00:00 2001 From: Szymon Migacz <1934379+szmigacz@users.noreply.github.com> Date: Mon, 21 Sep 2020 17:51:03 -0700 Subject: [PATCH 20/21] Add Performance Tuning guide recipe (#1161) * Performance Tuning Guide - initial commit * Minor tweaks * Switched profiling guide thumbnail to pytorch logo * Converted Tuning Guide to 80 chars/line * Split tuning guide into general, GPU-specific and distributed optimizations. * WAR to fix generation of header for 1st section * Minor fixes * Implemented changes suggested during initial review * Changed 'addition assignment' to 'addition' * Removed sentences about 1 CPU core for DataParallel training * Reordering of layers is recommended only for DDP(find_unused_parameters=True) * Fixed formatting * s/constructors/model constructors and s/match/roughly match * Fixed typos --- recipes_source/recipes/tuning_guide.py | 370 +++++++++++++++++++++++++ recipes_source/recipes_index.rst | 10 + 2 files changed, 380 insertions(+) create mode 100644 recipes_source/recipes/tuning_guide.py diff --git a/recipes_source/recipes/tuning_guide.py b/recipes_source/recipes/tuning_guide.py new file mode 100644 index 00000000000..9d9726ae7e1 --- /dev/null +++ b/recipes_source/recipes/tuning_guide.py @@ -0,0 +1,370 @@ +""" +Performance Tuning Guide +************************* +**Author**: `Szymon Migacz `_ + +Performance Tuning Guide is a set of optimizations and best practices which can +accelerate training and inference of deep learning models in PyTorch. Presented +techniques often can be implemented by changing only a few lines of code and can +be applied to a wide range of deep learning models across all domains. + +General optimizations +--------------------- +""" + +############################################################################### +# Enable async data loading and augmentation +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# `torch.utils.data.DataLoader `_ +# supports asynchronous data loading and data augmentation in separate worker +# subprocesses. The default setting for ``DataLoader`` is ``num_workers=0``, +# which means that the data loading is synchronous and done in the main process. +# As a result the main training process has to wait for the data to be available +# to continue the execution. +# +# Setting ``num_workers > 0`` enables asynchronous data loading and overlap +# between the training and data loading. ``num_workers`` should be tuned +# depending on the workload, CPU, GPU, and location of training data. +# +# ``DataLoader`` accepts ``pin_memory`` argument, which defaults to ``False``. +# When using a GPU it's better to set ``pin_memory=True``, this instructs +# ``DataLoader`` to use pinned memory and enables faster and asynchronous memory +# copy from the host to the GPU. + +############################################################################### +# Disable gradient calculation for validation or inference +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# PyTorch saves intermediate buffers from all operations which involve tensors +# that require gradients. Typically gradients aren't needed for validation or +# inference. +# `torch.no_grad() `_ +# context manager can be applied to disable gradient calculation within a +# specified block of code, this accelerates execution and reduces the amount of +# required memory. +# `torch.no_grad() `_ +# can also be used as a function decorator. + +############################################################################### +# Disable bias for convolutions directly followed by a batch norm +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# `torch.nn.Conv2d() `_ +# has ``bias`` parameter which defaults to ``True`` (the same is true for +# `Conv1d `_ +# and +# `Conv3d `_ +# ). +# +# If a ``nn.Conv2d`` layer is directly followed by a ``nn.BatchNorm2d`` layer, +# then the bias in the convolution is not needed, instead use +# ``nn.Conv2d(..., bias=False, ....)``. Bias is not needed because in the first +# step ``BatchNorm`` subtracts the mean, which effectively cancels out the +# effect of bias. +# +# This is also applicable to 1d and 3d convolutions as long as ``BatchNorm`` (or +# other normalization layer) normalizes on the same dimension as convolution's +# bias. +# +# Models available from `torchvision `_ +# already implement this optimization. + +############################################################################### +# Use parameter.grad = None instead of model.zero_grad() or optimizer.zero_grad() +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Instead of calling: +model.zero_grad() +# or +optimizer.zero_grad() + +############################################################################### +# to zero out gradients, use the following method instead: + +for param in model.parameters(): + param.grad = None + +############################################################################### +# The second code snippet does not zero the memory of each individual parameter, +# also the subsequent backward pass uses assignment instead of addition to store +# gradients, this reduces the number of memory operations. +# +# Setting gradient to ``None`` has a slightly different numerical behavior than +# setting it to zero, for more details refer to the +# `documentation `_. +# +# Alternatively, starting from PyTorch 1.7, call ``model`` or +# ``optimizer.zero_grad(set_to_none=True)``. + +############################################################################### +# Fuse pointwise operations +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# Pointwise operations (elementwise addition, multiplication, math functions - +# ``sin()``, ``cos()``, ``sigmoid()`` etc.) can be fused into a single kernel +# to amortize memory access time and kernel launch time. +# +# `PyTorch JIT `_ can fuse kernels +# automatically, although there could be additional fusion opportunities not yet +# implemented in the compiler, and not all device types are supported equally. +# +# Pointwise operations are memory-bound, for each operation PyTorch launches a +# separate kernel. Each kernel loads data from the memory, performs computation +# (this step is usually inexpensive) and stores results back into the memory. +# +# Fused operator launches only one kernel for multiple fused pointwise ops and +# loads/stores data only once to the memory. This makes JIT very useful for +# activation functions, optimizers, custom RNN cells etc. +# +# In the simplest case fusion can be enabled by applying +# `torch.jit.script `_ +# decorator to the function definition, for example: + +@torch.jit.script +def fused_gelu(x): + return x * 0.5 * (1.0 + torch.erf(x / 1.41421)) + +############################################################################### +# Refer to +# `TorchScript documentation `_ +# for more advanced use cases. + +############################################################################### +# Enable channels_last memory format for computer vision models +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# PyTorch 1.5 introduced support for ``channels_last`` memory format for +# convolutional networks. This format is meant to be used in conjunction with +# `AMP `_ to further accelerate +# convolutional neural networks with +# `Tensor Cores `_. +# +# Support for ``channels_last`` is experimental, but it's expected to work for +# standard computer vision models (e.g. ResNet-50, SSD). To convert models to +# ``channels_last`` format follow +# `Channels Last Memory Format Tutorial `_. +# The tutorial includes a section on +# `converting existing models `_. + +############################################################################### +# Checkpoint intermediate buffers +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Buffer checkpointing is a technique to mitigate the memory capacity burden of +# model training. Instead of storing inputs of all layers to compute upstream +# gradients in backward propagation, it stores the inputs of a few layers and +# the others are recomputed during backward pass. The reduced memory +# requirements enables increasing the batch size that can improve utilization. +# +# Checkpointing targets should be selected carefully. The best is not to store +# large layer outputs that have small re-computation cost. The example target +# layers are activation functions (e.g. ``ReLU``, ``Sigmoid``, ``Tanh``), +# up/down sampling and matrix-vector operations with small accumulation depth. +# +# PyTorch supports a native +# `torch.utils.checkpoint `_ +# API to automatically perform checkpointing and recomputation. + +############################################################################### +# Disable debugging APIs +# ~~~~~~~~~~~~~~~~~~~~~~ +# Many PyTorch APIs are intended for debugging and should be disabled for +# regular training runs: +# +# * anomaly detection: +# `torch.autograd.detect_anomaly `_ +# or +# `torch.autograd.set_detect_anomaly(True) `_ +# * profiler related: +# `torch.autograd.profiler.emit_nvtx `_, +# `torch.autograd.profiler.profile `_ +# * autograd gradcheck: +# `torch.autograd.gradcheck `_ +# or +# `torch.autograd.gradgradcheck `_ +# + +############################################################################### +# GPU specific optimizations +# -------------------------- + +############################################################################### +# Enable cuDNN auto-tuner +# ~~~~~~~~~~~~~~~~~~~~~~~ +# `NVIDIA cuDNN `_ supports many algorithms +# to compute a convolution. Autotuner runs a short benchmark and selects the +# kernel with the best performance on a given hardware for a given input size. +# +# For convolutional networks (other types currently not supported), enable cuDNN +# autotuner before launching the training loop by setting: + +torch.backends.cudnn.benchmark = True +############################################################################### +# +# * the auto-tuner decisions may be non-deterministic; different algorithm may +# be selected for different runs. For more details see +# `PyTorch: Reproducibility `_ +# * in some rare cases, such as with highly variable input sizes, it's better +# to run convolutional networks with autotuner disabled to avoid the overhead +# associated with algorithm selection for each input size. +# + +############################################################################### +# Avoid unnecessary CPU-GPU synchronization +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Avoid unnecessary synchronizations, to let the CPU run ahead of the +# accelerator as much as possible to make sure that the accelerator work queue +# contains many operations. +# +# When possible, avoid operations which require synchronizations, for example: +# +# * ``print(cuda_tensor)`` +# * ``cuda_tensor.item()`` +# * memory copies: ``tensor.cuda()``, ``cuda_tensor.cpu()`` and equivalent +# ``tensor.to(device)`` calls +# * ``cuda_tensor.nonzero()`` +# * python control flow which depends on results of operations performed on cuda +# tensors e.g. ``if (cuda_tensor != 0).all()`` +# + +############################################################################### +# Create tensors directly on the target device +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Instead of calling ``torch.rand(size).cuda()`` to generate a random tensor, +# produce the output directly on the target device: +# ``torch.rand(size, device=torch.device('cuda'))``. +# +# This is applicable to all functions which create new tensors and accept +# ``device`` argument: +# `torch.rand() `_, +# `torch.zeros() `_, +# `torch.full() `_ +# and similar. + +############################################################################### +# Use mixed precision and AMP +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Mixed precision leverages +# `Tensor Cores `_ +# and offers up to 3x overall speedup on Volta and newer GPU architectures. To +# use Tensor Cores AMP should be enabled and matrix/tensor dimensions should +# satisfy requirements for calling kernels that use Tensor Cores. +# +# To use Tensor Cores: +# +# * set sizes to multiples of 8 (to map onto dimensions of Tensor Cores) +# +# * see +# `Deep Learning Performance Documentation +# `_ +# for more details and guidelines specific to layer type +# * if layer size is derived from other parameters rather than fixed, it can +# still be explicitly padded e.g. vocabulary size in NLP models +# +# * enable AMP +# +# * Introduction to Mixed Precision Training and AMP: +# `video `_, +# `slides `_ +# * native PyTorch AMP is available starting from PyTorch 1.6: +# `documentation `_, +# `examples `_, +# `tutorial `_ +# +# + +############################################################################### +# Pre-allocate memory in case of variable input length +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Models for speech recognition or for NLP are often trained on input tensors +# with variable sequence length. Variable length can be problematic for PyTorch +# caching allocator and can lead to reduced performance or to unexpected +# out-of-memory errors. If a batch with a short sequence length is followed by +# an another batch with longer sequence length, then PyTorch is forced to +# release intermediate buffers from previous iteration and to re-allocate new +# buffers. This process is time consuming and causes fragmentation in the +# caching allocator which may result in out-of-memory errors. +# +# A typical solution is to implement pre-allocation. It consists of the +# following steps: +# +# #. generate a (usually random) batch of inputs with maximum sequence length +# (either corresponding to max length in the training dataset or to some +# predefined threshold) +# #. execute a forward and a backward pass with the generated batch, do not +# execute an optimizer or a learning rate scheduler, this step pre-allocates +# buffers of maximum size, which can be reused in subsequent +# training iterations +# #. zero out gradients +# #. proceed to regular training +# + +############################################################################### +# Distributed optimizations +# ------------------------- + +############################################################################### +# Use efficient data-parallel backend +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# PyTorch has two ways to implement data-parallel training: +# +# * `torch.nn.DataParallel `_ +# * `torch.nn.parallel.DistributedDataParallel `_ +# +# ``DistributedDataParallel`` offers much better performance and scaling to +# multiple-GPUs. For more information refer to the +# `relevant section of CUDA Best Practices `_ +# from PyTorch documentation. + +############################################################################### +# Skip unnecessary all-reduce if training with DistributedDataParallel and gradient accumulation +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# By default +# `torch.nn.parallel.DistributedDataParallel `_ +# executes gradient all-reduce after every backward pass to compute the average +# gradient over all workers participating in the training. If training uses +# gradient accumulation over N steps, then all-reduce is not necessary after +# every training step, it's only required to perform all-reduce after the last +# call to backward, just before the execution of the optimizer. +# +# ``DistributedDataParallel`` provides +# `no_sync() `_ +# context manager which disables gradient all-reduce for particular iteration. +# ``no_sync()`` should be applied to first ``N-1`` iterations of gradient +# accumulation, the last iteration should follow the default execution and +# perform the required gradient all-reduce. + +############################################################################### +# Match the order of layers in constructors and during the execution if using DistributedDataParallel(find_unused_parameters=True) +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# `torch.nn.parallel.DistributedDataParallel `_ +# with ``find_unused_parameters=True`` uses the order of layers and parameters +# from model constructors to build buckets for ``DistributedDataParallel`` +# gradient all-reduce. ``DistributedDataParallel`` overlaps all-reduce with the +# backward pass. All-reduce for a particular bucket is asynchronously triggered +# only when all gradients for parameters in a given bucket are available. +# +# To maximize the amount of overlap, the order in model constructors should +# roughly match the order during the execution. If the order doesn't match, then +# all-reduce for the entire bucket waits for the gradient which is the last to +# arrive, this may reduce the overlap between backward pass and all-reduce, +# all-reduce may end up being exposed, which slows down the training. +# +# ``DistributedDataParallel`` with ``find_unused_parameters=False`` (which is +# the default setting) relies on automatic bucket formation based on order of +# operations encountered during the backward pass. With +# ``find_unused_parameters=False`` it's not necessary to reorder layers or +# parameters to achieve optimal performance. + +############################################################################### +# Load-balance workload in a distributed setting +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Load imbalance typically may happen for models processing sequential data +# (speech recognition, translation, language models etc.). If one device +# receives a batch of data with sequence length longer than sequence lengths for +# the remaining devices, then all devices wait for the worker which finishes +# last. Backward pass functions as an implicit synchronization point in a +# distributed setting with +# `DistributedDataParallel `_ +# backend. +# +# There are multiple ways to solve the load balancing problem. The core idea is +# to distribute workload over all workers as uniformly as possible within each +# global batch. For example Transformer solves imbalance by forming batches with +# approximately constant number of tokens (and variable number of sequences in a +# batch), other models solve imbalance by bucketing samples with similar +# sequence length or even by sorting dataset by sequence length. diff --git a/recipes_source/recipes_index.rst b/recipes_source/recipes_index.rst index 3a05d729b67..6d78ff4ec3c 100644 --- a/recipes_source/recipes_index.rst +++ b/recipes_source/recipes_index.rst @@ -183,6 +183,15 @@ Recipes are bite-sized, actionable examples of how to use specific PyTorch featu :link: ../recipes/recipes/amp_recipe.html :tags: Model-Optimization +.. Performance + +.. customcarditem:: + :header: Performance Tuning Guide + :card_description: Tips for achieving optimal performance. + :image: ../_static/img/thumbnails/cropped/profiler.png + :link: ../recipes/recipes/tuning_guide.html + :tags: Model-Optimization + .. End of tutorial card section .. raw:: html @@ -216,6 +225,7 @@ Recipes are bite-sized, actionable examples of how to use specific PyTorch featu /recipes/recipes/tensorboard_with_pytorch /recipes/recipes/dynamic_quantization /recipes/recipes/amp_recipe + /recipes/recipes/tuning_guide /recipes/torchscript_inference /recipes/deployment_with_flask /recipes/distributed_rpc_profiling From 258f422fd3c31ca1daa08538dd542f0b82108f44 Mon Sep 17 00:00:00 2001 From: Jinlin Zhang Date: Thu, 24 Sep 2020 10:47:47 -0700 Subject: [PATCH 21/21] A fix for one line comment when removing runnable code. (#1165) Co-authored-by: v-jizhang <66389669+buck-bot@users.noreply.github.com> --- .jenkins/remove_runnable_code.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.jenkins/remove_runnable_code.py b/.jenkins/remove_runnable_code.py index 6a61cb656bc..bd62f0c5156 100644 --- a/.jenkins/remove_runnable_code.py +++ b/.jenkins/remove_runnable_code.py @@ -16,9 +16,17 @@ if line.startswith('#'): ret_lines.append(line) state = STATE_NORMAL + elif ((line.startswith('"""') or line.startswith('r"""')) and + line.endswith('"""')): + ret_lines.append(line) + state = STATE_NORMAL elif line.startswith('"""') or line.startswith('r"""'): ret_lines.append(line) state = STATE_IN_MULTILINE_COMMENT_BLOCK_DOUBLE_QUOTE + elif ((line.startswith("'''") or line.startswith("r'''")) and + line.endswith("'''")): + ret_lines.append(line) + state = STATE_NORMAL elif line.startswith("'''") or line.startswith("r'''"): ret_lines.append(line) state = STATE_IN_MULTILINE_COMMENT_BLOCK_SINGLE_QUOTE